Skip to content

Commit

Permalink
fixups
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Oct 1, 2017
1 parent b868cbc commit 950de20
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 26 deletions.
35 changes: 22 additions & 13 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,13 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False):
def ax(self):
return self.grouper

def _get_grouper(self, obj):
def _get_grouper(self, obj, validate=True):
"""
Parameters
----------
obj : the subject object
validate : boolean, default True
if True, validate the grouper
Returns
-------
Expand All @@ -271,7 +273,8 @@ def _get_grouper(self, obj):
self.grouper, exclusions, self.obj = _get_grouper(self.obj, [self.key],
axis=self.axis,
level=self.level,
sort=self.sort)
sort=self.sort,
validate=validate)
return self.binner, self.grouper, self.obj

def _set_grouper(self, obj, sort=False):
Expand Down Expand Up @@ -1739,8 +1742,9 @@ class BaseGrouper(object):
whether this grouper will give sorted result or not
group_keys : boolean, default True
mutated : boolean, default False
indexer : the indexer created by Grouper
some grouper (TimeGrouper eg) will sort its axis and its
indexer : intp array, optional
the indexer created by Grouper
some groupers (TimeGrouper) will sort its axis and its
group_info is also sorted, so need the indexer to reorder
"""
Expand Down Expand Up @@ -2514,8 +2518,11 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
# a passed Grouper like, directly get the grouper in the same way
# as single grouper groupby, use the group_info to get labels
elif isinstance(self.grouper, Grouper):
# get the new grouper
_, grouper, _ = self.grouper._get_grouper(self.obj)
# get the new grouper; we already have disambiguated
# what key/level refer to exactly, don't need to
# check again as we have by this point converted these
# to an actual value (rather than a pd.Grouper)
_, grouper, _ = self.grouper._get_grouper(self.obj, validate=False)
if self.name is None:
self.name = grouper.result_index.name
self.obj = self.grouper.obj
Expand Down Expand Up @@ -2587,12 +2594,12 @@ def ngroups(self):

@cache_readonly
def indices(self):
# for the situation of groupby list of groupers
# we have a list of groupers
if isinstance(self.grouper, BaseGrouper):
return self.grouper.indices
else:
values = _ensure_categorical(self.grouper)
return values._reverse_indexer()

values = _ensure_categorical(self.grouper)
return values._reverse_indexer()

@property
def labels(self):
Expand All @@ -2608,7 +2615,7 @@ def group_index(self):

def _make_labels(self):
if self._labels is None or self._group_index is None:
# for the situation of groupby list of groupers
# we have a list of groupers
if isinstance(self.grouper, BaseGrouper):
labels = self.grouper.label_info
uniques = self.grouper.result_index
Expand All @@ -2626,7 +2633,7 @@ def groups(self):


def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
mutated=False):
mutated=False, validate=True):
"""
create and return a BaseGrouper, which is an internal
mapping of how to create the grouper indexers.
Expand All @@ -2643,6 +2650,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
are and then creates a Grouping for each one, combined into
a BaseGrouper.
If validate, then check for key/level overlaps
"""
group_axis = obj._get_axis(axis)

Expand Down Expand Up @@ -2767,7 +2776,7 @@ def is_in_obj(gpr):

elif is_in_axis(gpr): # df.groupby('name')
if gpr in obj:
if gpr in obj.index.names:
if validate and gpr in obj.index.names:
warnings.warn(
("'%s' is both a column name and an index level.\n"
"Defaulting to column but "
Expand Down
11 changes: 2 additions & 9 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,14 +879,7 @@ def _downsample(self, how, **kwargs):

if is_subperiod(ax.freq, self.freq):
# Downsampling
if len(new_index) == 0:
bins = []
else:
i8 = memb.asi8
rng = np.arange(i8[0], i8[-1] + 1)
bins = memb.searchsorted(rng, side='right')
grouper = BinGrouper(bins, new_index, indexer=self.groupby.indexer)
return self._groupby_and_aggregate(how, grouper=grouper)
return self._groupby_and_aggregate(how, grouper=self.grouper)
elif is_superperiod(ax.freq, self.freq):
if how == 'ohlc':
# GH #13083
Expand Down Expand Up @@ -1112,7 +1105,7 @@ def _get_resampler(self, obj, kind=None):
"TimedeltaIndex or PeriodIndex, "
"but got an instance of %r" % type(ax).__name__)

def _get_grouper(self, obj):
def _get_grouper(self, obj, validate=True):
# create the resampler and return our binner
r = self._get_resampler(obj)
r._set_binner()
Expand Down
10 changes: 6 additions & 4 deletions pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,9 +634,11 @@ def test_scalar_call_versus_list_call(self):
'value': [1, 2, 3]
}
data_frame = pd.DataFrame(data_frame).set_index('time')
grouper = pd.TimeGrouper('D')
grouper = pd.Grouper(freq='D')

grouped = data_frame.groupby(grouper)
data1 = grouped.count()
result = grouped.count()
grouped = data_frame.groupby([grouper])
data2 = grouped.count()
assert_frame_equal(data1, data2)
expected = grouped.count()

assert_frame_equal(result, expected)

0 comments on commit 950de20

Please sign in to comment.