-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Define boxplot inside DataFrameGroupBy definition #17179
Changes from all commits
ead477b
b4a469b
1d59eff
3627aaf
b639c4d
58e00a7
c3c0e0a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
import types | ||
from functools import wraps | ||
from functools import wraps, partial | ||
import numpy as np | ||
import datetime | ||
import collections | ||
|
@@ -54,15 +54,16 @@ | |
from pandas.core.sorting import (get_group_index_sorter, get_group_index, | ||
compress_group_index, get_flattened_iterator, | ||
decons_obs_group_ids, get_indexer_dict) | ||
from pandas.util._decorators import (cache_readonly, Substitution, | ||
Appender, make_signature) | ||
from pandas.util._decorators import cache_readonly, Substitution, Appender | ||
from pandas.io.formats.printing import pprint_thing | ||
from pandas.util._validators import validate_kwargs | ||
|
||
import pandas.core.algorithms as algorithms | ||
import pandas.core.common as com | ||
from pandas.core.config import option_context | ||
|
||
from pandas.plotting._core import boxplot_frame_groupby | ||
|
||
from pandas._libs import lib, groupby as libgroupby, Timestamp, NaT, iNaT | ||
from pandas._libs.lib import count_level_2d | ||
|
||
|
@@ -950,7 +951,6 @@ def _apply_filter(self, indices, dropna): | |
|
||
|
||
class GroupBy(_GroupBy): | ||
|
||
""" | ||
Class for grouping and aggregating relational data. See aggregate, | ||
transform, and apply functions on this object. | ||
|
@@ -2553,8 +2553,7 @@ def groups(self): | |
self.group_index)) | ||
|
||
|
||
def _get_grouper(obj, key=None, axis=0, level=None, sort=True, | ||
mutated=False): | ||
def _get_grouper(obj, key=None, axis=0, level=None, sort=True, mutated=False): | ||
""" | ||
create and return a BaseGrouper, which is an internal | ||
mapping of how to create the grouper indexers. | ||
|
@@ -2742,74 +2741,124 @@ def _convert_grouper(axis, grouper): | |
return grouper | ||
|
||
|
||
def _whitelist_method_generator(klass, whitelist): | ||
""" | ||
Yields all GroupBy member defs for DataFrame/Series names in _whitelist. | ||
|
||
Parameters | ||
---------- | ||
klass - class where members are defined. Should be Series or DataFrame | ||
|
||
whitelist - list of names of klass methods to be constructed | ||
|
||
Returns | ||
------- | ||
The generator yields a sequence of strings, each suitable for exec'ing, | ||
that define implementations of the named methods for DataFrameGroupBy | ||
or SeriesGroupBy. | ||
|
||
Since we don't want to override methods explicitly defined in the | ||
base class, any such name is skipped. | ||
""" | ||
|
||
method_wrapper_template = \ | ||
"""def %(name)s(%(sig)s) : | ||
\""" | ||
%(doc)s | ||
\""" | ||
f = %(self)s.__getattr__('%(name)s') | ||
return f(%(args)s)""" | ||
property_wrapper_template = \ | ||
"""@property | ||
def %(name)s(self) : | ||
\""" | ||
%(doc)s | ||
\""" | ||
return self.__getattr__('%(name)s')""" | ||
for name in whitelist: | ||
# don't override anything that was explicitly defined | ||
# in the base class | ||
if hasattr(GroupBy, name): | ||
continue | ||
# ugly, but we need the name string itself in the method. | ||
f = getattr(klass, name) | ||
doc = f.__doc__ | ||
doc = doc if type(doc) == str else '' | ||
if isinstance(f, types.MethodType): | ||
wrapper_template = method_wrapper_template | ||
decl, args = make_signature(f) | ||
# pass args by name to f because otherwise | ||
# GroupBy._make_wrapper won't know whether | ||
# we passed in an axis parameter. | ||
args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]] | ||
params = {'name': name, | ||
'doc': doc, | ||
'sig': ','.join(decl), | ||
'self': args[0], | ||
'args': ','.join(args_by_name)} | ||
else: | ||
wrapper_template = property_wrapper_template | ||
params = {'name': name, 'doc': doc} | ||
yield wrapper_template % params | ||
|
||
|
||
# TODO: *args/**kwargs get ignored in many of the methods | ||
# inherited from Series/DataFrame. Should they be used, dropped ...? | ||
class SeriesGroupBy(GroupBy): | ||
# | ||
# Make class defs of attributes on SeriesGroupBy whitelist | ||
_apply_whitelist = _series_apply_whitelist | ||
for _def_str in _whitelist_method_generator(Series, | ||
_series_apply_whitelist): | ||
exec(_def_str) | ||
|
||
@Appender(Series.all.__doc__) | ||
def all(self, axis=None, bool_only=None, skipna=None, level=None, | ||
**kwargs): | ||
f = self.__getattr__('all') | ||
return f(axis=axis, bool_only=bool_only, skipna=skipna, level=level) | ||
|
||
@property | ||
def dtype(self): | ||
""" | ||
return the dtype object of the underlying data | ||
""" | ||
return self.__getattr__('dtype') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this indirection is extremely odd to do, simply use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. Again: this is the code that is generated by |
||
|
||
@Appender(Series.idxmax.__doc__) | ||
def idxmax(self, axis=None, skipna=True, *args, **kwargs): | ||
f = self.__getattr__('idxmax') | ||
return f(axis=axis, skipna=skipna) | ||
|
||
@Appender(Series.rank.__doc__) | ||
def rank(self, axis=0, method='average', numeric_only=None, | ||
na_option='keep', ascending=True, pct=False): | ||
f = self.__getattr__('rank') | ||
return f(axis=axis, method=method, numeric_only=numeric_only, | ||
na_option=na_option, ascending=ascending, pct=pct) | ||
|
||
@Appender(Series.diff.__doc__) | ||
def diff(self, periods=1): | ||
f = self.__getattr__('diff') | ||
return f(periods=periods) | ||
|
||
@Appender(Series.any.__doc__) | ||
def any(self, axis=None, bool_only=None, skipna=None, level=None, | ||
**kwargs): | ||
f = self.__getattr__('any') | ||
return f(axis=axis, bool_only=bool_only, skipna=skipna, level=level) | ||
|
||
@Appender(Series.nsmallest.__doc__) | ||
def nsmallest(self, n=5, keep='first'): | ||
f = self.__getattr__('nsmallest') | ||
return f(n=n, keep=keep) | ||
|
||
@Appender(Series.quantile.__doc__) | ||
def quantile(self, q=0.5, interpolation='linear'): | ||
f = self.__getattr__('quantile') | ||
return f(q=q, interpolation=interpolation) | ||
|
||
@Appender(Series.hist.__doc__) | ||
def hist(self, by=None, ax=None, grid=True, xlabelsize=None, xrot=None, | ||
ylabelsize=None, yrot=None, figsize=None, bins=10, **kwds): | ||
f = self.__getattr__('hist') | ||
return f(by=by, ax=ax, grid=grid, xlabelsize=xlabelsize, xrot=xrot, | ||
ylabelsize=ylabelsize, yrot=yrot, figsize=figsize, bins=bins) | ||
|
||
@Appender(Series.take.__doc__) | ||
def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): | ||
f = self.__getattr__('take') | ||
return f(indices=indices, axis=axis, convert=convert, is_copy=is_copy) | ||
|
||
@Appender(Series.mad.__doc__) | ||
def mad(self, axis=None, skipna=None, level=None): | ||
f = self.__getattr__('mad') | ||
return f(axis=axis, skipna=skipna, level=level) | ||
|
||
@Appender(Series.corr.__doc__) | ||
def corr(self, other, method='pearson', min_periods=None): | ||
f = self.__getattr__('corr') | ||
return f(other=other, method=method, min_periods=min_periods) | ||
|
||
@Appender(Series.fillna.__doc__) | ||
def fillna(self, value=None, method=None, axis=None, inplace=False, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. many of these keywords need to be eliminated. e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As above, I suggest this should happen in a follow-up. This is not a new problem, but an existing problem that was hidden by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is not enough testing around this. i am concerned you are actually changing the signatures. can you add covereage for all of the exposed functions (just assert that they work with a full spread of arguments). xfail the ones that are problematic. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you didn't address this point. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right. This point in my "get back to it later" pile, as it is several steps removed from the original goal of the PR. |
||
limit=None, downcast=None, **kwargs): | ||
f = self.__getattr__('fillna') | ||
return f(value=value, method=method, axis=axis, inplace=inplace, | ||
limit=limit, downcast=downcast) | ||
|
||
@Appender(Series.unique.__doc__) | ||
def unique(self): | ||
f = self.__getattr__('unique') | ||
return f() | ||
|
||
@Appender(Series.idxmin.__doc__) | ||
def idxmin(self, axis=None, skipna=True, *args, **kwargs): | ||
f = self.__getattr__('idxmin') | ||
return f(axis=axis, skipna=skipna) | ||
|
||
@Appender(Series.cov.__doc__) | ||
def cov(self, other, min_periods=None): | ||
f = self.__getattr__('cov') | ||
return f(other=other, min_periods=min_periods) | ||
|
||
@Appender(Series.tshift.__doc__) | ||
def tshift(self, periods=1, freq=None, axis=0): | ||
f = self.__getattr__('tshift') | ||
return f(periods=periods, freq=freq, axis=axis) | ||
|
||
@Appender(Series.pct_change.__doc__) | ||
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, | ||
**kwargs): | ||
f = self.__getattr__('pct_change') | ||
return f(periods=periods, fill_method=fill_method, limit=limit, | ||
freq=freq) | ||
|
||
@Appender(Series.skew.__doc__) | ||
def skew(self, axis=None, skipna=None, level=None, numeric_only=None, | ||
**kwargs): | ||
f = self.__getattr__('skew') | ||
return f(axis=axis, skipna=skipna, level=level, | ||
numeric_only=numeric_only) | ||
|
||
@Appender(Series.nlargest.__doc__) | ||
def nlargest(self, n=5, keep='first'): | ||
f = self.__getattr__('nlargest') | ||
return f(n=n, keep=keep) | ||
|
||
@property | ||
def _selection_name(self): | ||
|
@@ -3189,7 +3238,6 @@ def describe(self, **kwargs): | |
def value_counts(self, normalize=False, sort=True, ascending=False, | ||
bins=None, dropna=True): | ||
|
||
from functools import partial | ||
from pandas.core.reshape.tile import cut | ||
from pandas.core.reshape.merge import _get_join_indexers | ||
|
||
|
@@ -3964,13 +4012,117 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa | |
|
||
class DataFrameGroupBy(NDFrameGroupBy): | ||
_apply_whitelist = _dataframe_apply_whitelist | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this concept of a whitelist may not be necessary anymore There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is still referenced in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should not be referenced in the tests at all There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See tests/groupby/test_whitelist.py Looks like it also shows up in
AFAICT There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i would remove the whitelist entirely; since you are defining the whitelist the methods no longer are referenced thru that mechanism (and instead thru explicit methods). you can simply setup a list in the tests that is a list of these methods and check that they are defined. |
||
# | ||
# Make class defs of attributes on DataFrameGroupBy whitelist. | ||
for _def_str in _whitelist_method_generator(DataFrame, _apply_whitelist): | ||
exec(_def_str) | ||
|
||
_block_agg_axis = 1 | ||
|
||
@Appender(DataFrame.all.__doc__) | ||
def all(self, axis=None, bool_only=None, skipna=None, level=None, | ||
**kwargs): | ||
f = self.__getattr__('all') | ||
return f(axis=axis, bool_only=bool_only, skipna=skipna, level=level) | ||
|
||
@Appender(DataFrame.idxmax.__doc__) | ||
def idxmax(self, axis=0, skipna=True): | ||
f = self.__getattr__('idxmax') | ||
return f(axis=axis, skipna=skipna) | ||
|
||
@Appender(DataFrame.rank.__doc__) | ||
def rank(self, axis=0, method='average', numeric_only=None, | ||
na_option='keep', ascending=True, pct=False): | ||
f = self.__getattr__('rank') | ||
return f(axis=axis, method=method, numeric_only=numeric_only, | ||
na_option=na_option, ascending=ascending, pct=pct) | ||
|
||
@Appender(DataFrame.diff.__doc__) | ||
def diff(self, periods=1, axis=0): | ||
f = self.__getattr__('diff') | ||
return f(periods=periods, axis=axis) | ||
|
||
@Appender(DataFrame.any.__doc__) | ||
def any(self, axis=None, bool_only=None, skipna=None, level=None, | ||
**kwargs): | ||
f = self.__getattr__('any') | ||
return f(axis=axis, bool_only=bool_only, skipna=skipna, level=level) | ||
|
||
@Appender(DataFrame.quantile.__doc__) | ||
def quantile(self, q=0.5, axis=0, numeric_only=True, | ||
interpolation='linear'): | ||
f = self.__getattr__('quantile') | ||
return f(q=q, axis=axis, numeric_only=numeric_only, | ||
interpolation=interpolation) | ||
|
||
@Appender(DataFrame.hist.__doc__) | ||
def hist(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, | ||
ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, | ||
figsize=None, layout=None, bins=10, **kwds): | ||
f = data.__getattr__('hist') | ||
return f(column=column, by=by, grid=grid, xlabelsize=xlabelsize, | ||
xrot=xrot, ylabelsize=ylabelsize, yrot=yrot, ax=ax, | ||
sharex=sharex, sharey=sharey, figsize=figsize, layout=layout, | ||
bins=bins) | ||
|
||
@Appender(DataFrame.take.__doc__) | ||
def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs): | ||
f = self.__getattr__('take') | ||
return f(indices=indices, axis=axis, convert=convert, is_copy=is_copy) | ||
|
||
@property | ||
def dtypes(self): | ||
""" | ||
Return the dtypes in this object. | ||
""" | ||
return self.__getattr__('dtypes') | ||
|
||
@Appender(DataFrame.mad.__doc__) | ||
def mad(self, axis=None, skipna=None, level=None): | ||
f = self.__getattr__('mad') | ||
return f(axis=axis, skipna=skipna, level=level) | ||
|
||
@Appender(DataFrame.corr.__doc__) | ||
def corr(self, method='pearson', min_periods=1): | ||
f = self.__getattr__('corr') | ||
return f(method=method, min_periods=min_periods) | ||
|
||
@Appender(DataFrame.fillna.__doc__) | ||
def fillna(self, value=None, method=None, axis=None, inplace=False, | ||
limit=None, downcast=None, **kwargs): | ||
f = self.__getattr__('fillna') | ||
return f(value=value, method=method, axis=axis, inplace=inplace, | ||
limit=limit, downcast=downcast) | ||
|
||
@Appender(DataFrame.idxmin.__doc__) | ||
def idxmin(self, axis=0, skipna=True): | ||
f = self.__getattr__('idxmin') | ||
return f(axis=axis, skipna=skipna) | ||
|
||
@Appender(DataFrame.cov.__doc__) | ||
def cov(self, min_periods=None): | ||
f = self.__getattr__('cov') | ||
return f(min_periods=min_periods) | ||
|
||
@Appender(DataFrame.tshift.__doc__) | ||
def tshift(self, periods=1, freq=None, axis=0): | ||
f = self.__getattr__('tshift') | ||
return f(periods=periods, freq=freq, axis=axis) | ||
|
||
@Appender(DataFrame.pct_change.__doc__) | ||
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, | ||
**kwargs): | ||
f = self.__getattr__('pct_change') | ||
return f(periods=periods, fill_method=fill_method, limit=limit, | ||
freq=freq) | ||
|
||
@Appender(DataFrame.skew.__doc__) | ||
def skew(self, axis=None, skipna=None, level=None, numeric_only=None, | ||
**kwargs): | ||
f = self.__getattr__('skew') | ||
return f(axis=axis, skipna=skipna, level=level, | ||
numeric_only=numeric_only) | ||
|
||
@Appender(DataFrame.corrwith.__doc__) | ||
def corrwith(self, other, axis=0, drop=False): | ||
f = self.__getattr__('corrwith') | ||
return f(other=other, axis=axis, drop=drop) | ||
|
||
_agg_doc = dedent(""" | ||
Examples | ||
-------- | ||
|
@@ -4203,7 +4355,6 @@ def _apply_to_column_groupbys(self, func): | |
|
||
def count(self): | ||
""" Compute count of group, excluding missing values """ | ||
from functools import partial | ||
from pandas.core.dtypes.missing import _isna_ndarraylike as isna | ||
|
||
data, _ = self._get_data_to_aggregate() | ||
|
@@ -4283,9 +4434,7 @@ def groupby_series(obj, col=None): | |
results.index = _default_index(len(results)) | ||
return results | ||
|
||
|
||
from pandas.plotting._core import boxplot_frame_groupby # noqa | ||
DataFrameGroupBy.boxplot = boxplot_frame_groupby | ||
boxplot = boxplot_frame_groupby | ||
|
||
|
||
class PanelGroupBy(NDFrameGroupBy): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just adding the doc-string is not really enough. you need to have a bit of substitution and/or mention that this is actually a groupby method.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure. Under the status-quo the docstring is attached verbatim but with some extra whitespace. My preference would be to address this in a follow-up PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that is fine