From df865a11ce9ca4a0c7ba7356a8f14edb842ae7a0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 16 May 2018 16:36:42 -0700 Subject: [PATCH 1/9] Added test for rolling --- pandas/tests/test_window.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index d8e90ae0e1b35..dfe791f4f9ba1 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -520,6 +520,39 @@ def test_iter_raises(self, klass): with pytest.raises(NotImplementedError): iter(obj.rolling(2)) + def test_agg(self): + # GH 15072 + df = pd.DataFrame([ + ['A', 10, 20], + ['A', 20, 30], + ['A', 30, 40], + ['B', 10, 30], + ['B', 30, 40], + ['B', 40, 80], + ['B', 80, 90]], columns=['stock', 'low', 'high']) + + index = pd.MultiIndex.from_tuples([ + ('A', 0), ('A', 1), ('A', 2), + ('B', 3), ('B', 4), ('B', 5), ('B', 6)], names=['stock', None]) + columns = pd.MultiIndex.from_tuples([ + ('A', 'mean'), ('A', 'max'), ('B', 'mean'), ('B', 'min')]) + expected = pd.DataFrame([ + [np.nan, np.nan, np.nan, np.nan], + [15., 20., 25., 20.], + [25., 30., 35., 30.], + [np.nan, np.nan, np.nan, np.nan], + [20., 30., 35., 30.], + [35., 40., 60., 40.], + [60., 80., 85., 80]], index=index, columns=columns) + + result = df.groupby('stock').rolling(2).agg({ + 'low': ['mean', 'max'], + 'high': ['mean', 'min'] + }) + + tm.assert_frame_equal(result, expected) + + class TestExpanding(Base): From 36afc13f0834174f427188a0d17be34dbe16ea29 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 4 Jun 2018 21:42:59 -0700 Subject: [PATCH 2/9] Fixed GroupByMixin class constructor with SeriesGroupBy --- pandas/core/base.py | 8 +++++++- pandas/tests/test_window.py | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index c331ead8d2fef..ec3d6b7ef0adc 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -684,8 +684,14 @@ def _gotitem(self, key, ndim, subset=None): # with the same groupby kwargs = dict([(attr, getattr(self, attr)) for attr in self._attributes]) + + if self._groupby.ndim == 1 and self._groupby.obj.name == key: + groupby = self._groupby + else: + groupby = self._groupby[key] + self = self.__class__(subset, - groupby=self._groupby[key], + groupby=groupby, parent=self, **kwargs) self._reset_cache() diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index a775b1fe91863..b72cb4eb5b976 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -535,7 +535,8 @@ def test_agg(self): ('A', 0), ('A', 1), ('A', 2), ('B', 3), ('B', 4), ('B', 5), ('B', 6)], names=['stock', None]) columns = pd.MultiIndex.from_tuples([ - ('A', 'mean'), ('A', 'max'), ('B', 'mean'), ('B', 'min')]) + ('low', 'mean'), ('low', 'max'), ('high', 'mean'), + ('high', 'min')]) expected = pd.DataFrame([ [np.nan, np.nan, np.nan, np.nan], [15., 20., 25., 20.], @@ -553,7 +554,6 @@ def test_agg(self): tm.assert_frame_equal(result, expected) - class TestExpanding(Base): def setup_method(self, method): From 3635ce99f3c7fb34d4efaf7b3a737220f30e85d2 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 5 Jun 2018 09:08:02 -0700 Subject: [PATCH 3/9] Replaced introspection with try...except --- pandas/core/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index ec3d6b7ef0adc..68a3ec845117e 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -685,10 +685,10 @@ def _gotitem(self, key, ndim, subset=None): kwargs = dict([(attr, getattr(self, attr)) for attr in self._attributes]) - if self._groupby.ndim == 1 and self._groupby.obj.name == key: - groupby = self._groupby - else: - groupby = self._groupby[key] + try: + groupby = self._groupby[key] # get slice of frame + except Exception: + groupby = self._groupby # working with a Series self = self.__class__(subset, groupby=groupby, From d6e77d6a0bc8d0590cdbbddfd02995be5952acec Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 5 Jun 2018 09:23:24 -0700 Subject: [PATCH 4/9] Moved and parametrized test for rolling/expanding --- pandas/tests/test_window.py | 80 ++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index b72cb4eb5b976..8265f6fc688ae 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -308,6 +308,53 @@ def test_preserve_metadata(self): assert s2.name == 'foo' assert s3.name == 'foo' + @pytest.mark.parametrize("func,window_size,expected_vals", [ + ('rolling', 2, [[np.nan, np.nan, np.nan, np.nan], + [15., 20., 25., 20.], + [25., 30., 35., 30.], + [np.nan, np.nan, np.nan, np.nan], + [20., 30., 35., 30.], + [35., 40., 60., 40.], + [60., 80., 85., 80]]), + ('expanding', None, [[10., 10., 20., 20.], + [15., 20., 25., 20.], + [20., 30., 30., 20.], + [10., 10., 30., 30.], + [20., 30., 35., 30.], + [26.666667, 40., 50., 30.], + [40., 80., 60., 30.]])]) + def test_multiple_agg_funcs(self, func, window_size, expected_vals): + # GH 15072 + df = pd.DataFrame([ + ['A', 10, 20], + ['A', 20, 30], + ['A', 30, 40], + ['B', 10, 30], + ['B', 30, 40], + ['B', 40, 80], + ['B', 80, 90]], columns=['stock', 'low', 'high']) + + f = getattr(df.groupby('stock'), func) + if window_size: + window = f(window_size) + else: + window = f() + + index = pd.MultiIndex.from_tuples([ + ('A', 0), ('A', 1), ('A', 2), + ('B', 3), ('B', 4), ('B', 5), ('B', 6)], names=['stock', None]) + columns = pd.MultiIndex.from_tuples([ + ('low', 'mean'), ('low', 'max'), ('high', 'mean'), + ('high', 'min')]) + expected = pd.DataFrame(expected_vals, index=index, columns=columns) + + result = window.agg({ + 'low': ['mean', 'max'], + 'high': ['mean', 'min'] + }) + + tm.assert_frame_equal(result, expected) + class TestWindow(Base): @@ -520,39 +567,6 @@ def test_iter_raises(self, klass): with pytest.raises(NotImplementedError): iter(obj.rolling(2)) - def test_agg(self): - # GH 15072 - df = pd.DataFrame([ - ['A', 10, 20], - ['A', 20, 30], - ['A', 30, 40], - ['B', 10, 30], - ['B', 30, 40], - ['B', 40, 80], - ['B', 80, 90]], columns=['stock', 'low', 'high']) - - index = pd.MultiIndex.from_tuples([ - ('A', 0), ('A', 1), ('A', 2), - ('B', 3), ('B', 4), ('B', 5), ('B', 6)], names=['stock', None]) - columns = pd.MultiIndex.from_tuples([ - ('low', 'mean'), ('low', 'max'), ('high', 'mean'), - ('high', 'min')]) - expected = pd.DataFrame([ - [np.nan, np.nan, np.nan, np.nan], - [15., 20., 25., 20.], - [25., 30., 35., 30.], - [np.nan, np.nan, np.nan, np.nan], - [20., 30., 35., 30.], - [35., 40., 60., 40.], - [60., 80., 85., 80]], index=index, columns=columns) - - result = df.groupby('stock').rolling(2).agg({ - 'low': ['mean', 'max'], - 'high': ['mean', 'min'] - }) - - tm.assert_frame_equal(result, expected) - class TestExpanding(Base): From c8563a97b2c6c83955d65877bd42443e0f13331a Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 5 Jun 2018 09:28:10 -0700 Subject: [PATCH 5/9] Updated whatsnew --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 6cbc19cca99e1..c4f3b12a1ab4c 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -161,7 +161,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- :func:`RollingGroupby.agg` and :func:`ExpandingGroupby.agg` now support multiple aggregation functions as parameters (:issue:`15072`) - - From a275aec8f2939ea0029ce3104f509df2d4167ce0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 5 Jun 2018 17:56:12 -0700 Subject: [PATCH 6/9] OrderedDict for test case --- pandas/tests/test_window.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 8265f6fc688ae..bf6b04c717155 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1,3 +1,4 @@ +from collections import OrderedDict from itertools import product import pytest import warnings @@ -348,10 +349,10 @@ def test_multiple_agg_funcs(self, func, window_size, expected_vals): ('high', 'min')]) expected = pd.DataFrame(expected_vals, index=index, columns=columns) - result = window.agg({ - 'low': ['mean', 'max'], - 'high': ['mean', 'min'] - }) + result = window.agg(OrderedDict(( + ('low', ['mean', 'max']), + ('high', ['mean', 'min']), + ))) tm.assert_frame_equal(result, expected) From faf6846b3ae8e6291149fae5eb55b750621923c7 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 5 Jun 2018 18:11:07 -0700 Subject: [PATCH 7/9] More Detailed Exception for __getitem__ --- pandas/core/base.py | 6 +++--- pandas/tests/groupby/test_groupby.py | 9 +++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 68a3ec845117e..1392ad15dacaf 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -246,8 +246,8 @@ def _obj_with_exclusions(self): def __getitem__(self, key): if self._selection is not None: - raise Exception('Column(s) {selection} already selected' - .format(selection=self._selection)) + raise IndexError('Column(s) {selection} already selected' + .format(selection=self._selection)) if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass, np.ndarray)): @@ -687,7 +687,7 @@ def _gotitem(self, key, ndim, subset=None): try: groupby = self._groupby[key] # get slice of frame - except Exception: + except IndexError: groupby = self._groupby # working with a Series self = self.__class__(subset, diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e05f9de5ea7f4..27b134c8f4a1c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -624,8 +624,13 @@ def test_as_index_series_return_frame(df): assert isinstance(result2, DataFrame) assert_frame_equal(result2, expected2) - # corner case - pytest.raises(Exception, grouped['C'].__getitem__, 'D') + +def test_as_index_series_column_slice_raises(df): + grouped = df.groupby('A', as_index=False) + msg = "Column\(s\) C already selected" + + with tm.assert_raises_regex(IndexError, msg): + grouped['C'].__getitem__('D') def test_groupby_as_index_cython(df): From 1fe6559573b830569d65041424974c7aac27b95f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 25 Sep 2018 09:51:48 -0700 Subject: [PATCH 8/9] Updated comments and implementation --- pandas/core/groupby/base.py | 9 ++++++++- pandas/tests/groupby/test_groupby.py | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 96c74f7fd4d75..ac84971de08d8 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -44,8 +44,15 @@ def _gotitem(self, key, ndim, subset=None): # we need to make a shallow copy of ourselves # with the same groupby kwargs = {attr: getattr(self, attr) for attr in self._attributes} + + # Try to select from a DataFrame, falling back to a Series + try: + groupby = self._groupby[key] + except IndexError: + groupby = self._groupby + self = self.__class__(subset, - groupby=self._groupby[key], + groupby=groupby, parent=self, **kwargs) self._reset_cache() diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index b5cdb3dd45530..e9a51052b86dc 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -625,6 +625,7 @@ def test_as_index_series_return_frame(df): def test_as_index_series_column_slice_raises(df): + # GH15072 grouped = df.groupby('A', as_index=False) msg = "Column\(s\) C already selected" From c234b364847a3e06dd471d805e554f1c852f8419 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 25 Sep 2018 12:09:10 -0700 Subject: [PATCH 9/9] Raw regex for LINT fixup --- pandas/tests/groupby/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e9a51052b86dc..3cdd0965ccfd0 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -627,7 +627,7 @@ def test_as_index_series_return_frame(df): def test_as_index_series_column_slice_raises(df): # GH15072 grouped = df.groupby('A', as_index=False) - msg = "Column\(s\) C already selected" + msg = r"Column\(s\) C already selected" with tm.assert_raises_regex(IndexError, msg): grouped['C'].__getitem__('D')