Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: RollingGroupby with closed and column selection no longer raises ValueError #35639

Merged
merged 2 commits into from
Aug 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v1.1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ Categorical
-
-

**Groupby/resample/rolling**

- Bug in :class:`pandas.core.groupby.RollingGroupby` where passing ``closed`` with column selection would raise a ``ValueError`` (:issue:`35549`)
jreback marked this conversation as resolved.
Show resolved Hide resolved

**Plotting**

-
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/window/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(self, obj, *args, **kwargs):
kwargs.pop("parent", None)
groupby = kwargs.pop("groupby", None)
if groupby is None:
groupby, obj = obj, obj.obj
groupby, obj = obj, obj._selected_obj
self._groupby = groupby
self._groupby.mutated = True
self._groupby.grouper.mutated = True
Expand Down
10 changes: 3 additions & 7 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -2212,7 +2212,7 @@ def _apply(
# Cannot use _wrap_outputs because we calculate the result all at once
# Compose MultiIndex result from grouping levels then rolling level
# Aggregate the MultiIndex data as tuples then the level names
grouped_object_index = self._groupby._selected_obj.index
grouped_object_index = self.obj.index
grouped_index_name = [grouped_object_index.name]
groupby_keys = [grouping.name for grouping in self._groupby.grouper._groupings]
result_index_names = groupby_keys + grouped_index_name
Expand All @@ -2236,10 +2236,6 @@ def _apply(
def _constructor(self):
return Rolling

@cache_readonly
def _selected_obj(self):
return self._groupby._selected_obj

def _create_blocks(self, obj: FrameOrSeries):
"""
Split data into blocks & return conformed data.
Expand Down Expand Up @@ -2278,7 +2274,7 @@ def _get_window_indexer(self, window: int) -> GroupbyRollingIndexer:
rolling_indexer: Union[Type[FixedWindowIndexer], Type[VariableWindowIndexer]]
if self.is_freq_type:
rolling_indexer = VariableWindowIndexer
index_array = self._groupby._selected_obj.index.asi8
index_array = self.obj.index.asi8
else:
rolling_indexer = FixedWindowIndexer
index_array = None
Expand All @@ -2295,7 +2291,7 @@ def _gotitem(self, key, ndim, subset=None):
# here so our index is carried thru to the selected obj
# when we do the splitting for the groupby
if self.on is not None:
self._groupby.obj = self._groupby.obj.set_index(self._on)
self.obj = self.obj.set_index(self._on)
self.on = None
return super()._gotitem(key, ndim, subset=subset)

Expand Down
51 changes: 51 additions & 0 deletions pandas/tests/window/test_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,54 @@ def test_groupby_subselect_rolling(self):
name="b",
)
tm.assert_series_equal(result, expected)

def test_groupby_rolling_subset_with_closed(self):
# GH 35549
df = pd.DataFrame(
{
"column1": range(6),
"column2": range(6),
"group": 3 * ["A", "B"],
"date": [pd.Timestamp("2019-01-01")] * 6,
}
)
result = (
df.groupby("group").rolling("1D", on="date", closed="left")["column1"].sum()
)
expected = Series(
[np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
index=pd.MultiIndex.from_tuples(
[("A", pd.Timestamp("2019-01-01"))] * 3
+ [("B", pd.Timestamp("2019-01-01"))] * 3,
names=["group", "date"],
),
name="column1",
)
tm.assert_series_equal(result, expected)

def test_groupby_subset_rolling_subset_with_closed(self):
# GH 35549
df = pd.DataFrame(
{
"column1": range(6),
"column2": range(6),
"group": 3 * ["A", "B"],
"date": [pd.Timestamp("2019-01-01")] * 6,
}
)

result = (
df.groupby("group")[["column1", "date"]]
.rolling("1D", on="date", closed="left")["column1"]
.sum()
)
expected = Series(
[np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
index=pd.MultiIndex.from_tuples(
[("A", pd.Timestamp("2019-01-01"))] * 3
+ [("B", pd.Timestamp("2019-01-01"))] * 3,
names=["group", "date"],
),
name="column1",
)
tm.assert_series_equal(result, expected)