Skip to content

Commit

Permalink
DEPR: Enforce deprecation of include_groups in groupby.apply
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach committed Dec 14, 2024
1 parent 9501650 commit 23fdb9f
Show file tree
Hide file tree
Showing 24 changed files with 273 additions and 680 deletions.
4 changes: 2 additions & 2 deletions doc/source/user_guide/cookbook.rst
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
df
# List the size of the animals with the highest weight.
df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()], include_groups=False)
df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()])
`Using get_group
<https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key>`__
Expand All @@ -482,7 +482,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
return pd.Series(["L", avg_weight, True], index=["size", "weight", "adult"])
expected_df = gb.apply(GrowUp, include_groups=False)
expected_df = gb.apply(GrowUp)
expected_df
`Expanding apply
Expand Down
8 changes: 4 additions & 4 deletions doc/source/user_guide/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1074,7 +1074,7 @@ missing values with the ``ffill()`` method.
).set_index("date")
df_re
df_re.groupby("group").resample("1D", include_groups=False).ffill()
df_re.groupby("group").resample("1D").ffill()
.. _groupby.filter:

Expand Down Expand Up @@ -1252,13 +1252,13 @@ the argument ``group_keys`` which defaults to ``True``. Compare

.. ipython:: python
df.groupby("A", group_keys=True).apply(lambda x: x, include_groups=False)
df.groupby("A", group_keys=True).apply(lambda x: x)
with

.. ipython:: python
df.groupby("A", group_keys=False).apply(lambda x: x, include_groups=False)
df.groupby("A", group_keys=False).apply(lambda x: x)
Numba accelerated routines
Expand Down Expand Up @@ -1742,7 +1742,7 @@ column index name will be used as the name of the inserted column:
result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()}
return pd.Series(result, name="metrics")
result = df.groupby("a").apply(compute_metrics, include_groups=False)
result = df.groupby("a").apply(compute_metrics)
result
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ Other Removals
- Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`)
- Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`)
- Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`)
- Removed specifying ``include_groups=True`` in :class:`.DataFrameGroupBy.apply` and :class:`.Resampler.apply` (:issue:`7155`)

.. ---------------------------------------------------------------------------
.. _whatsnew_300.performance:
Expand Down
89 changes: 23 additions & 66 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,7 @@ def _aggregate_with_numba(self, func, *args, engine_kwargs=None, **kwargs):
# -----------------------------------------------------------------
# apply/agg/transform

def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
def apply(self, func, *args, include_groups: bool = False, **kwargs) -> NDFrameT:
"""
Apply function ``func`` group-wise and combine the results together.
Expand All @@ -1419,18 +1419,17 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
*args : tuple
Optional positional arguments to pass to ``func``.
include_groups : bool, default True
include_groups : bool, default False
When True, will attempt to apply ``func`` to the groupings in
the case that they are columns of the DataFrame. If this raises a
TypeError, the result will be computed with the groupings excluded.
When False, the groupings will be excluded when applying ``func``.
.. versionadded:: 2.2.0
.. deprecated:: 2.2.0
.. versionchanged:: 3.0.0
Setting include_groups to True is deprecated. Only the value
False will be allowed in a future version of pandas.
The default changed from True to False, and True is no longer allowed.
**kwargs : dict
Optional keyword arguments to pass to ``func``.
Expand Down Expand Up @@ -1520,7 +1519,7 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
each group together into a Series, including setting the index as
appropriate:
>>> g1.apply(lambda x: x.C.max() - x.B.min(), include_groups=False)
>>> g1.apply(lambda x: x.C.max() - x.B.min())
A
a 5
b 2
Expand All @@ -1529,11 +1528,13 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
Example 4: The function passed to ``apply`` returns ``None`` for one of the
group. This group is filtered from the result:
>>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False)
>>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x)
B C
0 1 4
1 2 6
"""
if include_groups:
raise ValueError("include_groups=True is no longer allowed.")
if isinstance(func, str):
if hasattr(self, func):
res = getattr(self, func)
Expand All @@ -1560,33 +1561,7 @@ def f(g):
else:
f = func

if not include_groups:
return self._python_apply_general(f, self._obj_with_exclusions)

try:
result = self._python_apply_general(f, self._selected_obj)
if (
not isinstance(self.obj, Series)
and self._selection is None
and self._selected_obj.shape != self._obj_with_exclusions.shape
):
warnings.warn(
message=_apply_groupings_depr.format(type(self).__name__, "apply"),
category=DeprecationWarning,
stacklevel=find_stack_level(),
)
except TypeError:
# gh-20949
# try again, with .apply acting as a filtering
# operation, by excluding the grouping column
# This would normally not be triggered
# except if the udf is trying an operation that
# fails on *some* columns, e.g. a numeric operation
# on a string grouper column

return self._python_apply_general(f, self._obj_with_exclusions)

return result
return self._python_apply_general(f, self._obj_with_exclusions)

@final
def _python_apply_general(
Expand Down Expand Up @@ -3424,7 +3399,9 @@ def describe(
return result

@final
def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resampler:
def resample(
self, rule, *args, include_groups: bool = False, **kwargs
) -> Resampler:
"""
Provide resampling when using a TimeGrouper.
Expand All @@ -3449,10 +3426,9 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
.. versionadded:: 2.2.0
.. deprecated:: 2.2.0
.. versionchanged:: 3.0
Setting include_groups to True is deprecated. Only the value
False will be allowed in a future version of pandas.
The default was changed to False, and True is no longer allowed.
**kwargs
Possible arguments are `how`, `fill_method`, `limit`, `kind` and
Expand Down Expand Up @@ -3485,7 +3461,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
Downsample the DataFrame into 3 minute bins and sum the values of
the timestamps falling into a bin.
>>> df.groupby("a").resample("3min", include_groups=False).sum()
>>> df.groupby("a").resample("3min").sum()
b
a
0 2000-01-01 00:00:00 2
Expand All @@ -3494,7 +3470,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
Upsample the series into 30 second bins.
>>> df.groupby("a").resample("30s", include_groups=False).sum()
>>> df.groupby("a").resample("30s").sum()
b
a
0 2000-01-01 00:00:00 1
Expand All @@ -3508,7 +3484,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
Resample by month. Values are assigned to the month of the period.
>>> df.groupby("a").resample("ME", include_groups=False).sum()
>>> df.groupby("a").resample("ME").sum()
b
a
0 2000-01-31 3
Expand All @@ -3517,11 +3493,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
Downsample the series into 3 minute bins as above, but close the right
side of the bin interval.
>>> (
... df.groupby("a")
... .resample("3min", closed="right", include_groups=False)
... .sum()
... )
>>> (df.groupby("a").resample("3min", closed="right").sum())
b
a
0 1999-12-31 23:57:00 1
Expand All @@ -3532,11 +3504,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
the bin interval, but label each bin using the right edge instead of
the left.
>>> (
... df.groupby("a")
... .resample("3min", closed="right", label="right", include_groups=False)
... .sum()
... )
>>> (df.groupby("a").resample("3min", closed="right", label="right").sum())
b
a
0 2000-01-01 00:00:00 1
Expand All @@ -3545,11 +3513,10 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
"""
from pandas.core.resample import get_resampler_for_grouping

# mypy flags that include_groups could be specified via `*args` or `**kwargs`
# GH#54961 would resolve.
return get_resampler_for_grouping( # type: ignore[misc]
self, rule, *args, include_groups=include_groups, **kwargs
)
if include_groups:
raise ValueError("include_groups=True is no longer allowed.")

return get_resampler_for_grouping(self, rule, *args, **kwargs)

@final
def rolling(
Expand Down Expand Up @@ -5561,13 +5528,3 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
mi = MultiIndex(levels=levels, codes=codes, names=[idx.name, None])

return mi


# GH#7155
_apply_groupings_depr = (
"{}.{} operated on the grouping columns. This behavior is deprecated, "
"and in a future version of pandas the grouping columns will be excluded "
"from the operation. Either pass `include_groups=False` to exclude the "
"groupings or explicitly select the grouping columns after groupby to silence "
"this warning."
)
Loading

0 comments on commit 23fdb9f

Please sign in to comment.