Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: enforce nested-renaming deprecation #29608

Merged
merged 6 commits into from
Nov 19, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 8 additions & 36 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,7 @@ def _try_aggregate_string_function(self, arg: str, *args, **kwargs):
# people may try to aggregate on a non-callable attribute
# but don't let them think they can pass args to it
assert len(args) == 0
assert (
len([kwarg for kwarg in kwargs if kwarg not in ["axis", "_level"]]) == 0
)
assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0
return f

f = getattr(np, arg, None)
Expand Down Expand Up @@ -324,34 +322,17 @@ def _aggregate(self, arg, *args, **kwargs):
_axis = kwargs.pop("_axis", None)
if _axis is None:
_axis = getattr(self, "axis", 0)
_level = kwargs.pop("_level", None)

if isinstance(arg, str):
return self._try_aggregate_string_function(arg, *args, **kwargs), None

if isinstance(arg, dict):

# aggregate based on the passed dict
if _axis != 0: # pragma: no cover
raise ValueError("Can only pass dict with axis=0")

obj = self._selected_obj

def nested_renaming_depr(level: int = 4):
# deprecation of nested renaming
# GH 15931
msg = textwrap.dedent(
"""\
using a dict with renaming is deprecated and will be removed
in a future version.

For column-specific groupby renaming, use named aggregation

>>> df.groupby(...).agg(name=('column', aggfunc))
"""
)
warnings.warn(msg, FutureWarning, stacklevel=level)

# if we have a dict of any non-scalars
# eg. {'A' : ['mean']}, normalize all to
# be list-likes
Expand All @@ -374,18 +355,9 @@ def nested_renaming_depr(level: int = 4):
# not ok
# {'ra' : { 'A' : 'mean' }}
if isinstance(v, dict):
is_nested_renamer = True

if k not in obj.columns:
msg = (
"cannot perform renaming for {key} with a "
"nested dictionary"
).format(key=k)
raise SpecificationError(msg)
nested_renaming_depr(4 + (_level or 0))

raise SpecificationError("nested renamer is not supported")
elif isinstance(obj, ABCSeries):
nested_renaming_depr()
raise SpecificationError("nested renamer is not supported")
elif isinstance(obj, ABCDataFrame) and k not in obj.columns:
raise KeyError("Column '{col}' does not exist!".format(col=k))

Expand All @@ -398,7 +370,7 @@ def nested_renaming_depr(level: int = 4):
if isinstance(obj, ABCDataFrame) and len(
obj.columns.intersection(keys)
) != len(keys):
nested_renaming_depr()
raise SpecificationError("nested renamer is not supported")

from pandas.core.reshape.concat import concat

Expand All @@ -411,14 +383,14 @@ def _agg_1dim(name, how, subset=None):
raise SpecificationError(
"nested dictionary is ambiguous in aggregation"
)
return colg.aggregate(how, _level=(_level or 0) + 1)
return colg.aggregate(how)

def _agg_2dim(name, how):
"""
aggregate a 2-dim with how
"""
colg = self._gotitem(self._selection, ndim=2, subset=obj)
return colg.aggregate(how, _level=None)
return colg.aggregate(how)

def _agg(arg, func):
"""
Expand Down Expand Up @@ -535,7 +507,7 @@ def is_any_frame() -> bool:
return result, True
elif is_list_like(arg):
# we require a list, but not an 'str'
return self._aggregate_multiple_funcs(arg, _level=_level, _axis=_axis), None
return self._aggregate_multiple_funcs(arg, _axis=_axis), None
else:
result = None

Expand All @@ -546,7 +518,7 @@ def is_any_frame() -> bool:
# caller can react
return result, True

def _aggregate_multiple_funcs(self, arg, _level, _axis):
def _aggregate_multiple_funcs(self, arg, _axis):
from pandas.core.reshape.concat import concat

if _axis != 0:
Expand Down
32 changes: 8 additions & 24 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from textwrap import dedent
import typing
from typing import Any, Callable, FrozenSet, Iterable, Sequence, Type, Union, cast
import warnings

import numpy as np

Expand Down Expand Up @@ -213,7 +212,6 @@ def apply(self, func, *args, **kwargs):
)
@Appender(_shared_docs["aggregate"])
def aggregate(self, func=None, *args, **kwargs):
_level = kwargs.pop("_level", None)

relabeling = func is None
columns = None
Expand All @@ -232,7 +230,7 @@ def aggregate(self, func=None, *args, **kwargs):
# Catch instances of lists / tuples
# but not the class list / tuple itself.
func = _maybe_mangle_lambdas(func)
ret = self._aggregate_multiple_funcs(func, (_level or 0) + 1)
ret = self._aggregate_multiple_funcs(func)
if relabeling:
ret.columns = columns
else:
Expand All @@ -256,32 +254,22 @@ def aggregate(self, func=None, *args, **kwargs):
if not self.as_index: # pragma: no cover
print("Warning, ignoring as_index=True")

# _level handled at higher
if not _level and isinstance(ret, dict):
if isinstance(ret, dict):
from pandas import concat

ret = concat(ret, axis=1)
return ret

agg = aggregate

def _aggregate_multiple_funcs(self, arg, _level):
def _aggregate_multiple_funcs(self, arg):
if isinstance(arg, dict):

# show the deprecation, but only if we
# have not shown a higher level one
# GH 15931
if isinstance(self._selected_obj, Series) and _level <= 1:
msg = dedent(
"""\
using a dict on a Series for aggregation
is deprecated and will be removed in a future version. Use \
named aggregation instead.

>>> grouper.agg(name_1=func_1, name_2=func_2)
"""
)
warnings.warn(msg, FutureWarning, stacklevel=3)
if isinstance(self._selected_obj, Series):
raise SpecificationError("nested renamer is not supported")

columns = list(arg.keys())
arg = arg.items()
Expand Down Expand Up @@ -317,8 +305,7 @@ def _aggregate_multiple_funcs(self, arg, _level):

if any(isinstance(x, DataFrame) for x in results.values()):
# let higher level handle
if _level:
return results
return results

return DataFrame(results, columns=columns)

Expand Down Expand Up @@ -844,7 +831,6 @@ class DataFrameGroupBy(GroupBy):
)
@Appender(_shared_docs["aggregate"])
def aggregate(self, func=None, *args, **kwargs):
_level = kwargs.pop("_level", None)

relabeling = func is None and _is_multi_agg_with_relabel(**kwargs)
if relabeling:
Expand All @@ -857,7 +843,7 @@ def aggregate(self, func=None, *args, **kwargs):

func = _maybe_mangle_lambdas(func)

result, how = self._aggregate(func, _level=_level, *args, **kwargs)
result, how = self._aggregate(func, *args, **kwargs)
if how is None:
return result

Expand All @@ -877,9 +863,7 @@ def aggregate(self, func=None, *args, **kwargs):

# try to treat as if we are passing a list
try:
result = self._aggregate_multiple_funcs(
[func], _level=_level, _axis=self.axis
)
result = self._aggregate_multiple_funcs([func], _axis=self.axis)
except ValueError as err:
if "no results" not in str(err):
# raised directly by _aggregate_multiple_funcs
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/frame/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, notna
from pandas.conftest import _get_cython_table_params
from pandas.core.apply import frame_apply
from pandas.core.base import SpecificationError
import pandas.util.testing as tm


Expand Down Expand Up @@ -1094,7 +1095,8 @@ def test_agg_dict_nested_renaming_depr(self):
df = pd.DataFrame({"A": range(5), "B": 5})

# nested renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
df.agg({"A": {"foo": "min"}, "B": {"bar": "max"}})

def test_agg_reduce(self, axis, float_frame):
Expand Down
25 changes: 12 additions & 13 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,16 +267,16 @@ def bar(x):
return np.std(x, ddof=1)

# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
d = OrderedDict(
[["C", np.mean], ["D", OrderedDict([["foo", np.mean], ["bar", np.std]])]]
)
result = grouped.aggregate(d)
grouped.aggregate(d)

# But without renaming, these functions are OK
d = OrderedDict([["C", [np.mean]], ["D", [foo, bar]]])
expected = grouped.aggregate(d)

tm.assert_frame_equal(result, expected)
grouped.aggregate(d)


def test_multi_function_flexible_mix(df):
Expand All @@ -288,26 +288,25 @@ def test_multi_function_flexible_mix(df):
[["C", OrderedDict([["foo", "mean"], ["bar", "std"]])], ["D", {"sum": "sum"}]]
)
# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
expected = grouped.aggregate(d)
msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
grouped.aggregate(d)

# Test 1
d = OrderedDict(
[["C", OrderedDict([["foo", "mean"], ["bar", "std"]])], ["D", "sum"]]
)
# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = grouped.aggregate(d)
tm.assert_frame_equal(result, expected)
with pytest.raises(SpecificationError, match=msg):
grouped.aggregate(d)

# Test 2
d = OrderedDict(
[["C", OrderedDict([["foo", "mean"], ["bar", "std"]])], ["D", ["sum"]]]
)
# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = grouped.aggregate(d)
tm.assert_frame_equal(result, expected)
with pytest.raises(SpecificationError, match=msg):
grouped.aggregate(d)


def test_groupby_agg_coercing_bools():
Expand Down
58 changes: 19 additions & 39 deletions pandas/tests/groupby/aggregate/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,31 +211,26 @@ def test_aggregate_api_consistency():
expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1)
expected.columns = MultiIndex.from_product([["C", "D"], ["mean", "sum"]])

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = grouped[["D", "C"]].agg({"r": np.sum, "r2": np.mean})
expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
expected.columns = MultiIndex.from_product([["r", "r2"], ["D", "C"]])
tm.assert_frame_equal(result, expected, check_like=True)
msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
grouped[["D", "C"]].agg({"r": np.sum, "r2": np.mean})


def test_agg_dict_renaming_deprecation():
# 15931
df = pd.DataFrame({"A": [1, 1, 1, 2, 2], "B": range(5), "C": range(5)})

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False) as w:
msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
df.groupby("A").agg(
{"B": {"foo": ["sum", "max"]}, "C": {"bar": ["count", "min"]}}
)
assert "using a dict with renaming" in str(w[0].message)
assert "named aggregation" in str(w[0].message)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
with pytest.raises(SpecificationError, match=msg):
df.groupby("A")[["B", "C"]].agg({"ma": "max"})

with tm.assert_produces_warning(FutureWarning) as w:
with pytest.raises(SpecificationError, match=msg):
df.groupby("A").B.agg({"foo": "count"})
assert "using a dict on a Series for aggregation" in str(w[0].message)
assert "named aggregation instead." in str(w[0].message)


def test_agg_compat():
Expand All @@ -251,18 +246,12 @@ def test_agg_compat():

g = df.groupby(["A", "B"])

expected = pd.concat([g["D"].sum(), g["D"].std()], axis=1)
expected.columns = MultiIndex.from_tuples([("C", "sum"), ("C", "std")])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = g["D"].agg({"C": ["sum", "std"]})
tm.assert_frame_equal(result, expected, check_like=True)

expected = pd.concat([g["D"].sum(), g["D"].std()], axis=1)
expected.columns = ["C", "D"]
msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
g["D"].agg({"C": ["sum", "std"]})

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = g["D"].agg({"C": "sum", "D": "std"})
tm.assert_frame_equal(result, expected, check_like=True)
with pytest.raises(SpecificationError, match=msg):
g["D"].agg({"C": "sum", "D": "std"})


def test_agg_nested_dicts():
Expand All @@ -278,29 +267,20 @@ def test_agg_nested_dicts():

g = df.groupby(["A", "B"])

msg = r"cannot perform renaming for r[1-2] with a nested dictionary"
msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
g.aggregate({"r1": {"C": ["mean", "sum"]}, "r2": {"D": ["mean", "sum"]}})

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = g.agg({"C": {"ra": ["mean", "std"]}, "D": {"rb": ["mean", "std"]}})
expected = pd.concat(
[g["C"].mean(), g["C"].std(), g["D"].mean(), g["D"].std()], axis=1
)
expected.columns = pd.MultiIndex.from_tuples(
[("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")]
)
tm.assert_frame_equal(result, expected, check_like=True)
with pytest.raises(SpecificationError, match=msg):
g.agg({"C": {"ra": ["mean", "std"]}, "D": {"rb": ["mean", "std"]}})

# same name as the original column
# GH9052
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
expected = g["D"].agg({"result1": np.sum, "result2": np.mean})
expected = expected.rename(columns={"result1": "D"})
with pytest.raises(SpecificationError, match=msg):
g["D"].agg({"result1": np.sum, "result2": np.mean})

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = g["D"].agg({"D": np.sum, "result2": np.mean})
tm.assert_frame_equal(result, expected, check_like=True)
with pytest.raises(SpecificationError, match=msg):
g["D"].agg({"D": np.sum, "result2": np.mean})


def test_agg_item_by_item_raise_typeerror():
Expand Down
Loading