Skip to content

Commit

Permalink
correct apply(axis=1) and related bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
tp committed Jul 16, 2018
1 parent 17dc5b9 commit 4b5b2c3
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 50 deletions.
6 changes: 5 additions & 1 deletion doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,11 @@ Numeric

- Bug in :class:`Series` ``__rmatmul__`` doesn't support matrix vector multiplication (:issue:`21530`)
- Bug in :func:`factorize` fails with read-only array (:issue:`12813`)
-
- Bug in :meth:`DataFrame.agg`, :meth:`DataFrame.transform` and :meth:`DataFrame.apply` when ``axis=1``.
Using ``apply`` with a list on functions and axis=1 (e.g. ``df.apply(['abs'], axis=1)``)
previously gave an TypeError, while the operation worked with axis=0. This fixes that issue.
As ``agg`` and ``transform`` in many cases delegated to ``apply``, this also
fixed this issue for them also (:issue:`16679`).
-

Strings
Expand Down
14 changes: 5 additions & 9 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ def agg_axis(self):
def get_result(self):
""" compute the results """

# dispatch to agg
if isinstance(self.f, (list, dict)):
return self.obj.aggregate(self.f, axis=self.axis,
*self.args, **self.kwds)

# all empty
if len(self.columns) == 0 and len(self.index) == 0:
return self.apply_empty_result()
Expand Down Expand Up @@ -308,15 +313,6 @@ def wrap_results(self):
class FrameRowApply(FrameApply):
axis = 0

def get_result(self):

# dispatch to agg
if isinstance(self.f, (list, dict)):
return self.obj.aggregate(self.f, axis=self.axis,
*self.args, **self.kwds)

return super(FrameRowApply, self).get_result()

def apply_broadcast(self):
return super(FrameRowApply, self).apply_broadcast(self.obj)

Expand Down
13 changes: 11 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6077,11 +6077,20 @@ def aggregate(self, func, axis=0, *args, **kwargs):
return result

def _aggregate(self, arg, axis=0, *args, **kwargs):
obj = self.T if axis == 1 else self
return super(DataFrame, obj)._aggregate(arg, *args, **kwargs)
if axis == 1:
result, how = (super(DataFrame, self.T)
._aggregate(arg, *args, **kwargs))
result = result.T if result is not None else result
return result, how
return super(DataFrame, self)._aggregate(arg, *args, **kwargs)

agg = aggregate

def transform(self, func, axis=0, *args, **kwargs):
if axis == 1:
return super(DataFrame, self.T).transform(func, *args, **kwargs).T
return super(DataFrame, self).transform(func, *args, **kwargs)

def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None,
result_type=None, args=(), **kwds):
"""
Expand Down
16 changes: 7 additions & 9 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9090,16 +9090,14 @@ def ewm(self, com=None, span=None, halflife=None, alpha=None,

cls.ewm = ewm

@Appender(_shared_docs['transform'] % _shared_doc_kwargs)
def transform(self, func, *args, **kwargs):
result = self.agg(func, *args, **kwargs)
if is_scalar(result) or len(result) != len(self):
raise ValueError("transforms cannot produce "
"aggregated results")
@Appender(_shared_docs['transform'] % _shared_doc_kwargs)
def transform(self, func, *args, **kwargs):
result = self.agg(func, *args, **kwargs)
if is_scalar(result) or len(result) != len(self):
raise ValueError("transforms cannot produce "
"aggregated results")

return result

cls.transform = transform
return result

# ----------------------------------------------------------------------
# Misc methods
Expand Down
79 changes: 50 additions & 29 deletions pandas/tests/frame/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,58 +846,74 @@ def test_consistency_for_boxed(self, box):
assert_frame_equal(result, expected)


def zip_frames(*frames):
def zip_frames(*frames, axis=1):
"""
take a list of frames, zip the columns together for each
assume that these all have the first frame columns
take a list of frames, zip them together under the
assumption that these all have the first frames' index/columns.
return a new frame
Returns
-------
new_frame : DataFrame
"""
columns = frames[0].columns
zipped = [f[c] for c in columns for f in frames]
return pd.concat(zipped, axis=1)
if axis == 1:
columns = frames[0].columns
zipped = [f.loc[:, c] for c in columns for f in frames]
return pd.concat(zipped, axis=1)
else:
index = frames[0].index
zipped = [f.loc[i, :] for i in index for f in frames]
return pd.DataFrame(zipped)


class TestDataFrameAggregate(TestData):

def test_agg_transform(self):
def test_agg_transform(self, axis):
other_axis = abs(axis - 1)

with np.errstate(all='ignore'):

f_sqrt = np.sqrt(self.frame)
f_abs = np.abs(self.frame)
f_sqrt = np.sqrt(self.frame)

# ufunc
result = self.frame.transform(np.sqrt)
result = self.frame.transform(np.sqrt, axis=axis)
expected = f_sqrt.copy()
assert_frame_equal(result, expected)

result = self.frame.apply(np.sqrt)
result = self.frame.apply(np.sqrt, axis=axis)
assert_frame_equal(result, expected)

result = self.frame.transform(np.sqrt)
result = self.frame.transform(np.sqrt, axis=axis)
assert_frame_equal(result, expected)

# list-like
result = self.frame.apply([np.sqrt])
result = self.frame.apply([np.sqrt], axis=axis)
expected = f_sqrt.copy()
expected.columns = pd.MultiIndex.from_product(
[self.frame.columns, ['sqrt']])
if axis == 0:
expected.columns = pd.MultiIndex.from_product(
[self.frame.columns, ['sqrt']])
else:
expected.index = pd.MultiIndex.from_product(
[self.frame.index, ['sqrt']])
assert_frame_equal(result, expected)

result = self.frame.transform([np.sqrt])
result = self.frame.transform([np.sqrt], axis=axis)
assert_frame_equal(result, expected)

# multiple items in list
# these are in the order as if we are applying both
# functions per series and then concatting
expected = zip_frames(f_sqrt, f_abs)
expected.columns = pd.MultiIndex.from_product(
[self.frame.columns, ['sqrt', 'absolute']])
result = self.frame.apply([np.sqrt, np.abs])
result = self.frame.apply([np.abs, np.sqrt], axis=axis)
expected = zip_frames(f_abs, f_sqrt, axis=other_axis)
if axis == 0:
expected.columns = pd.MultiIndex.from_product(
[self.frame.columns, ['absolute', 'sqrt']])
else:
expected.index = pd.MultiIndex.from_product(
[self.frame.index, ['absolute', 'sqrt']])
assert_frame_equal(result, expected)

result = self.frame.transform(['sqrt', np.abs])
result = self.frame.transform([np.abs, 'sqrt'], axis=axis)
assert_frame_equal(result, expected)

def test_transform_and_agg_err(self, axis):
Expand Down Expand Up @@ -985,13 +1001,16 @@ def test_agg_dict_nested_renaming_depr(self):

def test_agg_reduce(self, axis):
other_axis = abs(axis - 1)
name1, name2 = self.frame.axes[other_axis].unique()[:2]
name1, name2 = self.frame.axes[other_axis].unique()[:2].sort_values()

# all reducers
expected = zip_frames(self.frame.mean(axis=axis).to_frame(),
self.frame.max(axis=axis).to_frame(),
self.frame.sum(axis=axis).to_frame()).T
expected.index = ['mean', 'max', 'sum']
expected = pd.concat([self.frame.mean(axis=axis),
self.frame.max(axis=axis),
self.frame.sum(axis=axis),
], axis=1)
expected.columns = ['mean', 'max', 'sum']
expected = expected.T if axis == 0 else expected

result = self.frame.agg(['mean', 'max', 'sum'], axis=axis)
assert_frame_equal(result, expected)

Expand All @@ -1001,7 +1020,7 @@ def test_agg_reduce(self, axis):
expected = Series([self.frame.loc(other_axis)[name1].mean(),
self.frame.loc(other_axis)[name2].sum()],
index=[name1, name2])
assert_series_equal(result.reindex_like(expected), expected)
assert_series_equal(result, expected)

# dict input with lists
func = {name1: ['mean'], name2: ['sum']}
Expand All @@ -1011,7 +1030,8 @@ def test_agg_reduce(self, axis):
index=['mean']),
name2: Series([self.frame.loc(other_axis)[name2].sum()],
index=['sum'])})
assert_frame_equal(result.reindex_like(expected), expected)
expected = expected.T if axis == 1 else expected
assert_frame_equal(result, expected)

# dict input with lists with multiple
func = {name1: ['mean', 'sum'],
Expand All @@ -1024,7 +1044,8 @@ def test_agg_reduce(self, axis):
name2: Series([self.frame.loc(other_axis)[name2].sum(),
self.frame.loc(other_axis)[name2].max()],
index=['sum', 'max'])})
assert_frame_equal(result.reindex_like(expected), expected)
expected = expected.T if axis == 1 else expected
assert_frame_equal(result, expected)

def test_nuiscance_columns(self):

Expand Down

0 comments on commit 4b5b2c3

Please sign in to comment.