Skip to content

Commit

Permalink
BUG: dropna incorrect with categoricals in pivot_table (pandas-dev#21252
Browse files Browse the repository at this point in the history
)
  • Loading branch information
jreback authored and jorisvandenbossche committed Jun 7, 2018
1 parent c460710 commit abfac97
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 3 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.23.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ Fixed Regressions
- Bug in :meth:`~DataFrame.to_csv` causes encoding error when compression and encoding are specified (:issue:`21241`, :issue:`21118`)
- Bug preventing pandas from being importable with -OO optimization (:issue:`21071`)
- Bug in :meth:`Categorical.fillna` incorrectly raising a ``TypeError`` when `value` the individual categories are iterable and `value` is an iterable (:issue:`21097`, :issue:`19788`)
- Regression in :func:`pivot_table` where an ordered ``Categorical`` with missing
values for the pivot's ``index`` would give a mis-aligned result (:issue:`21133`)


.. _whatsnew_0231.performance:
Expand Down
20 changes: 18 additions & 2 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# pylint: disable=E1103


from pandas.core.dtypes.common import is_list_like, is_scalar
from pandas.core.dtypes.common import (
is_list_like, is_scalar, is_integer_dtype)
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
from pandas.core.dtypes.cast import maybe_downcast_to_dtype

from pandas.core.reshape.concat import concat
from pandas.core.series import Series
Expand Down Expand Up @@ -79,8 +81,22 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
pass
values = list(values)

grouped = data.groupby(keys, observed=dropna)
# group by the cartesian product of the grouper
# if we have a categorical
grouped = data.groupby(keys, observed=False)
agged = grouped.agg(aggfunc)
if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
agged = agged.dropna(how='all')

# gh-21133
# we want to down cast if
# the original values are ints
# as we grouped with a NaN value
# and then dropped, coercing to floats
for v in [v for v in values if v in data and v in agged]:
if (is_integer_dtype(data[v]) and
not is_integer_dtype(agged[v])):
agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype)

table = agged
if table.index.nlevels > 1:
Expand Down
26 changes: 25 additions & 1 deletion pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-

from datetime import datetime, date, timedelta

Expand All @@ -16,6 +17,11 @@
from pandas.api.types import CategoricalDtype as CDT


@pytest.fixture(params=[True, False])
def dropna(request):
return request.param


class TestPivotTable(object):

def setup_method(self, method):
Expand Down Expand Up @@ -109,7 +115,6 @@ def test_pivot_table_categorical(self):
index=exp_index)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize('dropna', [True, False])
def test_pivot_table_dropna_categoricals(self, dropna):
# GH 15193
categories = ['a', 'b', 'c', 'd']
Expand Down Expand Up @@ -137,6 +142,25 @@ def test_pivot_table_dropna_categoricals(self, dropna):

tm.assert_frame_equal(result, expected)

def test_pivot_with_non_observable_dropna(self, dropna):
# gh-21133
df = pd.DataFrame(
{'A': pd.Categorical([np.nan, 'low', 'high', 'low', 'high'],
categories=['low', 'high'],
ordered=True),
'B': range(5)})

result = df.pivot_table(index='A', values='B', dropna=dropna)
expected = pd.DataFrame(
{'B': [2, 3]},
index=pd.Index(
pd.Categorical.from_codes([0, 1],
categories=['low', 'high'],
ordered=True),
name='A'))

tm.assert_frame_equal(result, expected)

def test_pass_array(self):
result = self.data.pivot_table(
'D', index=self.data.A, columns=self.data.C)
Expand Down

0 comments on commit abfac97

Please sign in to comment.