Skip to content

Commit

Permalink
BUG: Respect dtype when calling pivot_table with margins=True
Browse files Browse the repository at this point in the history
closes #17013

This fix actually exposed an occurrence of #17035 in an existing test
(as well as in one I added).

Author: Pietro Battiston <me@pietrobattiston.it>

Closes #17062 from toobaz/pivot_margin_int and squashes the following commits:

2737600 [Pietro Battiston] Removed now obsolete workaround
956c4f9 [Pietro Battiston] BUG: respect dtype when calling pivot_table with margins=True
  • Loading branch information
toobaz authored and jreback committed Jul 26, 2017
1 parent 5c185e0 commit e3b7840
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 14 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ Reshaping
- Bug in merging with categorical dtypes with datetimelikes incorrectly raised a ``TypeError`` (:issue:`16900`)
- Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`)
- Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`)
- Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`)

Numeric
^^^^^^^
Expand Down
14 changes: 6 additions & 8 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
data = data[data.notna().all(axis=1)]
table = _add_margins(table, data, values, rows=index,
cols=columns, aggfunc=aggfunc,
margins_name=margins_name)
margins_name=margins_name, fill_value=fill_value)

# discard the top level
if values_passed and not values_multi and not table.empty and \
Expand All @@ -199,7 +199,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',


def _add_margins(table, data, values, rows, cols, aggfunc,
margins_name='All'):
margins_name='All', fill_value=None):
if not isinstance(margins_name, compat.string_types):
raise ValueError('margins_name argument must be a string')

Expand Down Expand Up @@ -240,8 +240,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
if not isinstance(marginal_result_set, tuple):
return marginal_result_set
result, margin_keys, row_margin = marginal_result_set

row_margin = row_margin.reindex(result.columns)
row_margin = row_margin.reindex(result.columns, fill_value=fill_value)
# populate grand margin
for k in margin_keys:
if isinstance(k, compat.string_types):
Expand All @@ -253,6 +252,9 @@ def _add_margins(table, data, values, rows, cols, aggfunc,

row_names = result.index.names
try:
for dtype in set(result.dtypes):
cols = result.select_dtypes([dtype]).columns
margin_dummy[cols] = margin_dummy[cols].astype(dtype)
result = result.append(margin_dummy)
except TypeError:

Expand Down Expand Up @@ -524,10 +526,6 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
margins=margins, margins_name=margins_name,
dropna=dropna, **kwargs)

# GH 17013:
if values is None and margins:
table = table.fillna(0).astype(np.int64)

# Post-process
if normalize is not False:
table = _normalize(table, normalize=normalize, margins=margins,
Expand Down
58 changes: 52 additions & 6 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,41 @@ def _check_output(result, values_col, index=['A', 'B'],

tm.assert_frame_equal(result['SALARY'], expected['SALARY'])

def test_margins_dtype(self):
# GH 17013

df = self.data.copy()
df[['D', 'E', 'F']] = np.arange(len(df) * 3).reshape(len(df), 3)

mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')]
mi = MultiIndex.from_tuples(mi_val, names=('A', 'B'))
expected = DataFrame({'dull': [12, 21, 3, 9, 45],
'shiny': [33, 0, 36, 51, 120]},
index=mi).rename_axis('C', axis=1)
expected['All'] = expected['dull'] + expected['shiny']

result = df.pivot_table(values='D', index=['A', 'B'],
columns='C', margins=True,
aggfunc=np.sum, fill_value=0)

tm.assert_frame_equal(expected, result)

@pytest.mark.xfail(reason='GH 17035 (len of floats is casted back to '
'floats)')
def test_margins_dtype_len(self):
mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')]
mi = MultiIndex.from_tuples(mi_val, names=('A', 'B'))
expected = DataFrame({'dull': [1, 1, 2, 1, 5],
'shiny': [2, 0, 2, 2, 6]},
index=mi).rename_axis('C', axis=1)
expected['All'] = expected['dull'] + expected['shiny']

result = self.data.pivot_table(values='D', index=['A', 'B'],
columns='C', margins=True,
aggfunc=len, fill_value=0)

tm.assert_frame_equal(expected, result)

def test_pivot_integer_columns(self):
# caused by upstream bug in unstack

Expand Down Expand Up @@ -894,6 +929,8 @@ def test_pivot_table_margins_name_with_aggfunc_list(self):
expected = pd.DataFrame(table.values, index=ix, columns=cols)
tm.assert_frame_equal(table, expected)

@pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to '
'ints)')
def test_categorical_margins(self):
# GH 10989
df = pd.DataFrame({'x': np.arange(8),
Expand All @@ -904,14 +941,23 @@ def test_categorical_margins(self):
expected.index = Index([0, 1, 'All'], name='y')
expected.columns = Index([0, 1, 'All'], name='z')

data = df.copy()
table = data.pivot_table('x', 'y', 'z', margins=True)
table = df.pivot_table('x', 'y', 'z', margins=True)
tm.assert_frame_equal(table, expected)

data = df.copy()
data.y = data.y.astype('category')
data.z = data.z.astype('category')
table = data.pivot_table('x', 'y', 'z', margins=True)
@pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to '
'ints)')
def test_categorical_margins_category(self):
df = pd.DataFrame({'x': np.arange(8),
'y': np.arange(8) // 4,
'z': np.arange(8) % 2})

expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]])
expected.index = Index([0, 1, 'All'], name='y')
expected.columns = Index([0, 1, 'All'], name='z')

df.y = df.y.astype('category')
df.z = df.z.astype('category')
table = df.pivot_table('x', 'y', 'z', margins=True)
tm.assert_frame_equal(table, expected)

def test_categorical_aggfunc(self):
Expand Down

0 comments on commit e3b7840

Please sign in to comment.