diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index fed89bb48ca23d..d5eb64c81b263d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -311,6 +311,7 @@ Reshaping - Bug in merging with categorical dtypes with datetimelikes incorrectly raised a ``TypeError`` (:issue:`16900`) - Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) - Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) +- Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) Numeric ^^^^^^^ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index c2fb81178433e7..fe525eb0a3c87a 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -178,7 +178,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', data = data[data.notna().all(axis=1)] table = _add_margins(table, data, values, rows=index, cols=columns, aggfunc=aggfunc, - margins_name=margins_name) + margins_name=margins_name, fill_value=fill_value) # discard the top level if values_passed and not values_multi and not table.empty and \ @@ -199,7 +199,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', def _add_margins(table, data, values, rows, cols, aggfunc, - margins_name='All'): + margins_name='All', fill_value=None): if not isinstance(margins_name, compat.string_types): raise ValueError('margins_name argument must be a string') @@ -240,8 +240,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, if not isinstance(marginal_result_set, tuple): return marginal_result_set result, margin_keys, row_margin = marginal_result_set - - row_margin = row_margin.reindex(result.columns) + row_margin = row_margin.reindex(result.columns, fill_value=fill_value) # populate grand margin for k in margin_keys: if isinstance(k, compat.string_types): @@ -253,6 +252,9 @@ def _add_margins(table, data, values, rows, cols, aggfunc, row_names = result.index.names try: + for dtype in set(result.dtypes): + cols = result.select_dtypes([dtype]).columns + margin_dummy[cols] = margin_dummy[cols].astype(dtype) result = result.append(margin_dummy) except TypeError: @@ -524,10 +526,6 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, margins=margins, margins_name=margins_name, dropna=dropna, **kwargs) - # GH 17013: - if values is None and margins: - table = table.fillna(0).astype(np.int64) - # Post-process if normalize is not False: table = _normalize(table, normalize=normalize, margins=margins, diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 5e5852ac5381da..ee6c32cd0a2083 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -459,6 +459,41 @@ def _check_output(result, values_col, index=['A', 'B'], tm.assert_frame_equal(result['SALARY'], expected['SALARY']) + def test_margins_dtype(self): + # GH 17013 + + df = self.data.copy() + df[['D', 'E', 'F']] = np.arange(len(df) * 3).reshape(len(df), 3) + + mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')] + mi = MultiIndex.from_tuples(mi_val, names=('A', 'B')) + expected = DataFrame({'dull': [12, 21, 3, 9, 45], + 'shiny': [33, 0, 36, 51, 120]}, + index=mi).rename_axis('C', axis=1) + expected['All'] = expected['dull'] + expected['shiny'] + + result = df.pivot_table(values='D', index=['A', 'B'], + columns='C', margins=True, + aggfunc=np.sum, fill_value=0) + + tm.assert_frame_equal(expected, result) + + @pytest.mark.xfail(reason='GH 17035 (len of floats is casted back to ' + 'floats)') + def test_margins_dtype_len(self): + mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')] + mi = MultiIndex.from_tuples(mi_val, names=('A', 'B')) + expected = DataFrame({'dull': [1, 1, 2, 1, 5], + 'shiny': [2, 0, 2, 2, 6]}, + index=mi).rename_axis('C', axis=1) + expected['All'] = expected['dull'] + expected['shiny'] + + result = self.data.pivot_table(values='D', index=['A', 'B'], + columns='C', margins=True, + aggfunc=len, fill_value=0) + + tm.assert_frame_equal(expected, result) + def test_pivot_integer_columns(self): # caused by upstream bug in unstack @@ -894,6 +929,8 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): expected = pd.DataFrame(table.values, index=ix, columns=cols) tm.assert_frame_equal(table, expected) + @pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to ' + 'ints)') def test_categorical_margins(self): # GH 10989 df = pd.DataFrame({'x': np.arange(8), @@ -904,14 +941,23 @@ def test_categorical_margins(self): expected.index = Index([0, 1, 'All'], name='y') expected.columns = Index([0, 1, 'All'], name='z') - data = df.copy() - table = data.pivot_table('x', 'y', 'z', margins=True) + table = df.pivot_table('x', 'y', 'z', margins=True) tm.assert_frame_equal(table, expected) - data = df.copy() - data.y = data.y.astype('category') - data.z = data.z.astype('category') - table = data.pivot_table('x', 'y', 'z', margins=True) + @pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to ' + 'ints)') + def test_categorical_margins_category(self): + df = pd.DataFrame({'x': np.arange(8), + 'y': np.arange(8) // 4, + 'z': np.arange(8) % 2}) + + expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected.index = Index([0, 1, 'All'], name='y') + expected.columns = Index([0, 1, 'All'], name='z') + + df.y = df.y.astype('category') + df.z = df.z.astype('category') + table = df.pivot_table('x', 'y', 'z', margins=True) tm.assert_frame_equal(table, expected) def test_categorical_aggfunc(self):