Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Respect dtype when calling pivot_table with margins=True #17062

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ Reshaping
- Bug in merging with categorical dtypes with datetimelikes incorrectly raised a ``TypeError`` (:issue:`16900`)
- Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`)
- Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`)
- Fixes dtype of result from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`)

Numeric
^^^^^^^
Expand Down
14 changes: 6 additions & 8 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
data = data[data.notnull().all(axis=1)]
table = _add_margins(table, data, values, rows=index,
cols=columns, aggfunc=aggfunc,
margins_name=margins_name)
margins_name=margins_name, fill_value=fill_value)

# discard the top level
if values_passed and not values_multi and not table.empty and \
Expand All @@ -199,7 +199,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',


def _add_margins(table, data, values, rows, cols, aggfunc,
margins_name='All'):
margins_name='All', fill_value=None):
if not isinstance(margins_name, compat.string_types):
raise ValueError('margins_name argument must be a string')

Expand Down Expand Up @@ -240,8 +240,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
if not isinstance(marginal_result_set, tuple):
return marginal_result_set
result, margin_keys, row_margin = marginal_result_set

row_margin = row_margin.reindex(result.columns)
row_margin = row_margin.reindex(result.columns, fill_value=fill_value)
# populate grand margin
for k in margin_keys:
if isinstance(k, compat.string_types):
Expand All @@ -253,6 +252,9 @@ def _add_margins(table, data, values, rows, cols, aggfunc,

row_names = result.index.names
try:
for dtype in set(result.dtypes):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this would break if dtype == category

instead use select_dtypes to get the columns you need, then you do
margin_dummy[cols] = margin_dummy[cols].astype(dtype)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(done)

cols = result.select_dtypes([dtype]).columns
margin_dummy[cols] = margin_dummy[cols].astype(dtype)
result = result.append(margin_dummy)
except TypeError:

Expand Down Expand Up @@ -524,10 +526,6 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
margins=margins, margins_name=margins_name,
dropna=dropna, **kwargs)

# GH 17013:
if values is None and margins:
table = table.fillna(0).astype(np.int64)

# Post-process
if normalize is not False:
table = _normalize(table, normalize=normalize, margins=margins,
Expand Down
58 changes: 52 additions & 6 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,41 @@ def _check_output(result, values_col, index=['A', 'B'],

tm.assert_frame_equal(result['SALARY'], expected['SALARY'])

def test_margins_dtype(self):
# GH 17013

df = self.data.copy()
df[['D', 'E', 'F']] = np.arange(len(df) * 3).reshape(len(df), 3)

mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')]
mi = MultiIndex.from_tuples(mi_val, names=('A', 'B'))
expected = DataFrame({'dull': [12, 21, 3, 9, 45],
'shiny': [33, 0, 36, 51, 120]},
index=mi).rename_axis('C', axis=1)
expected['All'] = expected['dull'] + expected['shiny']

result = df.pivot_table(values='D', index=['A', 'B'],
columns='C', margins=True,
aggfunc=np.sum, fill_value=0)

tm.assert_frame_equal(expected, result)

@pytest.mark.xfail(reason='GH 17035 (len of floats is casted back to '
'floats)')
def test_margins_dtype_len(self):
mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')]
mi = MultiIndex.from_tuples(mi_val, names=('A', 'B'))
expected = DataFrame({'dull': [1, 1, 2, 1, 5],
'shiny': [2, 0, 2, 2, 6]},
index=mi).rename_axis('C', axis=1)
expected['All'] = expected['dull'] + expected['shiny']

result = self.data.pivot_table(values='D', index=['A', 'B'],
columns='C', margins=True,
aggfunc=len, fill_value=0)

tm.assert_frame_equal(expected, result)

def test_pivot_integer_columns(self):
# caused by upstream bug in unstack

Expand Down Expand Up @@ -894,6 +929,8 @@ def test_pivot_table_margins_name_with_aggfunc_list(self):
expected = pd.DataFrame(table.values, index=ix, columns=cols)
tm.assert_frame_equal(table, expected)

@pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to '
'ints)')
def test_categorical_margins(self):
# GH 10989
df = pd.DataFrame({'x': np.arange(8),
Expand All @@ -904,14 +941,23 @@ def test_categorical_margins(self):
expected.index = Index([0, 1, 'All'], name='y')
expected.columns = Index([0, 1, 'All'], name='z')

data = df.copy()
table = data.pivot_table('x', 'y', 'z', margins=True)
table = df.pivot_table('x', 'y', 'z', margins=True)
tm.assert_frame_equal(table, expected)

data = df.copy()
data.y = data.y.astype('category')
data.z = data.z.astype('category')
table = data.pivot_table('x', 'y', 'z', margins=True)
@pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to '
'ints)')
def test_categorical_margins_category(self):
df = pd.DataFrame({'x': np.arange(8),
'y': np.arange(8) // 4,
'z': np.arange(8) % 2})

expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]])
expected.index = Index([0, 1, 'All'], name='y')
expected.columns = Index([0, 1, 'All'], name='z')

df.y = df.y.astype('category')
df.z = df.z.astype('category')
table = df.pivot_table('x', 'y', 'z', margins=True)
tm.assert_frame_equal(table, expected)

def test_categorical_aggfunc(self):
Expand Down