Skip to content

Commit

Permalink
BUG: do not cast ints to floats if inputs o crosstab are not aligned (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
toobaz authored and jreback committed Jul 21, 2017
1 parent 7d9d6d3 commit a2d03d4
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 12 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,10 +225,10 @@ Sparse
Reshaping
^^^^^^^^^
- Joining/Merging with a non unique ``PeriodIndex`` raised a TypeError (:issue:`16871`)
- Bug in :func:`crosstab` where non-aligned series of integers were casted to float (:issue:`17005`)
- Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`)
- Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`)


Numeric
^^^^^^^
- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`)
Expand Down
31 changes: 20 additions & 11 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pandas import Series, DataFrame, MultiIndex, Index
from pandas.core.groupby import Grouper
from pandas.core.reshape.util import cartesian_product
from pandas.core.index import _get_combined_index
from pandas.compat import range, lrange, zip
from pandas import compat
import pandas.core.common as com
Expand Down Expand Up @@ -493,6 +494,13 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
rownames = _get_names(index, rownames, prefix='row')
colnames = _get_names(columns, colnames, prefix='col')

obs_idxes = [obj.index for objs in (index, columns) for obj in objs
if hasattr(obj, 'index')]
if obs_idxes:
common_idx = _get_combined_index(obs_idxes, intersect=True)
else:
common_idx = None

data = {}
data.update(zip(rownames, index))
data.update(zip(colnames, columns))
Expand All @@ -503,20 +511,21 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
if values is not None and aggfunc is None:
raise ValueError("values cannot be used without an aggfunc.")

df = DataFrame(data, index=common_idx)
if values is None:
df = DataFrame(data)
df['__dummy__'] = 0
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
aggfunc=len, margins=margins,
margins_name=margins_name, dropna=dropna)
table = table.fillna(0).astype(np.int64)

kwargs = {'aggfunc': len, 'fill_value': 0}
else:
data['__dummy__'] = values
df = DataFrame(data)
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
aggfunc=aggfunc, margins=margins,
margins_name=margins_name, dropna=dropna)
df['__dummy__'] = values
kwargs = {'aggfunc': aggfunc}

table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
margins=margins, margins_name=margins_name,
dropna=dropna, **kwargs)

# GH 17013:
if values is None and margins:
table = table.fillna(0).astype(np.int64)

# Post-process
if normalize is not False:
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,22 @@ def test_crosstab_ndarray(self):
assert result.index.name == 'row_0'
assert result.columns.name == 'col_0'

def test_crosstab_non_aligned(self):
# GH 17005
a = pd.Series([0, 1, 1], index=['a', 'b', 'c'])
b = pd.Series([3, 4, 3, 4, 3], index=['a', 'b', 'c', 'd', 'f'])
c = np.array([3, 4, 3])

expected = pd.DataFrame([[1, 0], [1, 1]],
index=Index([0, 1], name='row_0'),
columns=Index([3, 4], name='col_0'))

result = crosstab(a, b)
tm.assert_frame_equal(result, expected)

result = crosstab(a, c)
tm.assert_frame_equal(result, expected)

def test_crosstab_margins(self):
a = np.random.randint(0, 7, size=100)
b = np.random.randint(0, 3, size=100)
Expand Down

0 comments on commit a2d03d4

Please sign in to comment.