Skip to content

Commit

Permalink
BUG: coercing of bools in groupby transform (pandas-dev#16895)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeetjitsu authored and alanbato committed Nov 10, 2017
1 parent 5643ec8 commit 5816d1a
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 5 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ Groupby/Resample/Rolling
- Bug in ``DataFrame.resample(...).size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
- Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)
- Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`)

- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`)

Sparse
^^^^^^
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,7 @@ def trans(x): # noqa
np.prod(result.shape)):
return result

if issubclass(dtype.type, np.floating):
return result.astype(dtype)
elif is_bool_dtype(dtype) or is_integer_dtype(dtype):
if is_bool_dtype(dtype) or is_integer_dtype(dtype):

# if we don't have any elements, just astype it
if not np.prod(result.shape):
Expand Down Expand Up @@ -144,6 +142,9 @@ def trans(x): # noqa
# hit here
if (new_result == result).all():
return new_result
elif (issubclass(dtype.type, np.floating) and
not is_bool_dtype(result.dtype)):
return result.astype(dtype)

# a datetimelike
# GH12821, iNaT is casted to float
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/dtypes/test_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from datetime import datetime, timedelta, date
import numpy as np

from pandas import Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT
from pandas import Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT, Series

from pandas.core.dtypes.cast import (
maybe_downcast_to_dtype,
Expand Down Expand Up @@ -45,6 +45,12 @@ def test_downcast_conv(self):
expected = np.array([8, 8, 8, 8, 9])
assert (np.array_equal(result, expected))

# GH16875 coercing of bools
ser = Series([True, True, False])
result = maybe_downcast_to_dtype(ser, np.dtype(np.float64))
expected = ser
tm.assert_series_equal(result, expected)

# conversions

expected = np.array([1, 2])
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/groupby/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,19 @@ def test_transform_bug(self):
expected = Series(np.arange(5, 0, step=-1), name='B')
assert_series_equal(result, expected)

def test_transform_numeric_to_boolean(self):
# GH 16875
# inconsistency in transforming boolean values
expected = pd.Series([True, True], name='A')

df = pd.DataFrame({'A': [1.1, 2.2], 'B': [1, 2]})
result = df.groupby('B').A.transform(lambda x: True)
assert_series_equal(result, expected)

df = pd.DataFrame({'A': [1, 2], 'B': [1, 2]})
result = df.groupby('B').A.transform(lambda x: True)
assert_series_equal(result, expected)

def test_transform_datetime_to_timedelta(self):
# GH 15429
# transforming a datetime to timedelta
Expand Down

0 comments on commit 5816d1a

Please sign in to comment.