Skip to content

Commit

Permalink
dispatch scalar DataFrame ops to Series (#22163)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Aug 14, 2018
1 parent 1ea9664 commit f7f266c
Show file tree
Hide file tree
Showing 12 changed files with 238 additions and 154 deletions.
41 changes: 40 additions & 1 deletion doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ New Behavior:
idx = pd.interval_range(0, 4)
idx.values

This mirrors ``CateogricalIndex.values``, which returns a ``Categorical``.
This mirrors ``CategoricalIndex.values``, which returns a ``Categorical``.

For situations where you need an ``ndarray`` of ``Interval`` objects, use
:meth:`numpy.asarray` or ``idx.astype(object)``.
Expand Down Expand Up @@ -406,6 +406,34 @@ Previous Behavior:
In [3]: pi - pi[0]
Out[3]: Int64Index([0, 1, 2], dtype='int64')


.. _whatsnew_0240.api.timedelta64_subtract_nan

Addition/Subtraction of ``NaN`` from :class:``DataFrame``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Adding or subtracting ``NaN`` from a :class:`DataFrame` column with
`timedelta64[ns]` dtype will now raise a ``TypeError`` instead of returning
all-``NaT``. This is for compatibility with ``TimedeltaIndex`` and
``Series`` behavior (:issue:`22163`)

.. ipython:: python

df = pd.DataFrame([pd.Timedelta(days=1)])
df - np.nan

Previous Behavior:

.. code-block:: ipython

In [4]: df = pd.DataFrame([pd.Timedelta(days=1)])

In [5]: df - np.nan
Out[5]:
0
0 NaT


.. _whatsnew_0240.api.extension:

ExtensionType Changes
Expand Down Expand Up @@ -539,6 +567,16 @@ Datetimelike
- Bug in :class:`DatetimeIndex` comparisons where string comparisons incorrectly raises ``TypeError`` (:issue:`22074`)
- Bug in :class:`DatetimeIndex` comparisons when comparing against ``timedelta64[ns]`` dtyped arrays; in some cases ``TypeError`` was incorrectly raised, in others it incorrectly failed to raise (:issue:`22074`)
- Bug in :class:`DatetimeIndex` comparisons when comparing against object-dtyped arrays (:issue:`22074`)
- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``Timedelta``-like objects (:issue:`22005`,:issue:`22163`)
- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``DateOffset`` objects returning an ``object`` dtype instead of ``datetime64[ns]`` dtype (:issue:`21610`,:issue:`22163`)
- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype comparing against ``NaT`` incorrectly (:issue:`22242`,:issue:`22163`)
- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``Timestamp``-like object incorrectly returned ``datetime64[ns]`` dtype instead of ``timedelta64[ns]`` dtype (:issue:`8554`,:issue:`22163`)
- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``np.datetime64`` object with non-nanosecond unit failing to convert to nanoseconds (:issue:`18874`,:issue:`22163`)
- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`)
- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`)
- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`)
- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
-

Timedelta
^^^^^^^^^
Expand Down Expand Up @@ -586,6 +624,7 @@ Numeric
when supplied with a list of functions and ``axis=1`` (e.g. ``df.apply(['sum', 'mean'], axis=1)``),
a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`).
- Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`)
- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`,:issue:`22163`)
-

Strings
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4835,6 +4835,14 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True):
return self._constructor(new_data)

def _combine_const(self, other, func, errors='raise', try_cast=True):
if lib.is_scalar(other) or np.ndim(other) == 0:
new_data = {i: func(self.iloc[:, i], other)
for i, col in enumerate(self.columns)}

result = self._constructor(new_data, index=self.index, copy=False)
result.columns = self.columns
return result

new_data = self._data.eval(func=func, other=other,
errors=errors,
try_cast=try_cast)
Expand Down
11 changes: 7 additions & 4 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,7 +1350,7 @@ def na_op(x, y):
with np.errstate(all='ignore'):
result = method(y)
if result is NotImplemented:
raise TypeError("invalid type comparison")
return invalid_comparison(x, y, op)
else:
result = op(x, y)

Expand All @@ -1366,6 +1366,10 @@ def wrapper(self, other, axis=None):

res_name = get_op_result_name(self, other)

if isinstance(other, list):
# TODO: same for tuples?
other = np.asarray(other)

if isinstance(other, ABCDataFrame): # pragma: no cover
# Defer to DataFrame implementation; fail early
return NotImplemented
Expand Down Expand Up @@ -1459,8 +1463,6 @@ def wrapper(self, other, axis=None):

else:
values = self.get_values()
if isinstance(other, list):
other = np.asarray(other)

with np.errstate(all='ignore'):
res = na_op(values, other)
Expand Down Expand Up @@ -1741,7 +1743,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
if fill_value is not None:
self = self.fillna(fill_value)

return self._combine_const(other, na_op, try_cast=True)
pass_op = op if lib.is_scalar(other) else na_op
return self._combine_const(other, pass_op, try_cast=True)

f.__name__ = op_name

Expand Down
47 changes: 34 additions & 13 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,15 @@ def test_tz_aware_scalar_comparison(self, timestamps):
expected = pd.DataFrame({'test': [False, False]})
tm.assert_frame_equal(df == -1, expected)

def test_dt64_nat_comparison(self):
# GH#22242, GH#22163 DataFrame considered NaT == ts incorrectly
ts = pd.Timestamp.now()
df = pd.DataFrame([ts, pd.NaT])
expected = pd.DataFrame([True, False])

result = df == ts
tm.assert_frame_equal(result, expected)


class TestDatetime64SeriesComparison(object):
# TODO: moved from tests.series.test_operators; needs cleanup
Expand Down Expand Up @@ -640,10 +649,22 @@ def test_dti_cmp_object_dtype(self):
# Arithmetic

class TestFrameArithmetic(object):
def test_dt64arr_sub_dtscalar(self, box):
# GH#8554, GH#22163 DataFrame op should _not_ return dt64 dtype
idx = pd.date_range('2013-01-01', periods=3)
idx = tm.box_expected(idx, box)

ts = pd.Timestamp('2013-01-01')
# TODO: parametrize over scalar types

expected = pd.TimedeltaIndex(['0 Days', '1 Day', '2 Days'])
expected = tm.box_expected(expected, box)

result = idx - ts
tm.assert_equal(result, expected)

@pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano',
strict=True)
def test_df_sub_datetime64_not_ns(self):
# GH#7996, GH#22163 ensure non-nano datetime64 is converted to nano
df = pd.DataFrame(pd.date_range('20130101', periods=3))
dt64 = np.datetime64('2013-01-01')
assert dt64.dtype == 'datetime64[D]'
Expand Down Expand Up @@ -992,9 +1013,11 @@ def test_dti_add_sub_float(self, op, other):
with pytest.raises(TypeError):
op(dti, other)

def test_dti_add_timestamp_raises(self):
def test_dti_add_timestamp_raises(self, box):
# GH#22163 ensure DataFrame doesn't cast Timestamp to i8
idx = DatetimeIndex(['2011-01-01', '2011-01-02'])
msg = "cannot add DatetimeIndex and Timestamp"
idx = tm.box_expected(idx, box)
msg = "cannot add"
with tm.assert_raises_regex(TypeError, msg):
idx + Timestamp('2011-01-01')

Expand Down Expand Up @@ -1090,13 +1113,17 @@ def test_dti_add_intarray_no_freq(self, box):
# -------------------------------------------------------------
# Binary operations DatetimeIndex and timedelta-like

def test_dti_add_timedeltalike(self, tz_naive_fixture, delta):
def test_dti_add_timedeltalike(self, tz_naive_fixture, delta, box):
# GH#22005, GH#22163 check DataFrame doesn't raise TypeError
tz = tz_naive_fixture
rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
rng = tm.box_expected(rng, box)

result = rng + delta
expected = pd.date_range('2000-01-01 02:00',
'2000-02-01 02:00', tz=tz)
tm.assert_index_equal(result, expected)
expected = tm.box_expected(expected, box)
tm.assert_equal(result, expected)

def test_dti_iadd_timedeltalike(self, tz_naive_fixture, delta):
tz = tz_naive_fixture
Expand Down Expand Up @@ -1662,14 +1689,8 @@ def test_dti_with_offset_series(self, tz_naive_fixture, names):
res3 = dti - other
tm.assert_series_equal(res3, expected_sub)

@pytest.mark.parametrize('box', [
pd.Index,
pd.Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Returns object dtype",
strict=True))
], ids=lambda x: x.__name__)
def test_dti_add_offset_tzaware(self, tz_aware_fixture, box):
# GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype
timezone = tz_aware_fixture
if timezone == 'US/Pacific':
dates = date_range('2012-11-01', periods=3, tz=timezone)
Expand Down
9 changes: 1 addition & 8 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,6 @@ def test_ops_series(self):
tm.assert_series_equal(expected, td * other)
tm.assert_series_equal(expected, other * td)

@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="block.eval incorrect",
strict=True))
])
@pytest.mark.parametrize('index', [
pd.Int64Index(range(1, 11)),
pd.UInt64Index(range(1, 11)),
Expand All @@ -79,7 +72,7 @@ def test_ops_series(self):
def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box):
# GH#19333

if (box is Series and
if (box in [Series, pd.DataFrame] and
type(scalar_td) is timedelta and index.dtype == 'f8'):
raise pytest.xfail(reason="Cannot multiply timedelta by float")

Expand Down
68 changes: 66 additions & 2 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,53 @@
# Comparisons

class TestFrameComparisons(object):
def test_flex_comparison_nat(self):
# GH#15697, GH#22163 df.eq(pd.NaT) should behave like df == pd.NaT,
# and _definitely_ not be NaN
df = pd.DataFrame([pd.NaT])

result = df == pd.NaT
# result.iloc[0, 0] is a np.bool_ object
assert result.iloc[0, 0].item() is False

result = df.eq(pd.NaT)
assert result.iloc[0, 0].item() is False

result = df != pd.NaT
assert result.iloc[0, 0].item() is True

result = df.ne(pd.NaT)
assert result.iloc[0, 0].item() is True

def test_mixed_comparison(self):
# GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,
# not raise TypeError
# (this appears to be fixed before #22163, not sure when)
df = pd.DataFrame([['1989-08-01', 1], ['1989-08-01', 2]])
other = pd.DataFrame([['a', 'b'], ['c', 'd']])

result = df == other
assert not result.any().any()

result = df != other
assert result.all().all()

def test_df_numeric_cmp_dt64_raises(self):
# GH#8932, GH#22163
ts = pd.Timestamp.now()
df = pd.DataFrame({'x': range(5)})
with pytest.raises(TypeError):
df > ts
with pytest.raises(TypeError):
df < ts
with pytest.raises(TypeError):
ts < df
with pytest.raises(TypeError):
ts > df

assert not (df == ts).any().any()
assert (df != ts).all().all()

def test_df_boolean_comparison_error(self):
# GH#4576
# boolean comparisons with a tuple/list give unexpected results
Expand All @@ -32,8 +79,8 @@ def test_df_float_none_comparison(self):
df = pd.DataFrame(np.random.randn(8, 3), index=range(8),
columns=['A', 'B', 'C'])

with pytest.raises(TypeError):
df.__eq__(None)
result = df.__eq__(None)
assert not result.any().any()

def test_df_string_comparison(self):
df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}])
Expand Down Expand Up @@ -251,3 +298,20 @@ def test_arith_flex_zero_len_raises(self):

with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
df_len0.sub(df['A'], axis=None, fill_value=3)


class TestFrameArithmetic(object):
def test_df_bool_mul_int(self):
# GH#22047, GH#22163 multiplication by 1 should result in int dtype,
# not object dtype
df = pd.DataFrame([[False, True], [False, False]])
result = df * 1

# On appveyor this comes back as np.int32 instead of np.int64,
# so we check dtype.kind instead of just dtype
kinds = result.dtypes.apply(lambda x: x.kind)
assert (kinds == 'i').all()

result = 1 * df
kinds = result.dtypes.apply(lambda x: x.kind)
assert (kinds == 'i').all()
28 changes: 25 additions & 3 deletions pandas/tests/frame/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,8 @@ def test_getitem_boolean(self):
# test df[df > 0]
for df in [self.tsframe, self.mixed_frame,
self.mixed_float, self.mixed_int]:
if compat.PY3 and df is self.mixed_frame:
continue

data = df._get_numeric_data()
bif = df[df > 0]
Expand Down Expand Up @@ -2468,8 +2470,11 @@ def test_boolean_indexing_mixed(self):
assert_frame_equal(df2, expected)

df['foo'] = 'test'
with tm.assert_raises_regex(TypeError, 'boolean setting '
'on mixed-type'):
msg = ("boolean setting on mixed-type|"
"not supported between|"
"unorderable types")
with tm.assert_raises_regex(TypeError, msg):
# TODO: This message should be the same in PY2/PY3
df[df > 0.3] = 1

def test_where(self):
Expand Down Expand Up @@ -2502,6 +2507,10 @@ def _check_get(df, cond, check_dtypes=True):
# check getting
for df in [default_frame, self.mixed_frame,
self.mixed_float, self.mixed_int]:
if compat.PY3 and df is self.mixed_frame:
with pytest.raises(TypeError):
df > 0
continue
cond = df > 0
_check_get(df, cond)

Expand Down Expand Up @@ -2549,6 +2558,10 @@ def _check_align(df, cond, other, check_dtypes=True):
assert (rs.dtypes == df.dtypes).all()

for df in [self.mixed_frame, self.mixed_float, self.mixed_int]:
if compat.PY3 and df is self.mixed_frame:
with pytest.raises(TypeError):
df > 0
continue

# other is a frame
cond = (df > 0)[1:]
Expand Down Expand Up @@ -2594,6 +2607,10 @@ def _check_set(df, cond, check_dtypes=True):

for df in [default_frame, self.mixed_frame, self.mixed_float,
self.mixed_int]:
if compat.PY3 and df is self.mixed_frame:
with pytest.raises(TypeError):
df > 0
continue

cond = df > 0
_check_set(df, cond)
Expand Down Expand Up @@ -2759,9 +2776,14 @@ def test_where_datetime(self):
C=np.random.randn(5)))

stamp = datetime(2013, 1, 3)
result = df[df > stamp]
with pytest.raises(TypeError):
df > stamp

result = df[df.iloc[:, :-1] > stamp]

expected = df.copy()
expected.loc[[0, 1], 'A'] = np.nan
expected.loc[:, 'C'] = np.nan
assert_frame_equal(result, expected)

def test_where_none(self):
Expand Down
Loading

0 comments on commit f7f266c

Please sign in to comment.