diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index d2d5d40393b62..643ecfe70d433 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -496,6 +496,9 @@ Datetimelike - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`) - Fixed bug where :meth:`Timestamp.resolution` incorrectly returned 1-microsecond ``timedelta`` instead of 1-nanosecond :class:`Timedelta` (:issue:`21336`,:issue:`21365`) - Bug in :func:`to_datetime` that did not consistently return an :class:`Index` when ``box=True`` was specified (:issue:`21864`) +- Bug in :class:`DatetimeIndex` comparisons where string comparisons incorrectly raises ``TypeError`` (:issue:`22074`) +- Bug in :class:`DatetimeIndex` comparisons when comparing against ``timedelta64[ns]`` dtyped arrays; in some cases ``TypeError`` was incorrectly raised, in others it incorrectly failed to raise (:issue:`22074`) +- Bug in :class:`DatetimeIndex` comparisons when comparing against object-dtyped arrays (:issue:`22074`) Timedelta ^^^^^^^^^ @@ -518,6 +521,7 @@ Timezones - Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`) - Bug in :class:`DatetimeIndex` where constructing with an integer and tz would not localize correctly (:issue:`12619`) - Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`) +- Bug in :class:`DatetimeIndex` comparisons failing to raise ``TypeError`` when comparing timezone-aware ``DatetimeIndex`` against ``np.datetime64`` (:issue:`22074`) Offsets ^^^^^^^ @@ -535,6 +539,7 @@ Numeric - Bug in :meth:`DataFrame.agg`, :meth:`DataFrame.transform` and :meth:`DataFrame.apply` where, when supplied with a list of functions and ``axis=1`` (e.g. ``df.apply(['sum', 'mean'], axis=1)``), a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`). +- Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`) - Strings diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 00d53ad82b2dc..31ffde0ea12d7 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -5,7 +5,7 @@ import numpy as np from pytz import utc -from pandas._libs import tslib +from pandas._libs import lib, tslib from pandas._libs.tslib import Timestamp, NaT, iNaT from pandas._libs.tslibs import ( normalize_date, @@ -18,7 +18,7 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, - is_datetimelike, + is_object_dtype, is_datetime64tz_dtype, is_datetime64_dtype, is_timedelta64_dtype, @@ -29,6 +29,7 @@ import pandas.core.common as com from pandas.core.algorithms import checked_add_with_arr +from pandas.core import ops from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import Tick, Day, generate_range @@ -99,31 +100,40 @@ def wrapper(self, other): meth = getattr(dtl.DatetimeLikeArrayMixin, opname) if isinstance(other, (datetime, np.datetime64, compat.string_types)): - if isinstance(other, datetime): + if isinstance(other, (datetime, np.datetime64)): # GH#18435 strings get a pass from tzawareness compat self._assert_tzawareness_compat(other) - other = _to_m8(other, tz=self.tz) + try: + other = _to_m8(other, tz=self.tz) + except ValueError: + # string that cannot be parsed to Timestamp + return ops.invalid_comparison(self, other, op) + result = meth(self, other) if isna(other): result.fill(nat_result) + elif lib.is_scalar(other): + return ops.invalid_comparison(self, other, op) else: if isinstance(other, list): + # FIXME: This can break for object-dtype with mixed types other = type(self)(other) elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)): # Following Timestamp convention, __eq__ is all-False # and __ne__ is all True, others raise TypeError. - if opname == '__eq__': - return np.zeros(shape=self.shape, dtype=bool) - elif opname == '__ne__': - return np.ones(shape=self.shape, dtype=bool) - raise TypeError('%s type object %s' % - (type(other), str(other))) - - if is_datetimelike(other): + return ops.invalid_comparison(self, other, op) + + if is_object_dtype(other): + result = op(self.astype('O'), np.array(other)) + elif not (is_datetime64_dtype(other) or + is_datetime64tz_dtype(other)): + # e.g. is_timedelta64_dtype(other) + return ops.invalid_comparison(self, other, op) + else: self._assert_tzawareness_compat(other) + result = meth(self, np.asarray(other)) - result = meth(self, np.asarray(other)) result = com.values_from_object(result) # Make sure to pass an array to result[...]; indexing with @@ -152,6 +162,10 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin): 'is_year_end', 'is_leap_year'] _object_ops = ['weekday_name', 'freq', 'tz'] + # dummy attribute so that datetime.__eq__(DatetimeArray) defers + # by returning NotImplemented + timetuple = None + # ----------------------------------------------------------------- # Constructors diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6d407c41daea6..f7d863bba82a7 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -788,6 +788,35 @@ def mask_cmp_op(x, y, op, allowed_types): return result +def invalid_comparison(left, right, op): + """ + If a comparison has mismatched types and is not necessarily meaningful, + follow python3 conventions by: + + - returning all-False for equality + - returning all-True for inequality + - raising TypeError otherwise + + Parameters + ---------- + left : array-like + right : scalar, array-like + op : operator.{eq, ne, lt, le, gt} + + Raises + ------ + TypeError : on inequality comparisons + """ + if op is operator.eq: + res_values = np.zeros(left.shape, dtype=bool) + elif op is operator.ne: + res_values = np.ones(left.shape, dtype=bool) + else: + raise TypeError("Invalid comparison between dtype={dtype} and {typ}" + .format(dtype=left.dtype, typ=type(right).__name__)) + return res_values + + # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory # methods @@ -1259,7 +1288,7 @@ def na_op(x, y): result = _comp_method_OBJECT_ARRAY(op, x, y) elif is_datetimelike_v_numeric(x, y): - raise TypeError("invalid type comparison") + return invalid_comparison(x, y, op) else: diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index fdf50805ad818..b6b783134b424 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -157,8 +157,20 @@ def test_comparison_invalid(self): def check(df, df2): for (x, y) in [(df, df2), (df2, df)]: - pytest.raises(TypeError, lambda: x == y) - pytest.raises(TypeError, lambda: x != y) + # we expect the result to match Series comparisons for + # == and !=, inequalities should raise + result = x == y + expected = DataFrame({col: x[col] == y[col] + for col in x.columns}, + index=x.index, columns=x.columns) + assert_frame_equal(result, expected) + + result = x != y + expected = DataFrame({col: x[col] != y[col] + for col in x.columns}, + index=x.index, columns=x.columns) + assert_frame_equal(result, expected) + pytest.raises(TypeError, lambda: x >= y) pytest.raises(TypeError, lambda: x > y) pytest.raises(TypeError, lambda: x < y) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index a226f8de3c8bd..92035850806cb 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -463,9 +463,13 @@ def test_date_query_with_non_date(self): df = DataFrame({'dates': date_range('1/1/2012', periods=n), 'nondate': np.arange(n)}) - ops = '==', '!=', '<', '>', '<=', '>=' + result = df.query('dates == nondate', parser=parser, engine=engine) + assert len(result) == 0 - for op in ops: + result = df.query('dates != nondate', parser=parser, engine=engine) + assert_frame_equal(result, df) + + for op in ['<', '>', '<=', '>=']: with pytest.raises(TypeError): df.query('dates %s nondate' % op, parser=parser, engine=engine) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 1e54e6563d598..f54cb32b0a036 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -275,6 +275,20 @@ def test_comparison_tzawareness_compat(self, op): with pytest.raises(TypeError): op(ts, dz) + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.gt, operator.ge, + operator.lt, operator.le]) + @pytest.mark.parametrize('other', [datetime(2016, 1, 1), + Timestamp('2016-01-01'), + np.datetime64('2016-01-01')]) + def test_scalar_comparison_tzawareness(self, op, other, tz_aware_fixture): + tz = tz_aware_fixture + dti = pd.date_range('2016-01-01', periods=2, tz=tz) + with pytest.raises(TypeError): + op(dti, other) + with pytest.raises(TypeError): + op(other, dti) + @pytest.mark.parametrize('op', [operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le]) @@ -290,12 +304,60 @@ def test_nat_comparison_tzawareness(self, op): result = op(dti.tz_localize('US/Pacific'), pd.NaT) tm.assert_numpy_array_equal(result, expected) - def test_dti_cmp_int_raises(self): - rng = date_range('1/1/2000', periods=10) + def test_dti_cmp_str(self, tz_naive_fixture): + # GH#22074 + # regardless of tz, we expect these comparisons are valid + tz = tz_naive_fixture + rng = date_range('1/1/2000', periods=10, tz=tz) + other = '1/1/2000' + + result = rng == other + expected = np.array([True] + [False] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng != other + expected = np.array([False] + [True] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng < other + expected = np.array([False] * 10) + tm.assert_numpy_array_equal(result, expected) + + result = rng <= other + expected = np.array([True] + [False] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng > other + expected = np.array([False] + [True] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng >= other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize('other', ['foo', 99, 4.0, + object(), timedelta(days=2)]) + def test_dti_cmp_scalar_invalid(self, other, tz_naive_fixture): + # GH#22074 + tz = tz_naive_fixture + rng = date_range('1/1/2000', periods=10, tz=tz) + + result = rng == other + expected = np.array([False] * 10) + tm.assert_numpy_array_equal(result, expected) + + result = rng != other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) - # raise TypeError for now with pytest.raises(TypeError): - rng < rng[3].value + rng < other + with pytest.raises(TypeError): + rng <= other + with pytest.raises(TypeError): + rng > other + with pytest.raises(TypeError): + rng >= other def test_dti_cmp_list(self): rng = date_range('1/1/2000', periods=10) @@ -304,6 +366,57 @@ def test_dti_cmp_list(self): expected = rng == rng tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize('other', [ + pd.timedelta_range('1D', periods=10), + pd.timedelta_range('1D', periods=10).to_series(), + pd.timedelta_range('1D', periods=10).asi8.view('m8[ns]') + ], ids=lambda x: type(x).__name__) + def test_dti_cmp_tdi_tzawareness(self, other): + # GH#22074 + # reversion test that we _don't_ call _assert_tzawareness_compat + # when comparing against TimedeltaIndex + dti = date_range('2000-01-01', periods=10, tz='Asia/Tokyo') + + result = dti == other + expected = np.array([False] * 10) + tm.assert_numpy_array_equal(result, expected) + + result = dti != other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) + + with pytest.raises(TypeError): + dti < other + with pytest.raises(TypeError): + dti <= other + with pytest.raises(TypeError): + dti > other + with pytest.raises(TypeError): + dti >= other + + def test_dti_cmp_object_dtype(self): + # GH#22074 + dti = date_range('2000-01-01', periods=10, tz='Asia/Tokyo') + + other = dti.astype('O') + + result = dti == other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) + + other = dti.tz_localize(None) + with pytest.raises(TypeError): + # tzawareness failure + dti != other + + other = np.array(list(dti[:5]) + [Timedelta(days=1)] * 5) + result = dti == other + expected = np.array([True] * 5 + [False] * 5) + tm.assert_numpy_array_equal(result, expected) + + with pytest.raises(TypeError): + dti >= other + class TestDatetimeIndexArithmetic(object): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index ecb74622edf10..fb6e7c412d884 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -238,8 +238,15 @@ def test_comparison_invalid(self): s2 = Series(date_range('20010101', periods=5)) for (x, y) in [(s, s2), (s2, s)]: - pytest.raises(TypeError, lambda: x == y) - pytest.raises(TypeError, lambda: x != y) + + result = x == y + expected = Series([False] * 5) + assert_series_equal(result, expected) + + result = x != y + expected = Series([True] * 5) + assert_series_equal(result, expected) + pytest.raises(TypeError, lambda: x >= y) pytest.raises(TypeError, lambda: x > y) pytest.raises(TypeError, lambda: x < y)