From c355f2637b4386e698056013c84623c9a42acb85 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 11 Nov 2018 08:20:04 -0800 Subject: [PATCH] BUG: Casting tz-aware DatetimeIndex to object-dtype ndarray/Index (#23524) --- doc/source/whatsnew/v0.24.0.txt | 4 ++ pandas/_libs/tslibs/offsets.pyx | 9 +++- pandas/core/arrays/datetimes.py | 10 ++++ pandas/core/indexes/base.py | 14 ++++-- pandas/tests/arrays/test_datetimelike.py | 48 ++++++++++++++++++++ pandas/tests/indexes/test_base.py | 12 ++++- pandas/tests/tseries/offsets/test_offsets.py | 13 ++++++ pandas/tests/tseries/offsets/test_ticks.py | 22 +++++++++ pandas/tseries/offsets.py | 31 ++++++++++--- 9 files changed, 149 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1a5e4144b842b..93c0c0eb745dc 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1128,6 +1128,9 @@ Datetimelike - Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`) - Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`) - Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`) +- Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`) +- Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`) +- Bug in :class:`Index` where calling ``np.array(dtindex, dtype=object)`` on a timezone-naive :class:`DatetimeIndex` would return an array of ``datetime`` objects instead of :class:`Timestamp` objects, potentially losing nanosecond portions of the timestamps (:issue:`23524`) Timedelta ^^^^^^^^^ @@ -1174,6 +1177,7 @@ Offsets - Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`) - Bug in :class:`DateOffset` where keyword arguments ``week`` and ``milliseconds`` were accepted and ignored. Passing these will now raise ``ValueError`` (:issue:`19398`) - Bug in adding :class:`DateOffset` with :class:`DataFrame` or :class:`PeriodIndex` incorrectly raising ``TypeError`` (:issue:`23215`) +- Bug in comparing :class:`DateOffset` objects with non-DateOffset objects, particularly strings, raising ``ValueError`` instead of returning ``False`` for equality checks and ``True`` for not-equal checks (:issue:`23524`) Numeric ^^^^^^^ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 0495202818eb5..7ef38cba0c37f 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -308,8 +308,13 @@ class _BaseOffset(object): def __eq__(self, other): if is_string_object(other): - other = to_offset(other) - + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False try: return self._params == other._params except AttributeError: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 405056c628ceb..08b83598bb6af 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -19,6 +19,7 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, is_object_dtype, + is_int64_dtype, is_datetime64tz_dtype, is_datetime64_dtype, ensure_int64) @@ -388,6 +389,15 @@ def _resolution(self): # ---------------------------------------------------------------- # Array-like Methods + def __array__(self, dtype=None): + if is_object_dtype(dtype): + return np.array(list(self), dtype=object) + elif is_int64_dtype(dtype): + return self.asi8 + + # TODO: warn that conversion may be lossy? + return self._data.view(np.ndarray) # follow Index.__array__ + def __iter__(self): """ Return an iterator over the boxed values diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8470bc6fec490..263de57d32f31 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -301,11 +301,19 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, (dtype is not None and is_datetime64_any_dtype(dtype)) or 'tz' in kwargs): from pandas import DatetimeIndex - result = DatetimeIndex(data, copy=copy, name=name, - dtype=dtype, **kwargs) + if dtype is not None and is_dtype_equal(_o_dtype, dtype): - return Index(result.to_pydatetime(), dtype=_o_dtype) + # GH#23524 passing `dtype=object` to DatetimeIndex is invalid, + # will raise in the where `data` is already tz-aware. So + # we leave it out of this step and cast to object-dtype after + # the DatetimeIndex construction. + # Note we can pass copy=False because the .astype below + # will always make a copy + result = DatetimeIndex(data, copy=False, name=name, **kwargs) + return result.astype(object) else: + result = DatetimeIndex(data, copy=copy, name=name, + dtype=dtype, **kwargs) return result elif (is_timedelta64_dtype(data) or diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 3fd03a351de7c..5ba99a48e34ad 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -57,6 +57,54 @@ def timedelta_index(request): class TestDatetimeArray(object): + def test_array_object_dtype(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArrayMixin(dti) + + expected = np.array(list(dti)) + + result = np.array(arr, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # also test the DatetimeIndex method while we're at it + result = np.array(dti, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_array(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArrayMixin(dti) + + expected = dti.asi8.view('M8[ns]') + result = np.array(arr) + tm.assert_numpy_array_equal(result, expected) + + # check that we are not making copies when setting copy=False + result = np.array(arr, copy=False) + assert result.base is expected.base + assert result.base is not None + + def test_array_i8_dtype(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArrayMixin(dti) + + expected = dti.asi8 + result = np.array(arr, dtype='i8') + tm.assert_numpy_array_equal(result, expected) + + result = np.array(arr, dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + # check that we are not making copies when setting copy=False + result = np.array(arr, dtype='i8', copy=False) + assert result.base is expected.base + assert result.base is not None + def test_from_dti(self, tz_naive_fixture): tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 666420a6a9b06..4a3efe22926f7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -132,7 +132,7 @@ def test_construction_list_tuples_nan(self, na_value, vtype): @pytest.mark.parametrize("cast_as_obj", [True, False]) @pytest.mark.parametrize("index", [ pd.date_range('2015-01-01 10:00', freq='D', periods=3, - tz='US/Eastern'), # DTI with tz + tz='US/Eastern', name='Green Eggs & Ham'), # DTI with tz pd.date_range('2015-01-01 10:00', freq='D', periods=3), # DTI no tz pd.timedelta_range('1 days', freq='D', periods=3), # td pd.period_range('2015-01-01', freq='D', periods=3) # period @@ -145,8 +145,16 @@ def test_constructor_from_index_dtlike(self, cast_as_obj, index): tm.assert_index_equal(result, index) - if isinstance(index, pd.DatetimeIndex) and hasattr(index, 'tz'): + if isinstance(index, pd.DatetimeIndex): assert result.tz == index.tz + if cast_as_obj: + # GH#23524 check that Index(dti, dtype=object) does not + # incorrectly raise ValueError, and that nanoseconds are not + # dropped + index += pd.Timedelta(nanoseconds=50) + result = pd.Index(index, dtype=object) + assert result.dtype == np.object_ + assert list(result) == list(index) @pytest.mark.parametrize("index,has_tz", [ (pd.date_range('2015-01-01 10:00', freq='D', periods=3, diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index cbd3e0903b713..d68dd65c9841b 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -187,6 +187,19 @@ def testMult2(self): assert self.d + (-5 * self._offset(-10)) == self.d + self._offset(50) assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6) + def test_compare_str(self): + # GH#23524 + # comparing to strings that cannot be cast to DateOffsets should + # not raise for __eq__ or __ne__ + if self._offset is None: + return + off = self._get_offset(self._offset) + + assert not off == "infer" + assert off != "foo" + # Note: inequalities are only implemented for Tick subclasses; + # tests for this are in test_ticks + class TestCommon(Base): # exected value created by Base._get_offset diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index 369c0971f1e9a..128010fe6d32c 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -267,3 +267,25 @@ def test_compare_ticks(cls): assert cls(4) > three assert cls(3) == cls(3) assert cls(3) != cls(4) + + +@pytest.mark.parametrize('cls', tick_classes) +def test_compare_ticks_to_strs(cls): + # GH#23524 + off = cls(19) + + # These tests should work with any strings, but we particularly are + # interested in "infer" as that comparison is convenient to make in + # Datetime/Timedelta Array/Index constructors + assert not off == "infer" + assert not "foo" == off + + for left, right in [("infer", off), (off, "infer")]: + with pytest.raises(TypeError): + left < right + with pytest.raises(TypeError): + left <= right + with pytest.raises(TypeError): + left > right + with pytest.raises(TypeError): + left >= right diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 53719b71d1180..25c419e485db1 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2199,9 +2199,18 @@ def apply_index(self, i): def _tick_comp(op): + assert op not in [operator.eq, operator.ne] + def f(self, other): - return op(self.delta, other.delta) + try: + return op(self.delta, other.delta) + except AttributeError: + # comparing with a non-Tick object + raise TypeError("Invalid comparison between {cls} and {typ}" + .format(cls=type(self).__name__, + typ=type(other).__name__)) + f.__name__ = '__{opname}__'.format(opname=op.__name__) return f @@ -2220,8 +2229,6 @@ def __init__(self, n=1, normalize=False): __ge__ = _tick_comp(operator.ge) __lt__ = _tick_comp(operator.lt) __le__ = _tick_comp(operator.le) - __eq__ = _tick_comp(operator.eq) - __ne__ = _tick_comp(operator.ne) def __add__(self, other): if isinstance(other, Tick): @@ -2242,8 +2249,13 @@ def __add__(self, other): def __eq__(self, other): if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset - - other = to_offset(other) + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False if isinstance(other, Tick): return self.delta == other.delta @@ -2258,8 +2270,13 @@ def __hash__(self): def __ne__(self, other): if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset - - other = to_offset(other) + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return True if isinstance(other, Tick): return self.delta != other.delta