From 94e2194b7ab2b62126d306896ac42a8c00c4f41c Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 6 Apr 2014 20:46:12 +0900 Subject: [PATCH] BUG: Arithmetic, timezone and offsets operations affecting to NaT --- doc/source/release.rst | 2 ++ pandas/tseries/index.py | 3 ++ pandas/tseries/offsets.py | 33 +++++++++++++++++ pandas/tseries/tests/test_offsets.py | 49 +++++++++++++++++++++----- pandas/tseries/tests/test_timezones.py | 35 ++++++++++++++++++ pandas/tseries/tests/test_tslib.py | 41 +++++++++++++++++++++ pandas/tslib.pyx | 41 +++++++++++++++++---- 7 files changed, 189 insertions(+), 15 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index a23936ae154c0..08d7bf9b8728b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -370,6 +370,8 @@ Bug Fixes - Better error message when passing a frequency of 'MS' in ``Period`` construction (GH5332) - Bug in `Series.__unicode__` when `max_rows` is `None` and the Series has more than 1000 rows. (:issue:`6863`) - Bug in ``groupby.get_group`` where a datetlike wasn't always accepted (:issue:`5267`) +- Bug in ``DatetimeIndex.tz_localize`` and ``DatetimeIndex.tz_convert`` affects to NaT (:issue:`5546`) +- Bug in arithmetic operations affecting to NaT (:issue:`6873`) pandas 0.13.1 ------------- diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 4de69639b8d7b..6ac21e60ea7f3 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -611,7 +611,10 @@ def __sub__(self, other): def _add_delta(self, delta): if isinstance(delta, (Tick, timedelta)): inc = offsets._delta_to_nanoseconds(delta) + mask = self.asi8 == tslib.iNaT new_values = (self.asi8 + inc).view(_NS_DTYPE) + new_values[mask] = tslib.iNaT + new_values = new_values.view(_NS_DTYPE) elif isinstance(delta, np.timedelta64): new_values = self.to_series() + delta else: diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index fdbfac1b4aae1..d5eaad61b2fda 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -13,6 +13,8 @@ from pandas import _np_version_under1p7 +import functools + __all__ = ['Day', 'BusinessDay', 'BDay', 'CustomBusinessDay', 'CDay', 'MonthBegin', 'BMonthBegin', 'MonthEnd', 'BMonthEnd', 'YearBegin', 'BYearBegin', 'YearEnd', 'BYearEnd', @@ -35,6 +37,15 @@ def as_datetime(obj): obj = f() return obj +def apply_nat(func): + @functools.wraps(func) + def wrapper(self, other): + if other is tslib.NaT: + return tslib.NaT + else: + return func(self, other) + return wrapper + #---------------------------------------------------------------------- # DateOffset @@ -102,6 +113,7 @@ def __init__(self, n=1, **kwds): else: self._offset = timedelta(1) + @apply_nat def apply(self, other): other = as_datetime(other) if len(self.kwds) > 0: @@ -382,6 +394,7 @@ def get_str(td): def isAnchored(self): return (self.n == 1) + @apply_nat def apply(self, other): if isinstance(other, datetime): n = self.n @@ -502,6 +515,7 @@ def __setstate__(self, state): self.__dict__ = state self._set_busdaycalendar() + @apply_nat def apply(self, other): if self.n <= 0: roll = 'forward' @@ -582,6 +596,7 @@ def name(self): class MonthEnd(MonthOffset): """DateOffset of one month end""" + @apply_nat def apply(self, other): other = datetime(other.year, other.month, other.day, tzinfo=other.tzinfo) @@ -606,6 +621,7 @@ def onOffset(cls, dt): class MonthBegin(MonthOffset): """DateOffset of one month at beginning""" + @apply_nat def apply(self, other): n = self.n @@ -628,6 +644,7 @@ class BusinessMonthEnd(MonthOffset): def isAnchored(self): return (self.n == 1) + @apply_nat def apply(self, other): other = datetime(other.year, other.month, other.day) @@ -653,6 +670,7 @@ def apply(self, other): class BusinessMonthBegin(MonthOffset): """DateOffset of one business month at beginning""" + @apply_nat def apply(self, other): n = self.n @@ -710,6 +728,7 @@ def __init__(self, n=1, **kwds): def isAnchored(self): return (self.n == 1 and self.weekday is not None) + @apply_nat def apply(self, other): if self.weekday is None: return as_timestamp(as_datetime(other) + self.n * self._inc) @@ -811,6 +830,7 @@ def __init__(self, n=1, **kwds): self.kwds = kwds + @apply_nat def apply(self, other): offsetOfMonth = self.getOffsetOfMonth(other) @@ -890,6 +910,7 @@ def __init__(self, n=1, **kwds): self.kwds = kwds + @apply_nat def apply(self, other): offsetOfMonth = self.getOffsetOfMonth(other) @@ -983,6 +1004,7 @@ class BQuarterEnd(QuarterOffset): _from_name_startingMonth = 12 _prefix = 'BQ' + @apply_nat def apply(self, other): n = self.n @@ -1037,6 +1059,7 @@ class BQuarterBegin(QuarterOffset): _from_name_startingMonth = 1 _prefix = 'BQS' + @apply_nat def apply(self, other): n = self.n other = as_datetime(other) @@ -1086,6 +1109,7 @@ def __init__(self, n=1, **kwds): def isAnchored(self): return (self.n == 1 and self.startingMonth is not None) + @apply_nat def apply(self, other): n = self.n other = as_datetime(other) @@ -1117,6 +1141,7 @@ class QuarterBegin(QuarterOffset): def isAnchored(self): return (self.n == 1 and self.startingMonth is not None) + @apply_nat def apply(self, other): n = self.n other = as_datetime(other) @@ -1166,6 +1191,7 @@ class BYearEnd(YearOffset): _default_month = 12 _prefix = 'BA' + @apply_nat def apply(self, other): n = self.n other = as_datetime(other) @@ -1203,6 +1229,7 @@ class BYearBegin(YearOffset): _default_month = 1 _prefix = 'BAS' + @apply_nat def apply(self, other): n = self.n other = as_datetime(other) @@ -1234,6 +1261,7 @@ class YearEnd(YearOffset): _default_month = 12 _prefix = 'A' + @apply_nat def apply(self, other): def _increment(date): if date.month == self.month: @@ -1290,6 +1318,7 @@ class YearBegin(YearOffset): _default_month = 1 _prefix = 'AS' + @apply_nat def apply(self, other): def _increment(date): year = date.year @@ -1410,6 +1439,7 @@ def onOffset(self, dt): else: return year_end == dt + @apply_nat def apply(self, other): n = self.n prev_year = self.get_year_end( @@ -1596,6 +1626,7 @@ def __init__(self, n=1, **kwds): def isAnchored(self): return self.n == 1 and self._offset.isAnchored() + @apply_nat def apply(self, other): other = as_datetime(other) n = self.n @@ -1693,6 +1724,7 @@ class Easter(DateOffset): def __init__(self, n=1, **kwds): super(Easter, self).__init__(n, **kwds) + @apply_nat def apply(self, other): currentEaster = easter(other.year) @@ -1786,6 +1818,7 @@ def delta(self): def nanos(self): return _delta_to_nanoseconds(self.delta) + @apply_nat def apply(self, other): if type(other) == date: other = datetime(other.year, other.month, other.day) diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index c306c9ff8b0e9..45ac1b3ac15a6 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -5,6 +5,7 @@ import nose from nose.tools import assert_raises + import numpy as np from pandas.core.datetools import ( @@ -20,7 +21,7 @@ from pandas.tseries.tools import parse_time_string import pandas.tseries.offsets as offsets -from pandas.tslib import monthrange, OutOfBoundsDatetime +from pandas.tslib import monthrange, OutOfBoundsDatetime, NaT from pandas.lib import Timestamp from pandas.util.testing import assertRaisesRegexp import pandas.util.testing as tm @@ -98,14 +99,33 @@ def test_to_m8(): class TestBase(tm.TestCase): _offset = None + offset_types = [getattr(offsets, o) for o in offsets.__all__] + skip_np_u1p7 = [offsets.CustomBusinessDay, offsets.CDay, offsets.Nano] + + def _get_offset(self, klass, value=1): + # create instance from offset class + if klass is FY5253 or klass is FY5253Quarter: + klass = klass(n=value, startingMonth=1, weekday=1, + qtr_with_extra_week=1, variation='last') + elif klass is WeekOfMonth or klass is LastWeekOfMonth: + klass = LastWeekOfMonth(n=value, weekday=5) + else: + try: + klass = klass(value) + except: + klass = klass() + return klass + def test_apply_out_of_range(self): if self._offset is None: raise nose.SkipTest("_offset not defined to test out-of-range") + if self._offset in self.skip_np_u1p7: + raise nose.SkipTest('numpy >= 1.7 required') # try to create an out-of-bounds result timestamp; if we can't create the offset # skip try: - offset = self._offset(10000) + offset = self._get_offset(self._offset, value=10000) result = Timestamp('20080101') + offset self.assertIsInstance(result, datetime) @@ -114,16 +134,27 @@ def test_apply_out_of_range(self): except (ValueError, KeyError): raise nose.SkipTest("cannot create out_of_range offset") + +class TestOps(TestBase): + def test_return_type(self): + for offset in self.offset_types: + if _np_version_under1p7 and offset in self.skip_np_u1p7: + continue - # make sure that we are returning a Timestamp - try: - offset = self._offset(1) - except: - raise nose.SkipTest("_offset not defined to test return_type") + offset = self._get_offset(offset) + + # make sure that we are returning a Timestamp + result = Timestamp('20080101') + offset + self.assertIsInstance(result, Timestamp) + + # make sure that we are returning NaT + self.assert_(NaT + offset is NaT) + self.assert_(offset + NaT is NaT) + + self.assert_(NaT - offset is NaT) + self.assert_((-offset).apply(NaT) is NaT) - result = Timestamp('20080101') + offset - self.assertIsInstance(result, Timestamp) class TestDateOffset(TestBase): _multiprocess_can_split_ = True diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 5a265464e4e87..cc976488c19c1 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -975,6 +975,41 @@ def test_tzaware_offset(self): offset = dates + timedelta(hours=5) self.assert_(offset.equals(expected)) + def test_nat(self): + # GH 5546 + dates = [NaT] + idx = DatetimeIndex(dates) + idx = idx.tz_localize('US/Pacific') + self.assert_(idx.equals(DatetimeIndex(dates, tz='US/Pacific'))) + idx = idx.tz_convert('US/Eastern') + self.assert_(idx.equals(DatetimeIndex(dates, tz='US/Eastern'))) + idx = idx.tz_convert('UTC') + self.assert_(idx.equals(DatetimeIndex(dates, tz='UTC'))) + + dates = ['2010-12-01 00:00', '2010-12-02 00:00', NaT] + idx = DatetimeIndex(dates) + idx = idx.tz_localize('US/Pacific') + self.assert_(idx.equals(DatetimeIndex(dates, tz='US/Pacific'))) + idx = idx.tz_convert('US/Eastern') + expected = ['2010-12-01 03:00', '2010-12-02 03:00', NaT] + self.assert_(idx.equals(DatetimeIndex(expected, tz='US/Eastern'))) + + idx = idx + offsets.Hour(5) + expected = ['2010-12-01 08:00', '2010-12-02 08:00', NaT] + self.assert_(idx.equals(DatetimeIndex(expected, tz='US/Eastern'))) + idx = idx.tz_convert('US/Pacific') + expected = ['2010-12-01 05:00', '2010-12-02 05:00', NaT] + self.assert_(idx.equals(DatetimeIndex(expected, tz='US/Pacific'))) + + if not _np_version_under1p7: + idx = idx + np.timedelta64(3, 'h') + expected = ['2010-12-01 08:00', '2010-12-02 08:00', NaT] + self.assert_(idx.equals(DatetimeIndex(expected, tz='US/Pacific'))) + + idx = idx.tz_convert('US/Eastern') + expected = ['2010-12-01 11:00', '2010-12-02 11:00', NaT] + self.assert_(idx.equals(DatetimeIndex(expected, tz='US/Eastern'))) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 8d9e4d5069f61..5ccac52cbb535 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -269,6 +269,47 @@ def test_nanosecond_string_parsing(self): self.timestamp = Timestamp('2013-05-01 07:15:45.123456789') self.assertEqual(self.timestamp.value, 1367392545123456000) + def test_nat_arithmetic(self): + # GH 6873 + nat = tslib.NaT + t = Timestamp('2014-01-01') + dt = datetime.datetime(2014, 1, 1) + delta = datetime.timedelta(3600) + + # Timestamp / datetime + for (left, right) in [(nat, nat), (nat, t), (dt, nat)]: + # NaT + Timestamp-like should raise TypeError + with tm.assertRaises(TypeError): + left + right + with tm.assertRaises(TypeError): + right + left + + # NaT - Timestamp-like (or inverse) returns NaT + self.assert_((left - right) is tslib.NaT) + self.assert_((right - left) is tslib.NaT) + + # timedelta-like + # offsets are tested in test_offsets.py + for (left, right) in [(nat, delta)]: + # NaT + timedelta-like returns NaT + self.assert_((left + right) is tslib.NaT) + # timedelta-like + NaT should raise TypeError + with tm.assertRaises(TypeError): + right + left + + self.assert_((left - right) is tslib.NaT) + with tm.assertRaises(TypeError): + right - left + + if _np_version_under1p7: + self.assertEqual(nat + np.timedelta64(1, 'h'), tslib.NaT) + with tm.assertRaises(TypeError): + np.timedelta64(1, 'h') + nat + + self.assertEqual(nat - np.timedelta64(1, 'h'), tslib.NaT) + with tm.assertRaises(TypeError): + np.timedelta64(1, 'h') - nat + class TestTslib(tm.TestCase): diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 278e8effb534b..ba6f03fd9bbb0 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -717,7 +717,8 @@ cdef class _Timestamp(datetime): or isinstance(other, timedelta) or hasattr(other, 'delta'): neg_other = -other return self + neg_other - + elif other is NaT: + return NaT return datetime.__sub__(self, other) cpdef _get_field(self, field): @@ -762,6 +763,26 @@ cdef class _NaT(_Timestamp): (type(self).__name__, type(other).__name__)) return PyObject_RichCompare(other, self, _reverse_ops[op]) + def __add__(self, other): + try: + result = _Timestamp.__add__(self, other) + if result is NotImplemented: + return result + except (OverflowError, OutOfBoundsDatetime): + pass + return NaT + + def __sub__(self, other): + if type(self) is datetime: + other, self = self, other + try: + result = _Timestamp.__sub__(self, other) + if result is NotImplemented: + return result + except (OverflowError, OutOfBoundsDatetime): + pass + return NaT + def _delta_to_nanoseconds(delta): if hasattr(delta, 'delta'): @@ -1764,10 +1785,13 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): for i in range(n): v = utc_dates[i] - if (pos + 1) < trans_len and v >= trans[pos + 1]: - pos += 1 - offset = deltas[pos] - result[i] = v + offset + if vals[i] == NPY_NAT: + result[i] = vals[i] + else: + if (pos + 1) < trans_len and v >= trans[pos + 1]: + pos += 1 + offset = deltas[pos] + result[i] = v + offset return result @@ -1781,6 +1805,9 @@ def tz_convert_single(int64_t val, object tz1, object tz2): if not have_pytz: import pytz + if val == NPY_NAT: + return val + # Convert to UTC if _is_tzlocal(tz1): pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) @@ -2006,7 +2033,9 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, bint infer_dst=False): for i in range(n): left = result_a[i] right = result_b[i] - if left != NPY_NAT and right != NPY_NAT: + if vals[i] == NPY_NAT: + result[i] = vals[i] + elif left != NPY_NAT and right != NPY_NAT: if left == right: result[i] = left else: