From 5067708f0199a0b614586dbbc1a1536fa4442b65 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 5 Mar 2017 11:25:46 -0500 Subject: [PATCH] BUG: Floating point accuracy with DatetimeIndex.round (#14440) closes #14440 Employs @eoincondron's fix for float point inaccuracies when rounding by milliseconds for `DatetimeIndex.round` and `Timestamp.round` Author: Matt Roeschke Closes #15568 from mroeschke/fix_14440 and squashes the following commits: c5a7cbc [Matt Roeschke] BUG:Floating point accuracy with DatetimeIndex.round (#14440) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/indexes/datetimes/test_ops.py | 11 +++++++++++ pandas/tests/scalar/test_timestamp.py | 9 +++++++++ pandas/tseries/base.py | 2 +- pandas/tslib.pyx | 3 ++- 5 files changed, 24 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fe9035106e4af..db803e6e7856b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -652,6 +652,7 @@ Bug Fixes - Bug in ``Index`` power operations with reversed operands (:issue:`14973`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) +- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds (:issue: `14440`) - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) - Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 8eb9128d8d1c8..3a6402ae83ae2 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -175,6 +175,17 @@ def test_round(self): tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M') tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') + # GH 14440 + index = pd.DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz) + result = index.round('ms') + expected = pd.DatetimeIndex(['2016-10-17 12:00:00.002000'], tz=tz) + tm.assert_index_equal(result, expected) + + index = pd.DatetimeIndex(['2016-10-17 12:00:00.00149'], tz=tz) + result = index.round('ms') + expected = pd.DatetimeIndex(['2016-10-17 12:00:00.001000'], tz=tz) + tm.assert_index_equal(result, expected) + def test_repeat_range(self): rng = date_range('1/1/2000', '1/1/2001') diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 2abc83ca6109c..ae278ebfa2533 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -732,6 +732,15 @@ def test_round(self): for freq in ['Y', 'M', 'foobar']: self.assertRaises(ValueError, lambda: dti.round(freq)) + # GH 14440 + result = pd.Timestamp('2016-10-17 12:00:00.0015').round('ms') + expected = pd.Timestamp('2016-10-17 12:00:00.002000') + self.assertEqual(result, expected) + + result = pd.Timestamp('2016-10-17 12:00:00.00149').round('ms') + expected = pd.Timestamp('2016-10-17 12:00:00.001000') + self.assertEqual(result, expected) + def test_class_ops_pytz(self): tm._skip_if_no_pytz() from pytz import timezone diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index ee9234d6c8237..5891481677ed2 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -83,7 +83,7 @@ def _round(self, freq, rounder): # round the local times values = _ensure_datetimelike_to_i8(self) - result = (unit * rounder(values / float(unit))).astype('i8') + result = (unit * rounder(values / float(unit)).astype('i8')) result = self._maybe_mask_results(result, fill_value=tslib.NaT) attribs = self._get_attributes_dict() if 'freq' in attribs: diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index fc6e689a35d81..b96e9434e617a 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -421,7 +421,8 @@ class Timestamp(_Timestamp): value = self.tz_localize(None).value else: value = self.value - result = Timestamp(unit * rounder(value / float(unit)), unit='ns') + result = (unit * rounder(value / float(unit)).astype('i8')) + result = Timestamp(result, unit='ns') if self.tz is not None: result = result.tz_localize(self.tz) return result