Skip to content

Commit

Permalink
BUG: Floating point accuracy with DatetimeIndex.round (#14440)
Browse files Browse the repository at this point in the history
closes #14440

Employs @eoincondron's fix for float point inaccuracies when rounding
by milliseconds for `DatetimeIndex.round` and `Timestamp.round`

Author: Matt Roeschke <emailformattr@gmail.com>

Closes #15568 from mroeschke/fix_14440 and squashes the following commits:

c5a7cbc [Matt Roeschke] BUG:Floating point accuracy with DatetimeIndex.round (#14440)
  • Loading branch information
mroeschke authored and jreback committed Mar 5, 2017
1 parent f4a03d9 commit 5067708
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,7 @@ Bug Fixes
- Bug in ``Index`` power operations with reversed operands (:issue:`14973`)
- Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`)
- Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`)
- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds (:issue: `14440`)
- Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`)
- Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`)
- Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`)
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/indexes/datetimes/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,17 @@ def test_round(self):
tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M')
tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M')

# GH 14440
index = pd.DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz)
result = index.round('ms')
expected = pd.DatetimeIndex(['2016-10-17 12:00:00.002000'], tz=tz)
tm.assert_index_equal(result, expected)

index = pd.DatetimeIndex(['2016-10-17 12:00:00.00149'], tz=tz)
result = index.round('ms')
expected = pd.DatetimeIndex(['2016-10-17 12:00:00.001000'], tz=tz)
tm.assert_index_equal(result, expected)

def test_repeat_range(self):
rng = date_range('1/1/2000', '1/1/2001')

Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/scalar/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,15 @@ def test_round(self):
for freq in ['Y', 'M', 'foobar']:
self.assertRaises(ValueError, lambda: dti.round(freq))

# GH 14440
result = pd.Timestamp('2016-10-17 12:00:00.0015').round('ms')
expected = pd.Timestamp('2016-10-17 12:00:00.002000')
self.assertEqual(result, expected)

result = pd.Timestamp('2016-10-17 12:00:00.00149').round('ms')
expected = pd.Timestamp('2016-10-17 12:00:00.001000')
self.assertEqual(result, expected)

def test_class_ops_pytz(self):
tm._skip_if_no_pytz()
from pytz import timezone
Expand Down
2 changes: 1 addition & 1 deletion pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def _round(self, freq, rounder):
# round the local times
values = _ensure_datetimelike_to_i8(self)

result = (unit * rounder(values / float(unit))).astype('i8')
result = (unit * rounder(values / float(unit)).astype('i8'))
result = self._maybe_mask_results(result, fill_value=tslib.NaT)
attribs = self._get_attributes_dict()
if 'freq' in attribs:
Expand Down
3 changes: 2 additions & 1 deletion pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,8 @@ class Timestamp(_Timestamp):
value = self.tz_localize(None).value
else:
value = self.value
result = Timestamp(unit * rounder(value / float(unit)), unit='ns')
result = (unit * rounder(value / float(unit)).astype('i8'))
result = Timestamp(result, unit='ns')
if self.tz is not None:
result = result.tz_localize(self.tz)
return result
Expand Down

0 comments on commit 5067708

Please sign in to comment.