Skip to content

Commit

Permalink
Fix Timestamp.round errors (pandas-dev#22802)
Browse files Browse the repository at this point in the history
  • Loading branch information
miccoli authored and jreback committed Oct 1, 2018
1 parent b92b043 commit f021bbc
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 44 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@ Datetimelike
- Bug in :class:`DatetimeIndex` subtraction that incorrectly failed to raise ``OverflowError`` (:issue:`22492`, :issue:`22508`)
- Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`)
- Bug in :class:`DatetimeIndex` where frequency was being set if original frequency was ``None`` (:issue:`22150`)
- Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`)

Timedelta
^^^^^^^^^
Expand Down
137 changes: 101 additions & 36 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ cimport ccalendar
from conversion import tz_localize_to_utc, normalize_i8_timestamps
from conversion cimport (tz_convert_single, _TSObject,
convert_to_tsobject, convert_datetime_to_tsobject)
import enum
from fields import get_start_end_field, get_date_name_field
from nattype import NaT
from nattype cimport NPY_NAT
Expand Down Expand Up @@ -57,50 +58,114 @@ cdef inline object create_timestamp_from_ts(int64_t value,
return ts_base


def round_ns(values, rounder, freq):
@enum.unique
class RoundTo(enum.Enum):
"""
Applies rounding function at given frequency
enumeration defining the available rounding modes
Attributes
----------
MINUS_INFTY
round towards -∞, or floor [2]_
PLUS_INFTY
round towards +∞, or ceil [3]_
NEAREST_HALF_EVEN
round to nearest, tie-break half to even [6]_
NEAREST_HALF_MINUS_INFTY
round to nearest, tie-break half to -∞ [5]_
NEAREST_HALF_PLUS_INFTY
round to nearest, tie-break half to +∞ [4]_
References
----------
.. [1] "Rounding - Wikipedia"
https://en.wikipedia.org/wiki/Rounding
.. [2] "Rounding down"
https://en.wikipedia.org/wiki/Rounding#Rounding_down
.. [3] "Rounding up"
https://en.wikipedia.org/wiki/Rounding#Rounding_up
.. [4] "Round half up"
https://en.wikipedia.org/wiki/Rounding#Round_half_up
.. [5] "Round half down"
https://en.wikipedia.org/wiki/Rounding#Round_half_down
.. [6] "Round half to even"
https://en.wikipedia.org/wiki/Rounding#Round_half_to_even
"""
MINUS_INFTY = 0
PLUS_INFTY = 1
NEAREST_HALF_EVEN = 2
NEAREST_HALF_PLUS_INFTY = 3
NEAREST_HALF_MINUS_INFTY = 4


cdef inline _npdivmod(x1, x2):
"""implement divmod for numpy < 1.13"""
return np.floor_divide(x1, x2), np.remainder(x1, x2)


try:
from numpy import divmod as npdivmod
except ImportError:
npdivmod = _npdivmod


cdef inline _floor_int64(values, unit):
return values - np.remainder(values, unit)

cdef inline _ceil_int64(values, unit):
return values + np.remainder(-values, unit)

cdef inline _rounddown_int64(values, unit):
return _ceil_int64(values - unit//2, unit)

cdef inline _roundup_int64(values, unit):
return _floor_int64(values + unit//2, unit)


def round_nsint64(values, mode, freq):
"""
Applies rounding mode at given frequency
Parameters
----------
values : :obj:`ndarray`
rounder : function, eg. 'ceil', 'floor', 'round'
mode : instance of `RoundTo` enumeration
freq : str, obj
Returns
-------
:obj:`ndarray`
"""

if not isinstance(mode, RoundTo):
raise ValueError('mode should be a RoundTo member')

unit = to_offset(freq).nanos

# GH21262 If the Timestamp is multiple of the freq str
# don't apply any rounding
mask = values % unit == 0
if mask.all():
return values
r = values.copy()

if unit < 1000:
# for nano rounding, work with the last 6 digits separately
# due to float precision
buff = 1000000
r[~mask] = (buff * (values[~mask] // buff) +
unit * (rounder((values[~mask] % buff) *
(1 / float(unit)))).astype('i8'))
else:
if unit % 1000 != 0:
msg = 'Precision will be lost using frequency: {}'
warnings.warn(msg.format(freq))
# GH19206
# to deal with round-off when unit is large
if unit >= 1e9:
divisor = 10 ** int(np.log10(unit / 1e7))
else:
divisor = 10
r[~mask] = (unit * rounder((values[~mask] *
(divisor / float(unit))) / divisor)
.astype('i8'))
return r
if mode is RoundTo.MINUS_INFTY:
return _floor_int64(values, unit)
elif mode is RoundTo.PLUS_INFTY:
return _ceil_int64(values, unit)
elif mode is RoundTo.NEAREST_HALF_MINUS_INFTY:
return _rounddown_int64(values, unit)
elif mode is RoundTo.NEAREST_HALF_PLUS_INFTY:
return _roundup_int64(values, unit)
elif mode is RoundTo.NEAREST_HALF_EVEN:
# for odd unit there is no need of a tie break
if unit % 2:
return _rounddown_int64(values, unit)
quotient, remainder = npdivmod(values, unit)
mask = np.logical_or(
remainder > (unit // 2),
np.logical_and(remainder == (unit // 2), quotient % 2)
)
quotient[mask] += 1
return quotient * unit

# if/elif above should catch all rounding modes defined in enum 'RoundTo':
# if flow of control arrives here, it is a bug
assert False, "round_nsint64 called with an unrecognized rounding mode"


# This is PITA. Because we inherit from datetime, which has very specific
Expand Down Expand Up @@ -656,7 +721,7 @@ class Timestamp(_Timestamp):

return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)

def _round(self, freq, rounder, ambiguous='raise'):
def _round(self, freq, mode, ambiguous='raise'):
if self.tz is not None:
value = self.tz_localize(None).value
else:
Expand All @@ -665,7 +730,7 @@ class Timestamp(_Timestamp):
value = np.array([value], dtype=np.int64)

# Will only ever contain 1 element for timestamp
r = round_ns(value, rounder, freq)[0]
r = round_nsint64(value, mode, freq)[0]
result = Timestamp(r, unit='ns')
if self.tz is not None:
result = result.tz_localize(self.tz, ambiguous=ambiguous)
Expand Down Expand Up @@ -694,7 +759,7 @@ class Timestamp(_Timestamp):
------
ValueError if the freq cannot be converted
"""
return self._round(freq, np.round, ambiguous)
return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous)

def floor(self, freq, ambiguous='raise'):
"""
Expand All @@ -715,7 +780,7 @@ class Timestamp(_Timestamp):
------
ValueError if the freq cannot be converted
"""
return self._round(freq, np.floor, ambiguous)
return self._round(freq, RoundTo.MINUS_INFTY, ambiguous)

def ceil(self, freq, ambiguous='raise'):
"""
Expand All @@ -736,7 +801,7 @@ class Timestamp(_Timestamp):
------
ValueError if the freq cannot be converted
"""
return self._round(freq, np.ceil, ambiguous)
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous)

@property
def tz(self):
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import numpy as np

from pandas._libs import lib, iNaT, NaT
from pandas._libs.tslibs.timestamps import round_ns
from pandas._libs.tslibs.timestamps import round_nsint64, RoundTo

from pandas.core.dtypes.common import (
ensure_int64,
Expand Down Expand Up @@ -180,10 +180,10 @@ class TimelikeOps(object):
"""
)

def _round(self, freq, rounder, ambiguous):
def _round(self, freq, mode, ambiguous):
# round the local times
values = _ensure_datetimelike_to_i8(self)
result = round_ns(values, rounder, freq)
result = round_nsint64(values, mode, freq)
result = self._maybe_mask_results(result, fill_value=NaT)

attribs = self._get_attributes_dict()
Expand All @@ -197,15 +197,15 @@ def _round(self, freq, rounder, ambiguous):

@Appender((_round_doc + _round_example).format(op="round"))
def round(self, freq, ambiguous='raise'):
return self._round(freq, np.round, ambiguous)
return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous)

@Appender((_round_doc + _floor_example).format(op="floor"))
def floor(self, freq, ambiguous='raise'):
return self._round(freq, np.floor, ambiguous)
return self._round(freq, RoundTo.MINUS_INFTY, ambiguous)

@Appender((_round_doc + _ceil_example).format(op="ceil"))
def ceil(self, freq, ambiguous='raise'):
return self._round(freq, np.ceil, ambiguous)
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous)


class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
Expand Down
43 changes: 42 additions & 1 deletion pandas/tests/indexes/datetimes/test_scalar_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pandas as pd

from pandas import date_range, Timestamp, DatetimeIndex
from pandas.tseries.frequencies import to_offset


class TestDatetimeIndexOps(object):
Expand Down Expand Up @@ -124,7 +125,7 @@ def test_round(self, tz_naive_fixture):
expected = DatetimeIndex(['2016-10-17 12:00:00.001501030'])
tm.assert_index_equal(result, expected)

with tm.assert_produces_warning():
with tm.assert_produces_warning(False):
ts = '2016-10-17 12:00:00.001501031'
DatetimeIndex([ts]).round('1010ns')

Expand Down Expand Up @@ -169,6 +170,46 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
expected = DatetimeIndex(list(expected))
assert expected.equals(result)

@pytest.mark.parametrize('start, index_freq, periods', [
('2018-01-01', '12H', 25),
('2018-01-01 0:0:0.124999', '1ns', 1000),
])
@pytest.mark.parametrize('round_freq', [
'2ns', '3ns', '4ns', '5ns', '6ns', '7ns',
'250ns', '500ns', '750ns',
'1us', '19us', '250us', '500us', '750us',
'1s', '2s', '3s',
'12H', '1D',
])
def test_round_int64(self, start, index_freq, periods, round_freq):
dt = DatetimeIndex(start=start, freq=index_freq, periods=periods)
unit = to_offset(round_freq).nanos

# test floor
result = dt.floor(round_freq)
diff = dt.asi8 - result.asi8
mod = result.asi8 % unit
assert (mod == 0).all(), "floor not a {} multiple".format(round_freq)
assert (0 <= diff).all() and (diff < unit).all(), "floor error"

# test ceil
result = dt.ceil(round_freq)
diff = result.asi8 - dt.asi8
mod = result.asi8 % unit
assert (mod == 0).all(), "ceil not a {} multiple".format(round_freq)
assert (0 <= diff).all() and (diff < unit).all(), "ceil error"

# test round
result = dt.round(round_freq)
diff = abs(result.asi8 - dt.asi8)
mod = result.asi8 % unit
assert (mod == 0).all(), "round not a {} multiple".format(round_freq)
assert (diff <= unit // 2).all(), "round error"
if unit % 2 == 0:
assert (
result.asi8[diff == unit // 2] % 2 == 0
).all(), "round half to even error"

# ----------------------------------------------------------------
# DatetimeIndex.normalize

Expand Down
43 changes: 42 additions & 1 deletion pandas/tests/scalar/timestamp/test_unary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pandas._libs.tslibs import conversion
from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG
from pandas import Timestamp, NaT
from pandas.tseries.frequencies import to_offset


class TestTimestampUnaryOps(object):
Expand Down Expand Up @@ -70,7 +71,7 @@ def test_round_subsecond(self):
assert result == expected

def test_round_nonstandard_freq(self):
with tm.assert_produces_warning():
with tm.assert_produces_warning(False):
Timestamp('2016-10-17 12:00:00.001501031').round('1010ns')

def test_round_invalid_arg(self):
Expand Down Expand Up @@ -154,6 +155,46 @@ def test_round_dst_border(self, method):
with pytest.raises(pytz.AmbiguousTimeError):
getattr(ts, method)('H', ambiguous='raise')

@pytest.mark.parametrize('timestamp', [
'2018-01-01 0:0:0.124999360',
'2018-01-01 0:0:0.125000367',
'2018-01-01 0:0:0.125500',
'2018-01-01 0:0:0.126500',
'2018-01-01 12:00:00',
'2019-01-01 12:00:00',
])
@pytest.mark.parametrize('freq', [
'2ns', '3ns', '4ns', '5ns', '6ns', '7ns',
'250ns', '500ns', '750ns',
'1us', '19us', '250us', '500us', '750us',
'1s', '2s', '3s',
'1D',
])
def test_round_int64(self, timestamp, freq):
"""check that all rounding modes are accurate to int64 precision
see GH#22591
"""
dt = Timestamp(timestamp)
unit = to_offset(freq).nanos

# test floor
result = dt.floor(freq)
assert result.value % unit == 0, "floor not a {} multiple".format(freq)
assert 0 <= dt.value - result.value < unit, "floor error"

# test ceil
result = dt.ceil(freq)
assert result.value % unit == 0, "ceil not a {} multiple".format(freq)
assert 0 <= result.value - dt.value < unit, "ceil error"

# test round
result = dt.round(freq)
assert result.value % unit == 0, "round not a {} multiple".format(freq)
assert abs(result.value - dt.value) <= unit // 2, "round error"
if unit % 2 == 0 and abs(result.value - dt.value) == unit // 2:
# round half to even
assert result.value // unit % 2 == 0, "round half to even error"

# --------------------------------------------------------------
# Timestamp.replace

Expand Down

0 comments on commit f021bbc

Please sign in to comment.