From 9e5dfedbd26b639d34bc5f32ac1a74851afd758e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 27 Oct 2018 20:14:13 -0700 Subject: [PATCH] BUG: Fix date_range overflow (#23345) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/arrays/datetimes.py | 42 ++++++++++++++++++- .../indexes/datetimes/test_date_range.py | 7 ++++ 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 70725a347c9d03..b8d63e03e33183 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1027,6 +1027,7 @@ Datetimelike - Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`) - Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`) - Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`) +- Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cd20bcbed22118..2392bbdd87f7a3 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1355,11 +1355,11 @@ def _generate_regular_range(cls, start, end, periods, freq): tz = start.tz elif start is not None: b = Timestamp(start).value - e = b + np.int64(periods) * stride + e = _generate_range_overflow_safe(b, periods, stride, side='start') tz = start.tz elif end is not None: e = Timestamp(end).value + stride - b = e - np.int64(periods) * stride + b = _generate_range_overflow_safe(e, periods, stride, side='end') tz = end.tz else: raise ValueError("at least 'start' or 'end' should be specified " @@ -1384,6 +1384,44 @@ def _generate_regular_range(cls, start, end, periods, freq): return data +def _generate_range_overflow_safe(endpoint, periods, stride, side='start'): + """ + Calculate the second endpoint for passing to np.arange, checking + to avoid an integer overflow. Catch OverflowError and re-raise + as OutOfBoundsDatetime. + + Parameters + ---------- + endpoint : int + periods : int + stride : int + side : {'start', 'end'} + + Returns + ------- + other_end : int + + Raises + ------ + OutOfBoundsDatetime + """ + # GH#14187 raise instead of incorrectly wrapping around + assert side in ['start', 'end'] + if side == 'end': + stride *= -1 + + try: + other_end = checked_add_with_arr(np.int64(endpoint), + np.int64(periods) * stride) + except OverflowError: + raise tslib.OutOfBoundsDatetime('Cannot generate range with ' + '{side}={endpoint} and ' + 'periods={periods}' + .format(side=side, endpoint=endpoint, + periods=periods)) + return other_end + + def _infer_tz_from_endpoints(start, end, tz): """ If a timezone is not explicitly given via `tz`, see if one can diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 657d8ffe0cd389..450d7643bfbd5b 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -15,6 +15,7 @@ from pandas import ( DatetimeIndex, Timestamp, bdate_range, compat, date_range, offsets ) +from pandas.errors import OutOfBoundsDatetime from pandas.tests.series.common import TestData from pandas.tseries.offsets import ( BDay, CDay, DateOffset, MonthEnd, generate_range, prefix_mapping @@ -79,6 +80,12 @@ def test_date_range_timestamp_equiv_preserve_frequency(self): class TestDateRanges(TestData): + def test_date_range_out_of_bounds(self): + # GH#14187 + with pytest.raises(OutOfBoundsDatetime): + date_range('2016-01-01', periods=100000, freq='D') + with pytest.raises(OutOfBoundsDatetime): + date_range(end='1763-10-12', periods=100000, freq='D') def test_date_range_gen_error(self): rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min')