From abf0824200a34c00f6a20b283bbb968f2c2d288f Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Tue, 29 Jan 2019 17:54:37 +0200 Subject: [PATCH] fix for BUG: grouping with tz-aware: Values falls after last bin (#24973) --- doc/source/whatsnew/v0.24.1.rst | 3 +- pandas/core/resample.py | 31 ++++++++++---------- pandas/tests/resample/test_datetime_index.py | 15 ++++++++++ 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index 85a2ba5bb03b6..8f4c3982c745f 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -72,8 +72,7 @@ Bug Fixes **Reshaping** -- -- +- Bug in :meth:`DataFrame.groupby` with :class:`Grouper` when there is a time change (DST) and grouping frequency is ``'1d'`` (:issue:`24972`) **Visualization** diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 6822225273906..7723827ff478a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -30,8 +30,7 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range from pandas.tseries.frequencies import to_offset -from pandas.tseries.offsets import ( - DateOffset, Day, Nano, Tick, delta_to_nanoseconds) +from pandas.tseries.offsets import DateOffset, Day, Nano, Tick _shared_docs_kwargs = dict() @@ -1613,20 +1612,20 @@ def _get_timestamp_range_edges(first, last, offset, closed='left', base=0): A tuple of length 2, containing the adjusted pd.Timestamp objects. """ if isinstance(offset, Tick): - is_day = isinstance(offset, Day) - day_nanos = delta_to_nanoseconds(timedelta(1)) - - # #1165 and #24127 - if (is_day and not offset.nanos % day_nanos) or not is_day: - first, last = _adjust_dates_anchored(first, last, offset, - closed=closed, base=base) - if is_day and first.tz is not None: - # _adjust_dates_anchored assumes 'D' means 24H, but first/last - # might contain a DST transition (23H, 24H, or 25H). - # Ensure first/last snap to midnight. - first = first.normalize() - last = last.normalize() - return first, last + if isinstance(offset, Day): + # _adjust_dates_anchored assumes 'D' means 24H, but first/last + # might contain a DST transition (23H, 24H, or 25H). + # So "pretend" the dates are naive when adjusting the endpoints + tz = first.tz + first = first.tz_localize(None) + last = last.tz_localize(None) + + first, last = _adjust_dates_anchored(first, last, offset, + closed=closed, base=base) + if isinstance(offset, Day): + first = first.tz_localize(tz) + last = last.tz_localize(tz) + return first, last else: first = first.normalize() diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 74487052f8982..856c4df5380e5 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1278,6 +1278,21 @@ def test_resample_across_dst(): assert_frame_equal(result, expected) +def test_groupby_with_dst_time_change(): + # GH 24972 + index = pd.DatetimeIndex([1478064900001000000, 1480037118776792000], + tz='UTC').tz_convert('America/Chicago') + + df = pd.DataFrame([1, 2], index=index) + result = df.groupby(pd.Grouper(freq='1d')).last() + expected_index_values = pd.date_range('2016-11-02', '2016-11-24', + freq='d', tz='America/Chicago') + + index = pd.DatetimeIndex(expected_index_values) + expected = pd.DataFrame([1.0] + ([np.nan] * 21) + [2.0], index=index) + assert_frame_equal(result, expected) + + def test_resample_dst_anchor(): # 5172 dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern')