Skip to content

Commit

Permalink
BUG: Avoid AmbiguousTime or NonExistentTime Error when resampling (pa…
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored and victor committed Sep 30, 2018
1 parent d3846e4 commit 13599ff
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 20 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,7 @@ Timezones
- Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`)
- Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`)
- Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`)
- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` where an ``AmbiguousTimeError`` or ``NonExistentTimeError`` would raise if a timezone aware timeseries ended on a DST transition (:issue:`19375`, :issue:`10117`)

Offsets
^^^^^^^
Expand Down
36 changes: 16 additions & 20 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1328,8 +1328,7 @@ def _get_time_bins(self, ax):
data=[], freq=self.freq, name=ax.name)
return binner, [], labels

first, last = ax.min(), ax.max()
first, last = _get_range_edges(first, last, self.freq,
first, last = _get_range_edges(ax.min(), ax.max(), self.freq,
closed=self.closed,
base=self.base)
tz = ax.tz
Expand Down Expand Up @@ -1519,9 +1518,6 @@ def _take_new_index(obj, indexer, new_index, axis=0):


def _get_range_edges(first, last, offset, closed='left', base=0):
if isinstance(offset, compat.string_types):
offset = to_offset(offset)

if isinstance(offset, Tick):
is_day = isinstance(offset, Day)
day_nanos = delta_to_nanoseconds(timedelta(1))
Expand All @@ -1531,8 +1527,7 @@ def _get_range_edges(first, last, offset, closed='left', base=0):
return _adjust_dates_anchored(first, last, offset,
closed=closed, base=base)

if not isinstance(offset, Tick): # and first.time() != last.time():
# hack!
else:
first = first.normalize()
last = last.normalize()

Expand All @@ -1553,19 +1548,16 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
#
# See https://github.com/pandas-dev/pandas/issues/8683

# 14682 - Since we need to drop the TZ information to perform
# the adjustment in the presence of a DST change,
# save TZ Info and the DST state of the first and last parameters
# so that we can accurately rebuild them at the end.
# GH 10117 & GH 19375. If first and last contain timezone information,
# Perform the calculation in UTC in order to avoid localizing on an
# Ambiguous or Nonexistent time.
first_tzinfo = first.tzinfo
last_tzinfo = last.tzinfo
first_dst = bool(first.dst())
last_dst = bool(last.dst())

first = first.tz_localize(None)
last = last.tz_localize(None)

start_day_nanos = first.normalize().value
if first_tzinfo is not None:
first = first.tz_convert('UTC')
if last_tzinfo is not None:
last = last.tz_convert('UTC')

base_nanos = (base % offset.n) * offset.nanos // offset.n
start_day_nanos += base_nanos
Expand Down Expand Up @@ -1598,9 +1590,13 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
lresult = last.value + (offset.nanos - loffset)
else:
lresult = last.value + offset.nanos

return (Timestamp(fresult).tz_localize(first_tzinfo, ambiguous=first_dst),
Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst))
fresult = Timestamp(fresult)
lresult = Timestamp(lresult)
if first_tzinfo is not None:
fresult = fresult.tz_localize('UTC').tz_convert(first_tzinfo)
if last_tzinfo is not None:
lresult = lresult.tz_localize('UTC').tz_convert(last_tzinfo)
return fresult, lresult


def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None):
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -2485,6 +2485,22 @@ def test_with_local_timezone_dateutil(self):
expected = Series(1, index=expected_index)
assert_series_equal(result, expected)

def test_resample_nonexistent_time_bin_edge(self):
# GH 19375
index = date_range('2017-03-12', '2017-03-12 1:45:00', freq='15T')
s = Series(np.zeros(len(index)), index=index)
expected = s.tz_localize('US/Pacific')
result = expected.resample('900S').mean()
tm.assert_series_equal(result, expected)

def test_resample_ambiguous_time_bin_edge(self):
# GH 10117
idx = pd.date_range("2014-10-25 22:00:00", "2014-10-26 00:30:00",
freq="30T", tz="Europe/London")
expected = Series(np.zeros(len(idx)), index=idx)
result = expected.resample('30T').mean()
tm.assert_series_equal(result, expected)

def test_fill_method_and_how_upsample(self):
# GH2073
s = Series(np.arange(9, dtype='int64'),
Expand Down

0 comments on commit 13599ff

Please sign in to comment.