Skip to content

Commit

Permalink
BUG: fix infer frequency for business daily (#16683)
Browse files Browse the repository at this point in the history
  • Loading branch information
AdamGleave authored and jreback committed Jul 7, 2017
1 parent d8cd9ca commit e832ddf
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 14 deletions.
16 changes: 13 additions & 3 deletions asv_bench/benchmarks/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ def setup(self):
self.rng6 = date_range(start='1/1/1', periods=self.N, freq='B')

self.rng7 = date_range(start='1/1/1700', freq='D', periods=100000)
self.a = self.rng7[:50000].append(self.rng7[50002:])
self.no_freq = self.rng7[:50000].append(self.rng7[50002:])
self.d_freq = self.rng7[:50000].append(self.rng7[50000:])

self.rng8 = date_range(start='1/1/1700', freq='B', periods=100000)
self.b_freq = self.rng8[:50000].append(self.rng8[50000:])

def time_add_timedelta(self):
(self.rng + dt.timedelta(minutes=2))
Expand Down Expand Up @@ -94,8 +98,14 @@ def time_infer_dst(self):
def time_timeseries_is_month_start(self):
self.rng6.is_month_start

def time_infer_freq(self):
infer_freq(self.a)
def time_infer_freq_none(self):
infer_freq(self.no_freq)

def time_infer_freq_daily(self):
infer_freq(self.d_freq)

def time_infer_freq_business(self):
infer_freq(self.b_freq)


class TimeDatetimeConverter(object):
Expand Down
1 change: 0 additions & 1 deletion doc/source/whatsnew/v0.20.3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ Groupby/Resample/Rolling
^^^^^^^^^^^^^^^^^^^^^^^^



Sparse
^^^^^^

Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ Groupby/Resample/Rolling
^^^^^^^^^^^^^^^^^^^^^^^^
- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)


- Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)

Sparse
^^^^^^
Expand Down
20 changes: 14 additions & 6 deletions pandas/tests/indexes/timedeltas/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,15 +564,23 @@ def test_freq_conversion(self):


class TestSlicing(object):
@pytest.mark.parametrize('freq', ['B', 'D'])
def test_timedelta(self, freq):
index = date_range('1/1/2000', periods=50, freq=freq)

def test_timedelta(self):
# this is valid too
index = date_range('1/1/2000', periods=50, freq='B')
shifted = index + timedelta(1)
back = shifted + timedelta(-1)
assert tm.equalContents(index, back)
assert shifted.freq == index.freq
assert shifted.freq == back.freq
tm.assert_index_equal(index, back)

if freq == 'D':
expected = pd.tseries.offsets.Day(1)
assert index.freq == expected
assert shifted.freq == expected
assert back.freq == expected
else: # freq == 'B'
assert index.freq == pd.tseries.offsets.BusinessDay(1)
assert shifted.freq is None
assert back.freq == pd.tseries.offsets.BusinessDay(1)

result = index - timedelta(1)
expected = index + timedelta(-1)
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/tseries/test_frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,9 +504,14 @@ def test_raise_if_too_few(self):
pytest.raises(ValueError, frequencies.infer_freq, index)

def test_business_daily(self):
index = _dti(['12/31/1998', '1/3/1999', '1/4/1999'])
index = _dti(['01/01/1999', '1/4/1999', '1/5/1999'])
assert frequencies.infer_freq(index) == 'B'

def test_business_daily_look_alike(self):
# GH 16624, do not infer 'B' when 'weekend' (2-day gap) in wrong place
index = _dti(['12/31/1998', '1/3/1999', '1/4/1999'])
assert frequencies.infer_freq(index) is None

def test_day(self):
self._check_tick(timedelta(1), 'D')

Expand Down
16 changes: 14 additions & 2 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,8 +975,7 @@ def _infer_daily_rule(self):
else:
return _maybe_add_count('D', days)

# Business daily. Maybe
if self.day_deltas == [1, 3]:
if self._is_business_daily():
return 'B'

wom_rule = self._get_wom_rule()
Expand Down Expand Up @@ -1012,6 +1011,19 @@ def _get_monthly_rule(self):
return {'cs': 'MS', 'bs': 'BMS',
'ce': 'M', 'be': 'BM'}.get(pos_check)

def _is_business_daily(self):
# quick check: cannot be business daily
if self.day_deltas != [1, 3]:
return False

# probably business daily, but need to confirm
first_weekday = self.index[0].weekday()
shifts = np.diff(self.index.asi8)
shifts = np.floor_divide(shifts, _ONE_DAY)
weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)
return np.all(((weekdays == 0) & (shifts == 3)) |
((weekdays > 0) & (weekdays <= 4) & (shifts == 1)))

def _get_wom_rule(self):
# wdiffs = unique(np.diff(self.index.week))
# We also need -47, -49, -48 to catch index spanning year boundary
Expand Down

0 comments on commit e832ddf

Please sign in to comment.