diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index f21230693686e2..3147c433fa8387 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -66,7 +66,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - +- Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) Sparse ^^^^^^ diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 79fe0a864f2468..e719d2d6fbcc26 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -567,7 +567,7 @@ class TestSlicing(object): def test_timedelta(self): # this is valid too - index = date_range('1/1/2000', periods=50, freq='B') + index = date_range('1/1/2000', periods=50, freq='D') shifted = index + timedelta(1) back = shifted + timedelta(-1) assert tm.equalContents(index, back) diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 2edca1bd4676b9..9dd45f9515360d 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -504,9 +504,14 @@ def test_raise_if_too_few(self): pytest.raises(ValueError, frequencies.infer_freq, index) def test_business_daily(self): - index = _dti(['12/31/1998', '1/3/1999', '1/4/1999']) + index = _dti(['01/01/1999', '1/4/1999', '1/5/1999']) assert frequencies.infer_freq(index) == 'B' + def test_business_daily_look_alike(self): + # 'weekend' (2-day gap) in wrong place + index = _dti(['12/31/1998', '1/3/1999', '1/4/1999']) + assert frequencies.infer_freq(index) is None + def test_day(self): self._check_tick(timedelta(1), 'D') diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index dddf835424f67e..8714f92b595b6c 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -975,8 +975,7 @@ def _infer_daily_rule(self): else: return _maybe_add_count('D', days) - # Business daily. Maybe - if self.day_deltas == [1, 3]: + if self._is_business_daily(): return 'B' wom_rule = self._get_wom_rule() @@ -1012,6 +1011,17 @@ def _get_monthly_rule(self): return {'cs': 'MS', 'bs': 'BMS', 'ce': 'M', 'be': 'BM'}.get(pos_check) + def _is_business_daily(self): + if self.day_deltas != [1, 3]: # quick check: cannot be business daily + return False + # probably business daily, but need to confirm + first_weekday = self.index[0].weekday() + shifts = np.diff(np.asarray(self.index).view('i8')) + shifts = np.floor_divide(shifts, _ONE_DAY) + weekdays = np.mod(first_weekday + np.cumsum(shifts), 7) + return np.all(((weekdays == 0) & (shifts == 3)) | + ((weekdays > 0) & (weekdays <= 4) & (shifts == 1))) + def _get_wom_rule(self): # wdiffs = unique(np.diff(self.index.week)) # We also need -47, -49, -48 to catch index spanning year boundary