From cd7fd0feb19f7f7fe703a3a9efe017d9f83706a3 Mon Sep 17 00:00:00 2001 From: yrhooke Date: Mon, 19 Nov 2018 03:21:32 +0200 Subject: [PATCH] ENH: between_time, at_time accept axis parameter (#21799) --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/generic.py | 31 ++++-- pandas/tests/frame/test_timeseries.py | 141 +++++++++++++++++-------- pandas/tests/series/test_timeseries.py | 11 ++ 4 files changed, 134 insertions(+), 50 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 2f9c4d2cb6d34a..bb02bbb36424a5 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -291,6 +291,7 @@ Other Enhancements - :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`) - :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object. - :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`) +- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue: `8839`) .. _whatsnew_0240.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7d8873fe6a6423..dde671993a56b4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7381,7 +7381,7 @@ def asfreq(self, freq, method=None, how=None, normalize=False, return asfreq(self, freq, method=method, how=how, normalize=normalize, fill_value=fill_value) - def at_time(self, time, asof=False): + def at_time(self, time, asof=False, axis=None): """ Select values at particular time of day (e.g. 9:30AM). @@ -7393,6 +7393,10 @@ def at_time(self, time, asof=False): Parameters ---------- time : datetime.time or string + axis : {0 or 'index', 1 or 'columns'}, default 0 + + .. versionadded:: 0.24.0 + Returns ------- @@ -7422,14 +7426,20 @@ def at_time(self, time, asof=False): DatetimeIndex.indexer_at_time : Get just the index locations for values at particular time of the day. """ + if axis is None: + axis = self._stat_axis_number + axis = self._get_axis_number(axis) + + index = self._get_axis(axis) try: - indexer = self.index.indexer_at_time(time, asof=asof) - return self._take(indexer) + indexer = index.indexer_at_time(time, asof=asof) except AttributeError: raise TypeError('Index must be DatetimeIndex') + return self._take(indexer, axis=axis) + def between_time(self, start_time, end_time, include_start=True, - include_end=True): + include_end=True, axis=None): """ Select values between particular times of the day (e.g., 9:00-9:30 AM). @@ -7447,6 +7457,9 @@ def between_time(self, start_time, end_time, include_start=True, end_time : datetime.time or string include_start : boolean, default True include_end : boolean, default True + axis : {0 or 'index', 1 or 'columns'}, default 0 + + .. versionadded:: 0.24.0 Returns ------- @@ -7484,14 +7497,20 @@ def between_time(self, start_time, end_time, include_start=True, DatetimeIndex.indexer_between_time : Get just the index locations for values between particular times of the day. """ + if axis is None: + axis = self._stat_axis_number + axis = self._get_axis_number(axis) + + index = self._get_axis(axis) try: - indexer = self.index.indexer_between_time( + indexer = index.indexer_between_time( start_time, end_time, include_start=include_start, include_end=include_end) - return self._take(indexer) except AttributeError: raise TypeError('Index must be DatetimeIndex') + return self._take(indexer, axis=axis) + def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, label=None, convention='start', kind=None, loffset=None, limit=None, base=0, on=None, level=None): diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 4f04169d082067..52f0b30bf0f0c3 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -25,6 +25,11 @@ from pandas.tests.frame.common import TestData +@pytest.fixture(params=product([True, False], [True, False])) +def close_open_fixture(request): + return request.param + + class TestDataFrameTimeSeriesMethods(TestData): def test_diff(self): @@ -638,33 +643,49 @@ def test_at_time_raises(self): with pytest.raises(TypeError): # index is not a DatetimeIndex df.at_time('00:00') - def test_between_time(self): + @pytest.mark.parametrize('axis', ['index', 'columns', 0, 1]) + def test_at_time_axis(self, axis): + # issue 8839 + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), len(rng))) + ts.index, ts.columns = rng, rng + + indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)] + + if axis in ['index', 0]: + expected = ts.loc[indices, :] + elif axis in ['columns', 1]: + expected = ts.loc[:, indices] + + result = ts.at_time('9:30', axis=axis) + assert_frame_equal(result, expected) + + def test_between_time(self, close_open_fixture): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(0, 0) etime = time(1, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = 13 * 4 + 1 - if not inc_start: - exp_len -= 5 - if not inc_end: - exp_len -= 4 - - assert len(filtered) == exp_len - for rs in filtered.index: - t = rs.time() - if inc_start: - assert t >= stime - else: - assert t > stime - - if inc_end: - assert t <= etime - else: - assert t < etime + inc_start, inc_end = close_open_fixture + + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert t >= stime + else: + assert t > stime + + if inc_end: + assert t <= etime + else: + assert t < etime result = ts.between_time('00:00', '01:00') expected = ts.between_time(stime, etime) @@ -676,27 +697,25 @@ def test_between_time(self): stime = time(22, 0) etime = time(9, 0) - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: - exp_len -= 4 - if not inc_end: - exp_len -= 4 - - assert len(filtered) == exp_len - for rs in filtered.index: - t = rs.time() - if inc_start: - assert (t >= stime) or (t <= etime) - else: - assert (t > stime) or (t <= etime) - - if inc_end: - assert (t <= etime) or (t >= stime) - else: - assert (t < etime) or (t >= stime) + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert (t >= stime) or (t <= etime) + else: + assert (t > stime) or (t <= etime) + + if inc_end: + assert (t <= etime) or (t >= stime) + else: + assert (t < etime) or (t >= stime) def test_between_time_raises(self): # GH20725 @@ -704,6 +723,40 @@ def test_between_time_raises(self): with pytest.raises(TypeError): # index is not a DatetimeIndex df.between_time(start_time='00:00', end_time='12:00') + def test_between_time_axis(self, axis): + # issue 8839 + rng = date_range('1/1/2000', periods=100, freq='10min') + ts = DataFrame(np.random.randn(len(rng), len(rng))) + stime, etime = ('08:00:00', '09:00:00') + exp_len = 7 + + if axis in ['index', 0]: + ts.index = rng + assert len(ts.between_time(stime, etime)) == exp_len + assert len(ts.between_time(stime, etime, axis=0)) == exp_len + + if axis in ['columns', 1]: + ts.columns = rng + selected = ts.between_time(stime, etime, axis=1).columns + assert len(selected) == exp_len + + def test_between_time_axis_raises(self, axis): + # issue 8839 + rng = date_range('1/1/2000', periods=100, freq='10min') + mask = np.arange(0, len(rng)) + rand_data = np.random.randn(len(rng), len(rng)) + ts = DataFrame(rand_data, index=rng, columns=rng) + stime, etime = ('08:00:00', '09:00:00') + + if axis in ['columns', 1]: + ts.index = mask + pytest.raises(TypeError, ts.between_time, stime, etime) + pytest.raises(TypeError, ts.between_time, stime, etime, axis=0) + + if axis in ['index', 0]: + ts.columns = mask + pytest.raises(TypeError, ts.between_time, stime, etime, axis=1) + def test_operation_on_NaT(self): # Both NaT and Timestamp are in DataFrame. df = pd.DataFrame({'foo': [pd.NaT, pd.NaT, diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 1681255f7e6bdd..969c20601c7c8b 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -817,6 +817,17 @@ def test_between_time_formats(self): for time_string in strings: assert len(ts.between_time(*time_string)) == expected_length + def test_between_time_axis(self): + # issue 8839 + rng = date_range('1/1/2000', periods=100, freq='10min') + ts = Series(np.random.randn(len(rng)), index=rng) + stime, etime = ('08:00:00', '09:00:00') + expected_length = 7 + + assert len(ts.between_time(stime, etime)) == expected_length + assert len(ts.between_time(stime, etime, axis=0)) == expected_length + pytest.raises(ValueError, ts.between_time, stime, etime, axis=1) + def test_to_period(self): from pandas.core.indexes.period import period_range