Skip to content

Commit

Permalink
ENH: between_time, at_time accept axis parameter (pandas-dev#21799)
Browse files Browse the repository at this point in the history
  • Loading branch information
yrhooke authored and Pingviinituutti committed Feb 28, 2019
1 parent 1591837 commit cd7fd0f
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 50 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ Other Enhancements
- :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`)
- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
- :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue: `8839`)

.. _whatsnew_0240.api_breaking:

Expand Down
31 changes: 25 additions & 6 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7381,7 +7381,7 @@ def asfreq(self, freq, method=None, how=None, normalize=False,
return asfreq(self, freq, method=method, how=how, normalize=normalize,
fill_value=fill_value)

def at_time(self, time, asof=False):
def at_time(self, time, asof=False, axis=None):
"""
Select values at particular time of day (e.g. 9:30AM).
Expand All @@ -7393,6 +7393,10 @@ def at_time(self, time, asof=False):
Parameters
----------
time : datetime.time or string
axis : {0 or 'index', 1 or 'columns'}, default 0
.. versionadded:: 0.24.0
Returns
-------
Expand Down Expand Up @@ -7422,14 +7426,20 @@ def at_time(self, time, asof=False):
DatetimeIndex.indexer_at_time : Get just the index locations for
values at particular time of the day.
"""
if axis is None:
axis = self._stat_axis_number
axis = self._get_axis_number(axis)

index = self._get_axis(axis)
try:
indexer = self.index.indexer_at_time(time, asof=asof)
return self._take(indexer)
indexer = index.indexer_at_time(time, asof=asof)
except AttributeError:
raise TypeError('Index must be DatetimeIndex')

return self._take(indexer, axis=axis)

def between_time(self, start_time, end_time, include_start=True,
include_end=True):
include_end=True, axis=None):
"""
Select values between particular times of the day (e.g., 9:00-9:30 AM).
Expand All @@ -7447,6 +7457,9 @@ def between_time(self, start_time, end_time, include_start=True,
end_time : datetime.time or string
include_start : boolean, default True
include_end : boolean, default True
axis : {0 or 'index', 1 or 'columns'}, default 0
.. versionadded:: 0.24.0
Returns
-------
Expand Down Expand Up @@ -7484,14 +7497,20 @@ def between_time(self, start_time, end_time, include_start=True,
DatetimeIndex.indexer_between_time : Get just the index locations for
values between particular times of the day.
"""
if axis is None:
axis = self._stat_axis_number
axis = self._get_axis_number(axis)

index = self._get_axis(axis)
try:
indexer = self.index.indexer_between_time(
indexer = index.indexer_between_time(
start_time, end_time, include_start=include_start,
include_end=include_end)
return self._take(indexer)
except AttributeError:
raise TypeError('Index must be DatetimeIndex')

return self._take(indexer, axis=axis)

def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
label=None, convention='start', kind=None, loffset=None,
limit=None, base=0, on=None, level=None):
Expand Down
141 changes: 97 additions & 44 deletions pandas/tests/frame/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
from pandas.tests.frame.common import TestData


@pytest.fixture(params=product([True, False], [True, False]))
def close_open_fixture(request):
return request.param


class TestDataFrameTimeSeriesMethods(TestData):

def test_diff(self):
Expand Down Expand Up @@ -638,33 +643,49 @@ def test_at_time_raises(self):
with pytest.raises(TypeError): # index is not a DatetimeIndex
df.at_time('00:00')

def test_between_time(self):
@pytest.mark.parametrize('axis', ['index', 'columns', 0, 1])
def test_at_time_axis(self, axis):
# issue 8839
rng = date_range('1/1/2000', '1/5/2000', freq='5min')
ts = DataFrame(np.random.randn(len(rng), len(rng)))
ts.index, ts.columns = rng, rng

indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)]

if axis in ['index', 0]:
expected = ts.loc[indices, :]
elif axis in ['columns', 1]:
expected = ts.loc[:, indices]

result = ts.at_time('9:30', axis=axis)
assert_frame_equal(result, expected)

def test_between_time(self, close_open_fixture):
rng = date_range('1/1/2000', '1/5/2000', freq='5min')
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
stime = time(0, 0)
etime = time(1, 0)

close_open = product([True, False], [True, False])
for inc_start, inc_end in close_open:
filtered = ts.between_time(stime, etime, inc_start, inc_end)
exp_len = 13 * 4 + 1
if not inc_start:
exp_len -= 5
if not inc_end:
exp_len -= 4

assert len(filtered) == exp_len
for rs in filtered.index:
t = rs.time()
if inc_start:
assert t >= stime
else:
assert t > stime

if inc_end:
assert t <= etime
else:
assert t < etime
inc_start, inc_end = close_open_fixture

filtered = ts.between_time(stime, etime, inc_start, inc_end)
exp_len = 13 * 4 + 1
if not inc_start:
exp_len -= 5
if not inc_end:
exp_len -= 4

assert len(filtered) == exp_len
for rs in filtered.index:
t = rs.time()
if inc_start:
assert t >= stime
else:
assert t > stime

if inc_end:
assert t <= etime
else:
assert t < etime

result = ts.between_time('00:00', '01:00')
expected = ts.between_time(stime, etime)
Expand All @@ -676,34 +697,66 @@ def test_between_time(self):
stime = time(22, 0)
etime = time(9, 0)

close_open = product([True, False], [True, False])
for inc_start, inc_end in close_open:
filtered = ts.between_time(stime, etime, inc_start, inc_end)
exp_len = (12 * 11 + 1) * 4 + 1
if not inc_start:
exp_len -= 4
if not inc_end:
exp_len -= 4

assert len(filtered) == exp_len
for rs in filtered.index:
t = rs.time()
if inc_start:
assert (t >= stime) or (t <= etime)
else:
assert (t > stime) or (t <= etime)

if inc_end:
assert (t <= etime) or (t >= stime)
else:
assert (t < etime) or (t >= stime)
filtered = ts.between_time(stime, etime, inc_start, inc_end)
exp_len = (12 * 11 + 1) * 4 + 1
if not inc_start:
exp_len -= 4
if not inc_end:
exp_len -= 4

assert len(filtered) == exp_len
for rs in filtered.index:
t = rs.time()
if inc_start:
assert (t >= stime) or (t <= etime)
else:
assert (t > stime) or (t <= etime)

if inc_end:
assert (t <= etime) or (t >= stime)
else:
assert (t < etime) or (t >= stime)

def test_between_time_raises(self):
# GH20725
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
with pytest.raises(TypeError): # index is not a DatetimeIndex
df.between_time(start_time='00:00', end_time='12:00')

def test_between_time_axis(self, axis):
# issue 8839
rng = date_range('1/1/2000', periods=100, freq='10min')
ts = DataFrame(np.random.randn(len(rng), len(rng)))
stime, etime = ('08:00:00', '09:00:00')
exp_len = 7

if axis in ['index', 0]:
ts.index = rng
assert len(ts.between_time(stime, etime)) == exp_len
assert len(ts.between_time(stime, etime, axis=0)) == exp_len

if axis in ['columns', 1]:
ts.columns = rng
selected = ts.between_time(stime, etime, axis=1).columns
assert len(selected) == exp_len

def test_between_time_axis_raises(self, axis):
# issue 8839
rng = date_range('1/1/2000', periods=100, freq='10min')
mask = np.arange(0, len(rng))
rand_data = np.random.randn(len(rng), len(rng))
ts = DataFrame(rand_data, index=rng, columns=rng)
stime, etime = ('08:00:00', '09:00:00')

if axis in ['columns', 1]:
ts.index = mask
pytest.raises(TypeError, ts.between_time, stime, etime)
pytest.raises(TypeError, ts.between_time, stime, etime, axis=0)

if axis in ['index', 0]:
ts.columns = mask
pytest.raises(TypeError, ts.between_time, stime, etime, axis=1)

def test_operation_on_NaT(self):
# Both NaT and Timestamp are in DataFrame.
df = pd.DataFrame({'foo': [pd.NaT, pd.NaT,
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/series/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,17 @@ def test_between_time_formats(self):
for time_string in strings:
assert len(ts.between_time(*time_string)) == expected_length

def test_between_time_axis(self):
# issue 8839
rng = date_range('1/1/2000', periods=100, freq='10min')
ts = Series(np.random.randn(len(rng)), index=rng)
stime, etime = ('08:00:00', '09:00:00')
expected_length = 7

assert len(ts.between_time(stime, etime)) == expected_length
assert len(ts.between_time(stime, etime, axis=0)) == expected_length
pytest.raises(ValueError, ts.between_time, stime, etime, axis=1)

def test_to_period(self):
from pandas.core.indexes.period import period_range

Expand Down

0 comments on commit cd7fd0f

Please sign in to comment.