Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: between_time, at_time accept axis parameter #21799

Merged
merged 11 commits into from
Nov 19, 2018
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ Other Enhancements
- :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`)
- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
- :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue: `8839`)

.. _whatsnew_0240.api_breaking:

Expand Down
31 changes: 25 additions & 6 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7381,7 +7381,7 @@ def asfreq(self, freq, method=None, how=None, normalize=False,
return asfreq(self, freq, method=method, how=how, normalize=normalize,
fill_value=fill_value)

def at_time(self, time, asof=False):
def at_time(self, time, asof=False, axis=None):
"""
Select values at particular time of day (e.g. 9:30AM).

Expand All @@ -7393,6 +7393,10 @@ def at_time(self, time, asof=False):
Parameters
----------
time : datetime.time or string
axis : {0 or 'index', 1 or 'columns'}, default 0

.. versionadded:: 0.24.0


Returns
-------
Expand Down Expand Up @@ -7422,14 +7426,20 @@ def at_time(self, time, asof=False):
DatetimeIndex.indexer_at_time : Get just the index locations for
values at particular time of the day.
"""
if axis is None:
axis = self._stat_axis_number
axis = self._get_axis_number(axis)

index = self._get_axis(axis)
try:
indexer = self.index.indexer_at_time(time, asof=asof)
return self._take(indexer)
indexer = index.indexer_at_time(time, asof=asof)
except AttributeError:
raise TypeError('Index must be DatetimeIndex')

return self._take(indexer, axis=axis)

def between_time(self, start_time, end_time, include_start=True,
include_end=True):
include_end=True, axis=None):
"""
Select values between particular times of the day (e.g., 9:00-9:30 AM).

Expand All @@ -7447,6 +7457,9 @@ def between_time(self, start_time, end_time, include_start=True,
end_time : datetime.time or string
include_start : boolean, default True
include_end : boolean, default True
axis : {0 or 'index', 1 or 'columns'}, default 0

.. versionadded:: 0.24.0

Returns
-------
Expand Down Expand Up @@ -7484,14 +7497,20 @@ def between_time(self, start_time, end_time, include_start=True,
DatetimeIndex.indexer_between_time : Get just the index locations for
values between particular times of the day.
"""
if axis is None:
axis = self._stat_axis_number
axis = self._get_axis_number(axis)

index = self._get_axis(axis)
try:
indexer = self.index.indexer_between_time(
indexer = index.indexer_between_time(
start_time, end_time, include_start=include_start,
include_end=include_end)
return self._take(indexer)
except AttributeError:
raise TypeError('Index must be DatetimeIndex')

return self._take(indexer, axis=axis)

def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
label=None, convention='start', kind=None, loffset=None,
limit=None, base=0, on=None, level=None):
Expand Down
141 changes: 97 additions & 44 deletions pandas/tests/frame/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
from pandas.tests.frame.common import TestData


@pytest.fixture(params=product([True, False], [True, False]))
def close_open_fixture(request):
return request.param


class TestDataFrameTimeSeriesMethods(TestData):

def test_diff(self):
Expand Down Expand Up @@ -638,33 +643,49 @@ def test_at_time_raises(self):
with pytest.raises(TypeError): # index is not a DatetimeIndex
df.at_time('00:00')

def test_between_time(self):
@pytest.mark.parametrize('axis', ['index', 'columns', 0, 1])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just remove the paramerize, the axis is already defined

def test_at_time_axis(self, axis):
# issue 8839
rng = date_range('1/1/2000', '1/5/2000', freq='5min')
ts = DataFrame(np.random.randn(len(rng), len(rng)))
ts.index, ts.columns = rng, rng

indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)]

if axis in ['index', 0]:
expected = ts.loc[indices, :]
elif axis in ['columns', 1]:
expected = ts.loc[:, indices]

result = ts.at_time('9:30', axis=axis)
assert_frame_equal(result, expected)

def test_between_time(self, close_open_fixture):
rng = date_range('1/1/2000', '1/5/2000', freq='5min')
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
stime = time(0, 0)
etime = time(1, 0)

close_open = product([True, False], [True, False])
for inc_start, inc_end in close_open:
filtered = ts.between_time(stime, etime, inc_start, inc_end)
exp_len = 13 * 4 + 1
if not inc_start:
exp_len -= 5
if not inc_end:
exp_len -= 4

assert len(filtered) == exp_len
for rs in filtered.index:
t = rs.time()
if inc_start:
assert t >= stime
else:
assert t > stime

if inc_end:
assert t <= etime
else:
assert t < etime
inc_start, inc_end = close_open_fixture

filtered = ts.between_time(stime, etime, inc_start, inc_end)
exp_len = 13 * 4 + 1
if not inc_start:
exp_len -= 5
if not inc_end:
exp_len -= 4

assert len(filtered) == exp_len
for rs in filtered.index:
t = rs.time()
if inc_start:
assert t >= stime
else:
assert t > stime

if inc_end:
assert t <= etime
else:
assert t < etime

result = ts.between_time('00:00', '01:00')
expected = ts.between_time(stime, etime)
Expand All @@ -676,34 +697,66 @@ def test_between_time(self):
stime = time(22, 0)
etime = time(9, 0)

close_open = product([True, False], [True, False])
for inc_start, inc_end in close_open:
filtered = ts.between_time(stime, etime, inc_start, inc_end)
exp_len = (12 * 11 + 1) * 4 + 1
if not inc_start:
exp_len -= 4
if not inc_end:
exp_len -= 4

assert len(filtered) == exp_len
for rs in filtered.index:
t = rs.time()
if inc_start:
assert (t >= stime) or (t <= etime)
else:
assert (t > stime) or (t <= etime)

if inc_end:
assert (t <= etime) or (t >= stime)
else:
assert (t < etime) or (t >= stime)
filtered = ts.between_time(stime, etime, inc_start, inc_end)
exp_len = (12 * 11 + 1) * 4 + 1
if not inc_start:
exp_len -= 4
if not inc_end:
exp_len -= 4

assert len(filtered) == exp_len
for rs in filtered.index:
t = rs.time()
if inc_start:
assert (t >= stime) or (t <= etime)
else:
assert (t > stime) or (t <= etime)

if inc_end:
assert (t <= etime) or (t >= stime)
else:
assert (t < etime) or (t >= stime)

def test_between_time_raises(self):
# GH20725
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
with pytest.raises(TypeError): # index is not a DatetimeIndex
df.between_time(start_time='00:00', end_time='12:00')

def test_between_time_axis(self, axis):
# issue 8839
rng = date_range('1/1/2000', periods=100, freq='10min')
ts = DataFrame(np.random.randn(len(rng), len(rng)))
stime, etime = ('08:00:00', '09:00:00')
exp_len = 7

if axis in ['index', 0]:
ts.index = rng
assert len(ts.between_time(stime, etime)) == exp_len
assert len(ts.between_time(stime, etime, axis=0)) == exp_len

if axis in ['columns', 1]:
ts.columns = rng
selected = ts.between_time(stime, etime, axis=1).columns
assert len(selected) == exp_len

def test_between_time_axis_raises(self, axis):
# issue 8839
rng = date_range('1/1/2000', periods=100, freq='10min')
mask = np.arange(0, len(rng))
rand_data = np.random.randn(len(rng), len(rng))
ts = DataFrame(rand_data, index=rng, columns=rng)
stime, etime = ('08:00:00', '09:00:00')

if axis in ['columns', 1]:
ts.index = mask
pytest.raises(TypeError, ts.between_time, stime, etime)
pytest.raises(TypeError, ts.between_time, stime, etime, axis=0)

if axis in ['index', 0]:
ts.columns = mask
pytest.raises(TypeError, ts.between_time, stime, etime, axis=1)

def test_operation_on_NaT(self):
# Both NaT and Timestamp are in DataFrame.
df = pd.DataFrame({'foo': [pd.NaT, pd.NaT,
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/series/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,17 @@ def test_between_time_formats(self):
for time_string in strings:
assert len(ts.between_time(*time_string)) == expected_length

def test_between_time_axis(self):
# issue 8839
rng = date_range('1/1/2000', periods=100, freq='10min')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add the issue number as a comment (to all new tests)

ts = Series(np.random.randn(len(rng)), index=rng)
stime, etime = ('08:00:00', '09:00:00')
expected_length = 7

assert len(ts.between_time(stime, etime)) == expected_length
assert len(ts.between_time(stime, etime, axis=0)) == expected_length
pytest.raises(ValueError, ts.between_time, stime, etime, axis=1)

def test_to_period(self):
from pandas.core.indexes.period import period_range

Expand Down