Skip to content

Commit

Permalink
Return slices when possible from CFTimeIndex.get_loc() (#2569)
Browse files Browse the repository at this point in the history
This makes string indexing usable in a pandas.MultiIndex.
  • Loading branch information
shoyer authored Nov 24, 2018
1 parent 57fdcc5 commit 9d572a5
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 13 deletions.
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ Breaking changes
Enhancements
~~~~~~~~~~~~

- :py:class:`CFTimeIndex` uses slicing for string indexing when possible (like
:py:class:`pandas.DatetimeIndex`), which avoids unnecessary copies.
By `Stephan Hoyer <https://github.com/shoyer>`_

Bug fixes
~~~~~~~~~

Expand Down
27 changes: 22 additions & 5 deletions xarray/coding/cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,14 +263,31 @@ def _partial_date_slice(self, resolution, parsed):
"""
start, end = _parsed_string_to_bounds(self.date_type, resolution,
parsed)
lhs_mask = (self._data >= start)
rhs_mask = (self._data <= end)
return (lhs_mask & rhs_mask).nonzero()[0]

times = self._data

if self.is_monotonic:
if (len(times) and ((start < times[0] and end < times[0]) or
(start > times[-1] and end > times[-1]))):
# we are out of range
raise KeyError

# a monotonic (sorted) series can be sliced
left = times.searchsorted(start, side='left')
right = times.searchsorted(end, side='right')
return slice(left, right)

lhs_mask = times >= start
rhs_mask = times <= end
return np.flatnonzero(lhs_mask & rhs_mask)

def _get_string_slice(self, key):
"""Adapted from pandas.tseries.index.DatetimeIndex._get_string_slice"""
parsed, resolution = _parse_iso8601_with_reso(self.date_type, key)
loc = self._partial_date_slice(resolution, parsed)
try:
loc = self._partial_date_slice(resolution, parsed)
except KeyError:
raise KeyError(key)
return loc

def get_loc(self, key, method=None, tolerance=None):
Expand Down Expand Up @@ -431,7 +448,7 @@ def to_datetimeindex(self, unsafe=False):
'calendar, {!r}, to a pandas.DatetimeIndex, which uses dates '
'from the standard calendar. This may lead to subtle errors '
'in operations that depend on the length of time between '
'dates.'.format(calendar), RuntimeWarning)
'dates.'.format(calendar), RuntimeWarning, stacklevel=2)
return pd.DatetimeIndex(nptimes)


Expand Down
23 changes: 15 additions & 8 deletions xarray/tests/test_cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
_parsed_string_to_bounds, assert_all_valid_date_type, parse_iso8601)
from xarray.tests import assert_array_equal, assert_identical

from . import has_cftime, has_cftime_or_netCDF4, requires_cftime
from . import has_cftime, has_cftime_or_netCDF4, requires_cftime, raises_regex
from .test_coding_times import (_all_cftime_date_types, _ALL_CALENDARS,
_NON_STANDARD_CALENDARS)

Expand Down Expand Up @@ -251,16 +251,16 @@ def test_parsed_string_to_bounds_raises(date_type):
@pytest.mark.skipif(not has_cftime, reason='cftime not installed')
def test_get_loc(date_type, index):
result = index.get_loc('0001')
expected = [0, 1]
assert_array_equal(result, expected)
assert result == slice(0, 2)

result = index.get_loc(date_type(1, 2, 1))
expected = 1
assert result == expected
assert result == 1

result = index.get_loc('0001-02-01')
expected = 1
assert result == expected
assert result == slice(1, 2)

with raises_regex(KeyError, '1234'):
index.get_loc('1234')


@pytest.mark.skipif(not has_cftime, reason='cftime not installed')
Expand Down Expand Up @@ -758,7 +758,7 @@ def test_to_datetimeindex(calendar, unsafe):
with pytest.warns(RuntimeWarning, match='non-standard'):
result = index.to_datetimeindex()
else:
result = index.to_datetimeindex()
result = index.to_datetimeindex(unsafe=unsafe)

assert result.equals(expected)
np.testing.assert_array_equal(result, expected)
Expand All @@ -779,3 +779,10 @@ def test_to_datetimeindex_feb_29(calendar):
index = xr.cftime_range('2001-02-28', periods=2, calendar=calendar)
with pytest.raises(ValueError, match='29'):
index.to_datetimeindex()


@pytest.mark.skipif(not has_cftime, reason='cftime not installed')
def test_multiindex():
index = xr.cftime_range('2001-01-01', periods=100, calendar='360_day')
mindex = pd.MultiIndex.from_arrays([index])
assert mindex.get_loc('2001-01') == slice(0, 30)

0 comments on commit 9d572a5

Please sign in to comment.