-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CFTimeIndex #1252
CFTimeIndex #1252
Changes from 3 commits
e1e8223
6496458
675b2f7
7beddc1
3cf03bc
53b085c
738979b
a177f89
48ec519
9e76df6
2a7b439
b942724
7845e6d
a9ed3c8
3e23ed5
a9f3548
f00f59a
b34879d
e93b62d
61e8bc6
0244f58
32d7986
9855176
8d61fdb
6b87da7
812710c
3610e6e
8f69a90
cec909c
422792b
de74037
2993e3c
f3438fd
c35364e
08f72dc
62ce0ae
ff05005
20fea63
d5a3cef
e721d26
5e1c4a8
257f086
00e8ada
c9d0454
f678714
b03e38e
890dde0
80e05ba
13c8358
ab46798
67fd335
7041a8d
9df4e11
9391463
da12ecd
a6997ec
7302d7e
9dc5539
1aa8d86
ef3f2b1
4fb5a90
1fd205a
58a0715
ca4d7dd
3947aac
a395db0
1b00bde
5fdcd20
459211c
7e9bb20
247c9eb
e66abe9
f25b0b6
b10cc73
c318755
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
import re | ||
from datetime import timedelta | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
from xarray.core import pycompat | ||
from xarray.core.utils import is_scalar | ||
|
||
|
||
def named(name, pattern): | ||
return '(?P<' + name + '>' + pattern + ')' | ||
|
||
|
||
def optional(x): | ||
return '(?:' + x + ')?' | ||
|
||
|
||
def trailing_optional(xs): | ||
if not xs: | ||
return '' | ||
return xs[0] + optional(trailing_optional(xs[1:])) | ||
|
||
|
||
def build_pattern(date_sep='\-', datetime_sep='T', time_sep='\:'): | ||
pieces = [(None, 'year', '\d{4}'), | ||
(date_sep, 'month', '\d{2}'), | ||
(date_sep, 'day', '\d{2}'), | ||
(datetime_sep, 'hour', '\d{2}'), | ||
(time_sep, 'minute', '\d{2}'), | ||
(time_sep, 'second', '\d{2}' + optional('\.\d+'))] | ||
pattern_list = [] | ||
for sep, name, sub_pattern in pieces: | ||
pattern_list.append((sep if sep else '') + named(name, sub_pattern)) | ||
# TODO: allow timezone offsets? | ||
return '^' + trailing_optional(pattern_list) + '$' | ||
|
||
|
||
basic_pattern = build_pattern(date_sep='', time_sep='') | ||
extended_pattern = build_pattern() | ||
patterns = [basic_pattern, extended_pattern] | ||
|
||
|
||
def parse_iso8601(datetime_string): | ||
for pattern in patterns: | ||
match = re.match(pattern, datetime_string) | ||
if match: | ||
return match.groupdict() | ||
raise ValueError('no ISO-8601 match for string: %s' % datetime_string) | ||
|
||
|
||
def _parse_iso8601_with_reso(date_type, timestr): | ||
default = date_type(1, 1, 1) | ||
result = parse_iso8601(timestr) | ||
replace = {} | ||
|
||
for attr in ['year', 'month', 'day', 'hour', 'minute', 'second']: | ||
value = result.get(attr, None) | ||
if value is not None: | ||
# Note ISO8601 conventions allow for fractional seconds; casting | ||
# to an int means all seconds values get rounded down to the | ||
# nearest integer. TODO: Consider adding support for sub-second | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you should update the regex above to exclude fractional seconds if that doesn't work |
||
# resolution? | ||
replace[attr] = int(value) | ||
resolution = attr | ||
|
||
return default.replace(**replace), resolution | ||
|
||
|
||
def _parsed_string_to_bounds(date_type, resolution, parsed): | ||
"""Generalization of | ||
pandas.tseries.index.DatetimeIndex._parsed_string_to_bounds | ||
for use with non-standard calendars and netcdftime._netcdftime.datetime | ||
objects. | ||
""" | ||
if resolution == 'year': | ||
return (date_type(parsed.year, 1, 1), | ||
date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1)) | ||
if resolution == 'month': | ||
if parsed.month == 12: | ||
end = date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1) | ||
else: | ||
end = (date_type(parsed.year, parsed.month + 1, 1) - | ||
timedelta(microseconds=1)) | ||
return date_type(parsed.year, parsed.month, 1), end | ||
if resolution == 'day': | ||
start = date_type(parsed.year, parsed.month, parsed.day) | ||
return start, start + timedelta(days=1, microseconds=-1) | ||
if resolution == 'hour': | ||
start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour) | ||
return start, start + timedelta(hours=1, microseconds=-1) | ||
if resolution == 'minute': | ||
start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour, | ||
parsed.minute) | ||
return start, start + timedelta(minutes=1, microseconds=-1) | ||
if resolution == 'second': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe I'm missing something obvious here but shouldn't all these |
||
start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour, | ||
parsed.minute, parsed.second) | ||
return start, start + timedelta(seconds=1, microseconds=-1) | ||
else: | ||
raise KeyError | ||
|
||
|
||
def get_date_field(datetimes, field): | ||
"""Adapted from pandas.tslib.get_date_field""" | ||
return [getattr(date, field) for date in datetimes] | ||
|
||
|
||
def _field_accessor(name, docstring=None): | ||
"""Adapted from pandas.tseries.index._field_accessor""" | ||
def f(self): | ||
return get_date_field(self._data, name) | ||
|
||
f.__name__ = name | ||
f.__doc__ = docstring | ||
return property(f) | ||
|
||
|
||
def get_date_type(self): | ||
return type(self._data[0]) | ||
|
||
|
||
def assert_all_same_netcdftime_datetimes(data): | ||
from netcdftime._netcdftime import datetime | ||
|
||
if not isinstance(data[0], datetime): | ||
raise TypeError( | ||
'NetCDFTimeIndex requires netcdftime._netcdftime.datetime' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use the public API name Also, print the invalid object in the error message (using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately the public API name actually represents a
|
||
' objects.') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I usually prefer to leave spaces at the lines instead of the the start of lines -- I think it looks slightly nicer. |
||
if not all(isinstance(value, type(data[0])) for value in data): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Create a variable for |
||
raise TypeError( | ||
'NetCDFTimeIndex requires using netcdftime._netcdftime.datetime' | ||
' objects of all the same type.') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same concerns as above on the error message |
||
|
||
|
||
class NetCDFTimeIndex(pd.Index): | ||
def __new__(cls, data): | ||
result = object.__new__(cls) | ||
assert_all_same_netcdftime_datetimes(data) | ||
result._data = np.array(data) | ||
return result | ||
|
||
year = _field_accessor('year', 'The year of the datetime') | ||
month = _field_accessor('month', 'The month of the datetime') | ||
day = _field_accessor('day', 'The days of the datetime') | ||
hour = _field_accessor('hour', 'The hours of the datetime') | ||
minute = _field_accessor('minute', 'The minutes of the datetime') | ||
second = _field_accessor('second', 'The seconds of the datetime') | ||
microsecond = _field_accessor('microsecond', | ||
'The microseconds of the datetime') | ||
date_type = property(get_date_type) | ||
|
||
def _partial_date_slice(self, resolution, parsed): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For now I tried to go as simple as possible here and in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you add a few examples (either here or in the docstring) that describe what behavior is not covered in this implementation. |
||
"""Adapted from | ||
pandas.tseries.index.DatetimeIndex._partial_date_slice""" | ||
start, end = _parsed_string_to_bounds(self.date_type, resolution, | ||
parsed) | ||
lhs_mask = (self._data >= start) | ||
rhs_mask = (self._data <= end) | ||
return (lhs_mask & rhs_mask).nonzero()[0] | ||
|
||
def _get_string_slice(self, key): | ||
"""Adapted from pandas.tseries.index.DatetimeIndex._get_string_slice""" | ||
parsed, resolution = _parse_iso8601_with_reso(self.date_type, key) | ||
loc = self._partial_date_slice(resolution, parsed) | ||
return loc | ||
|
||
def get_loc(self, key, method=None, tolerance=None): | ||
"""Adapted from pandas.tseries.index.DatetimeIndex.get_loc""" | ||
if isinstance(key, pycompat.basestring): | ||
return self._get_string_slice(key) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 for fewer hard to predict special cases. Pandas is really inscrutable here. |
||
else: | ||
return pd.Index.get_loc(self, key, method=method, | ||
tolerance=tolerance) | ||
|
||
def _maybe_cast_slice_bound(self, label, side, kind): | ||
"""Adapted from | ||
pandas.tseries.index.DatetimeIndex._maybe_cast_slice_bound""" | ||
if isinstance(label, pycompat.basestring): | ||
parsed, resolution = _parse_iso8601_with_reso(self.date_type, | ||
label) | ||
start, end = _parsed_string_to_bounds(self.date_type, resolution, | ||
parsed) | ||
if self.is_monotonic_decreasing and len(self): | ||
return end if side == 'left' else start | ||
return start if side == 'left' else end | ||
else: | ||
return label | ||
|
||
# TODO: Add ability to use integer range outside of iloc? | ||
# e.g. series[1:5]. | ||
def get_value(self, series, key): | ||
"""Adapted from pandas.tseries.index.DatetimeIndex.get_value""" | ||
if not isinstance(key, slice): | ||
return series.iloc[self.get_loc(key)] | ||
else: | ||
return series.iloc[self.slice_indexer( | ||
key.start, key.stop, key.step)] | ||
|
||
def __contains__(self, key): | ||
"""Adapted from | ||
pandas.tseries.base.DatetimeIndexOpsMixin.__contains__""" | ||
try: | ||
result = self.get_loc(key) | ||
return (is_scalar(result) or type(result) == slice or | ||
(isinstance(result, np.ndarray) and result.size)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Essentially all I want to do here is, if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is about the best you can do |
||
except (KeyError, TypeError, ValueError): | ||
return False |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use all caps for global constants, and preface for an underscore to indicate that they are private variables, e.g.,
_BASIC_PATTERN