-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add DatetimeAccessor for accessing datetime fields via .dt
attribute
#1356
Changes from 1 commit
4c4447d
c2bdd40
f51c707
5308642
621d144
daaacb7
0198c3d
8c38bf9
d4fcb49
9a616ef
e303e89
e35d0d0
f621ecc
74f8756
5ae4e08
0788549
d842159
426a16a
66cdb59
9c2fe26
14ac55c
d50420b
b286313
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
from .common import is_datetime_like | ||
from .extensions import register_dataarray_accessor | ||
|
||
from pandas import tslib as libts | ||
|
||
@register_dataarray_accessor('dt') | ||
class DatetimeAccessor(object): | ||
"""Access datetime fields for DataArrays with datetime-like dtypes. | ||
|
||
Similar to pandas, fields can be accessed through the `.dt` attribute | ||
for applicable DataArrays: | ||
|
||
>>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01', | ||
... freq='D', periods=100)}) | ||
>>> ds.time.dt | ||
<xarray.core.accessors.DatetimeAccessor at 0x10c369f60> | ||
>>> ds.time.dt.dayofyear[5] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be |
||
<xarray.DataArray 'dayofyear' (time: 5)> | ||
array([1, 2, 3, 4, 5], dtype=int32) | ||
Coordinates: | ||
* time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... | ||
|
||
All of the pandas fields are accessible here. Note that these fields are not | ||
calendar-aware; if your datetimes are encoded with a non-Gregorian calendar | ||
(e.g. a 360-day calendar), then some fields like `dayofyear` may not be | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add "using netcdftime" just to be super explicit about where special calendars come from. |
||
accurate. | ||
|
||
""" | ||
def __init__(self, xarray_obj): | ||
if not is_datetime_like(xarray_obj.dtype): | ||
raise TypeError("'dt' accessor only available for " | ||
"DataArray with datetime64 dtype") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add "or timedelta64 dtype" |
||
self._obj = xarray_obj | ||
self._dt = None | ||
|
||
_field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't need this variable anymore. |
||
'weekofyear', 'week', 'weekday', 'dayofweek', | ||
'dayofyear', 'quarter', 'days_in_month', | ||
'daysinmonth', 'microsecond', | ||
'nanosecond'] | ||
|
||
@property | ||
def dt(self): | ||
"""Attribute to cache a view of the underlying datetime-like | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we want to expose this as public facing API. So I would get ride of this property, probably. |
||
array for passing to pandas.tslib for date_field operations | ||
""" | ||
if self._dt is None: | ||
datetimes_asi8 = self._obj.values.view('i8') | ||
self._dt = datetimes_asi8 | ||
return self._dt | ||
|
||
# Modified from https://github.com/pandas-dev/pandas/pandas/tseries/index.py#L59 | ||
def _tslib_field_accessor(name, field, docstring=None): | ||
def f(self): | ||
from .dataarray import DataArray | ||
values = self.dt | ||
result = libts.get_date_field(values, field) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is using private pandas API, so it's liable to break in the future. Instead, let's wrap things in a def get_dt_field(array: np.ndarray, name: str):
series = pd.Series(array.ravel())
field_values = getattr(series.dt, name).values
return field_values.reshape(array.shape) |
||
return DataArray(result, name=name, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can also just do |
||
coords=self._obj.coords, dims=self._obj.dims) | ||
|
||
f.__name__ = name | ||
f.__doc__ = docstring | ||
return property(f) | ||
|
||
year = _tslib_field_accessor('year', 'Y', "The year of the datetime") | ||
month = _tslib_field_accessor( | ||
'month', 'M', "The month as January=1, December=12" | ||
) | ||
day = _tslib_field_accessor('day', 'D', "The days of the datetime") | ||
hour = _tslib_field_accessor('hour', 'h', "The hours of the datetime") | ||
minute = _tslib_field_accessor('minute', 'm', "The minutes of the datetime") | ||
second = _tslib_field_accessor('second', 's', "The seconds of the datetime") | ||
microsecond = _tslib_field_accessor( | ||
'microsecond', 'us', "The microseconds of the datetime" | ||
) | ||
nanosecond = _tslib_field_accessor( | ||
'nanosecond', 'ns', "The nanoseconds of the datetime" | ||
) | ||
weekofyear = _tslib_field_accessor( | ||
'weekofyear', 'woy', "The week ordinal of the year" | ||
) | ||
week = weekofyear | ||
dayofweek = _tslib_field_accessor( | ||
'dayofweek', 'dow', "The day of the week with Monday=0, Sunday=6" | ||
) | ||
weekday = dayofweek | ||
|
||
weekday_name = _tslib_field_accessor( | ||
'weekday_name', 'weekday_name', | ||
"The name of day in a week (ex: Friday)" | ||
) | ||
|
||
dayofyear = _tslib_field_accessor( | ||
'dayofyear', 'doy', "The ordinal day of the year" | ||
) | ||
quarter = _tslib_field_accessor('quarter', 'q', "The quarter of the date") | ||
days_in_month = _tslib_field_accessor( | ||
'days_in_month', 'dim', "The number of days in the month" | ||
) | ||
daysinmonth = days_in_month |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
try: | ||
import cPickle as pickle | ||
except ImportError: | ||
import pickle | ||
|
||
import xarray as xr | ||
import numpy as np | ||
import pandas as pd | ||
|
||
from . import TestCase | ||
|
||
|
||
class TestDatetimeAccessor(TestCase): | ||
def setUp(self): | ||
nt = 10000 | ||
data = np.random.rand(10, 10, nt) | ||
lons = np.linspace(0, 11, 10) | ||
lats = np.linspace(0, 20, 10) | ||
self.times = pd.date_range(start="2000/01/01", freq='H', periods=nt) | ||
|
||
self.data = xr.DataArray(data, coords=[lons, lats, self.times], | ||
dims=['lon', 'lat', 'time'], name='data') | ||
|
||
def test_field_access(self): | ||
years = self.times.year | ||
months = self.times.month | ||
days = self.times.day | ||
hours = self.times.hour | ||
|
||
self.assertArrayEqual(years, self.data.time.dt.year) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use |
||
self.assertArrayEqual(months, self.data.time.dt.month) | ||
self.assertArrayEqual(days, self.data.time.dt.day) | ||
self.assertArrayEqual(hours, self.data.time.dt.hour) | ||
|
||
def test_not_datetime_type(self): | ||
nontime_data = self.data.copy() | ||
nontime_data['time'].values = \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please avoid explicit backlashes for continuation if possible, per PEP8. |
||
np.arange(len(self.data.time)).astype('int8') | ||
with self.assertRaisesRegexp(TypeError, 'dt'): | ||
nontime_data.time.dt.year | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. missing newline on last line |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Another thought: since this isn't being defined outside of xarray, it might actually make sense to import this class from
datarray.py
and simply add it to the DataArray class directly:That has the advantage of being more transparent about whether it comes from.