-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add DatetimeAccessor for accessing datetime fields via
.dt
attribute (
#1356) * Add DatetimeAccessor for accessing datetime fields via `.dt` attribute * Cleaning up unit tests * Cleaning up comments and warnings in accessors * Indirectly access pandas tslib through Series accessors * Re-factor injection of datetime field accessor properties * Undo loop/injection of _get_date_field accessors * Remove public-facing dt property * Remove extra 'field' argument from _tslib_field_accessor * Added support for dask arrays * Added dask test cases Fixed a bug where data wasn't computed in correct order * Simplified _get_date_field for both dask/numpy arrays; additional code review cleanups * Fixing flake8 complaints * Adding whats-new entry * Updated timeseries docs with note about dt accessor * Moved season accessor to DatetimeAccessor * Re-factor virtual variable logic to lean on DateTimeAccessor * Added "Returns" documentation to _get_date_field Fixed imports to facilitate more direct implementation of DateTimeAccessor as a property in DataArray Moved _access_through_series to a top-level function in accessors.py so that dask serialization will hopefully work a bit better * Adding timestamp accessor * Hard-coding expected dtypes for each datetime field * Fix typo in non-datetime virtual variable access * Update What's New and timeseries docs
- Loading branch information
Showing
8 changed files
with
287 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
from .common import is_datetime_like | ||
from .pycompat import dask_array_type | ||
|
||
from functools import partial | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
|
||
def _season_from_months(months): | ||
"""Compute season (DJF, MAM, JJA, SON) from month ordinal | ||
""" | ||
# TODO: Move "season" accessor upstream into pandas | ||
seasons = np.array(['DJF', 'MAM', 'JJA', 'SON']) | ||
months = np.asarray(months) | ||
return seasons[(months // 3) % 4] | ||
|
||
|
||
def _access_through_series(values, name): | ||
"""Coerce an array of datetime-like values to a pandas Series and | ||
access requested datetime component | ||
""" | ||
values_as_series = pd.Series(values.ravel()) | ||
if name == "season": | ||
months = values_as_series.dt.month.values | ||
field_values = _season_from_months(months) | ||
else: | ||
field_values = getattr(values_as_series.dt, name).values | ||
return field_values.reshape(values.shape) | ||
|
||
|
||
def _get_date_field(values, name, dtype): | ||
"""Indirectly access pandas' libts.get_date_field by wrapping data | ||
as a Series and calling through `.dt` attribute. | ||
Parameters | ||
---------- | ||
values : np.ndarray or dask.array-like | ||
Array-like container of datetime-like values | ||
name : str | ||
Name of datetime field to access | ||
dtype : dtype-like | ||
dtype for output date field values | ||
Returns | ||
------- | ||
datetime_fields : same type as values | ||
Array-like of datetime fields accessed for each element in values | ||
""" | ||
if isinstance(values, dask_array_type): | ||
from dask.array import map_blocks | ||
return map_blocks(_access_through_series, | ||
values, name, dtype=dtype) | ||
else: | ||
return _access_through_series(values, name) | ||
|
||
|
||
class DatetimeAccessor(object): | ||
"""Access datetime fields for DataArrays with datetime-like dtypes. | ||
Similar to pandas, fields can be accessed through the `.dt` attribute | ||
for applicable DataArrays: | ||
>>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01', | ||
... freq='D', periods=100)}) | ||
>>> ds.time.dt | ||
<xarray.core.accessors.DatetimeAccessor at 0x10c369f60> | ||
>>> ds.time.dt.dayofyear[:5] | ||
<xarray.DataArray 'dayofyear' (time: 5)> | ||
array([1, 2, 3, 4, 5], dtype=int32) | ||
Coordinates: | ||
* time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... | ||
All of the pandas fields are accessible here. Note that these fields are | ||
not calendar-aware; if your datetimes are encoded with a non-Gregorian | ||
calendar (e.g. a 360-day calendar) using netcdftime, then some fields like | ||
`dayofyear` may not be accurate. | ||
""" | ||
def __init__(self, xarray_obj): | ||
if not is_datetime_like(xarray_obj.dtype): | ||
raise TypeError("'dt' accessor only available for " | ||
"DataArray with datetime64 or timedelta64 dtype") | ||
self._obj = xarray_obj | ||
|
||
def _tslib_field_accessor(name, docstring=None, dtype=None): | ||
def f(self, dtype=dtype): | ||
if dtype is None: | ||
dtype = self._obj.dtype | ||
obj_type = type(self._obj) | ||
result = _get_date_field(self._obj.data, name, dtype) | ||
return obj_type(result, name=name, | ||
coords=self._obj.coords, dims=self._obj.dims) | ||
|
||
f.__name__ = name | ||
f.__doc__ = docstring | ||
return property(f) | ||
|
||
year = _tslib_field_accessor('year', "The year of the datetime", np.int64) | ||
month = _tslib_field_accessor( | ||
'month', "The month as January=1, December=12", np.int64 | ||
) | ||
day = _tslib_field_accessor('day', "The days of the datetime", np.int64) | ||
hour = _tslib_field_accessor('hour', "The hours of the datetime", np.int64) | ||
minute = _tslib_field_accessor( | ||
'minute', "The minutes of the datetime", np.int64 | ||
) | ||
second = _tslib_field_accessor( | ||
'second', "The seconds of the datetime", np.int64 | ||
) | ||
microsecond = _tslib_field_accessor( | ||
'microsecond', "The microseconds of the datetime", np.int64 | ||
) | ||
nanosecond = _tslib_field_accessor( | ||
'nanosecond', "The nanoseconds of the datetime", np.int64 | ||
) | ||
weekofyear = _tslib_field_accessor( | ||
'weekofyear', "The week ordinal of the year", np.int64 | ||
) | ||
week = weekofyear | ||
dayofweek = _tslib_field_accessor( | ||
'dayofweek', "The day of the week with Monday=0, Sunday=6", np.int64 | ||
) | ||
weekday = dayofweek | ||
|
||
weekday_name = _tslib_field_accessor( | ||
'weekday_name', "The name of day in a week (ex: Friday)", object | ||
) | ||
|
||
dayofyear = _tslib_field_accessor( | ||
'dayofyear', "The ordinal day of the year", np.int64 | ||
) | ||
quarter = _tslib_field_accessor('quarter', "The quarter of the date") | ||
days_in_month = _tslib_field_accessor( | ||
'days_in_month', "The number of days in the month", np.int64 | ||
) | ||
daysinmonth = days_in_month | ||
|
||
season = _tslib_field_accessor( | ||
"season", "Season of the year (ex: DJF)", object | ||
) | ||
|
||
time = _tslib_field_accessor( | ||
"time", "Timestamps corresponding to datetimes", object | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import xarray as xr | ||
import numpy as np | ||
import pandas as pd | ||
|
||
from . import TestCase, requires_dask | ||
|
||
|
||
class TestDatetimeAccessor(TestCase): | ||
def setUp(self): | ||
nt = 100 | ||
data = np.random.rand(10, 10, nt) | ||
lons = np.linspace(0, 11, 10) | ||
lats = np.linspace(0, 20, 10) | ||
self.times = pd.date_range(start="2000/01/01", freq='H', periods=nt) | ||
|
||
self.data = xr.DataArray(data, coords=[lons, lats, self.times], | ||
dims=['lon', 'lat', 'time'], name='data') | ||
|
||
self.times_arr = np.random.choice(self.times, size=(10, 10, nt)) | ||
self.times_data = xr.DataArray(self.times_arr, | ||
coords=[lons, lats, self.times], | ||
dims=['lon', 'lat', 'time'], | ||
name='data') | ||
|
||
def test_field_access(self): | ||
years = xr.DataArray(self.times.year, name='year', | ||
coords=[self.times, ], dims=['time', ]) | ||
months = xr.DataArray(self.times.month, name='month', | ||
coords=[self.times, ], dims=['time', ]) | ||
days = xr.DataArray(self.times.day, name='day', | ||
coords=[self.times, ], dims=['time', ]) | ||
hours = xr.DataArray(self.times.hour, name='hour', | ||
coords=[self.times, ], dims=['time', ]) | ||
|
||
self.assertDataArrayEqual(years, self.data.time.dt.year) | ||
self.assertDataArrayEqual(months, self.data.time.dt.month) | ||
self.assertDataArrayEqual(days, self.data.time.dt.day) | ||
self.assertDataArrayEqual(hours, self.data.time.dt.hour) | ||
|
||
def test_not_datetime_type(self): | ||
nontime_data = self.data.copy() | ||
int_data = np.arange(len(self.data.time)).astype('int8') | ||
nontime_data['time'].values = int_data | ||
with self.assertRaisesRegexp(TypeError, 'dt'): | ||
nontime_data.time.dt | ||
|
||
@requires_dask | ||
def test_dask_field_access(self): | ||
import dask.array as da | ||
|
||
years = self.times_data.dt.year | ||
months = self.times_data.dt.month | ||
hours = self.times_data.dt.hour | ||
days = self.times_data.dt.day | ||
|
||
dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) | ||
dask_times_2d = xr.DataArray(dask_times_arr, | ||
coords=self.data.coords, | ||
dims=self.data.dims, | ||
name='data') | ||
dask_year = dask_times_2d.dt.year | ||
dask_month = dask_times_2d.dt.month | ||
dask_day = dask_times_2d.dt.day | ||
dask_hour = dask_times_2d.dt.hour | ||
|
||
# Test that the data isn't eagerly evaluated | ||
assert isinstance(dask_year.data, da.Array) | ||
assert isinstance(dask_month.data, da.Array) | ||
assert isinstance(dask_day.data, da.Array) | ||
assert isinstance(dask_hour.data, da.Array) | ||
|
||
# Double check that outcome chunksize is unchanged | ||
dask_chunks = dask_times_2d.chunks | ||
self.assertEqual(dask_year.data.chunks, dask_chunks) | ||
self.assertEqual(dask_month.data.chunks, dask_chunks) | ||
self.assertEqual(dask_day.data.chunks, dask_chunks) | ||
self.assertEqual(dask_hour.data.chunks, dask_chunks) | ||
|
||
# Check the actual output from the accessors | ||
self.assertDataArrayEqual(years, dask_year.compute()) | ||
self.assertDataArrayEqual(months, dask_month.compute()) | ||
self.assertDataArrayEqual(days, dask_day.compute()) | ||
self.assertDataArrayEqual(hours, dask_hour.compute()) | ||
|
||
def test_seasons(self): | ||
dates = pd.date_range(start="2000/01/01", freq="M", periods=12) | ||
dates = xr.DataArray(dates) | ||
seasons = ["DJF", "DJF", "MAM", "MAM", "MAM", "JJA", "JJA", "JJA", | ||
"SON", "SON", "SON", "DJF"] | ||
seasons = xr.DataArray(seasons) | ||
|
||
self.assertArrayEqual(seasons.values, dates.dt.season.values) |