Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for cftime.datetime coordinates with coarsen #2778

Merged
merged 1 commit into from
Mar 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ Enhancements
See :ref:`comput.coarsen` for details.
(:issue:`2525`)
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
- Taking the mean of arrays of :py:class:`cftime.datetime` objects, and
by extension, use of :py:meth:`~xarray.DataArray.coarsen` with
:py:class:`cftime.datetime` coordinates is now possible. By `Spencer Clark
<https://github.com/spencerkclark>`_.
- Upsampling an array via interpolation with resample is now dask-compatible,
as long as the array is not chunked along the resampling dimension.
By `Spencer Clark <https://github.com/spencerkclark>`_.
Expand Down
13 changes: 9 additions & 4 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -997,15 +997,15 @@ def is_np_datetime_like(dtype):
np.issubdtype(dtype, np.timedelta64))


def contains_cftime_datetimes(var):
"""Check if a variable contains cftime datetime objects"""
def _contains_cftime_datetimes(array):
"""Check if an array contains cftime.datetime objects"""
try:
from cftime import datetime as cftime_datetime
except ImportError:
return False
else:
if var.dtype == np.dtype('O') and var.data.size > 0:
sample = var.data.ravel()[0]
if array.dtype == np.dtype('O') and array.size > 0:
sample = array.ravel()[0]
if isinstance(sample, dask_array_type):
sample = sample.compute()
if isinstance(sample, np.ndarray):
Expand All @@ -1015,6 +1015,11 @@ def contains_cftime_datetimes(var):
return False


def contains_cftime_datetimes(var):
"""Check if an xarray.Variable contains cftime.datetime objects"""
return _contains_cftime_datetimes(var.data)


def _contains_datetime_like_objects(var):
"""Check if a variable contains datetime like objects (either
np.datetime64, np.timedelta64, or cftime.datetime)"""
Expand Down
26 changes: 23 additions & 3 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):

Parameters
----------
da : array
da : np.array
Input data
offset: Scalar with the same type of array or None
If None, subtract minimum values to reduce round off error
Expand All @@ -306,6 +306,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
-------
array
"""
# TODO: make this function dask-compatible?
if offset is None:
offset = array.min()
array = array - offset
Expand All @@ -326,15 +327,34 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
return array.astype(dtype)


def _to_pytimedelta(array, unit='us'):
index = pd.TimedeltaIndex(array.ravel(), unit=unit)
return index.to_pytimedelta().reshape(array.shape)


def mean(array, axis=None, skipna=None, **kwargs):
""" inhouse mean that can handle datatime dtype """
"""inhouse mean that can handle np.datetime64 or cftime.datetime
dtypes"""
from .common import _contains_cftime_datetimes

array = asarray(array)
if array.dtype.kind in 'Mm':
offset = min(array)
# xarray always uses datetime[ns] for datetime
# xarray always uses np.datetime64[ns] for np.datetime64 data
dtype = 'timedelta64[ns]'
return _mean(datetime_to_numeric(array, offset), axis=axis,
skipna=skipna, **kwargs).astype(dtype) + offset
elif _contains_cftime_datetimes(array):
if isinstance(array, dask_array_type):
raise NotImplementedError(
'Computing the mean of an array containing '
'cftime.datetime objects is not yet implemented on '
'dask arrays.')
offset = min(array)
timedeltas = datetime_to_numeric(array, offset, datetime_unit='us')
mean_timedeltas = _mean(timedeltas, axis=axis, skipna=skipna,
**kwargs)
return _to_pytimedelta(mean_timedeltas, unit='us') + offset
else:
return _mean(array, axis=axis, skipna=skipna, **kwargs)

Expand Down
11 changes: 10 additions & 1 deletion xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
InaccessibleArray, UnexpectedDataAccess, assert_allclose,
assert_array_equal, assert_equal, assert_identical, has_cftime, has_dask,
raises_regex, requires_bottleneck, requires_dask, requires_scipy,
source_ndarray)
source_ndarray, requires_cftime)

try:
import dask.array as da
Expand Down Expand Up @@ -4510,6 +4510,15 @@ def test_coarsen_coords(ds, dask):
actual = da.coarsen(time=2).mean()


@requires_cftime
def test_coarsen_coords_cftime():
times = xr.cftime_range('2000', periods=6)
da = xr.DataArray(range(6), [('time', times)])
actual = da.coarsen(time=3).mean()
expected_times = xr.cftime_range('2000-01-02', freq='3D', periods=2)
np.testing.assert_array_equal(actual.time, expected_times)


def test_rolling_properties(ds):
# catching invalid args
with pytest.raises(ValueError) as exception:
Expand Down
25 changes: 25 additions & 0 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,31 @@ def test_datetime_reduce(dask):
assert da['time'][0].mean() == da['time'][:1].mean()


@requires_cftime
def test_cftime_datetime_mean():
times = cftime_range('2000', periods=4)
da = DataArray(times, dims=['time'])

assert da.isel(time=0).mean() == da.isel(time=0)

expected = DataArray(times.date_type(2000, 1, 2, 12))
result = da.mean()
assert_equal(result, expected)

da_2d = DataArray(times.values.reshape(2, 2))
result = da_2d.mean()
assert_equal(result, expected)


@requires_cftime
@requires_dask
def test_cftime_datetime_mean_dask_error():
times = cftime_range('2000', periods=4)
da = DataArray(times, dims=['time']).chunk()
with pytest.raises(NotImplementedError):
da.mean()


@pytest.mark.parametrize('dim_num', [1, 2])
@pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_])
@pytest.mark.parametrize('dask', [False, True])
Expand Down