From 849eb186f93b1e1da1ecca67329ed7bf190e7d4c Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Wed, 6 Mar 2019 14:47:47 -0500 Subject: [PATCH] Add support for cftime.datetime coordinates with coarsen (#2778) --- doc/whats-new.rst | 4 ++++ xarray/core/common.py | 13 +++++++++---- xarray/core/duck_array_ops.py | 26 +++++++++++++++++++++++--- xarray/tests/test_dataset.py | 11 ++++++++++- xarray/tests/test_duck_array_ops.py | 25 +++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3ebd4001ae5..9ef2960ad76 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -45,6 +45,10 @@ Enhancements See :ref:`comput.coarsen` for details. (:issue:`2525`) By `Keisuke Fujii `_. +- Taking the mean of arrays of :py:class:`cftime.datetime` objects, and + by extension, use of :py:meth:`~xarray.DataArray.coarsen` with + :py:class:`cftime.datetime` coordinates is now possible. By `Spencer Clark + `_. - Upsampling an array via interpolation with resample is now dask-compatible, as long as the array is not chunked along the resampling dimension. By `Spencer Clark `_. diff --git a/xarray/core/common.py b/xarray/core/common.py index 2f32ca941be..6ec07156160 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -997,15 +997,15 @@ def is_np_datetime_like(dtype): np.issubdtype(dtype, np.timedelta64)) -def contains_cftime_datetimes(var): - """Check if a variable contains cftime datetime objects""" +def _contains_cftime_datetimes(array): + """Check if an array contains cftime.datetime objects""" try: from cftime import datetime as cftime_datetime except ImportError: return False else: - if var.dtype == np.dtype('O') and var.data.size > 0: - sample = var.data.ravel()[0] + if array.dtype == np.dtype('O') and array.size > 0: + sample = array.ravel()[0] if isinstance(sample, dask_array_type): sample = sample.compute() if isinstance(sample, np.ndarray): @@ -1015,6 +1015,11 @@ def contains_cftime_datetimes(var): return False +def contains_cftime_datetimes(var): + """Check if an xarray.Variable contains cftime.datetime objects""" + return _contains_cftime_datetimes(var.data) + + def _contains_datetime_like_objects(var): """Check if a variable contains datetime like objects (either np.datetime64, np.timedelta64, or cftime.datetime)""" diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 4d6d716a164..b67a220ed4c 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -294,7 +294,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): Parameters ---------- - da : array + da : np.array Input data offset: Scalar with the same type of array or None If None, subtract minimum values to reduce round off error @@ -306,6 +306,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): ------- array """ + # TODO: make this function dask-compatible? if offset is None: offset = array.min() array = array - offset @@ -326,15 +327,34 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): return array.astype(dtype) +def _to_pytimedelta(array, unit='us'): + index = pd.TimedeltaIndex(array.ravel(), unit=unit) + return index.to_pytimedelta().reshape(array.shape) + + def mean(array, axis=None, skipna=None, **kwargs): - """ inhouse mean that can handle datatime dtype """ + """inhouse mean that can handle np.datetime64 or cftime.datetime + dtypes""" + from .common import _contains_cftime_datetimes + array = asarray(array) if array.dtype.kind in 'Mm': offset = min(array) - # xarray always uses datetime[ns] for datetime + # xarray always uses np.datetime64[ns] for np.datetime64 data dtype = 'timedelta64[ns]' return _mean(datetime_to_numeric(array, offset), axis=axis, skipna=skipna, **kwargs).astype(dtype) + offset + elif _contains_cftime_datetimes(array): + if isinstance(array, dask_array_type): + raise NotImplementedError( + 'Computing the mean of an array containing ' + 'cftime.datetime objects is not yet implemented on ' + 'dask arrays.') + offset = min(array) + timedeltas = datetime_to_numeric(array, offset, datetime_unit='us') + mean_timedeltas = _mean(timedeltas, axis=axis, skipna=skipna, + **kwargs) + return _to_pytimedelta(mean_timedeltas, unit='us') + offset else: return _mean(array, axis=axis, skipna=skipna, **kwargs) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d130363a7c0..8e8c6c4b419 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -23,7 +23,7 @@ InaccessibleArray, UnexpectedDataAccess, assert_allclose, assert_array_equal, assert_equal, assert_identical, has_cftime, has_dask, raises_regex, requires_bottleneck, requires_dask, requires_scipy, - source_ndarray) + source_ndarray, requires_cftime) try: import dask.array as da @@ -4530,6 +4530,15 @@ def test_coarsen_coords(ds, dask): actual = da.coarsen(time=2).mean() +@requires_cftime +def test_coarsen_coords_cftime(): + times = xr.cftime_range('2000', periods=6) + da = xr.DataArray(range(6), [('time', times)]) + actual = da.coarsen(time=3).mean() + expected_times = xr.cftime_range('2000-01-02', freq='3D', periods=2) + np.testing.assert_array_equal(actual.time, expected_times) + + def test_rolling_properties(ds): # catching invalid args with pytest.raises(ValueError) as exception: diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index ab3cafed449..5d425f648bd 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -270,6 +270,31 @@ def test_datetime_reduce(dask): assert da['time'][0].mean() == da['time'][:1].mean() +@requires_cftime +def test_cftime_datetime_mean(): + times = cftime_range('2000', periods=4) + da = DataArray(times, dims=['time']) + + assert da.isel(time=0).mean() == da.isel(time=0) + + expected = DataArray(times.date_type(2000, 1, 2, 12)) + result = da.mean() + assert_equal(result, expected) + + da_2d = DataArray(times.values.reshape(2, 2)) + result = da_2d.mean() + assert_equal(result, expected) + + +@requires_cftime +@requires_dask +def test_cftime_datetime_mean_dask_error(): + times = cftime_range('2000', periods=4) + da = DataArray(times, dims=['time']).chunk() + with pytest.raises(NotImplementedError): + da.mean() + + @pytest.mark.parametrize('dim_num', [1, 2]) @pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_]) @pytest.mark.parametrize('dask', [False, True])