From 4cd56a9edb083a3eb8d11e7a367dfb9bda76fc2e Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Mon, 11 Feb 2019 18:47:08 +0900 Subject: [PATCH] fix datetime_to_numeric and Variable._to_numeric (#2668) * WIP: fix regression about datetime_to_numeric * Workaround for object array * added a whatsnew * rearrange tests * lint * Added Variable._to_numeric * Fix for cftime * Update via comments * lint * Fix via comment * Fix errors * lint --- xarray/core/dataset.py | 12 ++++---- xarray/core/duck_array_ops.py | 43 +++++++++++++++++++++++++-- xarray/core/missing.py | 11 ++++--- xarray/core/utils.py | 34 --------------------- xarray/core/variable.py | 8 +++++ xarray/tests/test_dataset.py | 9 +++--- xarray/tests/test_duck_array_ops.py | 46 +++++++++++++++++++++++++++-- xarray/tests/test_interp.py | 13 ++++++++ xarray/tests/test_utils.py | 39 ------------------------ 9 files changed, 120 insertions(+), 95 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 636cff2c3ec..7bb085848ef 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -28,6 +28,7 @@ DatasetCoordinates, LevelCoordinatesSource, assert_coordinate_consistent, remap_label_indexers, ) +from .duck_array_ops import datetime_to_numeric from .indexes import Indexes, default_indexes, isel_variable_and_index from .merge import ( dataset_merge_method, dataset_update_method, merge_data_and_coords, @@ -35,9 +36,9 @@ from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .utils import ( - Frozen, SortedKeysDict, _check_inplace, datetime_to_numeric, - decode_numpy_dict_values, either_dict_or_kwargs, hashable, - maybe_wrap_array) + Frozen, SortedKeysDict, _check_inplace, + decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution, + hashable, maybe_wrap_array) from .variable import IndexVariable, Variable, as_variable, broadcast_variables if TYPE_CHECKING: from .dataarray import DataArray @@ -3997,15 +3998,14 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None): datetime_unit, _ = np.datetime_data(coord_var.dtype) elif datetime_unit is None: datetime_unit = 's' # Default to seconds for cftime objects - coord_var = datetime_to_numeric( - coord_var, datetime_unit=datetime_unit) + coord_var = coord_var._to_numeric(datetime_unit=datetime_unit) variables = OrderedDict() for k, v in self.variables.items(): if (k in self.data_vars and dim in v.dims and k not in self.coords): if _contains_datetime_like_objects(v): - v = datetime_to_numeric(v, datetime_unit=datetime_unit) + v = v._to_numeric(datetime_unit=datetime_unit) grad = duck_array_ops.gradient( v.data, coord_var, edge_order=edge_order, axis=v.get_axis_num(dim)) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 36c4090297d..4d6d716a164 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd -from . import dask_array_ops, dtypes, npcompat, nputils, utils +from . import dask_array_ops, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast from .pycompat import dask_array_type @@ -289,14 +289,51 @@ def f(values, axis=None, skipna=None, **kwargs): _mean = _create_nan_agg_method('mean') +def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): + """Convert an array containing datetime-like data to an array of floats. + + Parameters + ---------- + da : array + Input data + offset: Scalar with the same type of array or None + If None, subtract minimum values to reduce round off error + datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', + 'us', 'ns', 'ps', 'fs', 'as'} + dtype: target dtype + + Returns + ------- + array + """ + if offset is None: + offset = array.min() + array = array - offset + + if not hasattr(array, 'dtype'): # scalar is converted to 0d-array + array = np.array(array) + + if array.dtype.kind in 'O': + # possibly convert object array containing datetime.timedelta + array = np.asarray(pd.Series(array.ravel())).reshape(array.shape) + + if datetime_unit: + array = array / np.timedelta64(1, datetime_unit) + + # convert np.NaT to np.nan + if array.dtype.kind in 'mM': + return np.where(isnull(array), np.nan, array.astype(dtype)) + return array.astype(dtype) + + def mean(array, axis=None, skipna=None, **kwargs): """ inhouse mean that can handle datatime dtype """ array = asarray(array) - if array.dtype.kind == 'M': + if array.dtype.kind in 'Mm': offset = min(array) # xarray always uses datetime[ns] for datetime dtype = 'timedelta64[ns]' - return _mean(utils.datetime_to_numeric(array, offset), axis=axis, + return _mean(datetime_to_numeric(array, offset), axis=axis, skipna=skipna, **kwargs).astype(dtype) + offset else: return _mean(array, axis=axis, skipna=skipna, **kwargs) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 4c9435e0bf4..50c420206cd 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -9,8 +9,8 @@ from . import utils from .common import _contains_datetime_like_objects from .computation import apply_ufunc -from .duck_array_ops import dask_array_type -from .utils import OrderedSet, datetime_to_numeric, is_scalar +from .duck_array_ops import dask_array_type, datetime_to_numeric +from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables @@ -411,10 +411,9 @@ def _floatize_x(x, new_x): # We assume that the most of the bits are used to represent the # offset (min(x)) and the variation (x - min(x)) can be # represented by float. - xmin = x[i].min() - x[i] = datetime_to_numeric(x[i], offset=xmin, dtype=np.float64) - new_x[i] = datetime_to_numeric( - new_x[i], offset=xmin, dtype=np.float64) + xmin = x[i].values.min() + x[i] = x[i]._to_numeric(offset=xmin, dtype=np.float64) + new_x[i] = new_x[i]._to_numeric(offset=xmin, dtype=np.float64) return x, new_x diff --git a/xarray/core/utils.py b/xarray/core/utils.py index b8e818693c4..053a45f01cb 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -603,40 +603,6 @@ def __len__(self): return len(self._data) - num_hidden -def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): - """Convert an array containing datetime-like data to an array of floats. - - Parameters - ---------- - da : array - Input data - offset: Scalar with the same type of array or None - If None, subtract minimum values to reduce round off error - datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', - 'us', 'ns', 'ps', 'fs', 'as'} - dtype: target dtype - - Returns - ------- - array - """ - from . import duck_array_ops - - if offset is None: - offset = array.min() - array = array - offset - - if datetime_unit: - array = array / np.timedelta64(1, datetime_unit) - # convert np.NaT to np.nan - if array.dtype.kind in 'mM': - if hasattr(array, 'isnull'): - return np.where(array.isnull(), np.nan, array.astype(dtype)) - return np.where(duck_array_ops.isnull(array), np.nan, - array.astype(dtype)) - return array - - def get_temp_dimname(dims, new_dim): """ Get an new dimension name based on new_dim, that is not used in dims. If the same name exists, we add an underscore(s) in the head. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index a35f8cf02f0..85eab294619 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1782,6 +1782,14 @@ def func(self, other): return self return func + def _to_numeric(self, offset=None, datetime_unit=None, dtype=float): + """ A (private) method to convert datetime array to numeric dtype + See duck_array_ops.datetime_to_numeric + """ + numeric_array = duck_array_ops.datetime_to_numeric( + self.data, offset, datetime_unit, dtype) + return type(self)(self.dims, numeric_array, self._attrs) + ops.inject_all_ops_and_reduce_methods(Variable) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 463c6756268..e4ffdad4260 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -16,7 +16,7 @@ ALL_DIMS, DataArray, Dataset, IndexVariable, MergeError, Variable, align, backends, broadcast, open_dataset, set_options) from xarray.core import dtypes, indexing, npcompat, utils -from xarray.core.common import full_like +from xarray.core.common import duck_array_ops, full_like from xarray.core.pycompat import integer_types from . import ( @@ -4676,7 +4676,7 @@ def test_differentiate_datetime(dask): actual = da.differentiate('x', edge_order=1, datetime_unit='D') expected_x = xr.DataArray( npcompat.gradient( - da, utils.datetime_to_numeric(da['x'], datetime_unit='D'), + da, da['x'].variable._to_numeric(datetime_unit='D'), axis=0, edge_order=1), dims=da.dims, coords=da.coords) assert_equal(expected_x, actual) @@ -4710,7 +4710,7 @@ def test_differentiate_cftime(dask): actual = da.differentiate('time', edge_order=1, datetime_unit='D') expected_data = npcompat.gradient( - da, utils.datetime_to_numeric(da['time'], datetime_unit='D'), + da, da['time'].variable._to_numeric(datetime_unit='D'), axis=0, edge_order=1) expected = xr.DataArray(expected_data, coords=da.coords, dims=da.dims) assert_equal(expected, actual) @@ -4789,7 +4789,8 @@ def test_trapz_datetime(dask, which_datetime): actual = da.integrate('time', datetime_unit='D') expected_data = np.trapz( - da, utils.datetime_to_numeric(da['time'], datetime_unit='D'), axis=0) + da, duck_array_ops.datetime_to_numeric(da['time'], datetime_unit='D'), + axis=0) expected = xr.DataArray( expected_data, dims=['y'], coords={k: v for k, v in da.coords.items() if 'time' not in v.dims}) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index ba7f6ba5db8..ab3cafed449 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -7,16 +7,17 @@ import pytest from numpy import array, nan -from xarray import DataArray, Dataset, concat +from xarray import DataArray, Dataset, concat, cftime_range from xarray.core import dtypes, duck_array_ops from xarray.core.duck_array_ops import ( array_notnull_equiv, concatenate, count, first, gradient, last, mean, rolling_window, stack, where) from xarray.core.pycompat import dask_array_type -from xarray.testing import assert_allclose, assert_equal +from xarray.testing import assert_allclose, assert_equal, assert_identical from . import ( - assert_array_equal, has_dask, has_np113, raises_regex, requires_dask) + assert_array_equal, has_dask, has_np113, raises_regex, requires_cftime, + requires_dask) class TestOps(object): @@ -569,3 +570,42 @@ def test_docs(): indicated dimension(s) removed. """) assert actual == expected + + +def test_datetime_to_numeric_datetime64(): + times = pd.date_range('2000', periods=5, freq='7D').values + result = duck_array_ops.datetime_to_numeric(times, datetime_unit='h') + expected = 24 * np.arange(0, 35, 7) + np.testing.assert_array_equal(result, expected) + + offset = times[1] + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit='h') + expected = 24 * np.arange(-7, 28, 7) + np.testing.assert_array_equal(result, expected) + + dtype = np.float32 + result = duck_array_ops.datetime_to_numeric( + times, datetime_unit='h', dtype=dtype) + expected = 24 * np.arange(0, 35, 7).astype(dtype) + np.testing.assert_array_equal(result, expected) + + +@requires_cftime +def test_datetime_to_numeric_cftime(): + times = cftime_range('2000', periods=5, freq='7D').values + result = duck_array_ops.datetime_to_numeric(times, datetime_unit='h') + expected = 24 * np.arange(0, 35, 7) + np.testing.assert_array_equal(result, expected) + + offset = times[1] + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit='h') + expected = 24 * np.arange(-7, 28, 7) + np.testing.assert_array_equal(result, expected) + + dtype = np.float32 + result = duck_array_ops.datetime_to_numeric( + times, datetime_unit='h', dtype=dtype) + expected = 24 * np.arange(0, 35, 7).astype(dtype) + np.testing.assert_array_equal(result, expected) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index d01929f163b..0d92f937821 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -571,3 +571,16 @@ def test_cftime_to_non_cftime_error(): with pytest.raises(TypeError): da.interp(time=0.5) + + +@requires_scipy +def test_datetime_interp_noerror(): + # GH:2667 + a = xr.DataArray( + np.arange(21).reshape(3, 7), dims=['x', 'time'], + coords={'x': [1, 2, 3], + 'time': pd.date_range('01-01-2001', periods=7, freq='D')}) + xi = xr.DataArray( + np.linspace(1, 3, 50), dims=['time'], + coords={'time': pd.date_range('01-01-2001', periods=50, freq='H')}) + a.interp(x=xi, time=xi.time) # should not raise an error diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 09152bac284..e98ab5cde4c 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -279,42 +279,3 @@ def test_either_dict_or_kwargs(): with pytest.raises(ValueError, match=r'foo'): result = either_dict_or_kwargs(dict(a=1), dict(a=1), 'foo') - - -def test_datetime_to_numeric_datetime64(): - times = pd.date_range('2000', periods=5, freq='7D') - da = xr.DataArray(times, coords=[times], dims=['time']) - result = utils.datetime_to_numeric(da, datetime_unit='h') - expected = 24 * xr.DataArray(np.arange(0, 35, 7), coords=da.coords) - assert_identical(result, expected) - - offset = da.isel(time=1) - result = utils.datetime_to_numeric(da, offset=offset, datetime_unit='h') - expected = 24 * xr.DataArray(np.arange(-7, 28, 7), coords=da.coords) - assert_identical(result, expected) - - dtype = np.float32 - result = utils.datetime_to_numeric(da, datetime_unit='h', dtype=dtype) - expected = 24 * xr.DataArray( - np.arange(0, 35, 7), coords=da.coords).astype(dtype) - assert_identical(result, expected) - - -@requires_cftime -def test_datetime_to_numeric_cftime(): - times = xr.cftime_range('2000', periods=5, freq='7D') - da = xr.DataArray(times, coords=[times], dims=['time']) - result = utils.datetime_to_numeric(da, datetime_unit='h') - expected = 24 * xr.DataArray(np.arange(0, 35, 7), coords=da.coords) - assert_identical(result, expected) - - offset = da.isel(time=1) - result = utils.datetime_to_numeric(da, offset=offset, datetime_unit='h') - expected = 24 * xr.DataArray(np.arange(-7, 28, 7), coords=da.coords) - assert_identical(result, expected) - - dtype = np.float32 - result = utils.datetime_to_numeric(da, datetime_unit='h', dtype=dtype) - expected = 24 * xr.DataArray( - np.arange(0, 35, 7), coords=da.coords).astype(dtype) - assert_identical(result, expected)