Skip to content

Commit

Permalink
fix datetime_to_numeric and Variable._to_numeric (#2668)
Browse files Browse the repository at this point in the history
* WIP: fix regression about datetime_to_numeric

* Workaround for object array

* added a whatsnew

* rearrange tests

* lint

* Added Variable._to_numeric

* Fix for cftime

* Update via comments

* lint

* Fix via comment

* Fix errors

* lint
  • Loading branch information
fujiisoup authored Feb 11, 2019
1 parent 6d20766 commit 4cd56a9
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 95 deletions.
12 changes: 6 additions & 6 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,17 @@
DatasetCoordinates, LevelCoordinatesSource, assert_coordinate_consistent,
remap_label_indexers,
)
from .duck_array_ops import datetime_to_numeric
from .indexes import Indexes, default_indexes, isel_variable_and_index
from .merge import (
dataset_merge_method, dataset_update_method, merge_data_and_coords,
merge_variables)
from .options import OPTIONS, _get_keep_attrs
from .pycompat import dask_array_type
from .utils import (
Frozen, SortedKeysDict, _check_inplace, datetime_to_numeric,
decode_numpy_dict_values, either_dict_or_kwargs, hashable,
maybe_wrap_array)
Frozen, SortedKeysDict, _check_inplace,
decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution,
hashable, maybe_wrap_array)
from .variable import IndexVariable, Variable, as_variable, broadcast_variables
if TYPE_CHECKING:
from .dataarray import DataArray
Expand Down Expand Up @@ -3997,15 +3998,14 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
datetime_unit, _ = np.datetime_data(coord_var.dtype)
elif datetime_unit is None:
datetime_unit = 's' # Default to seconds for cftime objects
coord_var = datetime_to_numeric(
coord_var, datetime_unit=datetime_unit)
coord_var = coord_var._to_numeric(datetime_unit=datetime_unit)

variables = OrderedDict()
for k, v in self.variables.items():
if (k in self.data_vars and dim in v.dims and
k not in self.coords):
if _contains_datetime_like_objects(v):
v = datetime_to_numeric(v, datetime_unit=datetime_unit)
v = v._to_numeric(datetime_unit=datetime_unit)
grad = duck_array_ops.gradient(
v.data, coord_var, edge_order=edge_order,
axis=v.get_axis_num(dim))
Expand Down
43 changes: 40 additions & 3 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import numpy as np
import pandas as pd

from . import dask_array_ops, dtypes, npcompat, nputils, utils
from . import dask_array_ops, dtypes, npcompat, nputils
from .nputils import nanfirst, nanlast
from .pycompat import dask_array_type

Expand Down Expand Up @@ -289,14 +289,51 @@ def f(values, axis=None, skipna=None, **kwargs):
_mean = _create_nan_agg_method('mean')


def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""Convert an array containing datetime-like data to an array of floats.
Parameters
----------
da : array
Input data
offset: Scalar with the same type of array or None
If None, subtract minimum values to reduce round off error
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
'us', 'ns', 'ps', 'fs', 'as'}
dtype: target dtype
Returns
-------
array
"""
if offset is None:
offset = array.min()
array = array - offset

if not hasattr(array, 'dtype'): # scalar is converted to 0d-array
array = np.array(array)

if array.dtype.kind in 'O':
# possibly convert object array containing datetime.timedelta
array = np.asarray(pd.Series(array.ravel())).reshape(array.shape)

if datetime_unit:
array = array / np.timedelta64(1, datetime_unit)

# convert np.NaT to np.nan
if array.dtype.kind in 'mM':
return np.where(isnull(array), np.nan, array.astype(dtype))
return array.astype(dtype)


def mean(array, axis=None, skipna=None, **kwargs):
""" inhouse mean that can handle datatime dtype """
array = asarray(array)
if array.dtype.kind == 'M':
if array.dtype.kind in 'Mm':
offset = min(array)
# xarray always uses datetime[ns] for datetime
dtype = 'timedelta64[ns]'
return _mean(utils.datetime_to_numeric(array, offset), axis=axis,
return _mean(datetime_to_numeric(array, offset), axis=axis,
skipna=skipna, **kwargs).astype(dtype) + offset
else:
return _mean(array, axis=axis, skipna=skipna, **kwargs)
Expand Down
11 changes: 5 additions & 6 deletions xarray/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from . import utils
from .common import _contains_datetime_like_objects
from .computation import apply_ufunc
from .duck_array_ops import dask_array_type
from .utils import OrderedSet, datetime_to_numeric, is_scalar
from .duck_array_ops import dask_array_type, datetime_to_numeric
from .utils import OrderedSet, is_scalar
from .variable import Variable, broadcast_variables


Expand Down Expand Up @@ -411,10 +411,9 @@ def _floatize_x(x, new_x):
# We assume that the most of the bits are used to represent the
# offset (min(x)) and the variation (x - min(x)) can be
# represented by float.
xmin = x[i].min()
x[i] = datetime_to_numeric(x[i], offset=xmin, dtype=np.float64)
new_x[i] = datetime_to_numeric(
new_x[i], offset=xmin, dtype=np.float64)
xmin = x[i].values.min()
x[i] = x[i]._to_numeric(offset=xmin, dtype=np.float64)
new_x[i] = new_x[i]._to_numeric(offset=xmin, dtype=np.float64)
return x, new_x


Expand Down
34 changes: 0 additions & 34 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,40 +603,6 @@ def __len__(self):
return len(self._data) - num_hidden


def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""Convert an array containing datetime-like data to an array of floats.
Parameters
----------
da : array
Input data
offset: Scalar with the same type of array or None
If None, subtract minimum values to reduce round off error
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
'us', 'ns', 'ps', 'fs', 'as'}
dtype: target dtype
Returns
-------
array
"""
from . import duck_array_ops

if offset is None:
offset = array.min()
array = array - offset

if datetime_unit:
array = array / np.timedelta64(1, datetime_unit)
# convert np.NaT to np.nan
if array.dtype.kind in 'mM':
if hasattr(array, 'isnull'):
return np.where(array.isnull(), np.nan, array.astype(dtype))
return np.where(duck_array_ops.isnull(array), np.nan,
array.astype(dtype))
return array


def get_temp_dimname(dims, new_dim):
""" Get an new dimension name based on new_dim, that is not used in dims.
If the same name exists, we add an underscore(s) in the head.
Expand Down
8 changes: 8 additions & 0 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1782,6 +1782,14 @@ def func(self, other):
return self
return func

def _to_numeric(self, offset=None, datetime_unit=None, dtype=float):
""" A (private) method to convert datetime array to numeric dtype
See duck_array_ops.datetime_to_numeric
"""
numeric_array = duck_array_ops.datetime_to_numeric(
self.data, offset, datetime_unit, dtype)
return type(self)(self.dims, numeric_array, self._attrs)


ops.inject_all_ops_and_reduce_methods(Variable)

Expand Down
9 changes: 5 additions & 4 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
ALL_DIMS, DataArray, Dataset, IndexVariable, MergeError, Variable, align,
backends, broadcast, open_dataset, set_options)
from xarray.core import dtypes, indexing, npcompat, utils
from xarray.core.common import full_like
from xarray.core.common import duck_array_ops, full_like
from xarray.core.pycompat import integer_types

from . import (
Expand Down Expand Up @@ -4676,7 +4676,7 @@ def test_differentiate_datetime(dask):
actual = da.differentiate('x', edge_order=1, datetime_unit='D')
expected_x = xr.DataArray(
npcompat.gradient(
da, utils.datetime_to_numeric(da['x'], datetime_unit='D'),
da, da['x'].variable._to_numeric(datetime_unit='D'),
axis=0, edge_order=1), dims=da.dims, coords=da.coords)
assert_equal(expected_x, actual)

Expand Down Expand Up @@ -4710,7 +4710,7 @@ def test_differentiate_cftime(dask):

actual = da.differentiate('time', edge_order=1, datetime_unit='D')
expected_data = npcompat.gradient(
da, utils.datetime_to_numeric(da['time'], datetime_unit='D'),
da, da['time'].variable._to_numeric(datetime_unit='D'),
axis=0, edge_order=1)
expected = xr.DataArray(expected_data, coords=da.coords, dims=da.dims)
assert_equal(expected, actual)
Expand Down Expand Up @@ -4789,7 +4789,8 @@ def test_trapz_datetime(dask, which_datetime):

actual = da.integrate('time', datetime_unit='D')
expected_data = np.trapz(
da, utils.datetime_to_numeric(da['time'], datetime_unit='D'), axis=0)
da, duck_array_ops.datetime_to_numeric(da['time'], datetime_unit='D'),
axis=0)
expected = xr.DataArray(
expected_data, dims=['y'],
coords={k: v for k, v in da.coords.items() if 'time' not in v.dims})
Expand Down
46 changes: 43 additions & 3 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@
import pytest
from numpy import array, nan

from xarray import DataArray, Dataset, concat
from xarray import DataArray, Dataset, concat, cftime_range
from xarray.core import dtypes, duck_array_ops
from xarray.core.duck_array_ops import (
array_notnull_equiv, concatenate, count, first, gradient, last, mean,
rolling_window, stack, where)
from xarray.core.pycompat import dask_array_type
from xarray.testing import assert_allclose, assert_equal
from xarray.testing import assert_allclose, assert_equal, assert_identical

from . import (
assert_array_equal, has_dask, has_np113, raises_regex, requires_dask)
assert_array_equal, has_dask, has_np113, raises_regex, requires_cftime,
requires_dask)


class TestOps(object):
Expand Down Expand Up @@ -569,3 +570,42 @@ def test_docs():
indicated dimension(s) removed.
""")
assert actual == expected


def test_datetime_to_numeric_datetime64():
times = pd.date_range('2000', periods=5, freq='7D').values
result = duck_array_ops.datetime_to_numeric(times, datetime_unit='h')
expected = 24 * np.arange(0, 35, 7)
np.testing.assert_array_equal(result, expected)

offset = times[1]
result = duck_array_ops.datetime_to_numeric(
times, offset=offset, datetime_unit='h')
expected = 24 * np.arange(-7, 28, 7)
np.testing.assert_array_equal(result, expected)

dtype = np.float32
result = duck_array_ops.datetime_to_numeric(
times, datetime_unit='h', dtype=dtype)
expected = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected)


@requires_cftime
def test_datetime_to_numeric_cftime():
times = cftime_range('2000', periods=5, freq='7D').values
result = duck_array_ops.datetime_to_numeric(times, datetime_unit='h')
expected = 24 * np.arange(0, 35, 7)
np.testing.assert_array_equal(result, expected)

offset = times[1]
result = duck_array_ops.datetime_to_numeric(
times, offset=offset, datetime_unit='h')
expected = 24 * np.arange(-7, 28, 7)
np.testing.assert_array_equal(result, expected)

dtype = np.float32
result = duck_array_ops.datetime_to_numeric(
times, datetime_unit='h', dtype=dtype)
expected = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected)
13 changes: 13 additions & 0 deletions xarray/tests/test_interp.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,3 +571,16 @@ def test_cftime_to_non_cftime_error():

with pytest.raises(TypeError):
da.interp(time=0.5)


@requires_scipy
def test_datetime_interp_noerror():
# GH:2667
a = xr.DataArray(
np.arange(21).reshape(3, 7), dims=['x', 'time'],
coords={'x': [1, 2, 3],
'time': pd.date_range('01-01-2001', periods=7, freq='D')})
xi = xr.DataArray(
np.linspace(1, 3, 50), dims=['time'],
coords={'time': pd.date_range('01-01-2001', periods=50, freq='H')})
a.interp(x=xi, time=xi.time) # should not raise an error
39 changes: 0 additions & 39 deletions xarray/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,42 +279,3 @@ def test_either_dict_or_kwargs():

with pytest.raises(ValueError, match=r'foo'):
result = either_dict_or_kwargs(dict(a=1), dict(a=1), 'foo')


def test_datetime_to_numeric_datetime64():
times = pd.date_range('2000', periods=5, freq='7D')
da = xr.DataArray(times, coords=[times], dims=['time'])
result = utils.datetime_to_numeric(da, datetime_unit='h')
expected = 24 * xr.DataArray(np.arange(0, 35, 7), coords=da.coords)
assert_identical(result, expected)

offset = da.isel(time=1)
result = utils.datetime_to_numeric(da, offset=offset, datetime_unit='h')
expected = 24 * xr.DataArray(np.arange(-7, 28, 7), coords=da.coords)
assert_identical(result, expected)

dtype = np.float32
result = utils.datetime_to_numeric(da, datetime_unit='h', dtype=dtype)
expected = 24 * xr.DataArray(
np.arange(0, 35, 7), coords=da.coords).astype(dtype)
assert_identical(result, expected)


@requires_cftime
def test_datetime_to_numeric_cftime():
times = xr.cftime_range('2000', periods=5, freq='7D')
da = xr.DataArray(times, coords=[times], dims=['time'])
result = utils.datetime_to_numeric(da, datetime_unit='h')
expected = 24 * xr.DataArray(np.arange(0, 35, 7), coords=da.coords)
assert_identical(result, expected)

offset = da.isel(time=1)
result = utils.datetime_to_numeric(da, offset=offset, datetime_unit='h')
expected = 24 * xr.DataArray(np.arange(-7, 28, 7), coords=da.coords)
assert_identical(result, expected)

dtype = np.float32
result = utils.datetime_to_numeric(da, datetime_unit='h', dtype=dtype)
expected = 24 * xr.DataArray(
np.arange(0, 35, 7), coords=da.coords).astype(dtype)
assert_identical(result, expected)

0 comments on commit 4cd56a9

Please sign in to comment.