From ad6f35bc880761ac784fe86bfbf8049a2831ac84 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 7 Sep 2019 18:08:34 -0600 Subject: [PATCH 01/29] interpolate_na: Add maxgap support. --- doc/whats-new.rst | 6 +++++ xarray/core/dataarray.py | 10 +++++++- xarray/core/dataset.py | 15 ++++++++---- xarray/core/missing.py | 45 +++++++++++++++++++++++++++++++++-- xarray/tests/test_missing.py | 46 +++++++++++++++++++++++++++++++++++- 5 files changed, 113 insertions(+), 9 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9e14120aeb3..dd94519a4fb 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -232,6 +232,12 @@ Enhancements - Added ``join='override'``. When aligning, this only checks that index sizes are equal among objects and skips checking indexes for equality. +- Added the ``maxgap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and + :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data + gap that will be filled by interpolation. By `Deepak Cherian `_. + +- Added ``join='override'``. This only checks that index sizes are equal among objects and skips + checking indexes for equality. By `Deepak Cherian `_. - :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg. It is passed down to :py:func:`~xarray.align`. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4a48f13b86d..740a7f79be9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1986,6 +1986,7 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, str] = True, + maxgap: int = None, **kwargs: Any ) -> "DataArray": """Interpolate values according to different methods. @@ -2015,7 +2016,13 @@ def interpolate_na( coordinate variariable to use as the index. limit : int, default None Maximum number of consecutive NaNs to fill. Must be greater than 0 - or None for no limit. + or None for no limit. This filling is done regardless of the size of + the gap in the data. + maxgap : int, default None + Maximum size of gap that will be filled. Must be greater than 0 or None + for no limit. + kwargs : dict(), optional + parameters passed verbatim to the underlying interpolation function Returns ------- @@ -2034,6 +2041,7 @@ def interpolate_na( method=method, limit=limit, use_coordinate=use_coordinate, + maxgap=maxgap, **kwargs ) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6123b42b77e..f3289620ed1 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3870,7 +3870,8 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, Hashable] = True, - **kwargs: Any, + maxgap: int = None, + **kwargs: Any ) -> "Dataset": """Interpolate values according to different methods. @@ -3899,9 +3900,13 @@ def interpolate_na( coordinate variariable to use as the index. limit : int, default None Maximum number of consecutive NaNs to fill. Must be greater than 0 - or None for no limit. - kwargs : any - parameters passed verbatim to the underlying interplation function + or None for no limit. This filling is done regardless of the size of + the gap in the data. + maxgap : int, default None + Maximum size of gap that will be filled. Must be greater than 0 or None + for no limit. + kwargs : dict(), optional + parameters passed verbatim to the underlying interpolation function Returns ------- @@ -3921,7 +3926,7 @@ def interpolate_na( method=method, limit=limit, use_coordinate=use_coordinate, - **kwargs, + **kwargs ) return new diff --git a/xarray/core/missing.py b/xarray/core/missing.py index dfe209e3f7e..72ad8a74bcb 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -6,13 +6,44 @@ import pandas as pd from . import utils -from .common import _contains_datetime_like_objects +from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc from .duck_array_ops import dask_array_type from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables +def _get_nan_block_lengths(obj, dim): + """ + Return an object where each NaN element in 'obj' is replaced by the + length of the gap the element is in. + """ + + # algorithm from https://stackoverflow.com/questions/53060003/how-to-get-the-maximum-time-of-gap-in-xarray-dataset/53075828#53075828 + arange = ones_like(obj) * np.arange(len(obj.indexes[dim])) + 1 + cumulative_nans = arange.where(obj.notnull()).ffill(dim=dim).fillna(0) + + num_nans = arange - cumulative_nans + + block_lengths_at_peaks = num_nans.where( + num_nans.diff(dim=dim, label="lower") < 0 + ).reindex({dim: obj[dim]}) + + # nans at the end + maybe_nans_at_end = ( + block_lengths_at_peaks.isel({dim: -1}) + .where(obj.isel({dim: -1}).notnull(), num_nans.isel({dim: -1})) + .expand_dims(dim) + .reindex_like(obj) + ) + + block_lengths_at_peaks = block_lengths_at_peaks.fillna(maybe_nans_at_end) + + nan_block_lengths = block_lengths_at_peaks.bfill(dim).where(obj.isnull()).fillna(0) + + return nan_block_lengths + + class BaseInterpolator: """Generic interpolator class for normalizing interpolation methods """ @@ -220,7 +251,13 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): def interp_na( - self, dim=None, use_coordinate=True, method="linear", limit=None, **kwargs + self, + dim=None, + use_coordinate=True, + method="linear", + limit=None, + maxgap=None, + **kwargs ): """Interpolate values according to different methods. """ @@ -253,6 +290,10 @@ def interp_na( if limit is not None: arr = arr.where(valids) + if maxgap is not None: + nan_block_lengths = _get_nan_block_lengths(self, dim) + arr = arr.where(nan_block_lengths <= maxgap) + return arr diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index cfce5d6f645..0cdd5d69f1e 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -5,7 +5,12 @@ import pytest import xarray as xr -from xarray.core.missing import NumpyInterpolator, ScipyInterpolator, SplineInterpolator +from xarray.core.missing import ( + NumpyInterpolator, + ScipyInterpolator, + SplineInterpolator, + _get_nan_block_lengths, +) from xarray.core.pycompat import dask_array_type from xarray.tests import ( assert_array_equal, @@ -439,3 +444,42 @@ def test_ffill_dataset(ds): @requires_bottleneck def test_bfill_dataset(ds): ds.ffill(dim="time") + + +def test_interpolate_na_maxgap(ds): + arr = [ + [ + np.nan, + np.nan, + np.nan, + 1, + 2, + 3, + np.nan, + np.nan, + 6, + 7, + np.nan, + 9, + np.nan, + np.nan, + ] + ] + + da = xr.DataArray( + arr * 2, dims=["x", "y"], coords={"x": [0, 1], "y": np.arange(14)} + ) + + expected = xr.DataArray( + [[np.nan, np.nan, np.nan, 1, 2, 3, 4, 5, 6, 7, 8, 9, np.nan, np.nan]] * 2, + dims=["x", "y"], + coords={"x": [0, 1], "y": np.arange(14)}, + ) + + expected_lengths = da.copy(data=[[3, 3, 3, 0, 0, 0, 2, 2, 0, 0, 1, 0, 2, 2]] * 2) + + actual_lengths = _get_nan_block_lengths(da, "y") + xr.testing.assert_equal(expected_lengths, actual_lengths) + + actual = da.interpolate_na("y", maxgap=2) + xr.testing.assert_identical(expected, actual) From 9275d89fb5039b75268c1ed0b88f1841256aea0a Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 12 Sep 2019 09:09:55 -0600 Subject: [PATCH 02/29] Add docs. --- doc/computation.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/computation.rst b/doc/computation.rst index ae5f4bc5c66..fdbd5797cba 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -94,7 +94,8 @@ for filling missing values via 1D interpolation. Note that xarray slightly diverges from the pandas ``interpolate`` syntax by providing the ``use_coordinate`` keyword which facilitates a clear specification -of which values to use as the index in the interpolation. +of which values to use as the index in the interpolation. xarray also provides the ``maxgap`` keyword argument to limit the interpolation to data gaps of length ``maxgap`` or smaller. See +:py:meth:`~xarray.DataArray.interpolate_na` for more. Aggregation =========== From 47a7cf5fe7c21b1182d013ad919e02f2c92afb6d Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 12 Sep 2019 09:25:12 -0600 Subject: [PATCH 03/29] Add requires_bottleneck to test. --- xarray/tests/test_missing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 0cdd5d69f1e..0a6545766af 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -446,6 +446,7 @@ def test_bfill_dataset(ds): ds.ffill(dim="time") +@requires_bottleneck def test_interpolate_na_maxgap(ds): arr = [ [ From 711b2a9a06f0588acd1707c351877f39c6ed2c92 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 12 Sep 2019 09:52:06 -0600 Subject: [PATCH 04/29] Review comments. --- xarray/tests/test_missing.py | 41 +++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 0a6545766af..c1e5567aa54 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -461,26 +461,47 @@ def test_interpolate_na_maxgap(ds): 6, 7, np.nan, - 9, + np.nan, + np.nan, + 11, np.nan, np.nan, ] ] da = xr.DataArray( - arr * 2, dims=["x", "y"], coords={"x": [0, 1], "y": np.arange(14)} + arr * 2, dims=["x", "y"], coords={"x": [0, 1], "y": np.arange(len(arr[0]))} ) - expected = xr.DataArray( - [[np.nan, np.nan, np.nan, 1, 2, 3, 4, 5, 6, 7, 8, 9, np.nan, np.nan]] * 2, - dims=["x", "y"], - coords={"x": [0, 1], "y": np.arange(14)}, + expected = da.copy( + data=[ + [ + np.nan, + np.nan, + np.nan, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + np.nan, + np.nan, + np.nan, + 11, + np.nan, + np.nan, + ] + ] + * 2 ) - expected_lengths = da.copy(data=[[3, 3, 3, 0, 0, 0, 2, 2, 0, 0, 1, 0, 2, 2]] * 2) + actual = da.interpolate_na("y", maxgap=2) + xr.testing.assert_identical(expected, actual) + expected_lengths = da.copy( + data=[[3, 3, 3, 0, 0, 0, 2, 2, 0, 0, 3, 3, 3, 0, 2, 2]] * 2 + ) actual_lengths = _get_nan_block_lengths(da, "y") xr.testing.assert_equal(expected_lengths, actual_lengths) - - actual = da.interpolate_na("y", maxgap=2) - xr.testing.assert_identical(expected, actual) From 4cad630202e1ff9cfd0590376f3a6883c4870c45 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 12 Sep 2019 20:57:42 -0600 Subject: [PATCH 05/29] =?UTF-8?q?maxgap=20=E2=86=92=20max=5Fgap?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- xarray/core/dataarray.py | 6 +++--- xarray/core/dataset.py | 7 ++++--- xarray/core/missing.py | 6 +++--- xarray/tests/test_missing.py | 4 ++-- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 740a7f79be9..d077cf9297e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1986,7 +1986,7 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, str] = True, - maxgap: int = None, + max_gap: int = None, **kwargs: Any ) -> "DataArray": """Interpolate values according to different methods. @@ -2018,7 +2018,7 @@ def interpolate_na( Maximum number of consecutive NaNs to fill. Must be greater than 0 or None for no limit. This filling is done regardless of the size of the gap in the data. - maxgap : int, default None + max_gap : int, default None Maximum size of gap that will be filled. Must be greater than 0 or None for no limit. kwargs : dict(), optional @@ -2041,7 +2041,7 @@ def interpolate_na( method=method, limit=limit, use_coordinate=use_coordinate, - maxgap=maxgap, + max_gap=max_gap, **kwargs ) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f3289620ed1..71eb61274f6 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3870,7 +3870,7 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, Hashable] = True, - maxgap: int = None, + max_gap: int = None, **kwargs: Any ) -> "Dataset": """Interpolate values according to different methods. @@ -3902,7 +3902,7 @@ def interpolate_na( Maximum number of consecutive NaNs to fill. Must be greater than 0 or None for no limit. This filling is done regardless of the size of the gap in the data. - maxgap : int, default None + max_gap : int, default None Maximum size of gap that will be filled. Must be greater than 0 or None for no limit. kwargs : dict(), optional @@ -3926,7 +3926,8 @@ def interpolate_na( method=method, limit=limit, use_coordinate=use_coordinate, - **kwargs + max_gap=max_gap, + **kwargs, ) return new diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 72ad8a74bcb..b48f0209b69 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -256,7 +256,7 @@ def interp_na( use_coordinate=True, method="linear", limit=None, - maxgap=None, + max_gap=None, **kwargs ): """Interpolate values according to different methods. @@ -290,9 +290,9 @@ def interp_na( if limit is not None: arr = arr.where(valids) - if maxgap is not None: + if max_gap is not None: nan_block_lengths = _get_nan_block_lengths(self, dim) - arr = arr.where(nan_block_lengths <= maxgap) + arr = arr.where(nan_block_lengths <= max_gap) return arr diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index c1e5567aa54..562129d86e1 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -447,7 +447,7 @@ def test_bfill_dataset(ds): @requires_bottleneck -def test_interpolate_na_maxgap(ds): +def test_interpolate_na_max_gap(ds): arr = [ [ np.nan, @@ -497,7 +497,7 @@ def test_interpolate_na_maxgap(ds): * 2 ) - actual = da.interpolate_na("y", maxgap=2) + actual = da.interpolate_na("y", max_gap=2) xr.testing.assert_identical(expected, actual) expected_lengths = da.copy( From 02b93c9f4144c43d0d4cec5ed6172ac831b1c302 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 12 Sep 2019 15:53:26 +0000 Subject: [PATCH 06/29] Update xarray/core/dataarray.py Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index d077cf9297e..42984601cf1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2021,7 +2021,7 @@ def interpolate_na( max_gap : int, default None Maximum size of gap that will be filled. Must be greater than 0 or None for no limit. - kwargs : dict(), optional + kwargs : dict, optional parameters passed verbatim to the underlying interpolation function Returns From da6c5f39bd02ad74aa2027140f8fd5b1e37dc269 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 12 Sep 2019 15:53:31 +0000 Subject: [PATCH 07/29] Update xarray/core/dataset.py Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 71eb61274f6..831881d672b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3905,7 +3905,7 @@ def interpolate_na( max_gap : int, default None Maximum size of gap that will be filled. Must be greater than 0 or None for no limit. - kwargs : dict(), optional + kwargs : dict, optional parameters passed verbatim to the underlying interpolation function Returns From e1880e3a8c9f099779588b3aff9f6364e234a417 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 12 Sep 2019 21:01:29 -0600 Subject: [PATCH 08/29] update whats-new --- doc/whats-new.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dd94519a4fb..4c2e6659bdb 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -233,6 +233,7 @@ Enhancements - Added ``join='override'``. When aligning, this only checks that index sizes are equal among objects and skips checking indexes for equality. - Added the ``maxgap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and +- Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data gap that will be filled by interpolation. By `Deepak Cherian `_. From 6c7a86954d6636336af2e5861f5bda71fe9c97ca Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 12 Sep 2019 21:01:57 -0600 Subject: [PATCH 09/29] update computation.rst --- doc/computation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/computation.rst b/doc/computation.rst index fdbd5797cba..3947063c326 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -94,7 +94,7 @@ for filling missing values via 1D interpolation. Note that xarray slightly diverges from the pandas ``interpolate`` syntax by providing the ``use_coordinate`` keyword which facilitates a clear specification -of which values to use as the index in the interpolation. xarray also provides the ``maxgap`` keyword argument to limit the interpolation to data gaps of length ``maxgap`` or smaller. See +of which values to use as the index in the interpolation. xarray also provides the ``max_gap`` keyword argument to limit the interpolation to data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na` for more. Aggregation From 49854e96f03d7b2269ad03b42e60d76b9bee5c92 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 13 Sep 2019 09:31:04 -0600 Subject: [PATCH 10/29] Better support uniformly spaced coordinates. Split legnths, interp test --- xarray/core/missing.py | 6 +++--- xarray/tests/test_missing.py | 28 ++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index b48f0209b69..ce8ffd91693 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -13,14 +13,14 @@ from .variable import Variable, broadcast_variables -def _get_nan_block_lengths(obj, dim): +def _get_nan_block_lengths(obj, dim, index): """ Return an object where each NaN element in 'obj' is replaced by the length of the gap the element is in. """ # algorithm from https://stackoverflow.com/questions/53060003/how-to-get-the-maximum-time-of-gap-in-xarray-dataset/53075828#53075828 - arange = ones_like(obj) * np.arange(len(obj.indexes[dim])) + 1 + arange = ones_like(obj) * index + (index[1] - index[0]) cumulative_nans = arange.where(obj.notnull()).ffill(dim=dim).fillna(0) num_nans = arange - cumulative_nans @@ -291,7 +291,7 @@ def interp_na( arr = arr.where(valids) if max_gap is not None: - nan_block_lengths = _get_nan_block_lengths(self, dim) + nan_block_lengths = _get_nan_block_lengths(self, dim, index) arr = arr.where(nan_block_lengths <= max_gap) return arr diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 562129d86e1..95c558b3fbb 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -9,6 +9,7 @@ NumpyInterpolator, ScipyInterpolator, SplineInterpolator, + get_clean_interp_index, _get_nan_block_lengths, ) from xarray.core.pycompat import dask_array_type @@ -447,7 +448,24 @@ def test_bfill_dataset(ds): @requires_bottleneck -def test_interpolate_na_max_gap(ds): +@pytest.mark.parametrize( + "y, lengths", + [ + [np.arange(9), [[3, 3, 3, 0, 2, 2, 0, 2, 2]]], + [[0, 2, 5, 6, 7, 8, 10, 12, 14], [[6, 6, 6, 0, 2, 2, 0, 3, 3]]], + ], +) +def test_interpolate_na_nan_block_lengths(y, lengths): + arr = [[np.nan, np.nan, np.nan, 1, np.nan, np.nan, 4, np.nan, np.nan]] + da = xr.DataArray(arr * 2, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + actual = _get_nan_block_lengths(da, dim="y", index=index) + expected = da.copy(data=lengths * 2) + assert_equal(actual, expected) + + +@requires_bottleneck +def test_interpolate_na_max_gap(): arr = [ [ np.nan, @@ -498,10 +516,4 @@ def test_interpolate_na_max_gap(ds): ) actual = da.interpolate_na("y", max_gap=2) - xr.testing.assert_identical(expected, actual) - - expected_lengths = da.copy( - data=[[3, 3, 3, 0, 0, 0, 2, 2, 0, 0, 3, 3, 3, 0, 2, 2]] * 2 - ) - actual_lengths = _get_nan_block_lengths(da, "y") - xr.testing.assert_equal(expected_lengths, actual_lengths) + assert_equal(expected, actual) From 6a866926e2826438cafb206c4126952e7e8a1594 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 13 Sep 2019 09:57:21 -0600 Subject: [PATCH 11/29] Raise error for max_gap and irregularly spaced coordinates + test --- xarray/core/missing.py | 8 ++++++++ xarray/tests/test_missing.py | 9 ++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index ce8ffd91693..cca795b87dc 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -291,6 +291,14 @@ def interp_na( arr = arr.where(valids) if max_gap is not None: + if use_coordinate: + delta_index = np.diff(index) + if not np.allclose(delta_index, delta_index[0] * np.ones_like(delta_index)): + coord_name = dim if use_coordinate is True else use_coordinate + raise ValueError( + "Cannot specify max_gap with irregularly spaced coordinate %s" + % coord_name + ) nan_block_lengths = _get_nan_block_lengths(self, dim, index) arr = arr.where(nan_block_lengths <= max_gap) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 95c558b3fbb..c6030fe7565 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -452,7 +452,14 @@ def test_bfill_dataset(ds): "y, lengths", [ [np.arange(9), [[3, 3, 3, 0, 2, 2, 0, 2, 2]]], - [[0, 2, 5, 6, 7, 8, 10, 12, 14], [[6, 6, 6, 0, 2, 2, 0, 3, 3]]], + [np.arange(9) * 3, [[9, 9, 9, 0, 6, 6, 0, 6, 6]]], + pytest.param( + [0, 2, 5, 6, 7, 8, 10, 12, 14], + [[6, 6, 6, 0, 2, 2, 0, 3, 3]], + marks=pytest.mark.xfail( + reason="max_gap with irregularly spaced coordinate." + ), + ), ], ) def test_interpolate_na_nan_block_lengths(y, lengths): From 6e7e2f5020ae11188dd3f637a33599df954b5248 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 1 Oct 2019 07:54:07 -0600 Subject: [PATCH 12/29] rework. --- xarray/core/missing.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index cca795b87dc..9173a40f5b0 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -20,27 +20,20 @@ def _get_nan_block_lengths(obj, dim, index): """ # algorithm from https://stackoverflow.com/questions/53060003/how-to-get-the-maximum-time-of-gap-in-xarray-dataset/53075828#53075828 - arange = ones_like(obj) * index + (index[1] - index[0]) - cumulative_nans = arange.where(obj.notnull()).ffill(dim=dim).fillna(0) - - num_nans = arange - cumulative_nans - - block_lengths_at_peaks = num_nans.where( - num_nans.diff(dim=dim, label="lower") < 0 - ).reindex({dim: obj[dim]}) - - # nans at the end - maybe_nans_at_end = ( - block_lengths_at_peaks.isel({dim: -1}) - .where(obj.isel({dim: -1}).notnull(), num_nans.isel({dim: -1})) - .expand_dims(dim) - .reindex_like(obj) + arange = ones_like(obj) * index + valid = obj.notnull() + valid_arange = arange.where(valid) + cumulative_nans = valid_arange.ffill(dim=dim).fillna(index[0]) + + nan_block_lengths = ( + cumulative_nans.diff(dim=dim, label="upper") + .reindex({dim: obj[dim]}) + .where(valid) + .bfill(dim=dim) + .where(~valid, 0) + .fillna(index[-1] - valid_arange.max()) ) - block_lengths_at_peaks = block_lengths_at_peaks.fillna(maybe_nans_at_end) - - nan_block_lengths = block_lengths_at_peaks.bfill(dim).where(obj.isnull()).fillna(0) - return nan_block_lengths From b74dead65bdad98accb67519ab7651a2f8d9b28e Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 3 Oct 2019 16:07:39 -0600 Subject: [PATCH 13/29] Use pandas checks for index duplication and monotonicity. --- xarray/core/missing.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 9173a40f5b0..72b55388770 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -222,6 +222,14 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): "Coordinates used for interpolation must be 1D, " "%s is %dD." % (use_coordinate, index.ndim) ) + index = index.to_index() + + # check index sorting now so we can skip it later + if not index.is_monotonic: + raise ValueError("Index must be monotonically increasing") + + if not index.is_unique: + raise ValueError("Index must be unique") # raise if index cannot be cast to a float (e.g. MultiIndex) try: @@ -233,9 +241,6 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): "Index must be castable to float64 to support" "interpolation, got: %s" % type(index) ) - # check index sorting now so we can skip it later - if not (np.diff(index) > 0).all(): - raise ValueError("Index must be monotonicly increasing") else: axis = arr.get_axis_num(dim) index = np.arange(arr.shape[axis], dtype=np.float64) From a139042282e6e7a0847d202f4c425a263066aad2 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 3 Oct 2019 16:20:11 -0600 Subject: [PATCH 14/29] Progress + add datetime. --- xarray/core/missing.py | 30 ++++++++++++++++++++++-------- xarray/tests/test_missing.py | 28 ++++++++++++++++++---------- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 72b55388770..38c37e5359a 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -265,6 +265,28 @@ def interp_na( if limit is not None: valids = _get_valid_fill_mask(self, dim, limit) + if max_gap is not None: + max_type = type(max_gap) + if isinstance(self.indexes[dim], pd.DatetimeIndex) and not isinstance( + max_gap, (np.timedelta64, str) + ): + raise TypeError( + "expected max_gap of type str or timedelta64 since underlying index is DatetimeIndex but received %r" + % max_type + ) + + # TODO: better time offset checks + if isinstance(max_gap, (np.timedelta64, str)): + if not use_coordinate: + raise ValueError( + "provided max_gap of type %r but use_coordinate=False. Set use_coordinate=True instead." + % max_type + ) + if isinstance(max_gap, str): + max_gap = pd.to_timedelta(max_gap).to_numpy().astype(np.float64) + else: + max_gap = np.timedelta64(max_gap, "ns").astype(np.float64) + # method index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate) interp_class, kwargs = _get_interpolator(method, **kwargs) @@ -289,14 +311,6 @@ def interp_na( arr = arr.where(valids) if max_gap is not None: - if use_coordinate: - delta_index = np.diff(index) - if not np.allclose(delta_index, delta_index[0] * np.ones_like(delta_index)): - coord_name = dim if use_coordinate is True else use_coordinate - raise ValueError( - "Cannot specify max_gap with irregularly spaced coordinate %s" - % coord_name - ) nan_block_lengths = _get_nan_block_lengths(self, dim, index) arr = arr.where(nan_block_lengths <= max_gap) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index c6030fe7565..ddc308efe5d 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -451,15 +451,9 @@ def test_bfill_dataset(ds): @pytest.mark.parametrize( "y, lengths", [ - [np.arange(9), [[3, 3, 3, 0, 2, 2, 0, 2, 2]]], - [np.arange(9) * 3, [[9, 9, 9, 0, 6, 6, 0, 6, 6]]], - pytest.param( - [0, 2, 5, 6, 7, 8, 10, 12, 14], - [[6, 6, 6, 0, 2, 2, 0, 3, 3]], - marks=pytest.mark.xfail( - reason="max_gap with irregularly spaced coordinate." - ), - ), + [np.arange(9), [[3, 3, 3, 0, 3, 3, 0, 2, 2]]], + [np.arange(9) * 3, [[9, 9, 9, 0, 9, 9, 0, 6, 6]]], + [[0, 2, 5, 6, 7, 8, 10, 12, 14], [[6, 6, 6, 0, 4, 4, 0, 4, 4]]], ], ) def test_interpolate_na_nan_block_lengths(y, lengths): @@ -522,5 +516,19 @@ def test_interpolate_na_max_gap(): * 2 ) - actual = da.interpolate_na("y", max_gap=2) + actual = da.interpolate_na("y", max_gap=3) assert_equal(expected, actual) + + +def test_interpolate_na_max_gap_datetime_errors(): + da = xr.DataArray( + [np.nan, 1, 2, np.nan, np.nan, 4], + dims=["t"], + coords={"t": pd.date_range("2001-01-01", freq="H", periods=6)}, + ) + + with raises_regex(TypeError, "expected max_gap of type"): + da.interpolate_na("t", max_gap=1) + + with raises_regex(ValueError, "but use_coordinate=False"): + da.interpolate_na("t", max_gap="1H", use_coordinate=False) From 8b150a432fe1d186bc7b2879ac7d6f0a99d199e6 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 4 Oct 2019 09:44:05 -0600 Subject: [PATCH 15/29] nicer error message --- xarray/core/missing.py | 24 ++++++++++++++---------- xarray/tests/test_missing.py | 11 +++++++++-- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 38c37e5359a..dfd99907a2f 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -95,7 +95,7 @@ def __call__(self, x): self._yi, left=self._left, right=self._right, - **self.call_kwargs + **self.call_kwargs, ) @@ -117,7 +117,7 @@ def __init__( copy=False, bounds_error=False, order=None, - **kwargs + **kwargs, ): from scipy.interpolate import interp1d @@ -150,7 +150,7 @@ def __init__( bounds_error=False, assume_sorted=assume_sorted, copy=copy, - **self.cons_kwargs + **self.cons_kwargs, ) @@ -171,7 +171,7 @@ def __init__( order=3, nu=0, ext=None, - **kwargs + **kwargs, ): from scipy.interpolate import UnivariateSpline @@ -224,12 +224,15 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): ) index = index.to_index() - # check index sorting now so we can skip it later + # TODO: index.name is None for multiindexes + if isinstance(index, pd.MultiIndex): + index.name = dim + if not index.is_monotonic: - raise ValueError("Index must be monotonically increasing") + raise ValueError(f"Index {index.name} must be monotonically increasing") if not index.is_unique: - raise ValueError("Index must be unique") + raise ValueError(f"Index {index.name} has duplicate values") # raise if index cannot be cast to a float (e.g. MultiIndex) try: @@ -238,9 +241,10 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): # pandas raises a TypeError # xarray/nuppy raise a ValueError raise TypeError( - "Index must be castable to float64 to support" - "interpolation, got: %s" % type(index) + f"Index {index.name} must be castable to float64 to support" + "interpolation, got {type(index)}" ) + else: axis = arr.get_axis_num(dim) index = np.arange(arr.shape[axis], dtype=np.float64) @@ -255,7 +259,7 @@ def interp_na( method="linear", limit=None, max_gap=None, - **kwargs + **kwargs, ): """Interpolate values according to different methods. """ diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index ddc308efe5d..7acb570b5ff 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -159,7 +159,7 @@ def test_interpolate_pd_compat_polynomial(): def test_interpolate_unsorted_index_raises(): vals = np.array([1, 2, 3], dtype=np.float64) expected = xr.DataArray(vals, dims="x", coords={"x": [2, 1, 3]}) - with raises_regex(ValueError, "Index must be monotonicly increasing"): + with raises_regex(ValueError, "Index 'x' must be monotonically increasing"): expected.interpolate_na(dim="x", method="index") @@ -175,12 +175,19 @@ def test_interpolate_invalid_interpolator_raises(): da.interpolate_na(dim="x", method="foo") +def test_interpolate_duplicate_values_raises(): + data = np.random.randn(2, 3) + da = xr.DataArray(data, coords=[("x", ["a", "a"]), ("y", [0, 1, 2])]) + with raises_regex(ValueError, "Index 'x' has duplicate values"): + da.interpolate_na(dim="x", method="foo") + + def test_interpolate_multiindex_raises(): data = np.random.randn(2, 3) data[1, 1] = np.nan da = xr.DataArray(data, coords=[("x", ["a", "b"]), ("y", [0, 1, 2])]) das = da.stack(z=("x", "y")) - with raises_regex(TypeError, "Index must be castable to float64"): + with raises_regex(TypeError, "Index 'z' must be castable to float64"): das.interpolate_na(dim="z") From 45d3c2861473cb605852132f999644c040a07e8d Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 21 Oct 2019 09:49:07 -0600 Subject: [PATCH 16/29] A few fstrings. --- xarray/core/missing.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index dfd99907a2f..4801f78716f 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -229,10 +229,10 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): index.name = dim if not index.is_monotonic: - raise ValueError(f"Index {index.name} must be monotonically increasing") + raise ValueError(f"Index {index.name!r} must be monotonically increasing") if not index.is_unique: - raise ValueError(f"Index {index.name} has duplicate values") + raise ValueError(f"Index {index.name!r} has duplicate values") # raise if index cannot be cast to a float (e.g. MultiIndex) try: @@ -241,7 +241,7 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): # pandas raises a TypeError # xarray/nuppy raise a ValueError raise TypeError( - f"Index {index.name} must be castable to float64 to support" + f"Index {index.name!r} must be castable to float64 to support" "interpolation, got {type(index)}" ) @@ -283,8 +283,7 @@ def interp_na( if isinstance(max_gap, (np.timedelta64, str)): if not use_coordinate: raise ValueError( - "provided max_gap of type %r but use_coordinate=False. Set use_coordinate=True instead." - % max_type + f"provided max_gap of type {max_type} but use_coordinate=False. Set use_coordinate=True instead." ) if isinstance(max_gap, str): max_gap = pd.to_timedelta(max_gap).to_numpy().astype(np.float64) From 980f475cc2d4eaa90cea21d6d22ac76d23ac0a88 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 21 Oct 2019 10:10:51 -0600 Subject: [PATCH 17/29] finish up timedelta max_gap. --- doc/computation.rst | 6 ++++-- xarray/core/dataarray.py | 9 ++++++--- xarray/core/missing.py | 29 +++++++++++++++++------------ xarray/tests/test_missing.py | 32 +++++++++++++++++++++++++------- 4 files changed, 52 insertions(+), 24 deletions(-) diff --git a/doc/computation.rst b/doc/computation.rst index 3947063c326..2799706b299 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -94,8 +94,10 @@ for filling missing values via 1D interpolation. Note that xarray slightly diverges from the pandas ``interpolate`` syntax by providing the ``use_coordinate`` keyword which facilitates a clear specification -of which values to use as the index in the interpolation. xarray also provides the ``max_gap`` keyword argument to limit the interpolation to data gaps of length ``max_gap`` or smaller. See -:py:meth:`~xarray.DataArray.interpolate_na` for more. +of which values to use as the index in the interpolation. +xarray also provides the ``max_gap`` keyword argument to limit the interpolation to +data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na` +for more. Aggregation =========== diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 42984601cf1..3ef6f386201 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2018,9 +2018,12 @@ def interpolate_na( Maximum number of consecutive NaNs to fill. Must be greater than 0 or None for no limit. This filling is done regardless of the size of the gap in the data. - max_gap : int, default None - Maximum size of gap that will be filled. Must be greater than 0 or None - for no limit. + max_gap : str, pandas.Timedelta or numpy.timedelta64, default None + Maximum size of gap that will be filled. Use None for no limit. When interpolating + along a datetime64 dimension and use_coordinate=True, max_gap can be one of the following: + - a string that is valid input for pandas.to_timedelta + - a numpy.timedelta64 object + - a pandas.Timedelta object kwargs : dict, optional parameters passed verbatim to the underlying interpolation function diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 4801f78716f..2bf026bec6a 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -271,24 +271,29 @@ def interp_na( if max_gap is not None: max_type = type(max_gap) - if isinstance(self.indexes[dim], pd.DatetimeIndex) and not isinstance( - max_gap, (np.timedelta64, str) - ): - raise TypeError( - "expected max_gap of type str or timedelta64 since underlying index is DatetimeIndex but received %r" - % max_type - ) + if isinstance(self.indexes[dim], pd.DatetimeIndex): + if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)): + raise TypeError( + f"Underlying index is DatetimeIndex. Expected max_gap of type str, pandas.Timedelta or numpy.timedelta64 but received {max_type}" + ) - # TODO: better time offset checks - if isinstance(max_gap, (np.timedelta64, str)): if not use_coordinate: raise ValueError( f"provided max_gap of type {max_type} but use_coordinate=False. Set use_coordinate=True instead." ) + if isinstance(max_gap, str): - max_gap = pd.to_timedelta(max_gap).to_numpy().astype(np.float64) - else: - max_gap = np.timedelta64(max_gap, "ns").astype(np.float64) + try: + max_gap = pd.to_timedelta(max_gap).to_numpy() + except ValueError: + raise ValueError( + f"Could not convert {max_gap!r} to a pandas timedelta using pandas.to_timedelta" + ) + + if isinstance(max_gap, pd.Timedelta): + max_gap = max_gap.to_numpy() + + max_gap = np.timedelta64(max_gap, "ns").astype(np.float64) # method index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 7acb570b5ff..e1a3781d95f 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -527,15 +527,33 @@ def test_interpolate_na_max_gap(): assert_equal(expected, actual) -def test_interpolate_na_max_gap_datetime_errors(): - da = xr.DataArray( - [np.nan, 1, 2, np.nan, np.nan, 4], +@pytest.fixture +def da_time(): + return xr.DataArray( + [np.nan, 1, 2, np.nan, np.nan, 5, np.nan, np.nan, np.nan, np.nan, 10], dims=["t"], - coords={"t": pd.date_range("2001-01-01", freq="H", periods=6)}, + coords={"t": pd.date_range("2001-01-01", freq="H", periods=11)}, ) - with raises_regex(TypeError, "expected max_gap of type"): - da.interpolate_na("t", max_gap=1) + +def test_interpolate_na_max_gap_datetime_errors(da_time): + with raises_regex(TypeError, "Underlying index is"): + da_time.interpolate_na("t", max_gap=1) with raises_regex(ValueError, "but use_coordinate=False"): - da.interpolate_na("t", max_gap="1H", use_coordinate=False) + da_time.interpolate_na("t", max_gap="1H", use_coordinate=False) + + with raises_regex(ValueError, "Could not convert 'huh' to a "): + da_time.interpolate_na("t", max_gap="huh") + + +@pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.to_dataset(name="a")]) +@pytest.mark.parametrize( + "max_gap", ["3H", np.timedelta64(3, "h"), pd.to_timedelta("3H")] +) +def test_interpolate_na_max_gap_time_specifier(da_time, max_gap, transform): + expected = transform( + da_time.copy(data=[np.nan, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan, 10]) + ) + actual = transform(da_time).interpolate_na("t", max_gap=max_gap) + assert_equal(actual, expected) From 6e857f08d0795f0b01a5fbd1f9db72c801a7ed00 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 21 Oct 2019 17:04:32 -0600 Subject: [PATCH 18/29] fix whats-new --- doc/whats-new.rst | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 61dbf291cf8..26c5c9d41c3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -20,6 +20,9 @@ v0.14.1 (unreleased) New Features ~~~~~~~~~~~~ +- Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and + :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data + gap that will be filled by interpolation. By `Deepak Cherian `_. - Added integration tests against `pint `_. (:pull:`3238`) by `Justus Magin `_. @@ -263,13 +266,6 @@ Enhancements - Added ``join='override'``. When aligning, this only checks that index sizes are equal among objects and skips checking indexes for equality. -- Added the ``maxgap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and -- Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and - :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data - gap that will be filled by interpolation. By `Deepak Cherian `_. - -- Added ``join='override'``. This only checks that index sizes are equal among objects and skips - checking indexes for equality. By `Deepak Cherian `_. - :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg. It is passed down to :py:func:`~xarray.align`. From 6f54616291813191f4d54c8e267db9efee0d558f Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 22 Oct 2019 08:54:36 -0600 Subject: [PATCH 19/29] small fixes. --- xarray/core/missing.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 2bf026bec6a..48baac97e09 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -219,12 +219,13 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): index = arr.coords[use_coordinate] if index.ndim != 1: raise ValueError( - "Coordinates used for interpolation must be 1D, " - "%s is %dD." % (use_coordinate, index.ndim) + f"Coordinates used for interpolation must be 1D, " + f"{use_coordinate} is {index.ndim}D." ) index = index.to_index() # TODO: index.name is None for multiindexes + # set name for nice error messages below if isinstance(index, pd.MultiIndex): index.name = dim @@ -239,7 +240,7 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): index = index.values.astype(np.float64) except (TypeError, ValueError): # pandas raises a TypeError - # xarray/nuppy raise a ValueError + # xarray/numpy raise a ValueError raise TypeError( f"Index {index.name!r} must be castable to float64 to support" "interpolation, got {type(index)}" From db0c5f3fc9a7de84dd36c17a1b488622239e25eb Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 22 Oct 2019 09:06:07 -0600 Subject: [PATCH 20/29] fix dan's test. --- xarray/core/missing.py | 13 ++++++++--- xarray/tests/test_missing.py | 45 ++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 48baac97e09..061363374a5 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -13,13 +13,16 @@ from .variable import Variable, broadcast_variables -def _get_nan_block_lengths(obj, dim, index): +def _get_nan_block_lengths(obj, dim: str, index: Variable): """ Return an object where each NaN element in 'obj' is replaced by the length of the gap the element is in. """ - # algorithm from https://stackoverflow.com/questions/53060003/how-to-get-the-maximum-time-of-gap-in-xarray-dataset/53075828#53075828 + # make variable so that we get broadcasting for free + index = Variable([dim], index) + + # algorithm from https://github.com/pydata/xarray/pull/3302#discussion_r324707072 arange = ones_like(obj) * index valid = obj.notnull() valid_arange = arange.where(valid) @@ -272,7 +275,7 @@ def interp_na( if max_gap is not None: max_type = type(max_gap) - if isinstance(self.indexes[dim], pd.DatetimeIndex): + if dim in self.indexes and isinstance(self.indexes[dim], pd.DatetimeIndex): if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)): raise TypeError( f"Underlying index is DatetimeIndex. Expected max_gap of type str, pandas.Timedelta or numpy.timedelta64 but received {max_type}" @@ -320,6 +323,10 @@ def interp_na( arr = arr.where(valids) if max_gap is not None: + if dim not in self.coords: + raise NotImplementedError( + "max_gap not implemented for unlabeled coordinates yet." + ) nan_block_lengths = _get_nan_block_lengths(self, dim, index) arr = arr.where(nan_block_lengths <= max_gap) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index e1a3781d95f..2740b951681 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -557,3 +557,48 @@ def test_interpolate_na_max_gap_time_specifier(da_time, max_gap, transform): ) actual = transform(da_time).interpolate_na("t", max_gap=max_gap) assert_equal(actual, expected) + + +@requires_bottleneck +@pytest.mark.parametrize( + "coords", + [ + pytest.param(None, marks=pytest.mark.xfail()), + {"x": np.arange(4), "y": np.arange(11)}, + ], +) +def test_interpolate_na_2d(coords): + da = xr.DataArray( + [ + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + ], + dims=["x", "y"], + coords=coords, + ) + + actual = da.interpolate_na("y", max_gap=2) + expected_y = da.copy( + data=[ + [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11], + ] + ) + assert_equal(actual, expected_y) + + actual = da.interpolate_na("x", max_gap=3) + expected_x = xr.DataArray( + [ + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + ], + dims=["x", "y"], + coords=coords, + ) + assert_equal(actual, expected_x) From 1127c61c6b3debf887d6fb097bd492bd13698595 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 22 Oct 2019 09:06:42 -0600 Subject: [PATCH 21/29] remove redundant test. --- xarray/tests/test_missing.py | 55 ------------------------------------ 1 file changed, 55 deletions(-) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 2740b951681..d383fe4f06d 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -472,61 +472,6 @@ def test_interpolate_na_nan_block_lengths(y, lengths): assert_equal(actual, expected) -@requires_bottleneck -def test_interpolate_na_max_gap(): - arr = [ - [ - np.nan, - np.nan, - np.nan, - 1, - 2, - 3, - np.nan, - np.nan, - 6, - 7, - np.nan, - np.nan, - np.nan, - 11, - np.nan, - np.nan, - ] - ] - - da = xr.DataArray( - arr * 2, dims=["x", "y"], coords={"x": [0, 1], "y": np.arange(len(arr[0]))} - ) - - expected = da.copy( - data=[ - [ - np.nan, - np.nan, - np.nan, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - np.nan, - np.nan, - np.nan, - 11, - np.nan, - np.nan, - ] - ] - * 2 - ) - - actual = da.interpolate_na("y", max_gap=3) - assert_equal(expected, actual) - - @pytest.fixture def da_time(): return xr.DataArray( From 4e27c94cf32d1412fc40fd5157ca0bf43620c877 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 23 Oct 2019 09:02:00 -0600 Subject: [PATCH 22/29] nicer error message. --- xarray/core/missing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 061363374a5..ce36a8769cb 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -245,8 +245,8 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): # pandas raises a TypeError # xarray/numpy raise a ValueError raise TypeError( - f"Index {index.name!r} must be castable to float64 to support" - "interpolation, got {type(index)}" + f"Index {index.name!r} must be castable to float64 to support " + f"interpolation, got {type(index).__name__}." ) else: @@ -274,7 +274,7 @@ def interp_na( valids = _get_valid_fill_mask(self, dim, limit) if max_gap is not None: - max_type = type(max_gap) + max_type = type(max_gap).__name__ if dim in self.indexes and isinstance(self.indexes[dim], pd.DatetimeIndex): if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)): raise TypeError( From 179eff1a0bcf58c23257dd463bc38f77a53429e0 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 24 Oct 2019 08:25:58 -0600 Subject: [PATCH 23/29] Add xfailed cftime tests --- xarray/tests/test_missing.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index d383fe4f06d..44fb60b366f 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -477,7 +477,6 @@ def da_time(): return xr.DataArray( [np.nan, 1, 2, np.nan, np.nan, 5, np.nan, np.nan, np.nan, np.nan, 10], dims=["t"], - coords={"t": pd.date_range("2001-01-01", freq="H", periods=11)}, ) @@ -492,11 +491,18 @@ def test_interpolate_na_max_gap_datetime_errors(da_time): da_time.interpolate_na("t", max_gap="huh") +@pytest.mark.parametrize( + "time_range_func", + [pd.date_range, pytest.param(xr.cftime_range, marks=pytest.mark.xfail)], +) @pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.to_dataset(name="a")]) @pytest.mark.parametrize( "max_gap", ["3H", np.timedelta64(3, "h"), pd.to_timedelta("3H")] ) -def test_interpolate_na_max_gap_time_specifier(da_time, max_gap, transform): +def test_interpolate_na_max_gap_time_specifier( + da_time, max_gap, transform, time_range_func +): + da_time["t"] = time_range_func("2001-01-01", freq="H", periods=11) expected = transform( da_time.copy(data=[np.nan, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan, 10]) ) From 9de946fd4bd40702759ac155a287b8a29aac5176 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 25 Oct 2019 08:02:24 -0600 Subject: [PATCH 24/29] better error checking and tests. --- xarray/core/missing.py | 21 +++++++++++++++------ xarray/tests/test_missing.py | 13 +++++++++++-- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index ce36a8769cb..5ef0fae1cee 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -1,5 +1,6 @@ import warnings from functools import partial +from numbers import Number from typing import Any, Callable, Dict, Sequence import numpy as np @@ -275,17 +276,19 @@ def interp_na( if max_gap is not None: max_type = type(max_gap).__name__ - if dim in self.indexes and isinstance(self.indexes[dim], pd.DatetimeIndex): + if not is_scalar(max_gap): + raise ValueError("max_gap must be a scalar.") + + if ( + dim in self.indexes + and isinstance(self.indexes[dim], pd.DatetimeIndex) + and use_coordinate + ): if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)): raise TypeError( f"Underlying index is DatetimeIndex. Expected max_gap of type str, pandas.Timedelta or numpy.timedelta64 but received {max_type}" ) - if not use_coordinate: - raise ValueError( - f"provided max_gap of type {max_type} but use_coordinate=False. Set use_coordinate=True instead." - ) - if isinstance(max_gap, str): try: max_gap = pd.to_timedelta(max_gap).to_numpy() @@ -299,6 +302,12 @@ def interp_na( max_gap = np.timedelta64(max_gap, "ns").astype(np.float64) + if not use_coordinate: + if not isinstance(max_gap, (Number, np.number)): + raise TypeError( + f"Expected integer or floating point max_gap since use_coordinate=False. Received {max_type}." + ) + # method index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate) interp_class, kwargs = _get_interpolator(method, **kwargs) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 44fb60b366f..5c543166ab3 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -480,11 +480,20 @@ def da_time(): ) -def test_interpolate_na_max_gap_datetime_errors(da_time): +def test_interpolate_na_max_gap_errors(da_time): + with raises_regex( + NotImplementedError, "max_gap not implemented for unlabeled coordinates" + ): + da_time.interpolate_na("t", max_gap=1) + + with raises_regex(ValueError, "max_gap must be a scalar."): + da_time.interpolate_na("t", max_gap=(1,)) + + da_time["t"] = pd.date_range("2001-01-01", freq="H", periods=11) with raises_regex(TypeError, "Underlying index is"): da_time.interpolate_na("t", max_gap=1) - with raises_regex(ValueError, "but use_coordinate=False"): + with raises_regex(TypeError, "Expected integer or floating point"): da_time.interpolate_na("t", max_gap="1H", use_coordinate=False) with raises_regex(ValueError, "Could not convert 'huh' to a "): From a411cc2c34f926433ff1c7c85810fb228855246e Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 25 Oct 2019 08:22:50 -0600 Subject: [PATCH 25/29] typing. --- xarray/core/dataarray.py | 4 ++-- xarray/core/dataset.py | 2 +- xarray/core/missing.py | 16 ++++++++-------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 6c0144f55a7..585bf5be34d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1982,11 +1982,11 @@ def fillna(self, value: Any) -> "DataArray": def interpolate_na( self, - dim=None, + dim: Hashable = None, method: str = "linear", limit: int = None, use_coordinate: Union[bool, str] = True, - max_gap: int = None, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, **kwargs: Any, ) -> "DataArray": """Interpolate values according to different methods. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 243feeae6d9..49237ef2f58 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3866,7 +3866,7 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, Hashable] = True, - max_gap: int = None, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, **kwargs: Any, ) -> "Dataset": """Interpolate values according to different methods. diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 5ef0fae1cee..5bdf0a8de67 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -1,7 +1,7 @@ import warnings from functools import partial from numbers import Number -from typing import Any, Callable, Dict, Sequence +from typing import Any, Callable, Dict, Hashable, Sequence, Union import numpy as np import pandas as pd @@ -14,7 +14,7 @@ from .variable import Variable, broadcast_variables -def _get_nan_block_lengths(obj, dim: str, index: Variable): +def _get_nan_block_lengths(obj, dim: Hashable, index: Variable): """ Return an object where each NaN element in 'obj' is replaced by the length of the gap the element is in. @@ -206,7 +206,7 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): return ds -def get_clean_interp_index(arr, dim, use_coordinate=True): +def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True): """get index to use for x values in interpolation. If use_coordinate is True, the coordinate that shares the name of the @@ -259,11 +259,11 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): def interp_na( self, - dim=None, - use_coordinate=True, - method="linear", - limit=None, - max_gap=None, + dim: Hashable = None, + use_coordinate: Union[bool, str] = True, + method: str = "linear", + limit: int = None, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, **kwargs, ): """Interpolate values according to different methods. From 4bda69989326c41bd03ff62c3d93267c1b602ad3 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 25 Oct 2019 09:09:24 -0600 Subject: [PATCH 26/29] update docstrings --- xarray/core/dataarray.py | 55 +++++++++++++++++++++++++-------------- xarray/core/dataset.py | 56 ++++++++++++++++++++++++++-------------- 2 files changed, 72 insertions(+), 39 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 585bf5be34d..206c9172429 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1989,47 +1989,62 @@ def interpolate_na( max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, **kwargs: Any, ) -> "DataArray": - """Interpolate values according to different methods. + """Fill in NaNs by interpolating according to different methods. Parameters ---------- dim : str Specifies the dimension along which to interpolate. - method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial', 'barycentric', 'krog', 'pchip', - 'spline', 'akima'}, optional + method : str, optional String indicating which method to use for interpolation: - 'linear': linear interpolation (Default). Additional keyword - arguments are passed to ``numpy.interp`` - - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial': are passed to ``scipy.interpolate.interp1d``. If - method=='polynomial', the ``order`` keyword argument must also be + arguments are passed to :py:func:`numpy.interp` + - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial': + are passed to :py:func:`scipy.interpolate.interp1d`. If + ``method='polynomial'``, the ``order`` keyword argument must also be provided. - - 'barycentric', 'krog', 'pchip', 'spline', and `akima`: use their - respective``scipy.interpolate`` classes. - use_coordinate : boolean or str, default True + - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their + respective :py:class:`scipy.interpolate` classes. + use_coordinate : bool, str, default True Specifies which index to use as the x values in the interpolation formulated as `y = f(x)`. If False, values are treated as if - eqaully-spaced along `dim`. If True, the IndexVariable `dim` is - used. If use_coordinate is a string, it specifies the name of a + eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is + used. If ``use_coordinate`` is a string, it specifies the name of a coordinate variariable to use as the index. limit : int, default None Maximum number of consecutive NaNs to fill. Must be greater than 0 or None for no limit. This filling is done regardless of the size of - the gap in the data. - max_gap : str, pandas.Timedelta or numpy.timedelta64, default None - Maximum size of gap that will be filled. Use None for no limit. When interpolating - along a datetime64 dimension and use_coordinate=True, max_gap can be one of the following: + the gap in the data. To only interpolate over gaps less than a given length, + see ``max_gap``. + max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None. + Maximum size of gap, a continuous sequence of NaNs, that will be filled. + Use None for no limit. When interpolating along a datetime64 dimension + and ``use_coordinate=True``, ``max_gap`` can be one of the following: + - a string that is valid input for pandas.to_timedelta - - a numpy.timedelta64 object - - a pandas.Timedelta object + - a :py:class:`numpy.timedelta64` object + - a :py:class:`pandas.Timedelta` object + Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled + dimensions has not been implemented yet. Gap length is defined as the difference + between coordinate values at the first data point after a gap and the last value + before a gap. For gaps at the beginning (end), gap length is defined as the difference + between coordinate values at the first (last) valid data point and the first (last) NaN. + For example, consider:: + + + array([nan, nan, nan, 1., nan, nan, 4., nan, nan]) + Coordinates: + * x (x) int64 0 1 2 3 4 5 6 7 8 + + The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively kwargs : dict, optional parameters passed verbatim to the underlying interpolation function Returns ------- - DataArray + interpolated: DataArray + Filled in DataArray. See also -------- diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index da168c6ea91..93b18ae473d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3876,44 +3876,62 @@ def interpolate_na( max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, **kwargs: Any, ) -> "Dataset": - """Interpolate values according to different methods. + """Fill in NaNs by interpolating according to different methods. Parameters ---------- - dim : Hashable + dim : str Specifies the dimension along which to interpolate. - method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial', 'barycentric', 'krog', 'pchip', - 'spline'}, optional + method : str, optional String indicating which method to use for interpolation: - 'linear': linear interpolation (Default). Additional keyword - arguments are passed to ``numpy.interp`` - - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial': are passed to ``scipy.interpolate.interp1d``. If - method=='polynomial', the ``order`` keyword argument must also be + arguments are passed to :py:func:`numpy.interp` + - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial': + are passed to :py:func:`scipy.interpolate.interp1d`. If + ``method='polynomial'``, the ``order`` keyword argument must also be provided. - - 'barycentric', 'krog', 'pchip', 'spline': use their respective - ``scipy.interpolate`` classes. - use_coordinate : boolean or str, default True + - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their + respective :py:class:`scipy.interpolate` classes. + use_coordinate : bool, str, default True Specifies which index to use as the x values in the interpolation formulated as `y = f(x)`. If False, values are treated as if - eqaully-spaced along `dim`. If True, the IndexVariable `dim` is - used. If use_coordinate is a string, it specifies the name of a + eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is + used. If ``use_coordinate`` is a string, it specifies the name of a coordinate variariable to use as the index. limit : int, default None Maximum number of consecutive NaNs to fill. Must be greater than 0 or None for no limit. This filling is done regardless of the size of - the gap in the data. - max_gap : int, default None - Maximum size of gap that will be filled. Must be greater than 0 or None - for no limit. + the gap in the data. To only interpolate over gaps less than a given length, + see ``max_gap``. + max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None. + Maximum size of gap, a continuous sequence of NaNs, that will be filled. + Use None for no limit. When interpolating along a datetime64 dimension + and ``use_coordinate=True``, ``max_gap`` can be one of the following: + + - a string that is valid input for pandas.to_timedelta + - a :py:class:`numpy.timedelta64` object + - a :py:class:`pandas.Timedelta` object + Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled + dimensions has not been implemented yet. Gap length is defined as the difference + between coordinate values at the first data point after a gap and the last value + before a gap. For gaps at the beginning (end), gap length is defined as the difference + between coordinate values at the first (last) valid data point and the first (last) NaN. + For example, consider:: + + + array([nan, nan, nan, 1., nan, nan, 4., nan, nan]) + Coordinates: + * x (x) int64 0 1 2 3 4 5 6 7 8 + + The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively kwargs : dict, optional parameters passed verbatim to the underlying interpolation function Returns ------- - Dataset + interpolated: Dataset + Filled in Dataset. See also -------- From 4acdd3b8f7c345c8cb0401ac5577654073b9bd13 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 25 Oct 2019 09:09:43 -0600 Subject: [PATCH 27/29] scipy intersphinx --- doc/conf.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 7c1557a1e66..0e04f8ccde8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -340,9 +340,10 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), - "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), - "iris": ("http://scitools.org.uk/iris/docs/latest/", None), - "numpy": ("https://docs.scipy.org/doc/numpy/", None), - "numba": ("https://numba.pydata.org/numba-doc/latest/", None), - "matplotlib": ("https://matplotlib.org/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), + "iris": ("https://scitools.org.uk/iris/docs/latest", None), + "numpy": ("https://docs.scipy.org/doc/numpy", None), + "scipy": ("https://docs.scipy.org/doc/scipy/reference", None), + "numba": ("https://numba.pydata.org/numba-doc/latest", None), + "matplotlib": ("https://matplotlib.org", None), } From d9410b13acc048774f017f9779842999ab531473 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 3 Nov 2019 19:07:06 -0700 Subject: [PATCH 28/29] fix tests --- xarray/core/missing.py | 6 +++--- xarray/tests/test_missing.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 5bdf0a8de67..117fcaf8f81 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -291,14 +291,14 @@ def interp_na( if isinstance(max_gap, str): try: - max_gap = pd.to_timedelta(max_gap).to_numpy() + max_gap = pd.to_timedelta(max_gap) except ValueError: raise ValueError( - f"Could not convert {max_gap!r} to a pandas timedelta using pandas.to_timedelta" + f"Could not convert {max_gap!r} to timedelta64 using pandas.to_timedelta" ) if isinstance(max_gap, pd.Timedelta): - max_gap = max_gap.to_numpy() + max_gap = np.timedelta64(max_gap.value, "ns") max_gap = np.timedelta64(max_gap, "ns").astype(np.float64) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 5c543166ab3..fe1cef9a557 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -496,7 +496,7 @@ def test_interpolate_na_max_gap_errors(da_time): with raises_regex(TypeError, "Expected integer or floating point"): da_time.interpolate_na("t", max_gap="1H", use_coordinate=False) - with raises_regex(ValueError, "Could not convert 'huh' to a "): + with raises_regex(ValueError, "Could not convert 'huh' to timedelta64"): da_time.interpolate_na("t", max_gap="huh") From d844ba76d6a8ae2d28a975775de335eb42b13363 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 3 Nov 2019 19:24:36 -0700 Subject: [PATCH 29/29] add bottleneck testing decorator. --- xarray/tests/test_missing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index fe1cef9a557..0b410383a34 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -500,6 +500,7 @@ def test_interpolate_na_max_gap_errors(da_time): da_time.interpolate_na("t", max_gap="huh") +@requires_bottleneck @pytest.mark.parametrize( "time_range_func", [pd.date_range, pytest.param(xr.cftime_range, marks=pytest.mark.xfail)],