Skip to content
forked from pydata/xarray

Commit

Permalink
finish up timedelta max_gap.
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian committed Oct 21, 2019
1 parent 45d3c28 commit 980f475
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 24 deletions.
6 changes: 4 additions & 2 deletions doc/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,10 @@ for filling missing values via 1D interpolation.
Note that xarray slightly diverges from the pandas ``interpolate`` syntax by
providing the ``use_coordinate`` keyword which facilitates a clear specification
of which values to use as the index in the interpolation. xarray also provides the ``max_gap`` keyword argument to limit the interpolation to data gaps of length ``max_gap`` or smaller. See
:py:meth:`~xarray.DataArray.interpolate_na` for more.
of which values to use as the index in the interpolation.
xarray also provides the ``max_gap`` keyword argument to limit the interpolation to
data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na`
for more.

Aggregation
===========
Expand Down
9 changes: 6 additions & 3 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2018,9 +2018,12 @@ def interpolate_na(
Maximum number of consecutive NaNs to fill. Must be greater than 0
or None for no limit. This filling is done regardless of the size of
the gap in the data.
max_gap : int, default None
Maximum size of gap that will be filled. Must be greater than 0 or None
for no limit.
max_gap : str, pandas.Timedelta or numpy.timedelta64, default None
Maximum size of gap that will be filled. Use None for no limit. When interpolating
along a datetime64 dimension and use_coordinate=True, max_gap can be one of the following:
- a string that is valid input for pandas.to_timedelta
- a numpy.timedelta64 object
- a pandas.Timedelta object
kwargs : dict, optional
parameters passed verbatim to the underlying interpolation function
Expand Down
29 changes: 17 additions & 12 deletions xarray/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,24 +271,29 @@ def interp_na(

if max_gap is not None:
max_type = type(max_gap)
if isinstance(self.indexes[dim], pd.DatetimeIndex) and not isinstance(
max_gap, (np.timedelta64, str)
):
raise TypeError(
"expected max_gap of type str or timedelta64 since underlying index is DatetimeIndex but received %r"
% max_type
)
if isinstance(self.indexes[dim], pd.DatetimeIndex):
if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)):
raise TypeError(
f"Underlying index is DatetimeIndex. Expected max_gap of type str, pandas.Timedelta or numpy.timedelta64 but received {max_type}"
)

# TODO: better time offset checks
if isinstance(max_gap, (np.timedelta64, str)):
if not use_coordinate:
raise ValueError(
f"provided max_gap of type {max_type} but use_coordinate=False. Set use_coordinate=True instead."
)

if isinstance(max_gap, str):
max_gap = pd.to_timedelta(max_gap).to_numpy().astype(np.float64)
else:
max_gap = np.timedelta64(max_gap, "ns").astype(np.float64)
try:
max_gap = pd.to_timedelta(max_gap).to_numpy()
except ValueError:
raise ValueError(
f"Could not convert {max_gap!r} to a pandas timedelta using pandas.to_timedelta"
)

if isinstance(max_gap, pd.Timedelta):
max_gap = max_gap.to_numpy()

max_gap = np.timedelta64(max_gap, "ns").astype(np.float64)

# method
index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate)
Expand Down
32 changes: 25 additions & 7 deletions xarray/tests/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,15 +527,33 @@ def test_interpolate_na_max_gap():
assert_equal(expected, actual)


def test_interpolate_na_max_gap_datetime_errors():
da = xr.DataArray(
[np.nan, 1, 2, np.nan, np.nan, 4],
@pytest.fixture
def da_time():
return xr.DataArray(
[np.nan, 1, 2, np.nan, np.nan, 5, np.nan, np.nan, np.nan, np.nan, 10],
dims=["t"],
coords={"t": pd.date_range("2001-01-01", freq="H", periods=6)},
coords={"t": pd.date_range("2001-01-01", freq="H", periods=11)},
)

with raises_regex(TypeError, "expected max_gap of type"):
da.interpolate_na("t", max_gap=1)

def test_interpolate_na_max_gap_datetime_errors(da_time):
with raises_regex(TypeError, "Underlying index is"):
da_time.interpolate_na("t", max_gap=1)

with raises_regex(ValueError, "but use_coordinate=False"):
da.interpolate_na("t", max_gap="1H", use_coordinate=False)
da_time.interpolate_na("t", max_gap="1H", use_coordinate=False)

with raises_regex(ValueError, "Could not convert 'huh' to a "):
da_time.interpolate_na("t", max_gap="huh")


@pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.to_dataset(name="a")])
@pytest.mark.parametrize(
"max_gap", ["3H", np.timedelta64(3, "h"), pd.to_timedelta("3H")]
)
def test_interpolate_na_max_gap_time_specifier(da_time, max_gap, transform):
expected = transform(
da_time.copy(data=[np.nan, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan, 10])
)
actual = transform(da_time).interpolate_na("t", max_gap=max_gap)
assert_equal(actual, expected)

0 comments on commit 980f475

Please sign in to comment.