diff --git a/doc/user-guide/interpolation.rst b/doc/user-guide/interpolation.rst index 53d8a52b342..3bc055ae78e 100644 --- a/doc/user-guide/interpolation.rst +++ b/doc/user-guide/interpolation.rst @@ -132,10 +132,11 @@ It is now possible to safely compute the difference ``other - interpolated``. Interpolation methods --------------------- -We use :py:class:`scipy.interpolate.interp1d` for 1-dimensional interpolation. +We use either :py:class:`scipy.interpolate.interp1d` or special interpolants from +:py:class:`scipy.interpolate` for 1-dimensional interpolation (see :py:meth:`~xarray.Dataset.interp`). For multi-dimensional interpolation, an attempt is first made to decompose the interpolation in a series of 1-dimensional interpolations, in which case -:py:class:`scipy.interpolate.interp1d` is used. If a decomposition cannot be +the relevant 1-dimensional interpolator is used. If a decomposition cannot be made (e.g. with advanced interpolation), :py:func:`scipy.interpolate.interpn` is used. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 9b5291fc553..8d347f7d33d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2217,19 +2217,44 @@ def interp( coords: Mapping[Any, Any] | None = None, method: InterpOptions = "linear", assume_sorted: bool = False, + reduce: bool = True, kwargs: Mapping[str, Any] | None = None, **coords_kwargs: Any, ) -> Self: - """Interpolate a DataArray onto new coordinates + """ + Interpolate a DataArray onto new coordinates. + + Performs univariate or multivariate interpolation of a Dataset onto new coordinates, + utilizing either NumPy or SciPy interpolation routines. + + When interpolating along multiple dimensions, the process attempts to decompose the + interpolation into independent interpolations along one dimension at a time, unless + `reduce=False` is passed. The specific interpolation method and dimensionality + determine which interpolant is used: + + 1. **Interpolation along one dimension of 1D data (`method='linear'`)** + - Uses :py:class:`numpy.interp`, unless `fill_value='extrapolate'` is provided via `kwargs`. + + 2. **Interpolation along one dimension of N-dimensional data (N ≥ 1)** + - Methods {"linear", "nearest", "zero", "slinear", "quadratic", "cubic", "quintic", "polynomial"} + use :py:class:`scipy.interpolate.interp1d`, unless conditions permit the use of :py:class:`numpy.interp` + (as in the case of `method='linear'` for 1D data). + - If `method='polynomial'`, the `order` keyword argument must also be provided. In this case, + :py:class:`scipy.interpolate.interp1d` is called with `kind=order`. + + 3. **Special interpolants for interpolation along one dimension of N-dimensional data (N ≥ 1)** + - Depending on the `method`, the following interpolants from :py:class:`scipy.interpolate` are used: + - `"pchip"`: :py:class:`scipy.interpolate.PchipInterpolator` + - `"barycentric"`: :py:class:`scipy.interpolate.BarycentricInterpolator` + - `"krogh"`: :py:class:`scipy.interpolate.KroghInterpolator` + - `"akima"` or `"makima"`: :py:class:`scipy.interpolate.Akima1dInterpolator` + (`makima` is handled by passing `makima` to `method`). - Performs univariate or multivariate interpolation of a DataArray onto - new coordinates using scipy's interpolation routines. If interpolating - along an existing dimension, either :py:class:`scipy.interpolate.interp1d` - or a 1-dimensional scipy interpolator (e.g. :py:class:`scipy.interpolate.KroghInterpolator`) - is called. When interpolating along multiple existing dimensions, an - attempt is made to decompose the interpolation into multiple - 1-dimensional interpolations. If this is possible, the 1-dimensional interpolator is called. - Otherwise, :py:func:`scipy.interpolate.interpn` is called. + 4. **Interpolation along multiple dimensions of multi-dimensional data** + - Uses :py:func:`scipy.interpolate.interpn` for methods {"linear", "nearest", "slinear", + "cubic", "quintic", "pchip"}. + + Out-of-range values are filled with NaN, unless specified otherwise via `kwargs` to the numpy/scipy interpolant. Parameters ---------- @@ -2238,20 +2263,16 @@ def interp( New coordinate can be a scalar, array-like or DataArray. If DataArrays are passed as new coordinates, their dimensions are used for the broadcasting. Missing values are skipped. - method : {"linear", "nearest", "zero", "slinear", "quadratic", "cubic", "polynomial"}, default: "linear" - The method used to interpolate. The method should be supported by - the scipy interpolator: - - - ``interp1d``: {"linear", "nearest", "zero", "slinear", - "quadratic", "cubic", "polynomial"} - - ``interpn``: {"linear", "nearest"} - - If ``"polynomial"`` is passed, the ``order`` keyword argument must - also be provided. + method : str + Interpolation method to use (see descriptions above). assume_sorted : bool, default: False If False, values of x can be in any order and they are sorted first. If True, x has to be an array of monotonically increasing values. + reduce : bool, default: True + If True, the interpolation is decomposed into independent interpolations along one dimension at a time, + where the interpolation coordinates are independent. Setting this to be True alters the behavior of certain + multi-dimensional interpolants compared to the default SciPy output. kwargs : dict-like or None, default: None Additional keyword arguments passed to scipy's interpolator. Valid options and their behavior depend whether ``interp1d`` or @@ -2267,12 +2288,14 @@ def interp( Notes ----- - scipy is required. + - SciPy is required for certain interpolation methods. + - Allowing `reduce=True` (the default) may alter the behavior of interpolation along multiple dimensions + compared to the default behavior in SciPy. See Also -------- - scipy.interpolate.interp1d - scipy.interpolate.interpn + Dataset.interp + Dataset.reindex_like :doc:`xarray-tutorial:fundamentals/02.2_manipulating_dimensions` Tutorial material on manipulating data resolution using :py:func:`~xarray.DataArray.interp` @@ -2354,6 +2377,7 @@ def interp( method=method, kwargs=kwargs, assume_sorted=assume_sorted, + reduce=reduce, **coords_kwargs, ) return self._from_temp_dataset(ds) @@ -2363,17 +2387,37 @@ def interp_like( other: T_Xarray, method: InterpOptions = "linear", assume_sorted: bool = False, + reduce: bool = True, kwargs: Mapping[str, Any] | None = None, ) -> Self: """Interpolate this object onto the coordinates of another object, filling out of range values with NaN. - If interpolating along a single existing dimension, - :py:class:`scipy.interpolate.interp1d` is called. When interpolating - along multiple existing dimensions, an attempt is made to decompose the - interpolation into multiple 1-dimensional interpolations. If this is - possible, :py:class:`scipy.interpolate.interp1d` is called. Otherwise, - :py:func:`scipy.interpolate.interpn` is called. + When interpolating along multiple dimensions, the process attempts to decompose the + interpolation into independent interpolations along one dimension at a time, unless + `reduce=False` is passed. The specific interpolation method and dimensionality + determine which interpolant is used: + + 1. **Interpolation along one dimension of 1D data (`method='linear'`)** + - Uses :py:class:`numpy.interp`, unless `fill_value='extrapolate'` is provided via `kwargs`. + + 2. **Interpolation along one dimension of N-dimensional data (N ≥ 1)** + - Methods {"linear", "nearest", "zero", "slinear", "quadratic", "cubic", "quintic", "polynomial"} + use :py:class:`scipy.interpolate.interp1d`, unless conditions permit the use of :py:class:`numpy.interp` + (as in the case of `method='linear'` for 1D data). + - If `method='polynomial'`, the `order` keyword argument must also be provided. + + 3. **Special interpolants for interpolation along one dimension of N-dimensional data (N ≥ 1)** + - Depending on the `method`, the following interpolants from :py:class:`scipy.interpolate` are used: + - `"pchip"`: :py:class:`scipy.interpolate.PchipInterpolator` + - `"barycentric"`: :py:class:`scipy.interpolate.BarycentricInterpolator` + - `"krogh"`: :py:class:`scipy.interpolate.KroghInterpolator` + - `"akima"` or `"makima"`: :py:class:`scipy.interpolate.Akima1dInterpolator` + (`makima` is handled by passing the `makima` flag). + + 4. **Interpolation along multiple dimensions of multi-dimensional data** + - Uses :py:func:`scipy.interpolate.interpn` for methods {"linear", "nearest", "slinear", + "cubic", "quintic", "pchip"}. Parameters ---------- @@ -2381,23 +2425,19 @@ def interp_like( Object with an 'indexes' attribute giving a mapping from dimension names to an 1d array-like, which provides coordinates upon which to index the variables in this dataset. Missing values are skipped. - method : {"linear", "nearest", "zero", "slinear", "quadratic", "cubic", "polynomial"}, default: "linear" - The method used to interpolate. The method should be supported by - the scipy interpolator: - - - {"linear", "nearest", "zero", "slinear", "quadratic", "cubic", - "polynomial"} when ``interp1d`` is called. - - {"linear", "nearest"} when ``interpn`` is called. - - If ``"polynomial"`` is passed, the ``order`` keyword argument must - also be provided. + method : str + Interpolation method to use (see descriptions above). assume_sorted : bool, default: False If False, values of coordinates that are interpolated over can be in any order and they are sorted first. If True, interpolated coordinates are assumed to be an array of monotonically increasing values. + reduce : bool, default: True + If True, the interpolation is decomposed into independent interpolations along one dimension at a time, + where the interpolation coordinates are independent. Setting this to be True alters the behavior of certain + multi-dimensional interpolants compared to the default SciPy output. kwargs : dict, optional - Additional keyword passed to scipy's interpolator. + Additional keyword arguments passed to the interpolant. Returns ------- @@ -2405,6 +2445,17 @@ def interp_like( Another dataarray by interpolating this dataarray's data along the coordinates of the other object. + Notes + ----- + scipy is required. + If the dataarray has object-type coordinates, reindex is used for these + coordinates instead of the interpolation. + + See Also + -------- + DataArray.interp + DataArray.reindex_like + Examples -------- >>> data = np.arange(12).reshape(4, 3) @@ -2460,24 +2511,18 @@ def interp_like( Coordinates: * x (x) int64 32B 10 20 30 40 * y (y) int64 24B 70 80 90 - - Notes - ----- - scipy is required. - If the dataarray has object-type coordinates, reindex is used for these - coordinates instead of the interpolation. - - See Also - -------- - DataArray.interp - DataArray.reindex_like """ + if self.dtype.kind not in "uifc": raise TypeError( f"interp only works for a numeric type array. Given {self.dtype}." ) ds = self._to_temp_dataset().interp_like( - other, method=method, kwargs=kwargs, assume_sorted=assume_sorted + other, + method=method, + kwargs=kwargs, + assume_sorted=assume_sorted, + reduce=reduce, ) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f8cf23d188c..a9bb2d26242 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3884,20 +3884,45 @@ def interp( coords: Mapping[Any, Any] | None = None, method: InterpOptions = "linear", assume_sorted: bool = False, + reduce: bool = True, kwargs: Mapping[str, Any] | None = None, method_non_numeric: str = "nearest", **coords_kwargs: Any, ) -> Self: - """Interpolate a Dataset onto new coordinates + """ + Interpolate a Dataset onto new coordinates. + + Performs univariate or multivariate interpolation of a Dataset onto new coordinates, + utilizing either NumPy or SciPy interpolation routines. + + When interpolating along multiple dimensions, the process attempts to decompose the + interpolation into independent interpolations along one dimension at a time, unless + `reduce=False` is passed. The specific interpolation method and dimensionality + determine which interpolant is used: + + 1. **Interpolation along one dimension of 1D data (`method='linear'`)** + - Uses :py:class:`numpy.interp`, unless `fill_value='extrapolate'` is provided via `kwargs`. + + 2. **Interpolation along one dimension of N-dimensional data (N ≥ 1)** + - Methods {"linear", "nearest", "zero", "slinear", "quadratic", "cubic", "quintic", "polynomial"} + use :py:class:`scipy.interpolate.interp1d`, unless conditions permit the use of :py:class:`numpy.interp` + (as in the case of `method='linear'` for 1D data). + - If `method='polynomial'`, the `order` keyword argument must also be provided. In this case, + :py:class:`scipy.interpolate.interp1d` is called with `kind=order`. + + 3. **Special interpolants for interpolation along one dimension of N-dimensional data (N ≥ 1)** + - Depending on the `method`, the following interpolants from :py:class:`scipy.interpolate` are used: + - `"pchip"`: :py:class:`scipy.interpolate.PchipInterpolator` + - `"barycentric"`: :py:class:`scipy.interpolate.BarycentricInterpolator` + - `"krogh"`: :py:class:`scipy.interpolate.KroghInterpolator` + - `"akima"` or `"makima"`: :py:class:`scipy.interpolate.Akima1dInterpolator` + (`makima` is handled by passing `makima` to `method`). - Performs univariate or multivariate interpolation of a Dataset onto - new coordinates using scipy's interpolation routines. If interpolating - along an existing dimension, either :py:class:`scipy.interpolate.interp1d` - or a 1-dimensional scipy interpolator (e.g. :py:class:`scipy.interpolate.KroghInterpolator`) - is called. When interpolating along multiple existing dimensions, an - attempt is made to decompose the interpolation into multiple - 1-dimensional interpolations. If this is possible, the 1-dimensional interpolator - is called. Otherwise, :py:func:`scipy.interpolate.interpn` is called. + 4. **Interpolation along multiple dimensions of multi-dimensional data** + - Uses :py:func:`scipy.interpolate.interpn` for methods {"linear", "nearest", "slinear", + "cubic", "quintic", "pchip"}. + + Out-of-range values are filled with NaN, unless specified otherwise via `kwargs` to the numpy/scipy interpolant. Parameters ---------- @@ -3906,28 +3931,20 @@ def interp( New coordinate can be a scalar, array-like or DataArray. If DataArrays are passed as new coordinates, their dimensions are used for the broadcasting. Missing values are skipped. - method : {"linear", "nearest", "zero", "slinear", "quadratic", "cubic", "polynomial", \ - "barycentric", "krogh", "pchip", "spline", "akima"}, default: "linear" - String indicating which method to use for interpolation: - - - 'linear': linear interpolation. Additional keyword - arguments are passed to :py:func:`numpy.interp` - - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial': - are passed to :py:func:`scipy.interpolate.interp1d`. If - ``method='polynomial'``, the ``order`` keyword argument must also be - provided. - - 'barycentric', 'krogh', 'pchip', 'spline', 'akima': use their - respective :py:class:`scipy.interpolate` classes. - + method : str + Interpolation method to use (see descriptions above). assume_sorted : bool, default: False If False, values of coordinates that are interpolated over can be in any order and they are sorted first. If True, interpolated coordinates are assumed to be an array of monotonically increasing values. + reduce : bool, default: True + If True, the interpolation is decomposed into independent interpolations of minimal dimensionality such that + the interpolation coordinates are independent. Setting this to be True alters the behavior of certain + multi-dimensional interpolants compared to the default SciPy output. kwargs : dict, optional - Additional keyword arguments passed to scipy's interpolator. Valid - options and their behavior depend whether ``interp1d`` or - ``interpn`` is used. + Additional keyword arguments passed to the interpolator. Valid + options and their behavior depend which interpolant is used. method_non_numeric : {"nearest", "pad", "ffill", "backfill", "bfill"}, optional Method for non-numeric types. Passed on to :py:meth:`Dataset.reindex`. ``"nearest"`` is used by default. @@ -3935,6 +3952,7 @@ def interp( The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. + Returns ------- interpolated : Dataset @@ -3942,7 +3960,9 @@ def interp( Notes ----- - scipy is required. + - SciPy is required for certain interpolation methods. + - Allowing `reduce=True` (the default) may alter the behavior of interpolation along multiple dimensions + compared to the default behavior in SciPy. See Also -------- @@ -4105,7 +4125,9 @@ def _validate_interp_indexer(x, new_x): if dtype_kind in "uifc": # For normal number types do the interpolation: var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims} - variables[name] = missing.interp(var, var_indexers, method, **kwargs) + variables[name] = missing.interp( + var, var_indexers, method, reduce=reduce, **kwargs + ) elif dtype_kind in "ObU" and (use_indexers.keys() & var.dims): # For types that we do not understand do stepwise # interpolation to avoid modifying the elements. @@ -4166,18 +4188,43 @@ def interp_like( other: T_Xarray, method: InterpOptions = "linear", assume_sorted: bool = False, + reduce: bool = True, kwargs: Mapping[str, Any] | None = None, method_non_numeric: str = "nearest", ) -> Self: - """Interpolate this object onto the coordinates of another object, - filling the out of range values with NaN. + """Interpolate this object onto the coordinates of another object. + + Performs univariate or multivariate interpolation of a Dataset onto new coordinates, + utilizing either NumPy or SciPy interpolation routines. + + When interpolating along multiple dimensions, the process attempts to decompose the + interpolation into independent interpolations along one dimension at a time, unless + `reduce=False` is passed. The specific interpolation method and dimensionality + determine which interpolant is used: + + 1. **Interpolation along one dimension of 1D data (`method='linear'`)** + - Uses :py:class:`numpy.interp`, unless `fill_value='extrapolate'` is provided via `kwargs`. + + 2. **Interpolation along one dimension of N-dimensional data (N ≥ 1)** + - Methods {"linear", "nearest", "zero", "slinear", "quadratic", "cubic", "quintic", "polynomial"} + use :py:class:`scipy.interpolate.interp1d`, unless conditions permit the use of :py:class:`numpy.interp` + (as in the case of `method='linear'` for 1D data). + - If `method='polynomial'`, the `order` keyword argument must also be provided. In this case, + :py:class:`scipy.interpolate.interp1d` is called with `kind=order`. - If interpolating along a single existing dimension, - :py:class:`scipy.interpolate.interp1d` is called. When interpolating - along multiple existing dimensions, an attempt is made to decompose the - interpolation into multiple 1-dimensional interpolations. If this is - possible, :py:class:`scipy.interpolate.interp1d` is called. Otherwise, - :py:func:`scipy.interpolate.interpn` is called. + 3. **Special interpolants for interpolation along one dimension of N-dimensional data (N ≥ 1)** + - Depending on the `method`, the following interpolants from :py:class:`scipy.interpolate` are used: + - `"pchip"`: :py:class:`scipy.interpolate.PchipInterpolator` + - `"barycentric"`: :py:class:`scipy.interpolate.BarycentricInterpolator` + - `"krogh"`: :py:class:`scipy.interpolate.KroghInterpolator` + - `"akima"` or `"makima"`: :py:class:`scipy.interpolate.Akima1dInterpolator` + (`makima` is handled by passing `makima` to `method`). + + 4. **Interpolation along multiple dimensions of multi-dimensional data** + - Uses :py:func:`scipy.interpolate.interpn` for methods {"linear", "nearest", "slinear", + "cubic", "quintic", "pchip"}. + + Out-of-range values are filled with NaN, unless specified otherwise via `kwargs` to the numpy/scipy interpolant. Parameters ---------- @@ -4185,26 +4232,20 @@ def interp_like( Object with an 'indexes' attribute giving a mapping from dimension names to an 1d array-like, which provides coordinates upon which to index the variables in this dataset. Missing values are skipped. - method : {"linear", "nearest", "zero", "slinear", "quadratic", "cubic", "polynomial", \ - "barycentric", "krogh", "pchip", "spline", "akima"}, default: "linear" - String indicating which method to use for interpolation: - - - 'linear': linear interpolation. Additional keyword - arguments are passed to :py:func:`numpy.interp` - - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial': - are passed to :py:func:`scipy.interpolate.interp1d`. If - ``method='polynomial'``, the ``order`` keyword argument must also be - provided. - - 'barycentric', 'krogh', 'pchip', 'spline', 'akima': use their - respective :py:class:`scipy.interpolate` classes. - + method : str + Interpolation method to use (see descriptions above). assume_sorted : bool, default: False If False, values of coordinates that are interpolated over can be in any order and they are sorted first. If True, interpolated coordinates are assumed to be an array of monotonically increasing values. + reduce : bool, default: True + If True, the interpolation is decomposed into independent 1-dimensional interpolations such that + the interpolation coordinates are independent. Setting this to be True alters the behavior of certain + multi-dimensional interpolants compared to the default SciPy output. kwargs : dict, optional - Additional keyword passed to scipy's interpolator. + Additional keyword arguments passed to the interpolator. Valid + options and their behavior depend which interpolant is use method_non_numeric : {"nearest", "pad", "ffill", "backfill", "bfill"}, optional Method for non-numeric types. Passed on to :py:meth:`Dataset.reindex`. ``"nearest"`` is used by default. diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 4523e4f8232..83fcee47a3b 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -20,7 +20,7 @@ timedelta_to_numeric, ) from xarray.core.options import _get_keep_attrs -from xarray.core.types import Interp1dOptions, InterpOptions +from xarray.core.types import Interp1dOptions, InterpnOptions, InterpOptions from xarray.core.utils import OrderedSet, is_scalar from xarray.core.variable import Variable, broadcast_variables from xarray.namedarray.parallelcompat import get_chunked_array_type @@ -154,6 +154,9 @@ def __init__( raise ValueError("order is required when method=polynomial") method = order + if method == "quintic": + method = 5 + self.method = method self.cons_kwargs = kwargs @@ -503,6 +506,7 @@ def _get_interpolator( interp_class = _import_interpolant("KroghInterpolator", method) elif method == "pchip": kwargs.update(axis=-1) + kwargs.setdefault("extrapolate", False) interp_class = _import_interpolant("PchipInterpolator", method) elif method == "spline": utils.emit_user_level_warning( @@ -520,9 +524,6 @@ def _get_interpolator( elif method == "makima": kwargs.update(method="makima", axis=-1) interp_class = _import_interpolant("Akima1DInterpolator", method) - elif method == "makima": - kwargs.update(method="makima", axis=-1) - interp_class = _import_interpolant("Akima1DInterpolator", method) else: raise ValueError(f"{method} is not a valid scipy interpolator") else: @@ -536,8 +537,7 @@ def _get_interpolator_nd(method, **kwargs): returns interpolator class and keyword arguments for the class """ - valid_methods = ["linear", "nearest"] - + valid_methods = tuple(get_args(InterpnOptions)) if method in valid_methods: kwargs.update(method=method) kwargs.setdefault("bounds_error", False) @@ -601,7 +601,7 @@ def _floatize_x(x, new_x): return x, new_x -def interp(var, indexes_coords, method: InterpOptions, **kwargs): +def interp(var, indexes_coords, method: InterpOptions, reduce: bool = True, **kwargs): """Make an interpolation of Variable Parameters @@ -615,6 +615,10 @@ def interp(var, indexes_coords, method: InterpOptions, **kwargs): One of {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'}. For multidimensional interpolation, only {'linear', 'nearest'} can be used. + reduce: + Decompose the interpolation along independent interpolation dimensions when + possible. This will likely improve performance over true multi-dimensional + interpolation but will alter the result for certain interpolators. **kwargs keyword arguments to be passed to scipy.interpolate @@ -631,8 +635,15 @@ def interp(var, indexes_coords, method: InterpOptions, **kwargs): return var.copy() result = var - # decompose the interpolation into a succession of independent interpolation - for indep_indexes_coords in decompose_interp(indexes_coords): + + if reduce: + # decompose the interpolation into a succession of independent interpolation. This may + # affect the mathematical behavior of certain nd interpolants. + indexes_coords = decompose_interp(indexes_coords) + else: + indexes_coords = [indexes_coords] + + for indep_indexes_coords in indexes_coords: var = result # target dimensions diff --git a/xarray/core/types.py b/xarray/core/types.py index 64acc2c4aa4..0c4849496a7 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -228,12 +228,20 @@ def copy( JoinOptions = Literal["outer", "inner", "left", "right", "exact", "override"] Interp1dOptions = Literal[ - "linear", "nearest", "zero", "slinear", "quadratic", "cubic", "polynomial" + "linear", + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "quintic", + "polynomial", ] InterpolantOptions = Literal[ "barycentric", "krogh", "pchip", "spline", "akima", "makima" ] -InterpOptions = Union[Interp1dOptions, InterpolantOptions] +InterpnOptions = Literal["linear", "nearest", "slinear", "cubic", "quintic", "pchip"] +InterpOptions = Union[Interp1dOptions, InterpolantOptions, InterpnOptions] DatetimeUnitOptions = Literal[ "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", None diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index 6722e8d9404..416d2320a29 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -1,6 +1,6 @@ from __future__ import annotations -from itertools import combinations, permutations +from itertools import combinations, permutations, product from typing import cast import numpy as np @@ -9,7 +9,7 @@ import xarray as xr from xarray.coding.cftimeindex import _parse_array_of_cftime_strings -from xarray.core.types import InterpOptions +from xarray.core.types import InterpnOptions, InterpOptions from xarray.tests import ( assert_allclose, assert_equal, @@ -62,6 +62,43 @@ def get_example_data(case: int) -> xr.DataArray: raise ValueError("case must be 1-4") +@pytest.fixture +def nd_interp_coords(): + # interpolation indices for nd interpolation of da from case 3 of get_example_data + + da = get_example_data(case=3) + + coords = {} + # grid -> grid + coords["xdestnp"] = np.linspace(0.1, 1.0, 11) + coords["ydestnp"] = np.linspace(0.0, 0.2, 10) + coords["zdestnp"] = da.get_index("z") # type: ignore[assignment] + # list of the points defined by the above mesh in C order + mesh_x, mesh_y, mesh_z = np.meshgrid( + coords["xdestnp"], coords["ydestnp"], coords["zdestnp"], indexing="ij" + ) + coords["grid_grid_points"] = np.column_stack( + [mesh_x.ravel(), mesh_y.ravel(), mesh_z.ravel()] + ) + + # grid -> oned + coords["xdest"] = xr.DataArray(np.linspace(0.1, 1.0, 11), dims="y") # type: ignore[assignment] + coords["ydest"] = xr.DataArray(np.linspace(0.0, 0.2, 11), dims="y") # type: ignore[assignment] + coords["zdest"] = da.z + # grid of the points defined by the oned gridded with zdest in C order + coords["grid_oned_points"] = np.array( + [ + (a, b, c) + for (a, b), c in product( + zip(coords["xdest"].data, coords["ydest"].data, strict=False), + coords["zdest"].data, + ) + ] + ) + + return coords + + def test_keywargs(): if not has_scipy: pytest.skip("scipy is not installed.") @@ -245,38 +282,39 @@ def func(obj, dim, new_x, method): assert_allclose(actual, expected.transpose("z", "w", "y", transpose_coords=True)) +@requires_scipy +@pytest.mark.parametrize("method", ("linear", "nearest", "slinear", "pchip")) @pytest.mark.parametrize( "case", [pytest.param(3, id="no_chunk"), pytest.param(4, id="chunked")] ) -def test_interpolate_nd(case: int) -> None: - if not has_scipy: - pytest.skip("scipy is not installed.") - +def test_interpolate_nd_separable( + case: int, method: InterpnOptions, nd_interp_coords +) -> None: if not has_dask and case == 4: pytest.skip("dask is not installed in the environment.") da = get_example_data(case) # grid -> grid - xdestnp = np.linspace(0.1, 1.0, 11) - ydestnp = np.linspace(0.0, 0.2, 10) - actual = da.interp(x=xdestnp, y=ydestnp, method="linear") + xdestnp = nd_interp_coords["xdestnp"] + ydestnp = nd_interp_coords["ydestnp"] + actual = da.interp(x=xdestnp, y=ydestnp, method=method) - # linear interpolation is separateable - expected = da.interp(x=xdestnp, method="linear") - expected = expected.interp(y=ydestnp, method="linear") + # `method` is separable + expected = da.interp(x=xdestnp, method=method) + expected = expected.interp(y=ydestnp, method=method) assert_allclose(actual.transpose("x", "y", "z"), expected.transpose("x", "y", "z")) # grid -> 1d-sample - xdest = xr.DataArray(np.linspace(0.1, 1.0, 11), dims="y") - ydest = xr.DataArray(np.linspace(0.0, 0.2, 11), dims="y") - actual = da.interp(x=xdest, y=ydest, method="linear") + xdest = nd_interp_coords["xdest"] + ydest = nd_interp_coords["ydest"] + actual = da.interp(x=xdest, y=ydest, method=method) - # linear interpolation is separateable + # `method` is separable expected_data = scipy.interpolate.RegularGridInterpolator( (da["x"], da["y"]), da.transpose("x", "y", "z").values, - method="linear", + method=method, bounds_error=False, fill_value=np.nan, )(np.stack([xdest, ydest], axis=-1)) @@ -287,18 +325,90 @@ def test_interpolate_nd(case: int) -> None: "z": da["z"], "y": ydest, "x": ("y", xdest.values), - "x2": da["x2"].interp(x=xdest), + "x2": da["x2"].interp(x=xdest, method=method), }, ) assert_allclose(actual.transpose("y", "z"), expected) # reversed order - actual = da.interp(y=ydest, x=xdest, method="linear") + actual = da.interp(y=ydest, x=xdest, method=method) assert_allclose(actual.transpose("y", "z"), expected) @requires_scipy -def test_interpolate_nd_nd() -> None: +@pytest.mark.parametrize("method", ("cubic", "quintic")) +@pytest.mark.parametrize( + "case", [pytest.param(3, id="no_chunk"), pytest.param(4, id="chunked")] +) +def test_interpolate_nd_inseparable( + case: int, method: InterpnOptions, nd_interp_coords +) -> None: + if not has_dask and case == 4: + pytest.skip("dask is not installed in the environment.") + + da = get_example_data(case) + + # grid -> grid + xdestnp = nd_interp_coords["xdestnp"] + ydestnp = nd_interp_coords["ydestnp"] + zdestnp = nd_interp_coords["zdestnp"] + grid_grid_points = nd_interp_coords["grid_grid_points"] + # the presence/absence of z cordinate may affect nd interpolants, even when the + # coordinate is unchanged + actual = da.interp(x=xdestnp, y=ydestnp, z=zdestnp, method=method, reduce=False) + expected_data = scipy.interpolate.interpn( + points=(da.x, da.y, da.z), + values=da.data, + xi=grid_grid_points, + method=method, + bounds_error=False, + ).reshape((len(xdestnp), len(ydestnp), len(zdestnp))) + expected = xr.DataArray( + expected_data, + dims=["x", "y", "z"], + coords={ + "x": xdestnp, + "y": ydestnp, + "z": zdestnp, + "x2": da["x2"].interp(x=xdestnp, method=method), + }, + ) + assert_allclose(actual.transpose("x", "y", "z"), expected.transpose("x", "y", "z")) + + # grid -> 1d-sample + xdest = nd_interp_coords["xdest"] + ydest = nd_interp_coords["ydest"] + zdest = nd_interp_coords["zdest"] + grid_oned_points = nd_interp_coords["grid_oned_points"] + actual = da.interp(x=xdest, y=ydest, z=zdest, method=method, reduce=False) + expected_data = scipy.interpolate.interpn( + points=(da.x, da.y, da.z), + values=da.data, + xi=grid_oned_points, + method=method, + bounds_error=False, + ).reshape([len(xdest), len(zdest)]) + expected = xr.DataArray( + expected_data, + dims=["y", "z"], + coords={ + "y": ydest, + "z": zdest, + "x": ("y", xdest.values), + "x2": da["x2"].interp(x=xdest, method=method), + }, + ) + + assert_allclose(actual.transpose("y", "z"), expected) + + # reversed order + actual = da.interp(y=ydest, x=xdest, z=zdest, method=method, reduce=False) + assert_allclose(actual.transpose("y", "z"), expected) + + +@requires_scipy +@pytest.mark.parametrize("method", ("linear", "nearest", "quintic")) +def test_interpolate_nd_nd(method: InterpnOptions) -> None: """Interpolate nd array with an nd indexer sharing coordinates.""" # Create original array a = [0, 2] @@ -353,14 +463,12 @@ def test_interpolate_nd_with_nan() -> None: xr.testing.assert_allclose(out2.drop_vars("a"), expected_ds) +@requires_scipy @pytest.mark.parametrize("method", ["linear"]) @pytest.mark.parametrize( "case", [pytest.param(0, id="no_chunk"), pytest.param(1, id="chunk_y")] ) def test_interpolate_scalar(method: InterpOptions, case: int) -> None: - if not has_scipy: - pytest.skip("scipy is not installed.") - if not has_dask and case in [1]: pytest.skip("dask is not installed in the environment.") @@ -384,33 +492,37 @@ def func(obj, new_x): assert_allclose(actual, expected) -@pytest.mark.parametrize("method", ["linear"]) +@requires_scipy +@pytest.mark.parametrize("method", ["linear", "quintic"]) @pytest.mark.parametrize( "case", [pytest.param(3, id="no_chunk"), pytest.param(4, id="chunked")] ) def test_interpolate_nd_scalar(method: InterpOptions, case: int) -> None: - if not has_scipy: - pytest.skip("scipy is not installed.") - if not has_dask and case in [4]: pytest.skip("dask is not installed in the environment.") da = get_example_data(case) xdest = 0.4 ydest = 0.05 + zdest = da.get_index("z") - actual = da.interp(x=xdest, y=ydest, method=method) + actual = da.interp(x=xdest, y=ydest, z=zdest, method=method) # scipy interpolation for the reference expected_data = scipy.interpolate.RegularGridInterpolator( - (da["x"], da["y"]), + (da["x"], da["y"], da["z"]), da.transpose("x", "y", "z").values, - method="linear", + method=method, bounds_error=False, fill_value=np.nan, - )(np.stack([xdest, ydest], axis=-1)) - - coords = {"x": xdest, "y": ydest, "x2": da["x2"].interp(x=xdest), "z": da["z"]} - expected = xr.DataArray(expected_data[0], dims=["z"], coords=coords) + )(np.asarray([(xdest, ydest, z_val) for z_val in zdest])) + + coords = { + "x": xdest, + "y": ydest, + "x2": da["x2"].interp(x=xdest, method=method), + "z": da["z"], + } + expected = xr.DataArray(expected_data, dims=["z"], coords=coords) assert_allclose(actual, expected) @@ -840,6 +952,7 @@ def test_interpolate_chunk_1d( * np.exp(z), coords=[("x", x), ("y", y), ("z", z)], ) + kwargs = {"fill_value": "extrapolate"} # choose the data dimensions @@ -888,7 +1001,7 @@ def test_interpolate_chunk_1d( @requires_scipy @requires_dask -@pytest.mark.parametrize("method", ["linear", "nearest"]) +@pytest.mark.parametrize("method", ["linear", "nearest", "cubic"]) @pytest.mark.filterwarnings("ignore:Increasing number of chunks") def test_interpolate_chunk_advanced(method: InterpOptions) -> None: """Interpolate nd array with an nd indexer sharing coordinates."""