From 090b49baecd52b89338ce26e8f3fe384bcab267f Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Sat, 25 Dec 2021 15:56:26 +0100 Subject: [PATCH 01/11] quantile: rename interpolation arg to method --- xarray/core/dataarray.py | 25 +++++++--------- xarray/core/dataset.py | 55 +++++++++++++++++++--------------- xarray/core/groupby.py | 28 +++++++++-------- xarray/core/variable.py | 48 ++++++++++++++++++++--------- xarray/tests/test_dataarray.py | 34 ++++++++++++++++++++- xarray/tests/test_dataset.py | 32 ++++++++++++++++++-- xarray/tests/test_groupby.py | 44 +++++++++++++++++++++++++++ xarray/tests/test_variable.py | 44 +++++++++++++++++++++++++++ 8 files changed, 242 insertions(+), 68 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 05d06400f2e..09f4a0ee0dc 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3424,11 +3424,12 @@ def sortby( def quantile( self, - q: Any, + q: np.typing.ArrayLike, dim: Union[Hashable, Sequence[Hashable], None] = None, - interpolation: str = "linear", + method: str = "linear", keep_attrs: bool = None, skipna: bool = True, + interpolation: str = None, ) -> "DataArray": """Compute the qth quantile of the data along the specified dimension. @@ -3440,18 +3441,13 @@ def quantile( Quantile to compute, which must be between 0 and 1 inclusive. dim : hashable or sequence of hashable, optional Dimension(s) over which to apply quantile. - interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear" + method : str, default: "linear" This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - - - linear: ``i + (j - i) * fraction``, where ``fraction`` is - the fractional part of the index surrounded by ``i`` and - ``j``. - - lower: ``i``. - - higher: ``j``. - - nearest: ``i`` or ``j``, whichever is nearest. - - midpoint: ``(i + j) / 2``. + use when the desired quantile lies between two data points. + See numpy.quantile for available methods. + + This argument was previously called "interpolation", renamed in accordance + with numpy version 1.22.0. keep_attrs : bool, optional If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new @@ -3509,8 +3505,9 @@ def quantile( q, dim=dim, keep_attrs=keep_attrs, - interpolation=interpolation, + method=method, skipna=skipna, + interpolation=interpolation, ) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 83c7b154658..c1fb0ee5dc5 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6135,12 +6135,13 @@ def sortby(self, variables, ascending=True): def quantile( self, - q, + q: np.typing.ArrayLike, dim=None, - interpolation="linear", - numeric_only=False, - keep_attrs=None, - skipna=True, + method: str = "linear", + numeric_only: bool = False, + keep_attrs: bool = None, + skipna: bool = True, + interpolation: str = None, ): """Compute the qth quantile of the data along the specified dimension. @@ -6153,18 +6154,13 @@ def quantile( Quantile to compute, which must be between 0 and 1 inclusive. dim : str or sequence of str, optional Dimension(s) over which to apply quantile. - interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear" + method : str, default: "linear" This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - - * linear: ``i + (j - i) * fraction``, where ``fraction`` is - the fractional part of the index surrounded by ``i`` and - ``j``. - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. + use when the desired quantile lies between two data points. + See numpy.quantile for available methods. + + This argument was previously called "interpolation", renamed in accordance + with numpy version 1.22.0. keep_attrs : bool, optional If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new @@ -6225,15 +6221,30 @@ def quantile( a (quantile, y) float64 0.7 4.2 2.6 1.5 3.6 ... 1.7 6.5 7.3 9.4 1.9 """ + # interpolation renamed to method in version 0.21.0 + # check here and in variable to avoid repeated warnings + if interpolation is not None: + warnings.warn( + "The `interpolation` argument to quantile was renamed to `method`.", + FutureWarning, + ) + + if method != "linear": + raise TypeError("Cannot pass interpolation and method keywords!") + + method = interpolation + interpolation = None + if isinstance(dim, str): dims = {dim} elif dim in [None, ...]: - dims = set(self.dims) + dim = list(self.dims.keys()) + dims = set(dim) else: dims = set(dim) _assert_empty( - [d for d in dims if d not in self.dims], + tuple(d for d in dims if d not in self.dims), "Dataset does not contain the dimensions: %s", ) @@ -6249,17 +6260,13 @@ def quantile( or np.issubdtype(var.dtype, np.number) or var.dtype == np.bool_ ): - if len(reduce_dims) == var.ndim: - # prefer to aggregate over axis=None rather than - # axis=(0, 1) if they will be equivalent, because - # the former is often more efficient - reduce_dims = None variables[name] = var.quantile( q, dim=reduce_dims, - interpolation=interpolation, + method=method, keep_attrs=keep_attrs, skipna=skipna, + interpolation=interpolation, ) else: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 185b4ae5bec..ae18355f384 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -549,7 +549,13 @@ def fillna(self, value): return ops.fillna(self, value) def quantile( - self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + self, + q, + dim=None, + method="linear", + keep_attrs=None, + skipna=True, + interpolation=None, ): """Compute the qth quantile over each array in the groups and concatenate them together into a new array. @@ -562,18 +568,13 @@ def quantile( dim : ..., str or sequence of str, optional Dimension(s) over which to apply quantile. Defaults to the grouped dimension. - interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear" + method : str, default: "linear" This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - - * linear: ``i + (j - i) * fraction``, where ``fraction`` is - the fractional part of the index surrounded by ``i`` and - ``j``. - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. + use when the desired quantile lies between two data points. + See numpy.quantile for available methods. + + This argument was previously called "interpolation", renamed in accordance + with numpy version 1.22.0. skipna : bool, optional Whether to skip missing values when aggregating. @@ -648,9 +649,10 @@ def quantile( shortcut=False, q=q, dim=dim, - interpolation=interpolation, + method=method, keep_attrs=keep_attrs, skipna=skipna, + interpolation=interpolation, ) return out diff --git a/xarray/core/variable.py b/xarray/core/variable.py index e2d02b41a17..ec1a265b1b2 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -21,6 +21,7 @@ import numpy as np import pandas as pd +from packaging.version import Version import xarray as xr # only for Dataset and DataArray @@ -1978,8 +1979,14 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): return self.broadcast_equals(other, equiv=equiv) def quantile( - self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True - ): + self, + q: np.typing.ArrayLike, + dim=None, + method: str = "linear", + keep_attrs: bool = None, + skipna: bool = True, + interpolation: str = None, + ) -> "Variable": """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -1991,18 +1998,14 @@ def quantile( inclusive. dim : str or sequence of str, optional Dimension(s) over which to apply quantile. - interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear" + method : str, default: "linear" This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - - * linear: ``i + (j - i) * fraction``, where ``fraction`` is - the fractional part of the index surrounded by ``i`` and - ``j``. - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. + use when the desired quantile lies between two data points. + See numpy.quantile for available methods. + + This argument was previously called "interpolation", renamed in accordance + with numpy version 1.22.0. + keep_attrs : bool, optional If True, the variable's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new @@ -2025,6 +2028,17 @@ def quantile( from .computation import apply_ufunc + if interpolation is not None: + warnings.warn( + "The `interpolation` argument to quantile was renamed to `method`.", + FutureWarning, + ) + + if method != "linear": + raise TypeError("Cannot pass interpolation and method keywords!") + + method = interpolation + _quantile_func = np.nanquantile if skipna else np.quantile if keep_attrs is None: @@ -2044,6 +2058,12 @@ def _wrapper(npa, **kwargs): return np.moveaxis(_quantile_func(npa, **kwargs), 0, -1) axis = np.arange(-1, -1 * len(dim) - 1, -1) + + if Version(np.__version__) >= Version("1.22.0"): + kwargs = {"q": q, "axis": axis, "method": method} + else: + kwargs = {"q": q, "axis": axis, "interpolation": method} + result = apply_ufunc( _wrapper, self, @@ -2053,7 +2073,7 @@ def _wrapper(npa, **kwargs): output_dtypes=[np.float64], dask_gufunc_kwargs=dict(output_sizes={"quantile": len(q)}), dask="parallelized", - kwargs={"q": q, "axis": axis, "interpolation": interpolation}, + kwargs=kwargs, ) # for backward compatibility diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index d2ce59cbced..5bda0e96d50 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2509,7 +2509,7 @@ def test_reduce_out(self): @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim, skipna): + def test_quantile(self, q, axis, dim, skipna) -> None: actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna) _percentile_func = np.nanpercentile if skipna else np.percentile expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis) @@ -2521,6 +2521,38 @@ def test_quantile(self, q, axis, dim, skipna): assert actual.attrs == self.attrs + @pytest.mark.parametrize("method", ["midpoint", "lower"]) + def test_quantile_method(self, method) -> None: + q = [0.25, 0.5, 0.75] + actual = DataArray(self.va).quantile(q, method=method) + + if Version(np.__version__) >= Version("1.22.0"): + expected = np.nanquantile(self.dv.values, np.array(q), method=method) # type: ignore[call-arg] + else: + expected = np.nanquantile(self.dv.values, np.array(q), interpolation=method) # type: ignore[call-arg] + + np.testing.assert_allclose(actual.values, expected) + + @pytest.mark.parametrize("method", ["midpoint", "lower"]) + def test_quantile_interpolation_deprecated(self, method) -> None: + + da = DataArray(self.va) + q = [0.25, 0.5, 0.75] + + with pytest.warns( + FutureWarning, + match="`interpolation` argument to quantile was renamed to `method`", + ): + actual = da.quantile(q, interpolation=method) + + expected = da.quantile(q, method=method) + + np.testing.assert_allclose(actual.values, expected.values) + + with warnings.catch_warnings(record=True): + with pytest.raises(TypeError, match="interpolation and method keywords"): + da.quantile(q, method=method, interpolation=method) + def test_reduce_keep_attrs(self): # Test dropped attrs vm = self.va.mean() diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 16148c21b43..4d765ae58d8 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4711,7 +4711,7 @@ def test_reduce_keepdims(self): @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) - def test_quantile(self, q, skipna): + def test_quantile(self, q, skipna) -> None: ds = create_test_data(seed=123) for dim in [None, "dim1", ["dim1"]]: @@ -4732,7 +4732,7 @@ def test_quantile(self, q, skipna): assert all(d not in ds_quantile.dims for d in dim) @pytest.mark.parametrize("skipna", [True, False]) - def test_quantile_skipna(self, skipna): + def test_quantile_skipna(self, skipna) -> None: q = 0.1 dim = "time" ds = Dataset({"a": ([dim], np.arange(0, 11))}) @@ -4744,6 +4744,34 @@ def test_quantile_skipna(self, skipna): expected = Dataset({"a": value}, coords={"quantile": q}) assert_identical(result, expected) + @pytest.mark.parametrize("method", ["midpoint", "lower"]) + def test_quantile_method(self, method) -> None: + + ds = create_test_data(seed=123) + q = [0.25, 0.5, 0.75] + + result = ds.quantile(q, method=method) + + assert_identical(result.var1, ds.var1.quantile(q, method=method)) + assert_identical(result.var2, ds.var2.quantile(q, method=method)) + assert_identical(result.var3, ds.var3.quantile(q, method=method)) + + @pytest.mark.parametrize("method", ["midpoint", "lower"]) + def test_quantile_interpolation_deprecated(self, method) -> None: + + ds = create_test_data(seed=123) + q = [0.25, 0.5, 0.75] + + with warnings.catch_warnings(record=True) as w: + ds.quantile(q, interpolation=method) + + # ensure the warning is only raised once + assert len(w) == 1 + + with warnings.catch_warnings(record=True): + with pytest.raises(TypeError, match="interpolation and method keywords"): + ds.quantile(q, method=method, interpolation=method) + @requires_bottleneck def test_rank(self): ds = create_test_data(seed=1234) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index d48726e8304..73d1a16f4af 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1,3 +1,6 @@ +import warnings +from typing import Union + import numpy as np import pandas as pd import pytest @@ -273,6 +276,15 @@ def test_da_groupby_quantile() -> None: ) assert_identical(expected, actual) + # method keyword + array = xr.DataArray(data=[1, 2, 3, 4], coords={"x": [1, 1, 2, 2]}, dims="x") + + expected = xr.DataArray( + data=[1, 3], coords={"x": [1, 2], "quantile": 0.5}, dims="x" + ) + actual = array.groupby("x").quantile(0.5, method="lower") + assert_identical(expected, actual) + def test_ds_groupby_quantile() -> None: ds = xr.Dataset( @@ -367,6 +379,38 @@ def test_ds_groupby_quantile() -> None: ) assert_identical(expected, actual) + ds = xr.Dataset(data_vars={"a": ("x", [1, 2, 3, 4])}, coords={"x": [1, 1, 2, 2]}) + + # method keyword + expected = xr.Dataset( + data_vars={"a": ("x", [1, 3])}, coords={"quantile": 0.5, "x": [1, 2]} + ) + actual = ds.groupby("x").quantile(0.5, method="lower") + assert_identical(expected, actual) + + +@pytest.mark.parametrize("as_dataset", [False, True]) +def test_groupby_quantile_interpolation_deprecated(as_dataset) -> None: + + array = xr.DataArray(data=[1, 2, 3, 4], coords={"x": [1, 1, 2, 2]}, dims="x") + + arr: Union[xr.DataArray, xr.Dataset] + arr = array.to_dataset(name="name") if as_dataset else array + + with pytest.warns( + FutureWarning, + match="`interpolation` argument to quantile was renamed to `method`", + ): + actual = arr.quantile(0.5, interpolation="lower") + + expected = arr.quantile(0.5, method="lower") + + assert_identical(actual, expected) + + with warnings.catch_warnings(record=True): + with pytest.raises(TypeError, match="interpolation and method keywords"): + arr.quantile(0.5, method="lower", interpolation="lower") + def test_da_groupby_assign_coords() -> None: actual = xr.DataArray( diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 3267af8b45b..67b8c357074 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -7,6 +7,7 @@ import pandas as pd import pytest import pytz +from packaging.version import Version from xarray import Coordinate, DataArray, Dataset, IndexVariable, Variable, set_options from xarray.core import dtypes, duck_array_ops, indexing @@ -1720,6 +1721,49 @@ def test_quantile_dask(self, q, axis, dim): expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) + @pytest.mark.parametrize("method", ["midpoint", "lower"]) + @pytest.mark.parametrize( + "use_dask", [pytest.param(True, marks=requires_dask), False] + ) + def test_quantile_method(self, method, use_dask) -> None: + + v = Variable(["x", "y"], self.d) + if use_dask: + v = v.chunk({"x": 2}) + + q = np.array([0.25, 0.5, 0.75]) + actual = v.quantile(q, dim="y", method=method) + + if Version(np.__version__) >= Version("1.22"): + expected = np.nanquantile(self.d, q, axis=1, method=method) # type: ignore[call-arg] + else: + expected = np.nanquantile(self.d, q, axis=1, interpolation=method) # type: ignore[call-arg] + + if use_dask: + assert isinstance(actual.data, dask_array_type) + + np.testing.assert_allclose(actual.values, expected) + + @pytest.mark.parametrize("method", ["midpoint", "lower"]) + def test_quantile_interpolation_deprecation(self, method) -> None: + + v = Variable(["x", "y"], self.d) + q = np.array([0.25, 0.5, 0.75]) + + with pytest.warns( + FutureWarning, + match="`interpolation` argument to quantile was renamed to `method`", + ): + actual = v.quantile(q, dim="y", interpolation=method) + + expected = v.quantile(q, dim="y", method=method) + + np.testing.assert_allclose(actual.values, expected.values) + + with warnings.catch_warnings(record=True): + with pytest.raises(TypeError, match="interpolation and method keywords"): + v.quantile(q, dim="y", interpolation=method, method=method) + @requires_dask def test_quantile_chunked_dim_error(self): v = Variable(["x", "y"], self.d).chunk({"x": 2}) From ea5dcf24747d99511fcc29dcecd21f188b4e3ca6 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Sat, 25 Dec 2021 16:11:55 +0100 Subject: [PATCH 02/11] add whats new entry --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1c4b49097a3..13d5b2f67f7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,9 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- Renamed the ``interpolation`` keyword of all ``quantile`` methods (e.g. :py:meth:`DataArray.quantile`) + to ``method`` for consistency with numpy v1.22.0 (:pull:`6108`). + By `Mathias Hauser `_. Deprecations ~~~~~~~~~~~~ From cb8d24e88abf0bf5e385f87737a9b17d26f9880a Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Sat, 25 Dec 2021 16:17:48 +0100 Subject: [PATCH 03/11] Apply suggestions from code review --- xarray/core/dataset.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c1fb0ee5dc5..295de8cfeb2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6233,7 +6233,6 @@ def quantile( raise TypeError("Cannot pass interpolation and method keywords!") method = interpolation - interpolation = None if isinstance(dim, str): dims = {dim} @@ -6266,7 +6265,6 @@ def quantile( method=method, keep_attrs=keep_attrs, skipna=skipna, - interpolation=interpolation, ) else: From bfe48579546b2124b0b69be295dab8f7d66229ef Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Wed, 29 Dec 2021 22:24:24 +0100 Subject: [PATCH 04/11] fix ArrayLike --- xarray/core/dataarray.py | 3 ++- xarray/core/dataset.py | 3 ++- xarray/core/variable.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 09f4a0ee0dc..5c34b4fa620 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -56,6 +56,7 @@ from .indexes import Index, Indexes, default_indexes, propagate_indexes from .indexing import is_fancy_indexer from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords +from .npcompat import ArrayLike from .options import OPTIONS, _get_keep_attrs from .utils import ( Default, @@ -3424,7 +3425,7 @@ def sortby( def quantile( self, - q: np.typing.ArrayLike, + q: ArrayLike, dim: Union[Hashable, Sequence[Hashable], None] = None, method: str = "linear", keep_attrs: bool = None, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b9afa428f5b..ba065efa1f8 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -79,6 +79,7 @@ merge_data_and_coords, ) from .missing import get_clean_interp_index +from .npcompat import ArrayLike from .options import OPTIONS, _get_keep_attrs from .pycompat import is_duck_dask_array, sparse_array_type from .utils import ( @@ -6135,7 +6136,7 @@ def sortby(self, variables, ascending=True): def quantile( self, - q: np.typing.ArrayLike, + q: ArrayLike, dim=None, method: str = "linear", numeric_only: bool = False, diff --git a/xarray/core/variable.py b/xarray/core/variable.py index ec1a265b1b2..f64fe041fad 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -36,6 +36,7 @@ VectorizedIndexer, as_indexable, ) +from .npcompat import ArrayLike from .options import OPTIONS, _get_keep_attrs from .pycompat import ( DuckArrayModule, @@ -1980,7 +1981,7 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): def quantile( self, - q: np.typing.ArrayLike, + q: ArrayLike, dim=None, method: str = "linear", keep_attrs: bool = None, From 7bea46f84bfe5dc630bb05aea820bd0f4702e1af Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 18 Jan 2022 22:26:51 +0100 Subject: [PATCH 05/11] type dim --- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 5 +++-- xarray/core/variable.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index fb30f124f92..7f4f22aeb88 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3435,7 +3435,7 @@ def sortby( def quantile( self, q: ArrayLike, - dim: Union[Hashable, Sequence[Hashable], None] = None, + dim: Union[str, Sequence[Hashable], None] = None, method: str = "linear", keep_attrs: bool = None, skipna: bool = True, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3cba692164c..af5caf43dcb 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6154,7 +6154,7 @@ def sortby(self, variables, ascending=True): def quantile( self, q: ArrayLike, - dim=None, + dim: Union[str, Iterable[Hashable]] = None, method: str = "linear", numeric_only: bool = False, keep_attrs: bool = None, @@ -6252,9 +6252,10 @@ def quantile( method = interpolation + dims: set[Hashable] if isinstance(dim, str): dims = {dim} - elif dim in [None, ...]: + elif dim is None or dim is ...: dim = list(self.dims.keys()) dims = set(dim) else: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 64c36a3f583..1c11b27a5d2 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1984,7 +1984,7 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): def quantile( self, q: ArrayLike, - dim=None, + dim: Union[str | Sequence[Hashable]] = None, method: str = "linear", keep_attrs: bool = None, skipna: bool = True, From a8bd471778889b06561585a635148b9cc23f7cbe Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 18 Jan 2022 22:36:44 +0100 Subject: [PATCH 06/11] cleanup --- xarray/core/dataset.py | 3 +-- xarray/core/variable.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index af5caf43dcb..d2cb5f321af 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6256,8 +6256,7 @@ def quantile( if isinstance(dim, str): dims = {dim} elif dim is None or dim is ...: - dim = list(self.dims.keys()) - dims = set(dim) + dims = set(self.dims) else: dims = set(dim) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 1c11b27a5d2..74a170c830b 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1984,7 +1984,7 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): def quantile( self, q: ArrayLike, - dim: Union[str | Sequence[Hashable]] = None, + dim: Union[str, Sequence[Hashable]] = None, method: str = "linear", keep_attrs: bool = None, skipna: bool = True, From 0e5e4d35227b14696956e08640d51775abd9b3ee Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Thu, 20 Jan 2022 17:45:06 +0100 Subject: [PATCH 07/11] update docstrings --- xarray/core/dataarray.py | 37 ++++++++++++++++++++++++++++++++----- xarray/core/dataset.py | 37 ++++++++++++++++++++++++++++++++----- xarray/core/groupby.py | 37 ++++++++++++++++++++++++++++++++----- xarray/core/variable.py | 38 ++++++++++++++++++++++++++++++++------ 4 files changed, 128 insertions(+), 21 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e22ee9a26a9..4e5d13623b7 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3445,12 +3445,33 @@ def quantile( dim : hashable or sequence of hashable, optional Dimension(s) over which to apply quantile. method : str, default: "linear" - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points. - See numpy.quantile for available methods. + This optional parameter specifies the interpolation method to use when the + desired quantile lies between two data points. The options sorted by their R + type as summarized in the H&F paper [1]_ are: + + 1. 'inverted_cdf' (*) + 2. 'averaged_inverted_cdf' (*) + 3. 'closest_observation' (*) + 4. 'interpolated_inverted_cdf' (*) + 5. 'hazen' (*) + 6. 'weibull' (*) + 7. 'linear' (default) + 8. 'median_unbiased' (*) + 9. 'normal_unbiased' (*) + + The first three methods are discontiuous. The following discontinuous + variations of the default 'linear' (7.) option are also available: + + * 'lower' + * 'higher' + * 'midpoint' + * 'nearest' + + See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with + an asterix require numpy version 1.22 or newer. The "method" argument was + previously called "interpolation", renamed in accordance with numpy + version 1.22.0. - This argument was previously called "interpolation", renamed in accordance - with numpy version 1.22.0. keep_attrs : bool, optional If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new @@ -3502,6 +3523,12 @@ def quantile( Coordinates: * y (y) float64 1.0 1.5 2.0 2.5 * quantile (quantile) float64 0.0 0.5 1.0 + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 """ ds = self._to_temp_dataset().quantile( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 700122b48ef..1006e4b57e5 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6163,12 +6163,33 @@ def quantile( dim : str or sequence of str, optional Dimension(s) over which to apply quantile. method : str, default: "linear" - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points. - See numpy.quantile for available methods. + This optional parameter specifies the interpolation method to use when the + desired quantile lies between two data points. The options sorted by their R + type as summarized in the H&F paper [1]_ are: + + 1. 'inverted_cdf' (*) + 2. 'averaged_inverted_cdf' (*) + 3. 'closest_observation' (*) + 4. 'interpolated_inverted_cdf' (*) + 5. 'hazen' (*) + 6. 'weibull' (*) + 7. 'linear' (default) + 8. 'median_unbiased' (*) + 9. 'normal_unbiased' (*) + + The first three methods are discontiuous. The following discontinuous + variations of the default 'linear' (7.) option are also available: + + * 'lower' + * 'higher' + * 'midpoint' + * 'nearest' + + See :py:func:`numpy.quantile` or [1]_ for a description. Methods marked with + an asterix require numpy version 1.22 or newer. The "method" argument was + previously called "interpolation", renamed in accordance with numpy + version 1.22.0. - This argument was previously called "interpolation", renamed in accordance - with numpy version 1.22.0. keep_attrs : bool, optional If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new @@ -6227,6 +6248,12 @@ def quantile( * quantile (quantile) float64 0.0 0.5 1.0 Data variables: a (quantile, y) float64 0.7 4.2 2.6 1.5 3.6 ... 1.7 6.5 7.3 9.4 1.9 + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 """ # interpolation renamed to method in version 0.21.0 diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index ae18355f384..335945bfb6c 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -569,12 +569,33 @@ def quantile( Dimension(s) over which to apply quantile. Defaults to the grouped dimension. method : str, default: "linear" - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points. - See numpy.quantile for available methods. + This optional parameter specifies the interpolation method to use when the + desired quantile lies between two data points. The options sorted by their R + type as summarized in the H&F paper [1]_ are: + + 1. 'inverted_cdf' (*) + 2. 'averaged_inverted_cdf' (*) + 3. 'closest_observation' (*) + 4. 'interpolated_inverted_cdf' (*) + 5. 'hazen' (*) + 6. 'weibull' (*) + 7. 'linear' (default) + 8. 'median_unbiased' (*) + 9. 'normal_unbiased' (*) + + The first three methods are discontiuous. The following discontinuous + variations of the default 'linear' (7.) option are also available: + + * 'lower' + * 'higher' + * 'midpoint' + * 'nearest' + + See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with + an asterix require numpy version 1.22 or newer. The "method" argument was + previously called "interpolation", renamed in accordance with numpy + version 1.22.0. - This argument was previously called "interpolation", renamed in accordance - with numpy version 1.22.0. skipna : bool, optional Whether to skip missing values when aggregating. @@ -640,6 +661,12 @@ def quantile( * y (y) int64 1 2 Data variables: a (y, quantile) float64 0.7 5.35 8.4 0.7 2.25 9.4 + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 """ if dim is None: dim = self._group_dim diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 076a7a07a9a..855a168ce2b 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1993,12 +1993,32 @@ def quantile( dim : str or sequence of str, optional Dimension(s) over which to apply quantile. method : str, default: "linear" - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points. - See numpy.quantile for available methods. - - This argument was previously called "interpolation", renamed in accordance - with numpy version 1.22.0. + This optional parameter specifies the interpolation method to use when the + desired quantile lies between two data points. The options sorted by their R + type as summarized in the H&F paper [1]_ are: + + 1. 'inverted_cdf' (*) + 2. 'averaged_inverted_cdf' (*) + 3. 'closest_observation' (*) + 4. 'interpolated_inverted_cdf' (*) + 5. 'hazen' (*) + 6. 'weibull' (*) + 7. 'linear' (default) + 8. 'median_unbiased' (*) + 9. 'normal_unbiased' (*) + + The first three methods are discontiuous. The following discontinuous + variations of the default 'linear' (7.) option are also available: + + * 'lower' + * 'higher' + * 'midpoint' + * 'nearest' + + See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with + an asterix require numpy version 1.22 or newer. The "method" argument was + previously called "interpolation", renamed in accordance with numpy + version 1.22.0. keep_attrs : bool, optional If True, the variable's attributes (`attrs`) will be copied from @@ -2018,6 +2038,12 @@ def quantile( -------- numpy.nanquantile, pandas.Series.quantile, Dataset.quantile DataArray.quantile + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 """ from .computation import apply_ufunc From 8fe04af6336ba29c002d7cfec3427dd40b6d4aea Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Thu, 20 Jan 2022 18:27:58 +0100 Subject: [PATCH 08/11] indentation and quotation marks --- xarray/core/dataarray.py | 28 ++++++++++++++-------------- xarray/core/dataset.py | 28 ++++++++++++++-------------- xarray/core/groupby.py | 28 ++++++++++++++-------------- xarray/core/variable.py | 28 ++++++++++++++-------------- 4 files changed, 56 insertions(+), 56 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4e5d13623b7..331dc5f2689 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3449,23 +3449,23 @@ def quantile( desired quantile lies between two data points. The options sorted by their R type as summarized in the H&F paper [1]_ are: - 1. 'inverted_cdf' (*) - 2. 'averaged_inverted_cdf' (*) - 3. 'closest_observation' (*) - 4. 'interpolated_inverted_cdf' (*) - 5. 'hazen' (*) - 6. 'weibull' (*) - 7. 'linear' (default) - 8. 'median_unbiased' (*) - 9. 'normal_unbiased' (*) + 1. "inverted_cdf" (*) + 2. "averaged_inverted_cdf" (*) + 3. "closest_observation" (*) + 4. "interpolated_inverted_cdf" (*) + 5. "hazen" (*) + 6. "weibull" (*) + 7. "linear" (default) + 8. "median_unbiased" (*) + 9. "normal_unbiased" (*) The first three methods are discontiuous. The following discontinuous - variations of the default 'linear' (7.) option are also available: + variations of the default "linear" (7.) option are also available: - * 'lower' - * 'higher' - * 'midpoint' - * 'nearest' + * "lower" + * "higher" + * "midpoint" + * "nearest" See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with an asterix require numpy version 1.22 or newer. The "method" argument was diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1006e4b57e5..ebf836b3e2d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6167,23 +6167,23 @@ def quantile( desired quantile lies between two data points. The options sorted by their R type as summarized in the H&F paper [1]_ are: - 1. 'inverted_cdf' (*) - 2. 'averaged_inverted_cdf' (*) - 3. 'closest_observation' (*) - 4. 'interpolated_inverted_cdf' (*) - 5. 'hazen' (*) - 6. 'weibull' (*) - 7. 'linear' (default) - 8. 'median_unbiased' (*) - 9. 'normal_unbiased' (*) + 1. "inverted_cdf" (*) + 2. "averaged_inverted_cdf" (*) + 3. "closest_observation" (*) + 4. "interpolated_inverted_cdf" (*) + 5. "hazen" (*) + 6. "weibull" (*) + 7. "linear" (default) + 8. "median_unbiased" (*) + 9. "normal_unbiased" (*) The first three methods are discontiuous. The following discontinuous - variations of the default 'linear' (7.) option are also available: + variations of the default "linear" (7.) option are also available: - * 'lower' - * 'higher' - * 'midpoint' - * 'nearest' + * "lower" + * "higher" + * "midpoint" + * "nearest" See :py:func:`numpy.quantile` or [1]_ for a description. Methods marked with an asterix require numpy version 1.22 or newer. The "method" argument was diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 335945bfb6c..daaaed3c0b2 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -573,23 +573,23 @@ def quantile( desired quantile lies between two data points. The options sorted by their R type as summarized in the H&F paper [1]_ are: - 1. 'inverted_cdf' (*) - 2. 'averaged_inverted_cdf' (*) - 3. 'closest_observation' (*) - 4. 'interpolated_inverted_cdf' (*) - 5. 'hazen' (*) - 6. 'weibull' (*) - 7. 'linear' (default) - 8. 'median_unbiased' (*) - 9. 'normal_unbiased' (*) + 1. "inverted_cdf" (*) + 2. "averaged_inverted_cdf" (*) + 3. "closest_observation" (*) + 4. "interpolated_inverted_cdf" (*) + 5. "hazen" (*) + 6. "weibull" (*) + 7. "linear" (default) + 8. "median_unbiased" (*) + 9. "normal_unbiased" (*) The first three methods are discontiuous. The following discontinuous - variations of the default 'linear' (7.) option are also available: + variations of the default "linear" (7.) option are also available: - * 'lower' - * 'higher' - * 'midpoint' - * 'nearest' + * "lower" + * "higher" + * "midpoint" + * "nearest" See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with an asterix require numpy version 1.22 or newer. The "method" argument was diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 855a168ce2b..2e86dae9411 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1997,23 +1997,23 @@ def quantile( desired quantile lies between two data points. The options sorted by their R type as summarized in the H&F paper [1]_ are: - 1. 'inverted_cdf' (*) - 2. 'averaged_inverted_cdf' (*) - 3. 'closest_observation' (*) - 4. 'interpolated_inverted_cdf' (*) - 5. 'hazen' (*) - 6. 'weibull' (*) - 7. 'linear' (default) - 8. 'median_unbiased' (*) - 9. 'normal_unbiased' (*) + 1. "inverted_cdf" (*) + 2. "averaged_inverted_cdf" (*) + 3. "closest_observation" (*) + 4. "interpolated_inverted_cdf" (*) + 5. "hazen" (*) + 6. "weibull" (*) + 7. "linear" (default) + 8. "median_unbiased" (*) + 9. "normal_unbiased" (*) The first three methods are discontiuous. The following discontinuous - variations of the default 'linear' (7.) option are also available: + variations of the default "linear" (7.) option are also available: - * 'lower' - * 'higher' - * 'midpoint' - * 'nearest' + * "lower" + * "higher" + * "midpoint" + * "nearest" See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with an asterix require numpy version 1.22 or newer. The "method" argument was From e4b3c3c9a22e32e95021be353bd3d34583d47ad5 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Thu, 3 Feb 2022 09:25:34 +0100 Subject: [PATCH 09/11] use Literal --- xarray/core/dataarray.py | 6 +++--- xarray/core/dataset.py | 6 +++--- xarray/core/npcompat.py | 28 +++++++++++++++++++++++++++- xarray/core/variable.py | 6 +++--- 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 331dc5f2689..6fe865a9f64 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -53,7 +53,7 @@ from .indexes import Index, Indexes, default_indexes, propagate_indexes from .indexing import is_fancy_indexer from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords -from .npcompat import ArrayLike +from .npcompat import QUANTILE_METHODS, ArrayLike from .options import OPTIONS, _get_keep_attrs from .utils import ( Default, @@ -3429,10 +3429,10 @@ def quantile( self, q: ArrayLike, dim: str | Sequence[Hashable] | None = None, - method: str = "linear", + method: QUANTILE_METHODS = "linear", keep_attrs: bool = None, skipna: bool = True, - interpolation: str = None, + interpolation: QUANTILE_METHODS = None, ) -> DataArray: """Compute the qth quantile of the data along the specified dimension. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 393521b1ec3..83126f157a4 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -77,7 +77,7 @@ merge_data_and_coords, ) from .missing import get_clean_interp_index -from .npcompat import ArrayLike +from .npcompat import QUANTILE_METHODS, ArrayLike from .options import OPTIONS, _get_keep_attrs from .pycompat import is_duck_dask_array, sparse_array_type from .utils import ( @@ -6140,11 +6140,11 @@ def quantile( self, q: ArrayLike, dim: str | Iterable[Hashable] | None = None, - method: str = "linear", + method: QUANTILE_METHODS = "linear", numeric_only: bool = False, keep_attrs: bool = None, skipna: bool = True, - interpolation: str = None, + interpolation: QUANTILE_METHODS = None, ): """Compute the qth quantile of the data along the specified dimension. diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 1eaa2728e8a..b5b98052fe9 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -28,7 +28,7 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from typing import TYPE_CHECKING, Any, Sequence, TypeVar, Union +from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar, Union import numpy as np from packaging.version import Version @@ -169,3 +169,29 @@ def sliding_window_view( return as_strided( x, strides=out_strides, shape=out_shape, subok=subok, writeable=writeable ) + + +if Version(np.__version__) >= Version("1.22.0"): + QUANTILE_METHODS = Literal[ + "inverted_cdf", + "averaged_inverted_cdf", + "closest_observation", + "interpolated_inverted_cdf", + "hazen", + "weibull", + "linear", + "median_unbiased", + "normal_unbiased", + "lower", + "higher", + "midpoint", + "nearest", + ] +else: + QUANTILE_METHODS = Literal[ # type: ignore[misc] + "linear", + "lower", + "higher", + "midpoint", + "nearest", + ] diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 2e86dae9411..6db795ce26f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -25,7 +25,7 @@ VectorizedIndexer, as_indexable, ) -from .npcompat import ArrayLike +from .npcompat import QUANTILE_METHODS, ArrayLike from .options import OPTIONS, _get_keep_attrs from .pycompat import ( DuckArrayModule, @@ -1976,10 +1976,10 @@ def quantile( self, q: ArrayLike, dim: str | Sequence[Hashable] | None = None, - method: str = "linear", + method: QUANTILE_METHODS = "linear", keep_attrs: bool = None, skipna: bool = True, - interpolation: str = None, + interpolation: QUANTILE_METHODS = None, ) -> Variable: """Compute the qth quantile of the data along the specified dimension. From a4881deb954f4e07328b4784610b69d9e08ec738 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 7 Feb 2022 09:33:44 +0100 Subject: [PATCH 10/11] update whats new --- doc/whats-new.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0c5944abdc3..35bed918711 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,9 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- Renamed the ``interpolation`` keyword of all ``quantile`` methods (e.g. :py:meth:`DataArray.quantile`) + to ``method`` for consistency with numpy v1.22.0 (:pull:`6108`). + By `Mathias Hauser `_. Deprecations ~~~~~~~~~~~~ @@ -87,9 +90,7 @@ Breaking changes wrapping the text once the maximum display width has been exceeded. (:issue:`5546`, :pull:`5662`) By `Jimmy Westling `_. -- Renamed the ``interpolation`` keyword of all ``quantile`` methods (e.g. :py:meth:`DataArray.quantile`) - to ``method`` for consistency with numpy v1.22.0 (:pull:`6108`). - By `Mathias Hauser `_. + Deprecations ~~~~~~~~~~~~ From cb929d0c46d07a99950923c1cc7dd04f1241b4b5 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 7 Feb 2022 09:34:46 +0100 Subject: [PATCH 11/11] remove newline --- doc/whats-new.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 35bed918711..a8cd952609c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -91,7 +91,6 @@ Breaking changes By `Jimmy Westling `_. - Deprecations ~~~~~~~~~~~~ - Removed the lock kwarg from the zarr and pydap backends, completing the deprecation cycle started in :issue:`5256`.