Skip to content

Commit

Permalink
quantile: rename interpolation arg to method (#6108)
Browse files Browse the repository at this point in the history
* quantile: rename interpolation arg to method

* add whats new entry

* Apply suggestions from code review

* fix ArrayLike

* type dim

* cleanup

* update docstrings

* indentation and quotation marks

* use Literal

* update whats new

* remove newline
  • Loading branch information
mathause authored Feb 7, 2022
1 parent 52a051a commit d47cf0c
Show file tree
Hide file tree
Showing 10 changed files with 386 additions and 75 deletions.
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ New Features
Breaking changes
~~~~~~~~~~~~~~~~

- Renamed the ``interpolation`` keyword of all ``quantile`` methods (e.g. :py:meth:`DataArray.quantile`)
to ``method`` for consistency with numpy v1.22.0 (:pull:`6108`).
By `Mathias Hauser <https://github.com/mathause>`_.

Deprecations
~~~~~~~~~~~~
Expand Down
57 changes: 41 additions & 16 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from .indexes import Index, Indexes, default_indexes, propagate_indexes
from .indexing import is_fancy_indexer
from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords
from .npcompat import QUANTILE_METHODS, ArrayLike
from .options import OPTIONS, _get_keep_attrs
from .utils import (
Default,
Expand Down Expand Up @@ -3426,11 +3427,12 @@ def sortby(

def quantile(
self,
q: Any,
dim: Hashable | Sequence[Hashable] | None = None,
interpolation: str = "linear",
q: ArrayLike,
dim: str | Sequence[Hashable] | None = None,
method: QUANTILE_METHODS = "linear",
keep_attrs: bool = None,
skipna: bool = True,
interpolation: QUANTILE_METHODS = None,
) -> DataArray:
"""Compute the qth quantile of the data along the specified dimension.
Expand All @@ -3442,18 +3444,34 @@ def quantile(
Quantile to compute, which must be between 0 and 1 inclusive.
dim : hashable or sequence of hashable, optional
Dimension(s) over which to apply quantile.
interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear"
This optional parameter specifies the interpolation method to
use when the desired quantile lies between two data points
``i < j``:
- linear: ``i + (j - i) * fraction``, where ``fraction`` is
the fractional part of the index surrounded by ``i`` and
``j``.
- lower: ``i``.
- higher: ``j``.
- nearest: ``i`` or ``j``, whichever is nearest.
- midpoint: ``(i + j) / 2``.
method : str, default: "linear"
This optional parameter specifies the interpolation method to use when the
desired quantile lies between two data points. The options sorted by their R
type as summarized in the H&F paper [1]_ are:
1. "inverted_cdf" (*)
2. "averaged_inverted_cdf" (*)
3. "closest_observation" (*)
4. "interpolated_inverted_cdf" (*)
5. "hazen" (*)
6. "weibull" (*)
7. "linear" (default)
8. "median_unbiased" (*)
9. "normal_unbiased" (*)
The first three methods are discontiuous. The following discontinuous
variations of the default "linear" (7.) option are also available:
* "lower"
* "higher"
* "midpoint"
* "nearest"
See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with
an asterix require numpy version 1.22 or newer. The "method" argument was
previously called "interpolation", renamed in accordance with numpy
version 1.22.0.
keep_attrs : bool, optional
If True, the dataset's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
Expand Down Expand Up @@ -3505,14 +3523,21 @@ def quantile(
Coordinates:
* y (y) float64 1.0 1.5 2.0 2.5
* quantile (quantile) float64 0.0 0.5 1.0
References
----------
.. [1] R. J. Hyndman and Y. Fan,
"Sample quantiles in statistical packages,"
The American Statistician, 50(4), pp. 361-365, 1996
"""

ds = self._to_temp_dataset().quantile(
q,
dim=dim,
keep_attrs=keep_attrs,
interpolation=interpolation,
method=method,
skipna=skipna,
interpolation=interpolation,
)
return self._from_temp_dataset(ds)

Expand Down
85 changes: 59 additions & 26 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
merge_data_and_coords,
)
from .missing import get_clean_interp_index
from .npcompat import QUANTILE_METHODS, ArrayLike
from .options import OPTIONS, _get_keep_attrs
from .pycompat import is_duck_dask_array, sparse_array_type
from .utils import (
Expand Down Expand Up @@ -6137,12 +6138,13 @@ def sortby(self, variables, ascending=True):

def quantile(
self,
q,
dim=None,
interpolation="linear",
numeric_only=False,
keep_attrs=None,
skipna=True,
q: ArrayLike,
dim: str | Iterable[Hashable] | None = None,
method: QUANTILE_METHODS = "linear",
numeric_only: bool = False,
keep_attrs: bool = None,
skipna: bool = True,
interpolation: QUANTILE_METHODS = None,
):
"""Compute the qth quantile of the data along the specified dimension.
Expand All @@ -6155,18 +6157,34 @@ def quantile(
Quantile to compute, which must be between 0 and 1 inclusive.
dim : str or sequence of str, optional
Dimension(s) over which to apply quantile.
interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear"
This optional parameter specifies the interpolation method to
use when the desired quantile lies between two data points
``i < j``:
* linear: ``i + (j - i) * fraction``, where ``fraction`` is
the fractional part of the index surrounded by ``i`` and
``j``.
* lower: ``i``.
* higher: ``j``.
* nearest: ``i`` or ``j``, whichever is nearest.
* midpoint: ``(i + j) / 2``.
method : str, default: "linear"
This optional parameter specifies the interpolation method to use when the
desired quantile lies between two data points. The options sorted by their R
type as summarized in the H&F paper [1]_ are:
1. "inverted_cdf" (*)
2. "averaged_inverted_cdf" (*)
3. "closest_observation" (*)
4. "interpolated_inverted_cdf" (*)
5. "hazen" (*)
6. "weibull" (*)
7. "linear" (default)
8. "median_unbiased" (*)
9. "normal_unbiased" (*)
The first three methods are discontiuous. The following discontinuous
variations of the default "linear" (7.) option are also available:
* "lower"
* "higher"
* "midpoint"
* "nearest"
See :py:func:`numpy.quantile` or [1]_ for a description. Methods marked with
an asterix require numpy version 1.22 or newer. The "method" argument was
previously called "interpolation", renamed in accordance with numpy
version 1.22.0.
keep_attrs : bool, optional
If True, the dataset's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
Expand Down Expand Up @@ -6225,17 +6243,37 @@ def quantile(
* quantile (quantile) float64 0.0 0.5 1.0
Data variables:
a (quantile, y) float64 0.7 4.2 2.6 1.5 3.6 ... 1.7 6.5 7.3 9.4 1.9
References
----------
.. [1] R. J. Hyndman and Y. Fan,
"Sample quantiles in statistical packages,"
The American Statistician, 50(4), pp. 361-365, 1996
"""

# interpolation renamed to method in version 0.21.0
# check here and in variable to avoid repeated warnings
if interpolation is not None:
warnings.warn(
"The `interpolation` argument to quantile was renamed to `method`.",
FutureWarning,
)

if method != "linear":
raise TypeError("Cannot pass interpolation and method keywords!")

method = interpolation

dims: set[Hashable]
if isinstance(dim, str):
dims = {dim}
elif dim in [None, ...]:
elif dim is None or dim is ...:
dims = set(self.dims)
else:
dims = set(dim)

_assert_empty(
[d for d in dims if d not in self.dims],
tuple(d for d in dims if d not in self.dims),
"Dataset does not contain the dimensions: %s",
)

Expand All @@ -6251,15 +6289,10 @@ def quantile(
or np.issubdtype(var.dtype, np.number)
or var.dtype == np.bool_
):
if len(reduce_dims) == var.ndim:
# prefer to aggregate over axis=None rather than
# axis=(0, 1) if they will be equivalent, because
# the former is often more efficient
reduce_dims = None
variables[name] = var.quantile(
q,
dim=reduce_dims,
interpolation=interpolation,
method=method,
keep_attrs=keep_attrs,
skipna=skipna,
)
Expand Down
57 changes: 43 additions & 14 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,13 @@ def fillna(self, value):
return ops.fillna(self, value)

def quantile(
self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True
self,
q,
dim=None,
method="linear",
keep_attrs=None,
skipna=True,
interpolation=None,
):
"""Compute the qth quantile over each array in the groups and
concatenate them together into a new array.
Expand All @@ -562,18 +568,34 @@ def quantile(
dim : ..., str or sequence of str, optional
Dimension(s) over which to apply quantile.
Defaults to the grouped dimension.
interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear"
This optional parameter specifies the interpolation method to
use when the desired quantile lies between two data points
``i < j``:
* linear: ``i + (j - i) * fraction``, where ``fraction`` is
the fractional part of the index surrounded by ``i`` and
``j``.
* lower: ``i``.
* higher: ``j``.
* nearest: ``i`` or ``j``, whichever is nearest.
* midpoint: ``(i + j) / 2``.
method : str, default: "linear"
This optional parameter specifies the interpolation method to use when the
desired quantile lies between two data points. The options sorted by their R
type as summarized in the H&F paper [1]_ are:
1. "inverted_cdf" (*)
2. "averaged_inverted_cdf" (*)
3. "closest_observation" (*)
4. "interpolated_inverted_cdf" (*)
5. "hazen" (*)
6. "weibull" (*)
7. "linear" (default)
8. "median_unbiased" (*)
9. "normal_unbiased" (*)
The first three methods are discontiuous. The following discontinuous
variations of the default "linear" (7.) option are also available:
* "lower"
* "higher"
* "midpoint"
* "nearest"
See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with
an asterix require numpy version 1.22 or newer. The "method" argument was
previously called "interpolation", renamed in accordance with numpy
version 1.22.0.
skipna : bool, optional
Whether to skip missing values when aggregating.
Expand Down Expand Up @@ -639,6 +661,12 @@ def quantile(
* y (y) int64 1 2
Data variables:
a (y, quantile) float64 0.7 5.35 8.4 0.7 2.25 9.4
References
----------
.. [1] R. J. Hyndman and Y. Fan,
"Sample quantiles in statistical packages,"
The American Statistician, 50(4), pp. 361-365, 1996
"""
if dim is None:
dim = self._group_dim
Expand All @@ -648,9 +676,10 @@ def quantile(
shortcut=False,
q=q,
dim=dim,
interpolation=interpolation,
method=method,
keep_attrs=keep_attrs,
skipna=skipna,
interpolation=interpolation,
)
return out

Expand Down
28 changes: 27 additions & 1 deletion xarray/core/npcompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from typing import TYPE_CHECKING, Any, Sequence, TypeVar, Union
from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar, Union

import numpy as np
from packaging.version import Version
Expand Down Expand Up @@ -169,3 +169,29 @@ def sliding_window_view(
return as_strided(
x, strides=out_strides, shape=out_shape, subok=subok, writeable=writeable
)


if Version(np.__version__) >= Version("1.22.0"):
QUANTILE_METHODS = Literal[
"inverted_cdf",
"averaged_inverted_cdf",
"closest_observation",
"interpolated_inverted_cdf",
"hazen",
"weibull",
"linear",
"median_unbiased",
"normal_unbiased",
"lower",
"higher",
"midpoint",
"nearest",
]
else:
QUANTILE_METHODS = Literal[ # type: ignore[misc]
"linear",
"lower",
"higher",
"midpoint",
"nearest",
]
Loading

0 comments on commit d47cf0c

Please sign in to comment.