diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8c9b61a7364..6db093b2dbb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,12 +8,12 @@ repos: - id: check-yaml # isort should run before black as black sometimes tweaks the isort output - repo: https://github.com/PyCQA/isort - rev: 5.9.3 + rev: 5.10.1 hooks: - id: isort # https://github.com/python/black#version-control-integration - repo: https://github.com/psf/black - rev: 21.9b0 + rev: 21.10b0 hooks: - id: black - id: black-jupyter @@ -22,8 +22,8 @@ repos: hooks: - id: blackdoc exclude: "generate_reductions.py" - - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.2 + - repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 hooks: - id: flake8 # - repo: https://github.com/Carreau/velin diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 0d9ce0d51a3..26738e2d357 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -62,7 +62,7 @@ "pandas": [""], "netcdf4": [""], "scipy": [""], - "bottleneck": ["", null], + "bottleneck": [""], "dask": [""], "distributed": [""], "flox": [""], diff --git a/asv_bench/benchmarks/dataarray_missing.py b/asv_bench/benchmarks/dataarray_missing.py index f89fe7f8eb9..d786c04e852 100644 --- a/asv_bench/benchmarks/dataarray_missing.py +++ b/asv_bench/benchmarks/dataarray_missing.py @@ -16,13 +16,6 @@ def make_bench_data(shape, frac_nan, chunks): return da -def requires_bottleneck(): - try: - import bottleneck # noqa: F401 - except ImportError: - raise NotImplementedError() - - class DataArrayMissingInterpolateNA: def setup(self, shape, chunks, limit): if chunks is not None: @@ -46,7 +39,6 @@ def time_interpolate_na(self, shape, chunks, limit): class DataArrayMissingBottleneck: def setup(self, shape, chunks, limit): - requires_bottleneck() if chunks is not None: requires_dask() self.da = make_bench_data(shape, 0.1, chunks) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index f0e18bf2153..1d3713f19bf 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -36,29 +36,45 @@ def setup(self, *args, **kwargs): randn_long, dims="x", coords={"x": np.arange(long_nx) * 0.1} ) - @parameterized(["func", "center"], (["mean", "count"], [True, False])) - def time_rolling(self, func, center): - getattr(self.ds.rolling(x=window, center=center), func)().load() - - @parameterized(["func", "pandas"], (["mean", "count"], [True, False])) - def time_rolling_long(self, func, pandas): + @parameterized( + ["func", "center", "use_bottleneck"], + (["mean", "count"], [True, False], [True, False]), + ) + def time_rolling(self, func, center, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + getattr(self.ds.rolling(x=window, center=center), func)().load() + + @parameterized( + ["func", "pandas", "use_bottleneck"], + (["mean", "count"], [True, False], [True, False]), + ) + def time_rolling_long(self, func, pandas, use_bottleneck): if pandas: se = self.da_long.to_series() getattr(se.rolling(window=window, min_periods=window), func)() else: - getattr(self.da_long.rolling(x=window, min_periods=window), func)().load() - - @parameterized(["window_", "min_periods"], ([20, 40], [5, 5])) - def time_rolling_np(self, window_, min_periods): - self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce( - getattr(np, "nansum") - ).load() - - @parameterized(["center", "stride"], ([True, False], [1, 1])) - def time_rolling_construct(self, center, stride): - self.ds.rolling(x=window, center=center).construct( - "window_dim", stride=stride - ).sum(dim="window_dim").load() + with xr.set_options(use_bottleneck=use_bottleneck): + getattr( + self.da_long.rolling(x=window, min_periods=window), func + )().load() + + @parameterized( + ["window_", "min_periods", "use_bottleneck"], ([20, 40], [5, 5], [True, False]) + ) + def time_rolling_np(self, window_, min_periods, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce( + getattr(np, "nansum") + ).load() + + @parameterized( + ["center", "stride", "use_bottleneck"], ([True, False], [1, 1], [True, False]) + ) + def time_rolling_construct(self, center, stride, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + self.ds.rolling(x=window, center=center).construct( + "window_dim", stride=stride + ).sum(dim="window_dim").load() class RollingDask(Rolling): @@ -87,24 +103,28 @@ def setup(self, *args, **kwargs): class DataArrayRollingMemory(RollingMemory): - @parameterized("func", ["sum", "max", "mean"]) - def peakmem_ndrolling_reduce(self, func): - roll = self.ds.var1.rolling(x=10, y=4) - getattr(roll, func)() + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_ndrolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.var1.rolling(x=10, y=4) + getattr(roll, func)() - @parameterized("func", ["sum", "max", "mean"]) - def peakmem_1drolling_reduce(self, func): - roll = self.ds.var3.rolling(t=100) - getattr(roll, func)() + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_1drolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.var3.rolling(t=100) + getattr(roll, func)() class DatasetRollingMemory(RollingMemory): - @parameterized("func", ["sum", "max", "mean"]) - def peakmem_ndrolling_reduce(self, func): - roll = self.ds.rolling(x=10, y=4) - getattr(roll, func)() - - @parameterized("func", ["sum", "max", "mean"]) - def peakmem_1drolling_reduce(self, func): - roll = self.ds.rolling(t=100) - getattr(roll, func)() + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_ndrolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.rolling(x=10, y=4) + getattr(roll, func)() + + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_1drolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.rolling(t=100) + getattr(roll, func)() diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index fc3c457308f..a4ba606feeb 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -107,6 +107,8 @@ Xarray also provides the ``max_gap`` keyword argument to limit the interpolation data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na` for more. +.. _agg: + Aggregation =========== diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f499cbe3d21..b66c99d0bcb 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,8 @@ Bug fixes ~~~~~~~~~ - Fix plot.line crash for data of shape ``(1, N)`` in _title_for_slice on format_item (:pull:`5948`). By `Sebastian Weigand `_. +- Fix a regression in the removal of duplicate backend entrypoints (:issue:`5944`, :pull:`5959`) + By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ @@ -49,6 +51,10 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Use ``importlib`` to replace functionality of ``pkg_resources`` in + backend plugins tests. (:pull:`5959`). + By `Kai Mühlbauer `_. + .. _whats-new.0.20.1: diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 32013f1f298..0a9ffcbda22 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -23,15 +23,17 @@ def remove_duplicates(entrypoints): # check if there are multiple entrypoints for the same name unique_entrypoints = [] for name, matches in entrypoints_grouped: - matches = list(matches) + # remove equal entrypoints + matches = list(set(matches)) unique_entrypoints.append(matches[0]) matches_len = len(matches) if matches_len > 1: - selected_module_name = matches[0].module_name - all_module_names = [e.module_name for e in matches] + all_module_names = [e.value.split(":")[0] for e in matches] + selected_module_name = all_module_names[0] warnings.warn( f"Found {matches_len} entrypoints for the engine name {name}:" - f"\n {all_module_names}.\n It will be used: {selected_module_name}.", + f"\n {all_module_names}.\n " + f"The entrypoint {selected_module_name} will be used.", RuntimeWarning, ) return unique_entrypoints diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index ef27413fb5b..c56e76cf5d3 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -1,19 +1,15 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -import sys -from typing import Any, Callable, Hashable, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops from .options import OPTIONS -from .types import T_DataArray, T_Dataset from .utils import contains_only_dask_or_numpy -if sys.version_info >= (3, 8): - from typing import Protocol -else: - from typing_extensions import Protocol - +if TYPE_CHECKING: + from .dataarray import DataArray + from .dataset import Dataset try: import flox @@ -21,85 +17,27 @@ flox = None -class DatasetReduce(Protocol): - def reduce( - self, - func: Callable[..., Any], - dim: Union[None, Hashable, Sequence[Hashable]] = None, - axis: Union[None, int, Sequence[int]] = None, - keep_attrs: bool = None, - keepdims: bool = False, - **kwargs: Any, - ) -> T_Dataset: - ... - - -class DatasetGroupByReduce(Protocol): - _obj: T_Dataset - - def reduce( - self, - func: Callable[..., Any], - dim: Union[None, Hashable, Sequence[Hashable]] = None, - axis: Union[None, int, Sequence[int]] = None, - keep_attrs: bool = None, - keepdims: bool = False, - **kwargs: Any, - ) -> T_Dataset: - ... - - def _flox_reduce( - self, - dim: Union[None, Hashable, Sequence[Hashable]], - **kwargs, - ) -> T_Dataset: - ... - - -class DataArrayReduce(Protocol): - def reduce( - self, - func: Callable[..., Any], - dim: Union[None, Hashable, Sequence[Hashable]] = None, - axis: Union[None, int, Sequence[int]] = None, - keep_attrs: bool = None, - keepdims: bool = False, - **kwargs: Any, - ) -> T_DataArray: - ... - - -class DataArrayGroupByReduce(Protocol): - _obj: T_DataArray +class DatasetReductions: + __slots__ = () def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> T_DataArray: - ... - - def _flox_reduce( - self, - dim: Union[None, Hashable, Sequence[Hashable]], - **kwargs, - ) -> T_DataArray: - ... - - -class DatasetReductions: - __slots__ = () + ) -> "Dataset": + raise NotImplementedError() def count( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). @@ -166,11 +104,11 @@ def count( ) def all( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). @@ -237,11 +175,11 @@ def all( ) def any( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). @@ -308,12 +246,12 @@ def any( ) def max( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). @@ -394,12 +332,12 @@ def max( ) def min( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). @@ -480,12 +418,12 @@ def min( ) def mean( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). @@ -570,13 +508,13 @@ def mean( ) def prod( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). @@ -676,13 +614,13 @@ def prod( ) def sum( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). @@ -782,13 +720,13 @@ def sum( ) def std( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). @@ -885,13 +823,13 @@ def std( ) def var( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). @@ -988,12 +926,12 @@ def var( ) def median( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). @@ -1081,12 +1019,24 @@ def median( class DataArrayReductions: __slots__ = () + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + raise NotImplementedError() + def count( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). @@ -1147,11 +1097,11 @@ def count( ) def all( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). @@ -1212,11 +1162,11 @@ def all( ) def any( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). @@ -1277,12 +1227,12 @@ def any( ) def max( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). @@ -1355,12 +1305,12 @@ def max( ) def min( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). @@ -1433,12 +1383,12 @@ def min( ) def mean( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). @@ -1515,13 +1465,13 @@ def mean( ) def prod( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). @@ -1611,13 +1561,13 @@ def prod( ) def sum( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). @@ -1707,13 +1657,13 @@ def sum( ) def std( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). @@ -1800,13 +1750,13 @@ def std( ) def var( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). @@ -1893,12 +1843,12 @@ def var( ) def median( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). @@ -1976,14 +1926,33 @@ def median( class DatasetGroupByReductions: - __slots__ = () + _obj: "Dataset" + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "Dataset": + raise NotImplementedError() + + def _flox_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> "Dataset": + raise NotImplementedError() def count( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). @@ -2067,11 +2036,11 @@ def count( ) def all( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). @@ -2155,11 +2124,11 @@ def all( ) def any( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). @@ -2243,12 +2212,12 @@ def any( ) def max( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). @@ -2349,12 +2318,12 @@ def max( ) def min( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). @@ -2455,12 +2424,12 @@ def min( ) def mean( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). @@ -2565,13 +2534,13 @@ def mean( ) def prod( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). @@ -2694,13 +2663,13 @@ def prod( ) def sum( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). @@ -2823,13 +2792,13 @@ def sum( ) def std( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). @@ -2949,13 +2918,13 @@ def std( ) def var( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). @@ -3075,12 +3044,12 @@ def var( ) def median( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). @@ -3170,14 +3139,33 @@ def median( class DatasetResampleReductions: - __slots__ = () + _obj: "Dataset" + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "Dataset": + raise NotImplementedError() + + def _flox_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> "Dataset": + raise NotImplementedError() def count( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). @@ -3261,11 +3249,11 @@ def count( ) def all( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). @@ -3349,11 +3337,11 @@ def all( ) def any( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). @@ -3437,12 +3425,12 @@ def any( ) def max( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). @@ -3543,12 +3531,12 @@ def max( ) def min( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). @@ -3649,12 +3637,12 @@ def min( ) def mean( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). @@ -3759,13 +3747,13 @@ def mean( ) def prod( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). @@ -3888,13 +3876,13 @@ def prod( ) def sum( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). @@ -4017,13 +4005,13 @@ def sum( ) def std( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). @@ -4143,13 +4131,13 @@ def std( ) def var( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). @@ -4269,12 +4257,12 @@ def var( ) def median( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). @@ -4364,14 +4352,33 @@ def median( class DataArrayGroupByReductions: - __slots__ = () + _obj: "DataArray" + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + raise NotImplementedError() + + def _flox_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> "DataArray": + raise NotImplementedError() def count( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). @@ -4448,11 +4455,11 @@ def count( ) def all( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). @@ -4529,11 +4536,11 @@ def all( ) def any( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). @@ -4610,12 +4617,12 @@ def any( ) def max( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). @@ -4707,12 +4714,12 @@ def max( ) def min( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). @@ -4804,12 +4811,12 @@ def min( ) def mean( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). @@ -4905,13 +4912,13 @@ def mean( ) def prod( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). @@ -5023,13 +5030,13 @@ def prod( ) def sum( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). @@ -5141,13 +5148,13 @@ def sum( ) def std( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). @@ -5256,13 +5263,13 @@ def std( ) def var( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). @@ -5371,12 +5378,12 @@ def var( ) def median( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). @@ -5458,14 +5465,33 @@ def median( class DataArrayResampleReductions: - __slots__ = () + _obj: "DataArray" + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + raise NotImplementedError() + + def _flox_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> "DataArray": + raise NotImplementedError() def count( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). @@ -5542,11 +5568,11 @@ def count( ) def all( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). @@ -5623,11 +5649,11 @@ def all( ) def any( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). @@ -5704,12 +5730,12 @@ def any( ) def max( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). @@ -5801,12 +5827,12 @@ def max( ) def min( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). @@ -5898,12 +5924,12 @@ def min( ) def mean( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). @@ -5999,13 +6025,13 @@ def mean( ) def prod( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). @@ -6117,13 +6143,13 @@ def prod( ) def sum( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). @@ -6235,13 +6261,13 @@ def sum( ) def std( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). @@ -6350,13 +6376,13 @@ def std( ) def var( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). @@ -6465,12 +6491,12 @@ def var( ) def median( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 814e9a59877..bf8d6ccaeb6 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -105,7 +105,6 @@ class VariableArithmetic( class DatasetArithmetic( ImplementsDatasetReduce, - IncludeReduceMethods, IncludeCumMethods, SupportsArithmetic, DatasetOpsMixin, @@ -116,7 +115,6 @@ class DatasetArithmetic( class DataArrayArithmetic( ImplementsArrayReduce, - IncludeReduceMethods, IncludeCumMethods, IncludeNumpySameMethods, SupportsArithmetic, diff --git a/xarray/core/common.py b/xarray/core/common.py index b5dc3bf0e20..2300f3dd8f5 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -60,12 +60,14 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool if include_skipna: def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): - return self.reduce(func, dim, axis, skipna=skipna, **kwargs) + return self.reduce( + func=func, dim=dim, axis=axis, skipna=skipna, **kwargs + ) else: def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim, axis, **kwargs) + return self.reduce(func=func, dim=dim, axis=axis, **kwargs) return wrapped_func @@ -98,13 +100,19 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool def wrapped_func(self, dim=None, skipna=None, **kwargs): return self.reduce( - func, dim, skipna=skipna, numeric_only=numeric_only, **kwargs + func=func, + dim=dim, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, ) else: def wrapped_func(self, dim=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim, numeric_only=numeric_only, **kwargs) + return self.reduce( + func=func, dim=dim, numeric_only=numeric_only, **kwargs + ) return wrapped_func diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 24e5f5736b0..1b96f22b744 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -35,6 +35,7 @@ utils, weighted, ) +from ._reductions import DataArrayReductions from .accessor_dt import CombinedDatetimelikeAccessor from .accessor_str import StringAccessor from .alignment import ( @@ -215,7 +216,9 @@ def __setitem__(self, key, value) -> None: _THIS_ARRAY = ReprObject("") -class DataArray(AbstractArray, DataWithCoords, DataArrayArithmetic): +class DataArray( + AbstractArray, DataWithCoords, DataArrayArithmetic, DataArrayReductions +): """N-dimensional array with labeled coordinates and dimensions. DataArray provides a wrapper around numpy ndarrays that uses @@ -2652,6 +2655,7 @@ def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e882495dce5..cf52fed6974 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -50,6 +50,7 @@ utils, weighted, ) +from ._reductions import DatasetReductions from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align from .arithmetic import DatasetArithmetic from .common import DataWithCoords, _contains_datetime_like_objects, get_chunksizes @@ -574,7 +575,7 @@ def __setitem__(self, key, value) -> None: self.dataset[pos_indexers] = value -class Dataset(DataWithCoords, DatasetArithmetic, Mapping): +class Dataset(DataWithCoords, DatasetReductions, DatasetArithmetic, Mapping): """A multi-dimensional, in memory, array database. A dataset resembles an in-memory representation of a NetCDF file, @@ -4999,6 +5000,7 @@ def reduce( self, func: Callable, dim: Union[Hashable, Iterable[Hashable]] = None, + *, keep_attrs: bool = None, keepdims: bool = False, numeric_only: bool = False, @@ -5034,7 +5036,7 @@ def reduce( Dataset with this object's DataArrays replaced with new DataArrays of summarized data and the indicated dimension(s) removed. """ - if "axis" in kwargs: + if kwargs.get("axis", None) is not None: raise ValueError( "passing 'axis' to Dataset reduce methods is ambiguous." " Please use 'dim' instead." diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 7f8f9802b59..8c0bde3a4f9 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1,5 +1,6 @@ import datetime import warnings +from typing import Any, Callable, Hashable, Sequence, Union import numpy as np import pandas as pd @@ -932,7 +933,15 @@ def _combine(self, applied, shortcut=False): return combined def reduce( - self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + shortcut: bool = True, + **kwargs: Any, ): """Reduce the items in this group by applying `func` along some dimension(s). @@ -965,11 +974,15 @@ def reduce( if dim is None: dim = self._group_dim - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - def reduce_array(ar): - return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs) + return ar.reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) check_reduce_dims(dim, self.dims) @@ -1047,7 +1060,16 @@ def _combine(self, applied): combined = self._maybe_unstack(combined) return combined - def reduce(self, func, dim=None, keep_attrs=None, **kwargs): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ): """Reduce the items in this group by applying `func` along some dimension(s). @@ -1079,11 +1101,15 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): if dim is None: dim = self._group_dim - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - def reduce_dataset(ds): - return ds.reduce(func, dim, keep_attrs, **kwargs) + return ds.reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) check_reduce_dims(dim, self.dims) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index e2f599e8b4e..ed665ad4048 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,4 +1,5 @@ import warnings +from typing import Any, Callable, Hashable, Sequence, Union from ._reductions import DataArrayResampleReductions, DatasetResampleReductions from .groupby import DataArrayGroupByBase, DatasetGroupByBase @@ -157,7 +158,7 @@ def _interpolate(self, kind="linear"): ) -class DataArrayResample(DataArrayResampleReductions, DataArrayGroupByBase, Resample): +class DataArrayResample(DataArrayGroupByBase, DataArrayResampleReductions, Resample): """DataArrayGroupBy object specialized to time resampling operations over a specified dimension """ @@ -248,7 +249,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return self.map(func=func, shortcut=shortcut, args=args, **kwargs) -class DatasetResample(DatasetResampleReductions, DatasetGroupByBase, Resample): +class DatasetResample(DatasetGroupByBase, DatasetResampleReductions, Resample): """DatasetGroupBy object specialized to resampling a specified dimension""" def __init__(self, *args, dim=None, resample_dim=None, **kwargs): @@ -316,7 +317,16 @@ def apply(self, func, args=(), shortcut=None, **kwargs): ) return self.map(func=func, shortcut=shortcut, args=args, **kwargs) - def reduce(self, func, dim=None, keep_attrs=None, **kwargs): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ): """Reduce the items in this group by applying `func` along the pre-defined resampling dimension. @@ -341,4 +351,11 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): Array with summarized data and the indicated dimension(s) removed. """ - return super().reduce(func, dim, keep_attrs, **kwargs) + return super().reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index c032a781e47..392597f1bda 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -676,87 +676,6 @@ def test_multiple_dims(dtype, dask, skipna, func): assert_allclose(actual, expected) -def test_docs(): - # with min_count - actual = DataArray.sum.__doc__ - expected = dedent( - """\ - Reduce this DataArray's data by applying `sum` along some dimension(s). - - Parameters - ---------- - dim : str or sequence of str, optional - Dimension(s) over which to apply `sum`. - axis : int or sequence of int, optional - Axis(es) over which to apply `sum`. Only one of the 'dim' - and 'axis' arguments can be supplied. If neither are supplied, then - `sum` is calculated over axes. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - min_count : int, default: None - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. New in version 0.10.8: Added with the default being - None. Changed in version 0.17.0: if specified on an integer array - and skipna=True, the result will be a float array. - keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating `sum` on this object's data. - - Returns - ------- - reduced : DataArray - New DataArray object with `sum` applied to its data and the - indicated dimension(s) removed. - """ - ) - assert actual == expected - - # without min_count - actual = DataArray.std.__doc__ - expected = dedent( - """\ - Reduce this DataArray's data by applying `std` along some dimension(s). - - Parameters - ---------- - dim : str or sequence of str, optional - Dimension(s) over which to apply `std`. - axis : int or sequence of int, optional - Axis(es) over which to apply `std`. Only one of the 'dim' - and 'axis' arguments can be supplied. If neither are supplied, then - `std` is calculated over axes. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating `std` on this object's data. - - Returns - ------- - reduced : DataArray - New DataArray object with `std` applied to its data and the - indicated dimension(s) removed. - """ - ) - assert actual == expected - - def test_datetime_to_numeric_datetime64(): times = pd.date_range("2000", periods=5, freq="7D").values result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index 7f77a677d6d..4d1eee6363d 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -1,10 +1,20 @@ +import sys from unittest import mock -import pkg_resources import pytest from xarray.backends import common, plugins +if sys.version_info >= (3, 8): + from importlib.metadata import EntryPoint + + importlib_metadata_mock = "importlib.metadata" +else: + # if the fallback library is missing, we are doomed. + from importlib_metadata import EntryPoint + + importlib_metadata_mock = "importlib_metadata" + class DummyBackendEntrypointArgs(common.BackendEntrypoint): def open_dataset(filename_or_obj, *args): @@ -29,12 +39,12 @@ def open_dataset(self, filename_or_obj, *, decoder): @pytest.fixture def dummy_duplicated_entrypoints(): specs = [ - "engine1 = xarray.tests.test_plugins:backend_1", - "engine1 = xarray.tests.test_plugins:backend_2", - "engine2 = xarray.tests.test_plugins:backend_1", - "engine2 = xarray.tests.test_plugins:backend_2", + ["engine1", "xarray.tests.test_plugins:backend_1", "xarray.backends"], + ["engine1", "xarray.tests.test_plugins:backend_2", "xarray.backends"], + ["engine2", "xarray.tests.test_plugins:backend_1", "xarray.backends"], + ["engine2", "xarray.tests.test_plugins:backend_2", "xarray.backends"], ] - eps = [pkg_resources.EntryPoint.parse(spec) for spec in specs] + eps = [EntryPoint(name, value, group) for name, value, group in specs] return eps @@ -46,8 +56,10 @@ def test_remove_duplicates(dummy_duplicated_entrypoints) -> None: def test_broken_plugin() -> None: - broken_backend = pkg_resources.EntryPoint.parse( - "broken_backend = xarray.tests.test_plugins:backend_1" + broken_backend = EntryPoint( + "broken_backend", + "xarray.tests.test_plugins:backend_1", + "xarray.backends", ) with pytest.warns(RuntimeWarning) as record: _ = plugins.build_engines([broken_backend]) @@ -68,13 +80,15 @@ def test_remove_duplicates_warnings(dummy_duplicated_entrypoints) -> None: assert "entrypoints" in message1 -@mock.patch("pkg_resources.EntryPoint.load", mock.MagicMock(return_value=None)) +@mock.patch( + f"{importlib_metadata_mock}.EntryPoint.load", mock.MagicMock(return_value=None) +) def test_backends_dict_from_pkg() -> None: specs = [ - "engine1 = xarray.tests.test_plugins:backend_1", - "engine2 = xarray.tests.test_plugins:backend_2", + ["engine1", "xarray.tests.test_plugins:backend_1", "xarray.backends"], + ["engine2", "xarray.tests.test_plugins:backend_2", "xarray.backends"], ] - entrypoints = [pkg_resources.EntryPoint.parse(spec) for spec in specs] + entrypoints = [EntryPoint(name, value, group) for name, value, group in specs] engines = plugins.backends_dict_from_pkg(entrypoints) assert len(engines) == 2 assert engines.keys() == set(("engine1", "engine2")) @@ -114,12 +128,12 @@ def test_set_missing_parameters_raise_error() -> None: @mock.patch( - "pkg_resources.EntryPoint.load", + f"{importlib_metadata_mock}.EntryPoint.load", mock.MagicMock(return_value=DummyBackendEntrypoint1), ) def test_build_engines() -> None: - dummy_pkg_entrypoint = pkg_resources.EntryPoint.parse( - "cfgrib = xarray.tests.test_plugins:backend_1" + dummy_pkg_entrypoint = EntryPoint( + "cfgrib", "xarray.tests.test_plugins:backend_1", "xarray_backends" ) backend_entrypoints = plugins.build_engines([dummy_pkg_entrypoint]) @@ -131,17 +145,13 @@ def test_build_engines() -> None: @mock.patch( - "pkg_resources.EntryPoint.load", + f"{importlib_metadata_mock}.EntryPoint.load", mock.MagicMock(return_value=DummyBackendEntrypoint1), ) def test_build_engines_sorted() -> None: dummy_pkg_entrypoints = [ - pkg_resources.EntryPoint.parse( - "dummy2 = xarray.tests.test_plugins:backend_1", - ), - pkg_resources.EntryPoint.parse( - "dummy1 = xarray.tests.test_plugins:backend_1", - ), + EntryPoint("dummy2", "xarray.tests.test_plugins:backend_1", "xarray.backends"), + EntryPoint("dummy1", "xarray.tests.test_plugins:backend_1", "xarray.backends"), ] backend_entrypoints = plugins.build_engines(dummy_pkg_entrypoints) backend_entrypoints = list(backend_entrypoints) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 5d4aa2145e1..db4157764ac 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -5,7 +5,7 @@ Usage: python xarray/util/generate_reductions.py > xarray/core/_reductions.py pytest --doctest-modules xarray/core/_reductions.py --accept || true - pytest --doctest-modules xarray/core/_reductions.py --accept + pytest --doctest-modules xarray/core/_reductions.py This requires [pytest-accept](https://github.com/max-sixty/pytest-accept). The second run of pytest is deliberate, since the first will return an error @@ -24,7 +24,6 @@ from . import duck_array_ops from .options import OPTIONS -from .types import T_DataArray, T_Dataset from .utils import contains_only_dask_or_numpy if TYPE_CHECKING: @@ -36,48 +35,48 @@ except ImportError: flox = None''' -OBJ_PREAMBLE = """ +DEFAULT_PREAMBLE = """ + +class {obj}{cls}Reductions: + __slots__ = () -class {obj}Reductions(): def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, ) -> "{obj}": - ... + raise NotImplementedError()""" +GROUPBY_PREAMBLE = """ -class {obj}GroupByReductions(): +class {obj}{cls}Reductions: _obj: "{obj}" def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, ) -> "{obj}": - ... + raise NotImplementedError() def _flox_reduce( self, dim: Union[None, Hashable, Sequence[Hashable]], **kwargs, ) -> "{obj}": - ...""" + raise NotImplementedError()""" -CLASS_PREAMBLE = """ - -class {obj}{cls}Reductions: - __slots__ = ()""" - TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( self, @@ -213,6 +212,7 @@ def __init__( docref, docref_description, example_call_preamble, + definition_preamble, see_also_obj=None, ): self.datastructure = datastructure @@ -221,7 +221,7 @@ def __init__( self.docref = docref self.docref_description = docref_description self.example_call_preamble = example_call_preamble - self.preamble = CLASS_PREAMBLE.format(obj=datastructure.name, cls=cls) + self.preamble = definition_preamble.format(obj=datastructure.name, cls=cls) if not see_also_obj: self.see_also_obj = self.datastructure.name else: @@ -245,7 +245,6 @@ def generate_method(self, method): yield TEMPLATE_REDUCTION_SIGNATURE.format( **template_kwargs, extra_kwargs=extra_kwargs, - self_type=self.self_type, ) for text in [ @@ -415,6 +414,7 @@ class DataStructure: docref_description="reduction or aggregation operations", example_call_preamble="", see_also_obj="DataArray", + definition_preamble=DEFAULT_PREAMBLE, ) DataArrayGenerator = GenericReductionGenerator( cls="", @@ -424,6 +424,7 @@ class DataStructure: docref_description="reduction or aggregation operations", example_call_preamble="", see_also_obj="Dataset", + definition_preamble=DEFAULT_PREAMBLE, ) DataArrayGroupByGenerator = GroupByReductionGenerator( @@ -433,6 +434,7 @@ class DataStructure: docref="groupby", docref_description="groupby operations", example_call_preamble='.groupby("labels")', + definition_preamble=GROUPBY_PREAMBLE, ) DataArrayResampleGenerator = GroupByReductionGenerator( cls="Resample", @@ -441,6 +443,7 @@ class DataStructure: docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', + definition_preamble=GROUPBY_PREAMBLE, ) DatasetGroupByGenerator = GroupByReductionGenerator( cls="GroupBy", @@ -449,6 +452,7 @@ class DataStructure: docref="groupby", docref_description="groupby operations", example_call_preamble='.groupby("labels")', + definition_preamble=GROUPBY_PREAMBLE, ) DatasetResampleGenerator = GroupByReductionGenerator( cls="Resample", @@ -457,6 +461,7 @@ class DataStructure: docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', + definition_preamble=GROUPBY_PREAMBLE, )