diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 884c3cef91c..ed1e79ce3d1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,7 +28,9 @@ New Features - :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile`` now work with dask Variables. By `Deepak Cherian `_. - +- Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen` + and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`) + By `Deepak Cherian `_ Bug fixes ~~~~~~~~~ diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 17240faf007..f70e96217e8 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -25,7 +25,7 @@ def _maybe_null_out(result, axis, mask, min_count=1): """ if hasattr(axis, "__len__"): # if tuple or list raise ValueError( - "min_count is not available for reduction " "with more than one dimensions." + "min_count is not available for reduction with more than one dimensions." ) if axis is not None and getattr(result, "ndim", False): diff --git a/xarray/core/ops.py b/xarray/core/ops.py index 78c4466faed..b789f93b4f1 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -347,13 +347,3 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True): inject_reduce_methods(cls) inject_cum_methods(cls) - - -def inject_coarsen_methods(cls): - # standard numpy reduce methods - methods = [(name, getattr(duck_array_ops, name)) for name in NAN_REDUCE_METHODS] - for name, f in methods: - func = cls._reduce_method(f) - func.__name__ = name - func.__doc__ = _COARSEN_REDUCE_DOCSTRING_TEMPLATE.format(name=func.__name__) - setattr(cls, name, func) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index a1864332f4d..ea6d72b2e03 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -1,12 +1,12 @@ import functools import warnings -from typing import Callable +from typing import Any, Callable, Dict import numpy as np from . import dtypes, duck_array_ops, utils from .dask_array_ops import dask_rolling_wrapper -from .ops import inject_coarsen_methods +from .ops import inject_reduce_methods from .pycompat import dask_array_type try: @@ -542,6 +542,11 @@ def __init__(self, obj, windows, boundary, side, coord_func): self.side = side self.boundary = boundary + absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims] + if absent_dims: + raise ValueError( + f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}." + ) if not utils.is_dict_like(coord_func): coord_func = {d: coord_func for d in self.obj.dims} for c in self.obj.coords: @@ -565,18 +570,23 @@ def __repr__(self): class DataArrayCoarsen(Coarsen): __slots__ = () + _reduce_extra_args_docstring = """""" + @classmethod - def _reduce_method(cls, func): + def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): """ - Return a wrapped function for injecting numpy methods. - see ops.inject_coarsen_methods + Return a wrapped function for injecting reduction methods. + see ops.inject_reduce_methods """ + kwargs: Dict[str, Any] = {} + if include_skipna: + kwargs["skipna"] = None def wrapped_func(self, **kwargs): from .dataarray import DataArray reduced = self.obj.variable.coarsen( - self.windows, func, self.boundary, self.side + self.windows, func, self.boundary, self.side, **kwargs ) coords = {} for c, v in self.obj.coords.items(): @@ -585,7 +595,11 @@ def wrapped_func(self, **kwargs): else: if any(d in self.windows for d in v.dims): coords[c] = v.variable.coarsen( - self.windows, self.coord_func[c], self.boundary, self.side + self.windows, + self.coord_func[c], + self.boundary, + self.side, + **kwargs, ) else: coords[c] = v @@ -597,12 +611,17 @@ def wrapped_func(self, **kwargs): class DatasetCoarsen(Coarsen): __slots__ = () + _reduce_extra_args_docstring = """""" + @classmethod - def _reduce_method(cls, func): + def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): """ - Return a wrapped function for injecting numpy methods. - see ops.inject_coarsen_methods + Return a wrapped function for injecting reduction methods. + see ops.inject_reduce_methods """ + kwargs: Dict[str, Any] = {} + if include_skipna: + kwargs["skipna"] = None def wrapped_func(self, **kwargs): from .dataset import Dataset @@ -610,14 +629,18 @@ def wrapped_func(self, **kwargs): reduced = {} for key, da in self.obj.data_vars.items(): reduced[key] = da.variable.coarsen( - self.windows, func, self.boundary, self.side + self.windows, func, self.boundary, self.side, **kwargs ) coords = {} for c, v in self.obj.coords.items(): if any(d in self.windows for d in v.dims): coords[c] = v.variable.coarsen( - self.windows, self.coord_func[c], self.boundary, self.side + self.windows, + self.coord_func[c], + self.boundary, + self.side, + **kwargs, ) else: coords[c] = v.variable @@ -626,5 +649,5 @@ def wrapped_func(self, **kwargs): return wrapped_func -inject_coarsen_methods(DataArrayCoarsen) -inject_coarsen_methods(DatasetCoarsen) +inject_reduce_methods(DataArrayCoarsen) +inject_reduce_methods(DatasetCoarsen) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 041c303dd3a..773dcef0aa1 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1864,9 +1864,9 @@ def rolling_window( ), ) - def coarsen(self, windows, func, boundary="exact", side="left"): + def coarsen(self, windows, func, boundary="exact", side="left", **kwargs): """ - Apply + Apply reduction function. """ windows = {k: v for k, v in windows.items() if k in self.dims} if not windows: @@ -1878,11 +1878,11 @@ def coarsen(self, windows, func, boundary="exact", side="left"): func = getattr(duck_array_ops, name, None) if func is None: raise NameError(f"{name} is not a valid method.") - return type(self)(self.dims, func(reshaped, axis=axes), self._attrs) + return self._replace(data=func(reshaped, axis=axes, **kwargs)) def _coarsen_reshape(self, windows, boundary, side): """ - Construct a reshaped-array for corsen + Construct a reshaped-array for coarsen """ if not utils.is_dict_like(boundary): boundary = {d: boundary for d in windows.keys()} diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d8282f58051..7db1911621b 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5497,6 +5497,11 @@ def ds(request): ) +def test_coarsen_absent_dims_error(ds): + with raises_regex(ValueError, "not found in Dataset."): + ds.coarsen(foo=2) + + @pytest.mark.parametrize("dask", [True, False]) @pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")]) def test_coarsen(ds, dask, boundary, side): @@ -5505,12 +5510,11 @@ def test_coarsen(ds, dask, boundary, side): actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max() assert_equal( - actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary=boundary, side=side).max() + actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max() ) # coordinate should be mean by default assert_equal( - actual["time"], - ds["time"].coarsen(time=2, x=3, boundary=boundary, side=side).mean(), + actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean() ) @@ -5521,8 +5525,8 @@ def test_coarsen_coords(ds, dask): # check if coord_func works actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max() - assert_equal(actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary="trim").max()) - assert_equal(actual["time"], ds["time"].coarsen(time=2, x=3, boundary="trim").max()) + assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max()) + assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max()) # raise if exact with pytest.raises(ValueError): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 245dc1acc42..5b5aa1a523f 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1833,6 +1833,26 @@ def test_coarsen_2d(self): expected[1, 1] *= 12 / 11 assert_allclose(actual, expected) + v = self.cls(("x", "y"), np.arange(4 * 4, dtype=np.float32).reshape(4, 4)) + actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact") + expected = self.cls(("x", "y"), 4 * np.ones((2, 2))) + assert_equal(actual, expected) + + v[0, 0] = np.nan + v[-1, -1] = np.nan + expected[0, 0] = 3 + expected[-1, -1] = 3 + actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact") + assert_equal(actual, expected) + + actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=False) + expected = self.cls(("x", "y"), [[np.nan, 18], [42, np.nan]]) + assert_equal(actual, expected) + + actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=True) + expected = self.cls(("x", "y"), [[10, 18], [42, 35]]) + assert_equal(actual, expected) + @requires_dask class TestVariableWithDask(VariableSubclassobjects):