Skip to content
This repository has been archived by the owner on Oct 7, 2024. It is now read-only.

Commit

Permalink
make coarsen reductions consistent with reductions on other classes (p…
Browse files Browse the repository at this point in the history
…ydata#3500)

* Coarsen now has the same reduction methods as groupby & rolling.

This brings in support for coarsen.count as well as passing skipna down to the
other reduction functions.

* test for count

* Test that dims passed to coarsen are present in dataset.

* Add whats-new

* fix tests.

* review comments.

* Update doc/whats-new.rst

Co-Authored-By: keewis <[email protected]>

* fix whats-new
  • Loading branch information
dcherian authored Dec 4, 2019
1 parent ed05f98 commit 308bb37
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 35 deletions.
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ New Features
- :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile``
now work with dask Variables.
By `Deepak Cherian <https://github.com/dcherian>`_.

- Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen`
and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`)
By `Deepak Cherian <https://github.com/dcherian/>`_

Bug fixes
~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def _maybe_null_out(result, axis, mask, min_count=1):
"""
if hasattr(axis, "__len__"): # if tuple or list
raise ValueError(
"min_count is not available for reduction " "with more than one dimensions."
"min_count is not available for reduction with more than one dimensions."
)

if axis is not None and getattr(result, "ndim", False):
Expand Down
10 changes: 0 additions & 10 deletions xarray/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,13 +347,3 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True):

inject_reduce_methods(cls)
inject_cum_methods(cls)


def inject_coarsen_methods(cls):
# standard numpy reduce methods
methods = [(name, getattr(duck_array_ops, name)) for name in NAN_REDUCE_METHODS]
for name, f in methods:
func = cls._reduce_method(f)
func.__name__ = name
func.__doc__ = _COARSEN_REDUCE_DOCSTRING_TEMPLATE.format(name=func.__name__)
setattr(cls, name, func)
51 changes: 37 additions & 14 deletions xarray/core/rolling.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import functools
import warnings
from typing import Callable
from typing import Any, Callable, Dict

import numpy as np

from . import dtypes, duck_array_ops, utils
from .dask_array_ops import dask_rolling_wrapper
from .ops import inject_coarsen_methods
from .ops import inject_reduce_methods
from .pycompat import dask_array_type

try:
Expand Down Expand Up @@ -542,6 +542,11 @@ def __init__(self, obj, windows, boundary, side, coord_func):
self.side = side
self.boundary = boundary

absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
if absent_dims:
raise ValueError(
f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}."
)
if not utils.is_dict_like(coord_func):
coord_func = {d: coord_func for d in self.obj.dims}
for c in self.obj.coords:
Expand All @@ -565,18 +570,23 @@ def __repr__(self):
class DataArrayCoarsen(Coarsen):
__slots__ = ()

_reduce_extra_args_docstring = """"""

@classmethod
def _reduce_method(cls, func):
def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
"""
Return a wrapped function for injecting numpy methods.
see ops.inject_coarsen_methods
Return a wrapped function for injecting reduction methods.
see ops.inject_reduce_methods
"""
kwargs: Dict[str, Any] = {}
if include_skipna:
kwargs["skipna"] = None

def wrapped_func(self, **kwargs):
from .dataarray import DataArray

reduced = self.obj.variable.coarsen(
self.windows, func, self.boundary, self.side
self.windows, func, self.boundary, self.side, **kwargs
)
coords = {}
for c, v in self.obj.coords.items():
Expand All @@ -585,7 +595,11 @@ def wrapped_func(self, **kwargs):
else:
if any(d in self.windows for d in v.dims):
coords[c] = v.variable.coarsen(
self.windows, self.coord_func[c], self.boundary, self.side
self.windows,
self.coord_func[c],
self.boundary,
self.side,
**kwargs,
)
else:
coords[c] = v
Expand All @@ -597,27 +611,36 @@ def wrapped_func(self, **kwargs):
class DatasetCoarsen(Coarsen):
__slots__ = ()

_reduce_extra_args_docstring = """"""

@classmethod
def _reduce_method(cls, func):
def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
"""
Return a wrapped function for injecting numpy methods.
see ops.inject_coarsen_methods
Return a wrapped function for injecting reduction methods.
see ops.inject_reduce_methods
"""
kwargs: Dict[str, Any] = {}
if include_skipna:
kwargs["skipna"] = None

def wrapped_func(self, **kwargs):
from .dataset import Dataset

reduced = {}
for key, da in self.obj.data_vars.items():
reduced[key] = da.variable.coarsen(
self.windows, func, self.boundary, self.side
self.windows, func, self.boundary, self.side, **kwargs
)

coords = {}
for c, v in self.obj.coords.items():
if any(d in self.windows for d in v.dims):
coords[c] = v.variable.coarsen(
self.windows, self.coord_func[c], self.boundary, self.side
self.windows,
self.coord_func[c],
self.boundary,
self.side,
**kwargs,
)
else:
coords[c] = v.variable
Expand All @@ -626,5 +649,5 @@ def wrapped_func(self, **kwargs):
return wrapped_func


inject_coarsen_methods(DataArrayCoarsen)
inject_coarsen_methods(DatasetCoarsen)
inject_reduce_methods(DataArrayCoarsen)
inject_reduce_methods(DatasetCoarsen)
8 changes: 4 additions & 4 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1864,9 +1864,9 @@ def rolling_window(
),
)

def coarsen(self, windows, func, boundary="exact", side="left"):
def coarsen(self, windows, func, boundary="exact", side="left", **kwargs):
"""
Apply
Apply reduction function.
"""
windows = {k: v for k, v in windows.items() if k in self.dims}
if not windows:
Expand All @@ -1878,11 +1878,11 @@ def coarsen(self, windows, func, boundary="exact", side="left"):
func = getattr(duck_array_ops, name, None)
if func is None:
raise NameError(f"{name} is not a valid method.")
return type(self)(self.dims, func(reshaped, axis=axes), self._attrs)
return self._replace(data=func(reshaped, axis=axes, **kwargs))

def _coarsen_reshape(self, windows, boundary, side):
"""
Construct a reshaped-array for corsen
Construct a reshaped-array for coarsen
"""
if not utils.is_dict_like(boundary):
boundary = {d: boundary for d in windows.keys()}
Expand Down
14 changes: 9 additions & 5 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5497,6 +5497,11 @@ def ds(request):
)


def test_coarsen_absent_dims_error(ds):
with raises_regex(ValueError, "not found in Dataset."):
ds.coarsen(foo=2)


@pytest.mark.parametrize("dask", [True, False])
@pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")])
def test_coarsen(ds, dask, boundary, side):
Expand All @@ -5505,12 +5510,11 @@ def test_coarsen(ds, dask, boundary, side):

actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max()
assert_equal(
actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary=boundary, side=side).max()
actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max()
)
# coordinate should be mean by default
assert_equal(
actual["time"],
ds["time"].coarsen(time=2, x=3, boundary=boundary, side=side).mean(),
actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean()
)


Expand All @@ -5521,8 +5525,8 @@ def test_coarsen_coords(ds, dask):

# check if coord_func works
actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max()
assert_equal(actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary="trim").max())
assert_equal(actual["time"], ds["time"].coarsen(time=2, x=3, boundary="trim").max())
assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max())
assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max())

# raise if exact
with pytest.raises(ValueError):
Expand Down
20 changes: 20 additions & 0 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1833,6 +1833,26 @@ def test_coarsen_2d(self):
expected[1, 1] *= 12 / 11
assert_allclose(actual, expected)

v = self.cls(("x", "y"), np.arange(4 * 4, dtype=np.float32).reshape(4, 4))
actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact")
expected = self.cls(("x", "y"), 4 * np.ones((2, 2)))
assert_equal(actual, expected)

v[0, 0] = np.nan
v[-1, -1] = np.nan
expected[0, 0] = 3
expected[-1, -1] = 3
actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact")
assert_equal(actual, expected)

actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=False)
expected = self.cls(("x", "y"), [[np.nan, 18], [42, np.nan]])
assert_equal(actual, expected)

actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=True)
expected = self.cls(("x", "y"), [[10, 18], [42, 35]])
assert_equal(actual, expected)


@requires_dask
class TestVariableWithDask(VariableSubclassobjects):
Expand Down

0 comments on commit 308bb37

Please sign in to comment.