Merge branch 'main' into typedops

headtr1ck · Sep 20, 2023 · bc9cb86 · bc9cb86
2 parents ed8d936 + 2b784f2
commit bc9cb86
Show file tree

Hide file tree

Showing 18 changed files with 268 additions and 86 deletions.
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -5,10 +5,10 @@
 
 from . import parameterized, randn, requires_dask
 
-nx = 300
+nx = 3000
 long_nx = 30000
 ny = 200
-nt = 100
+nt = 1000
 window = 20
 
 randn_xy = randn((nx, ny), frac_nan=0.1)
@@ -115,6 +115,11 @@ def peakmem_1drolling_reduce(self, func, use_bottleneck):
             roll = self.ds.var3.rolling(t=100)
             getattr(roll, func)()
 
+    @parameterized(["stride"], ([None, 5, 50]))
+    def peakmem_1drolling_construct(self, stride):
+        self.ds.var2.rolling(t=100).construct("w", stride=stride)
+        self.ds.var3.rolling(t=100).construct("w", stride=stride)
+
 
 class DatasetRollingMemory(RollingMemory):
     @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False]))
@@ -128,3 +133,7 @@ def peakmem_1drolling_reduce(self, func, use_bottleneck):
         with xr.set_options(use_bottleneck=use_bottleneck):
             roll = self.ds.rolling(t=100)
             getattr(roll, func)()
+
+    @parameterized(["stride"], ([None, 5, 50]))
+    def peakmem_1drolling_construct(self, stride):
+        self.ds.rolling(t=100).construct("w", stride=stride)
diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst
@@ -115,10 +115,7 @@ you try to perform some sort of actual computation. For an example of how these
 lazy arrays work, see the OPeNDAP section below.
 
 There may be minor differences in the :py:class:`Dataset` object returned
-when reading a NetCDF file with different engines. For example,
-single-valued attributes are returned as scalars by the default
-``engine=netcdf4``, but as arrays of size ``(1,)`` when reading with
-``engine=h5netcdf``.
+when reading a NetCDF file with different engines.
 
 It is important to note that when you modify values of a Dataset, even one
 linked to files on disk, only the in-memory copy you are manipulating in xarray

diff --git a/doc/user-guide/reshaping.rst b/doc/user-guide/reshaping.rst
@@ -274,7 +274,7 @@ Sort
 ----
 
 One may sort a DataArray/Dataset via :py:meth:`~xarray.DataArray.sortby` and
-:py:meth:`~xarray.DataArray.sortby`.  The input can be an individual or list of
+:py:meth:`~xarray.Dataset.sortby`.  The input can be an individual or list of
 1D ``DataArray`` objects:
 
 .. ipython:: python

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -74,16 +74,22 @@ Bug fixes
   of :py:meth:`DataArray.__setitem__` lose dimension names.
   (:issue:`7030`, :pull:`8067`) By `Darsh Ranjan <https://github.com/dranjan>`_.
 - Return ``float64`` in presence of ``NaT`` in :py:class:`~core.accessor_dt.DatetimeAccessor` and
-  special case ``NaT`` handling in :py:meth:`~core.accessor_dt.DatetimeAccessor.isocalendar()`
+  special case ``NaT`` handling in :py:meth:`~core.accessor_dt.DatetimeAccessor.isocalendar`
   (:issue:`7928`, :pull:`8084`).
   By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
+- Fix :py:meth:`~core.rolling.DatasetRolling.construct` with stride on Datasets without indexes.
+  (:issue:`7021`, :pull:`7578`).
+  By `Amrest Chinkamol <https://github.com/p4perf4ce>`_ and `Michael Niklas <https://github.com/headtr1ck>`_.
 - Calling plot with kwargs ``col``, ``row`` or ``hue`` no longer squeezes dimensions passed via these arguments
   (:issue:`7552`, :pull:`8174`).
   By `Wiktor Kraśnicki <https://github.com/wkrasnicki>`_.
 - Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) led to varying
   issues (:issue:`7817`, :issue:`7942`, :issue:`7790`, :issue:`6191`, :issue:`7096`,
   :issue:`1064`, :pull:`7827`).
   By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
+- ``.rolling_exp`` functions no longer mistakenly lose non-dimensioned coords
+  (:issue:`6528`, :pull:`8114`)
+  By `Maximilian Roos <https://github.com/max-sixty>`_.
 
 Documentation
 ~~~~~~~~~~~~~
@@ -96,6 +102,10 @@ Internal Changes
   By `András Gunyhó <https://github.com/mgunyho>`_.
 - Refactor of encoding and decoding times/timedeltas to preserve nanosecond resolution in arrays that contain missing values (:pull:`7827`).
   By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
+- Transition ``.rolling_exp`` functions to use `.apply_ufunc` internally rather
+  than `.reduce`, as the start of a broader effort to move non-reducing
+  functions away from ```.reduce``, (:pull:`8114`).
+  By `Maximilian Roos <https://github.com/max-sixty>`_.
 
 .. _whats-new.2023.08.0:
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -79,7 +79,7 @@ show_error_context = true
 warn_redundant_casts = true
 warn_unused_ignores = true
 
-# Most of the numerical computing stack doesn't have type annotations yet.
+# Much of the numerical computing stack doesn't have type annotations yet.
 [[tool.mypy.overrides]]
 ignore_missing_imports = true
 module = [
@@ -118,6 +118,15 @@ module = [
   "numpy.exceptions.*", # remove once support for `numpy<2.0` has been dropped
 ]
 
+# Gradually we want to add more modules to this list, ratcheting up our total
+# coverage. Once a module is here, functions require annotations in order to
+# pass mypy. It would be especially useful to have tests here, because without
+# annotating test functions, we don't have a great way of testing our type
+# annotations — even with just `-> None` is sufficient for mypy to check them.
+[[tool.mypy.overrides]]
+disallow_untyped_defs = true
+module = ["xarray.core.rolling_exp"]
+
 [tool.ruff]
 builtins = ["ellipsis"]
 exclude = [

diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
@@ -26,7 +26,7 @@
 if TYPE_CHECKING:
     from xarray.core.dataarray import DataArray
     from xarray.core.dataset import Dataset
-    from xarray.core.types import JoinOptions, T_DataArray, T_Dataset
+    from xarray.core.types import JoinOptions, T_DataArray, T_Dataset, T_DuckArray
 
 
 def reindex_variables(
@@ -173,7 +173,7 @@ def __init__(
 
     def _normalize_indexes(
         self,
-        indexes: Mapping[Any, Any],
+        indexes: Mapping[Any, Any | T_DuckArray],
     ) -> tuple[NormalizedIndexes, NormalizedIndexVars]:
         """Normalize the indexes/indexers used for re-indexing or alignment.
 
@@ -194,7 +194,7 @@ def _normalize_indexes(
                         f"Indexer has dimensions {idx.dims} that are different "
                         f"from that to be indexed along '{k}'"
                     )
-                data = as_compatible_data(idx)
+                data: T_DuckArray = as_compatible_data(idx)
                 pd_idx = safe_cast_to_index(data)
                 pd_idx.name = k
                 if isinstance(pd_idx, pd.MultiIndex):

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -1066,6 +1066,9 @@ def where(
     ) -> T_DataWithCoords:
         """Filter elements from this object according to a condition.
 
+        Returns elements from 'DataArray', where 'cond' is True,
+        otherwise fill in 'other'.
+
         This operation follows the normal broadcasting and alignment rules that
         xarray uses for binary arithmetic.
 

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -7481,7 +7481,7 @@ def _unary_op(self: T_Dataset, f, *args, **kwargs) -> T_Dataset:
             else:
                 variables[k] = f(v, *args, **kwargs)
                 if keep_attrs:
-                    variables[k].attrs = v._attrs
+                    variables[k]._attrs = v._attrs
         attrs = self._attrs if keep_attrs else None
         return self._replace_with_new_dims(variables, attrs=attrs)
 

diff --git a/xarray/core/parallelcompat.py b/xarray/core/parallelcompat.py
@@ -25,7 +25,7 @@
 T_ChunkedArray = TypeVar("T_ChunkedArray")
 
 if TYPE_CHECKING:
-    from xarray.core.types import T_Chunks, T_NormalizedChunks
+    from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks
 
 
 @functools.lru_cache(maxsize=1)
@@ -257,7 +257,7 @@ def normalize_chunks(
 
     @abstractmethod
     def from_array(
-        self, data: np.ndarray, chunks: T_Chunks, **kwargs
+        self, data: T_DuckArray | np.typing.ArrayLike, chunks: T_Chunks, **kwargs
     ) -> T_ChunkedArray:
         """
         Create a chunked array from a non-chunked numpy-like array.

diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
@@ -790,11 +790,14 @@ def construct(
             if not keep_attrs:
                 dataset[key].attrs = {}
 
+        # Need to stride coords as well. TODO: is there a better way?
+        coords = self.obj.isel(
+            {d: slice(None, None, s) for d, s in zip(self.dim, strides)}
+        ).coords
+
         attrs = self.obj.attrs if keep_attrs else {}
 
-        return Dataset(dataset, coords=self.obj.coords, attrs=attrs).isel(
-            {d: slice(None, None, s) for d, s in zip(self.dim, strides)}
-        )
+        return Dataset(dataset, coords=coords, attrs=attrs)
 
 
 class Coarsen(CoarsenArithmetic, Generic[T_Xarray]):

diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py
@@ -5,21 +5,27 @@
 
 import numpy as np
 
+from xarray.core.computation import apply_ufunc
 from xarray.core.options import _get_keep_attrs
 from xarray.core.pdcompat import count_not_none
 from xarray.core.pycompat import is_duck_dask_array
-from xarray.core.types import T_DataWithCoords
+from xarray.core.types import T_DataWithCoords, T_DuckArray
 
 
-def _get_alpha(com=None, span=None, halflife=None, alpha=None):
+def _get_alpha(
+    com: float | None = None,
+    span: float | None = None,
+    halflife: float | None = None,
+    alpha: float | None = None,
+) -> float:
     # pandas defines in terms of com (converting to alpha in the algo)
     # so use its function to get a com and then convert to alpha
 
     com = _get_center_of_mass(com, span, halflife, alpha)
     return 1 / (1 + com)
 
 
-def move_exp_nanmean(array, *, axis, alpha):
+def move_exp_nanmean(array: T_DuckArray, *, axis: int, alpha: float) -> np.ndarray:
     if is_duck_dask_array(array):
         raise TypeError("rolling_exp is not currently support for dask-like arrays")
     import numbagg
@@ -31,15 +37,20 @@ def move_exp_nanmean(array, *, axis, alpha):
         return numbagg.move_exp_nanmean(array, axis=axis, alpha=alpha)
 
 
-def move_exp_nansum(array, *, axis, alpha):
+def move_exp_nansum(array: T_DuckArray, *, axis: int, alpha: float) -> np.ndarray:
     if is_duck_dask_array(array):
         raise TypeError("rolling_exp is not currently supported for dask-like arrays")
     import numbagg
 
     return numbagg.move_exp_nansum(array, axis=axis, alpha=alpha)
 
 
-def _get_center_of_mass(comass, span, halflife, alpha):
+def _get_center_of_mass(
+    comass: float | None,
+    span: float | None,
+    halflife: float | None,
+    alpha: float | None,
+) -> float:
     """
     Vendored from pandas.core.window.common._get_center_of_mass
 
@@ -128,9 +139,18 @@ def mean(self, keep_attrs: bool | None = None) -> T_DataWithCoords:
         if keep_attrs is None:
             keep_attrs = _get_keep_attrs(default=True)
 
-        return self.obj.reduce(
-            move_exp_nanmean, dim=self.dim, alpha=self.alpha, keep_attrs=keep_attrs
-        )
+        dim_order = self.obj.dims
+
+        return apply_ufunc(
+            move_exp_nanmean,
+            self.obj,
+            input_core_dims=[[self.dim]],
+            kwargs=dict(alpha=self.alpha, axis=-1),
+            output_core_dims=[[self.dim]],
+            exclude_dims={self.dim},
+            keep_attrs=keep_attrs,
+            on_missing_core_dim="copy",
+        ).transpose(*dim_order)
 
     def sum(self, keep_attrs: bool | None = None) -> T_DataWithCoords:
         """
@@ -155,6 +175,15 @@ def sum(self, keep_attrs: bool | None = None) -> T_DataWithCoords:
         if keep_attrs is None:
             keep_attrs = _get_keep_attrs(default=True)
 
-        return self.obj.reduce(
-            move_exp_nansum, dim=self.dim, alpha=self.alpha, keep_attrs=keep_attrs
-        )
+        dim_order = self.obj.dims
+
+        return apply_ufunc(
+            move_exp_nansum,
+            self.obj,
+            input_core_dims=[[self.dim]],
+            kwargs=dict(alpha=self.alpha, axis=-1),
+            output_core_dims=[[self.dim]],
+            exclude_dims={self.dim},
+            keep_attrs=keep_attrs,
+            on_missing_core_dim="copy",
+        ).transpose(*dim_order)
diff --git a/xarray/core/types.py b/xarray/core/types.py
@@ -153,13 +153,27 @@ def copy(
 T_Array = TypeVar("T_Array", bound="AbstractArray")
 T_Index = TypeVar("T_Index", bound="Index")
 
+# `T_Xarray` is a type variable that can be either "DataArray" or "Dataset". When used
+# in a function definition, all inputs and outputs annotated with `T_Xarray` must be of
+# the same concrete type, either "DataArray" or "Dataset". This is generally preferred
+# over `T_DataArrayOrSet`, given the type system can determine the exact type.
+T_Xarray = TypeVar("T_Xarray", "DataArray", "Dataset")
+
+# `T_DataArrayOrSet` is a type variable that is bounded to either "DataArray" or
+# "Dataset". Use it for functions that might return either type, but where the exact
+# type cannot be determined statically using the type system.
 T_DataArrayOrSet = TypeVar("T_DataArrayOrSet", bound=Union["Dataset", "DataArray"])
 
-# Maybe we rename this to T_Data or something less Fortran-y?
-T_Xarray = TypeVar("T_Xarray", "DataArray", "Dataset")
+# For working directly with `DataWithCoords`. It will only allow using methods defined
+# on `DataWithCoords`.
 T_DataWithCoords = TypeVar("T_DataWithCoords", bound="DataWithCoords")
+
 T_Alignable = TypeVar("T_Alignable", bound="Alignable")
 
+# Temporary placeholder for indicating an array api compliant type.
+# hopefully in the future we can narrow this down more:
+T_DuckArray = TypeVar("T_DuckArray", bound=Any)
+
 ScalarOrArray = Union["ArrayLike", np.generic, np.ndarray, "DaskArray"]
 VarCompatible = Union["Variable", "ScalarOrArray"]
 DaCompatible = Union["DataArray", "VarCompatible"]

diff --git a/xarray/core/utils.py b/xarray/core/utils.py
@@ -73,7 +73,7 @@
 import pandas as pd
 
 if TYPE_CHECKING:
-    from xarray.core.types import Dims, ErrorOptionsWithWarn, OrderedDims
+    from xarray.core.types import Dims, ErrorOptionsWithWarn, OrderedDims, T_DuckArray
 
 K = TypeVar("K")
 V = TypeVar("V")
@@ -253,7 +253,7 @@ def is_list_like(value: Any) -> TypeGuard[list | tuple]:
     return isinstance(value, (list, tuple))
 
 
-def is_duck_array(value: Any) -> bool:
+def is_duck_array(value: Any) -> TypeGuard[T_DuckArray]:
     if isinstance(value, np.ndarray):
         return True
     return (