From 5f297a79244c93d3494f22cdd84dd0630e4bf4be Mon Sep 17 00:00:00 2001 From: p4perf4ce Date: Fri, 3 Mar 2023 04:03:26 +0700 Subject: [PATCH 1/7] Removed `.isel` for consistent rolling behavior. `.isel` causes `DatasetRolling.construct` to behavior to be inconsistent with `DataArrayRolling.construct` when `stride` > 1. --- xarray/core/rolling.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 8b9f31bfdfd..82245e0b516 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -780,9 +780,7 @@ def construct( attrs = self.obj.attrs if keep_attrs else {} - return Dataset(dataset, coords=self.obj.coords, attrs=attrs).isel( - {d: slice(None, None, s) for d, s in zip(self.dim, strides)} - ) + return Dataset(dataset, coords=self.obj.coords, attrs=attrs) class Coarsen(CoarsenArithmetic, Generic[T_Xarray]): From 4030eff39e687084a7ffaefb1691ab5f4f725d85 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Fri, 15 Sep 2023 22:31:18 +0200 Subject: [PATCH 2/7] new rolling construct strategy for coords --- xarray/core/rolling.py | 6 +++++- xarray/tests/test_rolling.py | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 68fdc209d94..4aa8eb72594 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -765,6 +765,7 @@ def construct( strides = self._mapping_to_list(stride, default=1) dataset = {} + coords = self.obj.coords.copy() for key, da in self.obj.data_vars.items(): # keeps rollings only for the dataset depending on self.dim dims = [d for d in self.dim if d in da.dims] @@ -785,9 +786,12 @@ def construct( if not keep_attrs: dataset[key].attrs = {} + # If striding need to update coords as well + coords.update(dataset[key].coords) + attrs = self.obj.attrs if keep_attrs else {} - return Dataset(dataset, coords=self.obj.coords, attrs=attrs) + return Dataset(dataset, coords=coords, attrs=attrs) class Coarsen(CoarsenArithmetic, Generic[T_Xarray]): diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index 0e3c0874a0a..607aac86fbd 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -175,7 +175,7 @@ def test_rolling_pandas_compat(self, center, window, min_periods) -> None: @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) - def test_rolling_construct(self, center, window) -> None: + def test_rolling_construct(self, center: bool, window: int) -> None: s = pd.Series(np.arange(10)) da = DataArray.from_series(s) @@ -610,7 +610,7 @@ def test_rolling_pandas_compat(self, center, window, min_periods) -> None: @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) - def test_rolling_construct(self, center, window) -> None: + def test_rolling_construct(self, center: bool, window: int) -> None: df = pd.DataFrame( { "x": np.random.randn(20), @@ -633,6 +633,7 @@ def test_rolling_construct(self, center, window) -> None: df_rolling["x"][::2].values, ds_rolling_mean["x"].values ) np.testing.assert_allclose(df_rolling.index[::2], ds_rolling_mean["index"]) + # with fill_value ds_rolling_mean = ds_rolling.construct("window", stride=2, fill_value=0.0).mean( "window" @@ -640,6 +641,15 @@ def test_rolling_construct(self, center, window) -> None: assert (ds_rolling_mean.isnull().sum() == 0).to_array(dim="vars").all() assert (ds_rolling_mean["x"] == 0.0).sum() >= 0 + # with stride and no index (https://github.com/pydata/xarray/issues/7021) + ds_noidx_rolling = ds.drop_vars("index").rolling(index=window, center=center) + ds_noidx_rolling_mean = ds_noidx_rolling.construct("window", stride=2).mean( + "window" + ) + np.testing.assert_allclose( + df_rolling["x"][::2].values, ds_noidx_rolling_mean["x"].values + ) + @pytest.mark.slow @pytest.mark.parametrize("ds", (1, 2), indirect=True) @pytest.mark.parametrize("center", (True, False)) From be96f606db7217645bf8a54f6907ef61c434bbb1 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Fri, 15 Sep 2023 22:34:17 +0200 Subject: [PATCH 3/7] add whats-new --- doc/whats-new.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f60941033f4..701c09b0329 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -67,9 +67,12 @@ Bug fixes of :py:meth:`DataArray.__setitem__` lose dimension names. (:issue:`7030`, :pull:`8067`) By `Darsh Ranjan `_. - Return ``float64`` in presence of ``NaT`` in :py:class:`~core.accessor_dt.DatetimeAccessor` and - special case ``NaT`` handling in :py:meth:`~core.accessor_dt.DatetimeAccessor.isocalendar()` + special case ``NaT`` handling in :py:meth:`~core.accessor_dt.DatetimeAccessor.isocalendar` (:issue:`7928`, :pull:`8084`). By `Kai Mühlbauer `_. +- Fix :py:meth:`~core.rolling.DatasetRolling.construct` with stride on Datasets without indexes. + (:issue:`7021`, :pull:`7578`). + By `Amrest Chinkamol `_ and `Michael Niklas `_. Documentation ~~~~~~~~~~~~~ From f2d4d822d510aa23d6b050a6d492614a907973fd Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sat, 16 Sep 2023 21:03:54 +0200 Subject: [PATCH 4/7] add new tests with different coords --- xarray/tests/test_rolling.py | 53 ++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index 607aac86fbd..58bec8b77d5 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -627,13 +627,6 @@ def test_rolling_construct(self, center: bool, window: int) -> None: np.testing.assert_allclose(df_rolling["x"].values, ds_rolling_mean["x"].values) np.testing.assert_allclose(df_rolling.index, ds_rolling_mean["index"]) - # with stride - ds_rolling_mean = ds_rolling.construct("window", stride=2).mean("window") - np.testing.assert_allclose( - df_rolling["x"][::2].values, ds_rolling_mean["x"].values - ) - np.testing.assert_allclose(df_rolling.index[::2], ds_rolling_mean["index"]) - # with fill_value ds_rolling_mean = ds_rolling.construct("window", stride=2, fill_value=0.0).mean( "window" @@ -641,14 +634,50 @@ def test_rolling_construct(self, center: bool, window: int) -> None: assert (ds_rolling_mean.isnull().sum() == 0).to_array(dim="vars").all() assert (ds_rolling_mean["x"] == 0.0).sum() >= 0 - # with stride and no index (https://github.com/pydata/xarray/issues/7021) - ds_noidx_rolling = ds.drop_vars("index").rolling(index=window, center=center) - ds_noidx_rolling_mean = ds_noidx_rolling.construct("window", stride=2).mean( - "window" + @pytest.mark.parametrize("center", (True, False)) + @pytest.mark.parametrize("window", (1, 2, 3, 4)) + def test_rolling_construct_stride(self, center: bool, window: int) -> None: + df = pd.DataFrame( + { + "x": np.random.randn(20), + "y": np.random.randn(20), + "time": np.linspace(0, 1, 20), + } ) + ds = Dataset.from_dataframe(df) + df_rolling_mean = df.rolling(window, center=center, min_periods=1).mean() + + # With an index (dimension coordinate) + ds_rolling = ds.rolling(index=window, center=center) + ds_rolling_mean = ds_rolling.construct("w", stride=2).mean("w") np.testing.assert_allclose( - df_rolling["x"][::2].values, ds_noidx_rolling_mean["x"].values + df_rolling_mean["x"][::2].values, ds_rolling_mean["x"].values + ) + np.testing.assert_allclose(df_rolling_mean.index[::2], ds_rolling_mean["index"]) + + # Without index (https://github.com/pydata/xarray/issues/7021) + ds2 = ds.drop_vars("index") + ds2_rolling = ds2.rolling(index=window, center=center) + ds2_rolling_mean = ds2_rolling.construct("w", stride=2).mean("w") + np.testing.assert_allclose( + df_rolling_mean["x"][::2].values, ds2_rolling_mean["x"].values + ) + + # Mixed coordinates, indexes and 2D coordinates + ds3 = xr.Dataset( + {"x": ("t", range(20)), "x2": ("y", range(5))}, + { + "t": range(20), + "y": ("y", range(5)), + "t2": ("t", range(20)), + "y2": ("y", range(5)), + "yt": (["t", "y"], np.ones((20, 5))), + }, ) + ds3_rolling = ds3.rolling(t=window, center=center) + ds3_rolling_mean = ds3_rolling.construct("w", stride=2).mean("w") + for coord in ds3.coords: + assert coord in ds3_rolling_mean.coords @pytest.mark.slow @pytest.mark.parametrize("ds", (1, 2), indirect=True) From c2d7ca916916eda03abd6594a27fab9671b5e9c7 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sat, 16 Sep 2023 21:15:43 +0200 Subject: [PATCH 5/7] next try on aligning strided coords --- xarray/core/rolling.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 4aa8eb72594..c6911cbe65b 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -765,7 +765,6 @@ def construct( strides = self._mapping_to_list(stride, default=1) dataset = {} - coords = self.obj.coords.copy() for key, da in self.obj.data_vars.items(): # keeps rollings only for the dataset depending on self.dim dims = [d for d in self.dim if d in da.dims] @@ -786,8 +785,10 @@ def construct( if not keep_attrs: dataset[key].attrs = {} - # If striding need to update coords as well - coords.update(dataset[key].coords) + # Need to stride coords as well. TODO: is there a better way? + coords = self.obj.isel( + {d: slice(None, None, s) for d, s in zip(self.dim, strides)} + ).coords attrs = self.obj.attrs if keep_attrs else {} From 44a59abc616ae752649d1c099f760419f7bae7e4 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Mon, 18 Sep 2023 14:41:50 +0200 Subject: [PATCH 6/7] add peakmem test for rolling.construct --- asv_bench/benchmarks/rolling.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 1d3713f19bf..70a0ae8e221 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -115,6 +115,11 @@ def peakmem_1drolling_reduce(self, func, use_bottleneck): roll = self.ds.var3.rolling(t=100) getattr(roll, func)() + @parameterized(["stride"], ([None, 5, 50])) + def peakmem_1drolling_construct(self, stride): + self.ds.var2.rolling(t=100).construct("w", stride=stride) + self.ds.var3.rolling(t=100).construct("w", stride=stride) + class DatasetRollingMemory(RollingMemory): @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) @@ -128,3 +133,7 @@ def peakmem_1drolling_reduce(self, func, use_bottleneck): with xr.set_options(use_bottleneck=use_bottleneck): roll = self.ds.rolling(t=100) getattr(roll, func)() + + @parameterized(["stride"], ([None, 5, 50])) + def peakmem_1drolling_construct(self, stride): + self.ds.rolling(t=100).construct("w", stride=stride) From 6744b9cca673f4a4791f444c7567452db151f7da Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Tue, 19 Sep 2023 14:30:49 +0200 Subject: [PATCH 7/7] increase asv benchmark rolling sizes --- asv_bench/benchmarks/rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 70a0ae8e221..579f4f00fbc 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -5,10 +5,10 @@ from . import parameterized, randn, requires_dask -nx = 300 +nx = 3000 long_nx = 30000 ny = 200 -nt = 100 +nt = 1000 window = 20 randn_xy = randn((nx, ny), frac_nan=0.1)