From 2962535c78a0c1ba5f5a46632883f819fdf51bf5 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Mon, 16 Jan 2023 13:23:49 -0500 Subject: [PATCH 01/13] [test-upstream] Preserve base and loffset arguments in resample While pandas is getting set to remove these, we have not had a chance to emit a deprecation warning yet for them in xarray. This should hopefully give users some extra time to adapt. --- xarray/core/common.py | 85 ++++++++++++++++++++++------------ xarray/core/groupby.py | 59 +++++++++++++++++------ xarray/core/resample_cftime.py | 8 ---- xarray/tests/test_groupby.py | 2 + 4 files changed, 103 insertions(+), 51 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 783847cd60d..db4fbcb59a4 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -20,10 +20,16 @@ import numpy as np import pandas as pd +from xarray.coding import cftime_offsets from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.pycompat import is_duck_dask_array -from xarray.core.utils import Frozen, either_dict_or_kwargs, is_scalar +from xarray.core.utils import ( + Frozen, + either_dict_or_kwargs, + emit_user_level_warning, + is_scalar, +) try: import cftime @@ -938,8 +944,8 @@ def _resample( """ # TODO support non-string indexer after removing the old API. - from xarray.coding.cftimeindex import CFTimeIndex from xarray.core.dataarray import DataArray + from xarray.core.groupby import TimeResampleGrouper from xarray.core.resample import RESAMPLE_DIM if keep_attrs is not None: @@ -969,36 +975,36 @@ def _resample( dim_name: Hashable = dim dim_coord = self[dim] - # TODO: remove once pandas=1.1 is the minimum required version - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - r"'(base|loffset)' in .resample\(\) and in Grouper\(\) is deprecated.", - category=FutureWarning, + if loffset is not None: + emit_user_level_warning( + "Following pandas, the `loffset` argument to resample will be deprecated in a " + "future version of xarray. Switch to using time offset arithmetic.", + FutureWarning, ) - if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex): - from xarray.core.resample_cftime import CFTimeGrouper - - grouper = CFTimeGrouper( - freq=freq, - closed=closed, - label=label, - base=base, - loffset=loffset, - origin=origin, - offset=offset, - ) - else: - grouper = pd.Grouper( - freq=freq, - closed=closed, - label=label, - base=base, - offset=offset, - origin=origin, - loffset=loffset, - ) + if base is None: + emit_user_level_warning( + "Following pandas, the `base` argument to resample will be deprecated in a " + "future version of xarray. Switch to using `origin` or `offset` instead.", + FutureWarning, + ) + + if base is not None and offset is not None: + raise ValueError("base and offset cannot be present at the same time") + + if base is not None: + index = self._indexes[dim_name].to_pandas_index() + offset = _convert_base_to_offset(base, freq, index) + + grouper = TimeResampleGrouper( + freq=freq, + closed=closed, + label=label, + origin=origin, + offset=offset, + loffset=loffset, + ) + group = DataArray( dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM ) @@ -1818,3 +1824,22 @@ def _contains_datetime_like_objects(var) -> bool: np.datetime64, np.timedelta64, or cftime.datetime) """ return is_np_datetime_like(var.dtype) or contains_cftime_datetimes(var) + + +def _convert_base_to_offset(base, freq, index): + """Required until we officially deprecate the base argument to resample. This + translates a provided `base` argument to an `offset` argument, following logic + from pandas. + """ + from xarray.coding.cftimeindex import CFTimeIndex + + if isinstance(index, pd.DatetimeIndex): + freq = pd.tseries.frequencies.to_offset(freq) + if isinstance(freq, pd.offsets.Tick): + return pd.Timedelta(base * freq.nanos // freq.n) + elif isinstance(index, CFTimeIndex): + freq = cftime_offsets.to_offset(freq) + if isinstance(freq, cftime_offsets.Tick): + return base * freq.as_timedelta() // freq.n + else: + raise ValueError("Can only resample using a DatetimeIndex or CFTimeIndex.") diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index a6516611efc..d9b4b4e5954 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -258,7 +258,7 @@ def _unique_and_monotonic(group: T_Group) -> bool: return index.is_unique and index.is_monotonic_increasing -def _apply_loffset(grouper, result): +def _apply_loffset(loffset, result): """ (copied from pandas) if loffset is set, offset the result index @@ -271,17 +271,19 @@ def _apply_loffset(grouper, result): result : Series or DataFrame the result of resample """ + # TODO: include some validation of the type of the loffset + # argument. + if isinstance(loffset, str): + loffset = pd.tseries.frequencies.to_offset(loffset) needs_offset = ( - isinstance(grouper.loffset, (pd.DateOffset, datetime.timedelta)) + isinstance(loffset, (pd.DateOffset, datetime.timedelta)) and isinstance(result.index, pd.DatetimeIndex) and len(result.index) > 0 ) if needs_offset: - result.index = result.index + grouper.loffset - - grouper.loffset = None + result.index = result.index + loffset class GroupBy(Generic[T_Xarray]): @@ -543,14 +545,7 @@ def __repr__(self) -> str: ) def _get_index_and_items(self, index, grouper): - from xarray.core.resample_cftime import CFTimeGrouper - - s = pd.Series(np.arange(index.size), index) - if isinstance(grouper, CFTimeGrouper): - first_items = grouper.first_items(index) - else: - first_items = s.groupby(grouper).first() - _apply_loffset(grouper, first_items) + first_items = grouper.first_items(index) full_index = first_items.index if first_items.isnull().any(): first_items = first_items.dropna() @@ -1379,3 +1374,41 @@ class DatasetGroupBy( # type: ignore[misc] ImplementsDatasetReduce, ): __slots__ = () + + +class TimeResampleGrouper: + def __init__(self, freq, closed, label, origin, offset, loffset): + self.freq = freq + self.closed = closed + self.label = label + self.origin = origin + self.offset = offset + self.loffset = loffset + + def first_items(self, index): + from xarray import CFTimeIndex + from xarray.core.resample_cftime import CFTimeGrouper + + if isinstance(index, CFTimeIndex): + grouper = CFTimeGrouper( + freq=self.freq, + closed=self.closed, + label=self.label, + origin=self.origin, + offset=self.offset, + loffset=self.loffset, + ) + return grouper.first_items(index) + else: + s = pd.Series(np.arange(index.size), index) + grouper = pd.Grouper( + freq=self.freq, + closed=self.closed, + label=self.label, + origin=self.origin, + offset=self.offset, + ) + + first_items = s.groupby(grouper).first() + _apply_loffset(self.loffset, first_items) + return first_items diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 7fdd372ec74..6f8727c9aef 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -71,7 +71,6 @@ def __init__( freq: str | BaseCFTimeOffset, closed: SideOptions | None = None, label: SideOptions | None = None, - base: int | None = None, loffset: str | datetime.timedelta | BaseCFTimeOffset | None = None, origin: str | CFTimeDatetime = "start_day", offset: str | datetime.timedelta | None = None, @@ -79,10 +78,6 @@ def __init__( self.offset: datetime.timedelta | None self.closed: SideOptions self.label: SideOptions - - if base is not None and offset is not None: - raise ValueError("base and offset cannot be provided at the same time") - self.freq = to_offset(freq) self.loffset = loffset self.origin = origin @@ -122,9 +117,6 @@ def __init__( else: self.label = label - if base is not None and isinstance(self.freq, Tick): - offset = type(self.freq)(n=base % self.freq.n).as_timedelta() - if offset is not None: try: self.offset = _convert_offset_to_timedelta(offset) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 3b214f43d69..96454d16a2b 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -16,6 +16,7 @@ assert_equal, assert_identical, create_test_data, + has_pandas_version_two, requires_dask, requires_flox, requires_scipy, @@ -1809,6 +1810,7 @@ def test_upsample_interpolate_dask(self, chunked_time): # done here due to floating point arithmetic assert_allclose(expected, actual, rtol=1e-16) + @pytest.mark.skipif(has_pandas_version_two, reason="requires pandas < 2.0.0") def test_resample_base(self) -> None: times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) array = DataArray(np.arange(10), [("time", times)]) From 84b379e7c4f278787b23c36053749a9476dccfd5 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 4 Mar 2023 17:58:54 -0500 Subject: [PATCH 02/13] Emit warning when base is not None Co-authored-by: Deepak Cherian --- xarray/core/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index b64d8ba596c..3cc184fa8de 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -972,7 +972,7 @@ def _resample( FutureWarning, ) - if base is None: + if base is not None: emit_user_level_warning( "Following pandas, the `base` argument to resample will be deprecated in a " "future version of xarray. Switch to using `origin` or `offset` instead.", From 47e743c64d4a6bd47c924f97cbcef2393728b68d Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 5 Mar 2023 07:13:49 -0500 Subject: [PATCH 03/13] Modify warnings to refer loffset and base as parameters; add tests --- xarray/core/common.py | 8 ++++---- xarray/tests/test_groupby.py | 7 +++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index db4fbcb59a4..57406de873d 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -977,15 +977,15 @@ def _resample( if loffset is not None: emit_user_level_warning( - "Following pandas, the `loffset` argument to resample will be deprecated in a " - "future version of xarray. Switch to using time offset arithmetic.", + "Following pandas, the `loffset` parameter to resample will be deprecated " + "in a future version of xarray. Switch to using time offset arithmetic.", FutureWarning, ) if base is None: emit_user_level_warning( - "Following pandas, the `base` argument to resample will be deprecated in a " - "future version of xarray. Switch to using `origin` or `offset` instead.", + "Following pandas, the `base` parameter to resample will be deprecated in " + "a future version of xarray. Switch to using `origin` or `offset` instead.", FutureWarning, ) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 96454d16a2b..7a2258d7ecf 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1485,7 +1485,8 @@ def test_resample(self): # Our use of `loffset` may change if we align our API with pandas' changes. # ref https://github.com/pydata/xarray/pull/4537 - actual = array.resample(time="24H", loffset="-12H").mean() + with pytest.warns(FutureWarning, match="`loffset` parameter"): + actual = array.resample(time="24H", loffset="-12H").mean() expected_ = array.to_series().resample("24H").mean() expected_.index += to_offset("-12H") expected = DataArray.from_series(expected_) @@ -1816,7 +1817,9 @@ def test_resample_base(self) -> None: array = DataArray(np.arange(10), [("time", times)]) base = 11 - actual = array.resample(time="24H", base=base).mean() + + with pytest.warns(FutureWarning, match="the `base` parameter to resample"): + actual = array.resample(time="24H", base=base).mean() expected = DataArray(array.to_series().resample("24H", base=base).mean()) assert_identical(expected, actual) From 09253e59c05d3659e64fffff93fcc2bcf21e59ab Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 5 Mar 2023 08:09:24 -0500 Subject: [PATCH 04/13] Add type validation for loffset arguments --- xarray/core/groupby.py | 8 +++- xarray/core/resample_cftime.py | 10 +++++ xarray/tests/test_cftimeindex_resample.py | 47 +++++++++++++++++------ xarray/tests/test_groupby.py | 31 ++++++++++----- 4 files changed, 74 insertions(+), 22 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 855939a625f..640014308ff 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -258,8 +258,12 @@ def _apply_loffset(loffset, result): result : Series or DataFrame the result of resample """ - # TODO: include some validation of the type of the loffset - # argument. + if not isinstance(loffset, (str, pd.DateOffset, datetime.timedelta)): + raise ValueError( + f"`loffset` must be a str, pd.DateOffset, or datetime.timedelta object. " + f"Got {loffset}." + ) + if isinstance(loffset, str): loffset = pd.tseries.frequencies.to_offset(loffset) diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 6f8727c9aef..920a6873814 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -142,6 +142,16 @@ def first_items(self, index: CFTimeIndex): index, self.freq, self.closed, self.label, self.origin, self.offset ) if self.loffset is not None: + if not isinstance( + self.loffset, (str, datetime.timedelta, BaseCFTimeOffset) + ): + # BaseCFTimeOffset is not public API so we do not include it in + # the error message for now. + raise ValueError( + f"`loffset` must be a str or datetime.timedelta object. " + f"Got {self.loffset}." + ) + if isinstance(self.loffset, datetime.timedelta): labels = labels + self.loffset else: diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 5f818b7663d..a31c950d433 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -130,17 +130,18 @@ def test_resample(freqs, closed, label, base, offset) -> None: da_datetimeindex = da(datetime_index) da_cftimeindex = da(cftime_index) - compare_against_pandas( - da_datetimeindex, - da_cftimeindex, - resample_freq, - closed=closed, - label=label, - base=base, - offset=offset, - origin=origin, - loffset=loffset, - ) + with pytest.warns(FutureWarning, match="`loffset` parameter"): + compare_against_pandas( + da_datetimeindex, + da_cftimeindex, + resample_freq, + closed=closed, + label=label, + base=base, + offset=offset, + origin=origin, + loffset=loffset, + ) @pytest.mark.parametrize( @@ -245,3 +246,27 @@ def test_timedelta_offset() -> None: timedelta_result = da_cftime.resample(time="2D", offset=timedelta).mean() string_result = da_cftime.resample(time="2D", offset=string).mean() xr.testing.assert_identical(timedelta_result, string_result) + + +@pytest.mark.parametrize("loffset", ["12H", datetime.timedelta(hours=-12)]) +def test_resample_loffset_cftimeindex(loffset): + datetimeindex = pd.date_range("2000-01-01", freq="6H", periods=10) + da_datetimeindex = xr.DataArray(np.arange(10), [("time", datetimeindex)]) + + cftimeindex = xr.cftime_range("2000-01-01", freq="6H", periods=10) + da_cftimeindex = xr.DataArray(np.arange(10), [("time", cftimeindex)]) + + with pytest.warns(FutureWarning, match="`loffset` parameter"): + result = da_cftimeindex.resample(time="24H", loffset=loffset).mean() + expected = da_datetimeindex.resample(time="24H", loffset=loffset).mean() + + result["time"] = result.xindexes["time"].to_pandas_index().to_datetimeindex() + xr.testing.assert_identical(result, expected) + + +def test_resample_invalid_loffset_cftimeindex(): + times = xr.cftime_range("2000-01-01", freq="6H", periods=10) + da = xr.DataArray(np.arange(10), [("time", times)]) + + with pytest.raises(ValueError): + da.resample(time="24H", loffset=1) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 7f51769d8f0..7a1d10f36f4 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1,5 +1,6 @@ from __future__ import annotations +import datetime import warnings import numpy as np @@ -1476,15 +1477,6 @@ def test_resample(self): actual = array.resample(time="24H").reduce(np.mean) assert_identical(expected, actual) - # Our use of `loffset` may change if we align our API with pandas' changes. - # ref https://github.com/pydata/xarray/pull/4537 - with pytest.warns(FutureWarning, match="`loffset` parameter"): - actual = array.resample(time="24H", loffset="-12H").mean() - expected_ = array.to_series().resample("24H").mean() - expected_.index += to_offset("-12H") - expected = DataArray.from_series(expected_) - assert_identical(actual, expected) - with pytest.raises(ValueError, match=r"index must be monotonic"): array[[2, 0, 1]].resample(time="1D") @@ -1834,6 +1826,27 @@ def test_resample_origin(self) -> None: expected = DataArray(array.to_series().resample("24H", origin=origin).mean()) assert_identical(expected, actual) + @pytest.mark.skipif(has_pandas_version_two, reason="requires pandas < 2.0.0") + @pytest.mark.parametrize( + "loffset", ["-12H", datetime.timedelta(hours=-12), pd.DateOffset(hours=-12)] + ) + def test_resample_loffset(self, loffset) -> None: + times = pd.date_range("2000-01-01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + with pytest.warns(FutureWarning, match="`loffset` parameter"): + actual = array.resample(time="24H", loffset=loffset).mean() + expected = DataArray(array.to_series().resample("24H", loffset=loffset).mean()) + assert_identical(actual, expected) + + @pytest.mark.skipif(has_pandas_version_two, reason="requires pandas < 2.0.0") + def test_resample_invalid_loffset(self): + times = pd.date_range("2000-01-01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + with pytest.raises(ValueError, match="`loffset` must be"): + array.resample(time="24H", loffset=1).mean() + class TestDatasetResample: def test_resample_and_first(self): From 3431b664bb387c39af1b655e8d2f22ecd19a974d Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 5 Mar 2023 08:31:37 -0500 Subject: [PATCH 05/13] Add typing and support for pd.Timedelta as an loffset --- xarray/core/groupby.py | 20 ++++++++++++++++---- xarray/tests/test_groupby.py | 8 +++++++- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 640014308ff..6c4902219ae 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -40,6 +40,7 @@ from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset + from xarray.core.types import DatetimeLike, SideOptions from xarray.core.utils import Frozen GroupKey = Any @@ -245,7 +246,10 @@ def _unique_and_monotonic(group: T_Group) -> bool: return index.is_unique and index.is_monotonic_increasing -def _apply_loffset(loffset, result): +def _apply_loffset( + loffset: str | pd.DateOffset | datetime.timedelta | pd.Timedelta, + result: pd.Series | pd.DataFrame, +): """ (copied from pandas) if loffset is set, offset the result index @@ -258,9 +262,9 @@ def _apply_loffset(loffset, result): result : Series or DataFrame the result of resample """ - if not isinstance(loffset, (str, pd.DateOffset, datetime.timedelta)): + if not isinstance(loffset, (str, pd.DateOffset, datetime.timedelta, pd.Timedelta)): raise ValueError( - f"`loffset` must be a str, pd.DateOffset, or datetime.timedelta object. " + f"`loffset` must be a str, pd.DateOffset, datetime.timedelta, or pandas.Timedelta object. " f"Got {loffset}." ) @@ -1367,7 +1371,15 @@ class DatasetGroupBy( # type: ignore[misc] class TimeResampleGrouper: - def __init__(self, freq, closed, label, origin, offset, loffset): + def __init__( + self, + freq: str, + closed: SideOptions | None, + label: SideOptions | None, + origin: str | DatetimeLike, + offset: pd.Timedelta | datetime.timedelta | str | None, + loffset: datetime.timedelta | str | None, + ): self.freq = freq self.closed = closed self.label = label diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 7a1d10f36f4..af9c874665d 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1828,7 +1828,13 @@ def test_resample_origin(self) -> None: @pytest.mark.skipif(has_pandas_version_two, reason="requires pandas < 2.0.0") @pytest.mark.parametrize( - "loffset", ["-12H", datetime.timedelta(hours=-12), pd.DateOffset(hours=-12)] + "loffset", + [ + "-12H", + datetime.timedelta(hours=-12), + pd.Timedelta(hours=-12), + pd.DateOffset(hours=-12), + ], ) def test_resample_loffset(self, loffset) -> None: times = pd.date_range("2000-01-01", freq="6H", periods=10) From c06b6e70bff929e468c869f6566dfb46a70cf553 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 5 Mar 2023 08:40:11 -0500 Subject: [PATCH 06/13] pd.Timedelta is a subclass of datetime.timedelta --- xarray/core/groupby.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 6c4902219ae..c6d750c8733 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -262,7 +262,9 @@ def _apply_loffset( result : Series or DataFrame the result of resample """ - if not isinstance(loffset, (str, pd.DateOffset, datetime.timedelta, pd.Timedelta)): + # pd.Timedelta is a subclass of datetime.timedelta so we do not need to + # include it in instance checks. + if not isinstance(loffset, (str, pd.DateOffset, datetime.timedelta)): raise ValueError( f"`loffset` must be a str, pd.DateOffset, datetime.timedelta, or pandas.Timedelta object. " f"Got {loffset}." From 61522250cc22adf20fd59874e0c539d16310f5c8 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 5 Mar 2023 08:45:44 -0500 Subject: [PATCH 07/13] [test-upstream] Remove unneeded skipif --- xarray/tests/test_groupby.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index af9c874665d..8588ee1156a 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1845,7 +1845,6 @@ def test_resample_loffset(self, loffset) -> None: expected = DataArray(array.to_series().resample("24H", loffset=loffset).mean()) assert_identical(actual, expected) - @pytest.mark.skipif(has_pandas_version_two, reason="requires pandas < 2.0.0") def test_resample_invalid_loffset(self): times = pd.date_range("2000-01-01", freq="6H", periods=10) array = DataArray(np.arange(10), [("time", times)]) From 1bae848b2e26b0cc7b0c538ef5a9c52a07555d10 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 5 Mar 2023 08:58:30 -0500 Subject: [PATCH 08/13] Fix failing tests --- xarray/core/groupby.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index c6d750c8733..15694b41219 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1414,5 +1414,6 @@ def first_items(self, index): ) first_items = s.groupby(grouper).first() - _apply_loffset(self.loffset, first_items) + if self.loffset is not None: + _apply_loffset(self.loffset, first_items) return first_items From 81e9c21a9bfe9bd4b747c5528d863b69d47d71d4 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 5 Mar 2023 09:04:13 -0500 Subject: [PATCH 09/13] [test-upstream] Add return type to tests --- xarray/tests/test_cftimeindex_resample.py | 4 ++-- xarray/tests/test_groupby.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index a31c950d433..0cc20a8f4e7 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -249,7 +249,7 @@ def test_timedelta_offset() -> None: @pytest.mark.parametrize("loffset", ["12H", datetime.timedelta(hours=-12)]) -def test_resample_loffset_cftimeindex(loffset): +def test_resample_loffset_cftimeindex(loffset) -> None: datetimeindex = pd.date_range("2000-01-01", freq="6H", periods=10) da_datetimeindex = xr.DataArray(np.arange(10), [("time", datetimeindex)]) @@ -264,7 +264,7 @@ def test_resample_loffset_cftimeindex(loffset): xr.testing.assert_identical(result, expected) -def test_resample_invalid_loffset_cftimeindex(): +def test_resample_invalid_loffset_cftimeindex() -> None: times = xr.cftime_range("2000-01-01", freq="6H", periods=10) da = xr.DataArray(np.arange(10), [("time", times)]) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 8588ee1156a..b3a0c70d707 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1845,7 +1845,7 @@ def test_resample_loffset(self, loffset) -> None: expected = DataArray(array.to_series().resample("24H", loffset=loffset).mean()) assert_identical(actual, expected) - def test_resample_invalid_loffset(self): + def test_resample_invalid_loffset(self) -> None: times = pd.date_range("2000-01-01", freq="6H", periods=10) array = DataArray(np.arange(10), [("time", times)]) From fa5bf61581d5fc5d95121d8560ca9da0d82df10c Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 5 Mar 2023 09:15:41 -0500 Subject: [PATCH 10/13] [test-upstream] Update documentation --- doc/whats-new.rst | 7 +++++++ xarray/core/common.py | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b7e632bdfb7..13fe144f376 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,13 @@ Breaking changes Deprecations ~~~~~~~~~~~~ +- Following pandas, the ``base`` and ``loffset`` parameters of + :py:meth:`xr.DataArray.resample` and :py:meth:`xr.Dataset.resample` have been + deprecated and will be removed in a future version of xarray. Using the + ``origin`` or ``offset`` parameters is recommended as a replacement for using + the ``base`` parameter and using time offset arithmetic is recommended as a + replacement for using the ``loffset`` parameter (:pull:`8459`). By `Spencer + Clark `_. Bug fixes diff --git a/xarray/core/common.py b/xarray/core/common.py index 0574d0b1d44..f6160480345 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -849,6 +849,12 @@ def _resample( For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for "24H" frequency, base could range from 0 through 23. + + .. deprecated:: 2023.03.0 + Following pandas, the ``base`` parameter is deprecated in favor + of the ``origin`` and ``offset`` parameters, and will be removed + in a future version of xarray. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day' The datetime on which to adjust the grouping. The timezone of origin must match the timezone of the index. @@ -864,6 +870,12 @@ def _resample( loffset : timedelta or str, optional Offset used to adjust the resampled time labels. Some pandas date offset strings are supported. + + .. deprecated:: 2023.03.0 + Following pandas, the ``loffset`` parameter is deprecated in favor + of using time offset arithmetic, and will be removed in a future + version of xarray. + restore_coord_dims : bool, optional If True, also restore the dimension order of multi-dimensional coordinates. From 31a423520b03d4f754835f2ebab8275226f038c1 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 5 Mar 2023 09:40:42 -0500 Subject: [PATCH 11/13] [test-upstream] Fix mypy errors in tests --- xarray/tests/test_cftimeindex_resample.py | 2 +- xarray/tests/test_groupby.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 0cc20a8f4e7..0f586d2d3c5 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -269,4 +269,4 @@ def test_resample_invalid_loffset_cftimeindex() -> None: da = xr.DataArray(np.arange(10), [("time", times)]) with pytest.raises(ValueError): - da.resample(time="24H", loffset=1) + da.resample(time="24H", loffset=1) # type: ignore diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index b3a0c70d707..a7d98405017 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1850,7 +1850,7 @@ def test_resample_invalid_loffset(self) -> None: array = DataArray(np.arange(10), [("time", times)]) with pytest.raises(ValueError, match="`loffset` must be"): - array.resample(time="24H", loffset=1).mean() + array.resample(time="24H", loffset=1).mean() # type: ignore class TestDatasetResample: From e0d16734658806ccedf2c57afd5ed0c966192c50 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Tue, 7 Mar 2023 21:26:10 -0500 Subject: [PATCH 12/13] Move _convert_base_to_offset to pdcompat and add a few more tests --- xarray/core/common.py | 21 +-------------------- xarray/core/pdcompat.py | 23 +++++++++++++++++++++++ xarray/tests/test_cftimeindex_resample.py | 17 +++++++++++++++++ 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index f6160480345..cd90c741ac6 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -10,9 +10,9 @@ import numpy as np import pandas as pd -from xarray.coding import cftime_offsets from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops from xarray.core.options import OPTIONS, _get_keep_attrs +from xarray.core.pdcompat import _convert_base_to_offset from xarray.core.pycompat import is_duck_dask_array from xarray.core.utils import ( Frozen, @@ -1825,22 +1825,3 @@ def _contains_datetime_like_objects(var) -> bool: np.datetime64, np.timedelta64, or cftime.datetime) """ return is_np_datetime_like(var.dtype) or contains_cftime_datetimes(var) - - -def _convert_base_to_offset(base, freq, index): - """Required until we officially deprecate the base argument to resample. This - translates a provided `base` argument to an `offset` argument, following logic - from pandas. - """ - from xarray.coding.cftimeindex import CFTimeIndex - - if isinstance(index, pd.DatetimeIndex): - freq = pd.tseries.frequencies.to_offset(freq) - if isinstance(freq, pd.offsets.Tick): - return pd.Timedelta(base * freq.nanos // freq.n) - elif isinstance(index, CFTimeIndex): - freq = cftime_offsets.to_offset(freq) - if isinstance(freq, cftime_offsets.Tick): - return base * freq.as_timedelta() // freq.n - else: - raise ValueError("Can only resample using a DatetimeIndex or CFTimeIndex.") diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py index 018bb19b871..b20a96bb8d6 100644 --- a/xarray/core/pdcompat.py +++ b/xarray/core/pdcompat.py @@ -38,6 +38,10 @@ from enum import Enum from typing import Literal +import pandas as pd + +from xarray.coding import cftime_offsets + def count_not_none(*args) -> int: """Compute the number of non-None arguments. @@ -68,3 +72,22 @@ def __repr__(self) -> str: _NoDefault.no_default ) # Sentinel indicating the default value following pandas NoDefault = Literal[_NoDefault.no_default] # For typing following pandas + + +def _convert_base_to_offset(base, freq, index): + """Required until we officially deprecate the base argument to resample. This + translates a provided `base` argument to an `offset` argument, following logic + from pandas. + """ + from xarray.coding.cftimeindex import CFTimeIndex + + if isinstance(index, pd.DatetimeIndex): + freq = pd.tseries.frequencies.to_offset(freq) + if isinstance(freq, pd.offsets.Tick): + return pd.Timedelta(base * freq.nanos // freq.n) + elif isinstance(index, CFTimeIndex): + freq = cftime_offsets.to_offset(freq) + if isinstance(freq, cftime_offsets.Tick): + return base * freq.as_timedelta() // freq.n + else: + raise ValueError("Can only resample using a DatetimeIndex or CFTimeIndex.") diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 0f586d2d3c5..07bc14f8983 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -8,6 +8,7 @@ import pytest import xarray as xr +from xarray.core.pdcompat import _convert_base_to_offset from xarray.core.resample_cftime import CFTimeGrouper cftime = pytest.importorskip("cftime") @@ -270,3 +271,19 @@ def test_resample_invalid_loffset_cftimeindex() -> None: with pytest.raises(ValueError): da.resample(time="24H", loffset=1) # type: ignore + + +@pytest.mark.parametrize(("base", "freq"), [(1, "10S"), (17, "3H"), (15, "5U")]) +def test__convert_base_to_offset(base, freq): + # Verify that the cftime_offset adapted version of _convert_base_to_offset + # produces the same result as the pandas version. + datetimeindex = pd.date_range("2000", periods=2) + cftimeindex = xr.cftime_range("2000", periods=2) + pandas_result = _convert_base_to_offset(base, freq, datetimeindex) + cftime_result = _convert_base_to_offset(base, freq, cftimeindex) + assert pandas_result.to_pytimedelta() == cftime_result + + +def test__convert_base_to_offset_invalid_index(): + with pytest.raises(ValueError, match="Can only resample"): + _convert_base_to_offset(1, "12H", pd.Index([0])) From f782578ac5e8c5dfb597c7db2e96bcf9b33cd7b0 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Wed, 8 Mar 2023 09:09:03 -0500 Subject: [PATCH 13/13] Use offset instead of base in documentation --- doc/user-guide/weather-climate.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index 3c957978acf..793da9d1bdd 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -233,7 +233,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.resample(time="81T", closed="right", label="right", base=3).mean() + da.resample(time="81T", closed="right", label="right", offset="3T").mean() .. _Timestamp-valid range: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations .. _ISO 8601 standard: https://en.wikipedia.org/wiki/ISO_8601