Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve base and loffset arguments in resample #7444

Merged
merged 18 commits into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/user-guide/weather-climate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports:

.. ipython:: python

da.resample(time="81T", closed="right", label="right", base=3).mean()
da.resample(time="81T", closed="right", label="right", offset="3T").mean()

.. _Timestamp-valid range: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations
.. _ISO 8601 standard: https://en.wikipedia.org/wiki/ISO_8601
Expand Down
7 changes: 7 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ Breaking changes

Deprecations
~~~~~~~~~~~~
- Following pandas, the ``base`` and ``loffset`` parameters of
:py:meth:`xr.DataArray.resample` and :py:meth:`xr.Dataset.resample` have been
deprecated and will be removed in a future version of xarray. Using the
``origin`` or ``offset`` parameters is recommended as a replacement for using
the ``base`` parameter and using time offset arithmetic is recommended as a
replacement for using the ``loffset`` parameter (:pull:`8459`). By `Spencer
Copy link
Contributor

@dcherian dcherian Mar 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to suggest adding an example of time offset arithmetic here. But then thought we could just link to docs. But turns out we have no docs on this AFAICT! I'll open an issue (see #7596)

Clark <https://github.com/spencerkclark>`_.


Bug fixes
Expand Down
70 changes: 48 additions & 22 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,14 @@
from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops
from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed
from xarray.core.options import OPTIONS, _get_keep_attrs
from xarray.core.pdcompat import _convert_base_to_offset
from xarray.core.pycompat import is_duck_dask_array
from xarray.core.utils import Frozen, either_dict_or_kwargs, is_scalar
from xarray.core.utils import (
Frozen,
either_dict_or_kwargs,
emit_user_level_warning,
is_scalar,
)

try:
import cftime
Expand Down Expand Up @@ -845,6 +851,12 @@ def _resample(
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.

.. deprecated:: 2023.03.0
Following pandas, the ``base`` parameter is deprecated in favor
of the ``origin`` and ``offset`` parameters, and will be removed
in a future version of xarray.

origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
Expand All @@ -860,6 +872,12 @@ def _resample(
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.

.. deprecated:: 2023.03.0
Following pandas, the ``loffset`` parameter is deprecated in favor
of using time offset arithmetic, and will be removed in a future
version of xarray.

restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
Expand Down Expand Up @@ -930,8 +948,8 @@ def _resample(
"""
# TODO support non-string indexer after removing the old API.

from xarray.coding.cftimeindex import CFTimeIndex
from xarray.core.dataarray import DataArray
from xarray.core.groupby import TimeResampleGrouper
from xarray.core.resample import RESAMPLE_DIM

if keep_attrs is not None:
Expand Down Expand Up @@ -961,28 +979,36 @@ def _resample(
dim_name: Hashable = dim
dim_coord = self[dim]

if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex):
from xarray.core.resample_cftime import CFTimeGrouper

grouper = CFTimeGrouper(
freq=freq,
closed=closed,
label=label,
base=base,
loffset=loffset,
origin=origin,
offset=offset,
if loffset is not None:
emit_user_level_warning(
"Following pandas, the `loffset` parameter to resample will be deprecated "
"in a future version of xarray. Switch to using time offset arithmetic.",
FutureWarning,
)
else:
grouper = pd.Grouper(
freq=freq,
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
loffset=loffset,

if base is not None:
emit_user_level_warning(
"Following pandas, the `base` parameter to resample will be deprecated in "
"a future version of xarray. Switch to using `origin` or `offset` instead.",
FutureWarning,
)

if base is not None and offset is not None:
raise ValueError("base and offset cannot be present at the same time")

if base is not None:
spencerkclark marked this conversation as resolved.
Show resolved Hide resolved
index = self._indexes[dim_name].to_pandas_index()
offset = _convert_base_to_offset(base, freq, index)

grouper = TimeResampleGrouper(
freq=freq,
closed=closed,
label=label,
origin=origin,
offset=offset,
loffset=loffset,
)

group = DataArray(
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
)
Expand Down
78 changes: 65 additions & 13 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

from xarray.core.dataarray import DataArray
from xarray.core.dataset import Dataset
from xarray.core.types import DatetimeLike, SideOptions
from xarray.core.utils import Frozen

GroupKey = Any
Expand Down Expand Up @@ -245,7 +246,10 @@ def _unique_and_monotonic(group: T_Group) -> bool:
return index.is_unique and index.is_monotonic_increasing


def _apply_loffset(grouper, result):
def _apply_loffset(
loffset: str | pd.DateOffset | datetime.timedelta | pd.Timedelta,
result: pd.Series | pd.DataFrame,
):
"""
(copied from pandas)
if loffset is set, offset the result index
Expand All @@ -258,17 +262,25 @@ def _apply_loffset(grouper, result):
result : Series or DataFrame
the result of resample
"""
# pd.Timedelta is a subclass of datetime.timedelta so we do not need to
# include it in instance checks.
if not isinstance(loffset, (str, pd.DateOffset, datetime.timedelta)):
raise ValueError(
f"`loffset` must be a str, pd.DateOffset, datetime.timedelta, or pandas.Timedelta object. "
f"Got {loffset}."
)

if isinstance(loffset, str):
loffset = pd.tseries.frequencies.to_offset(loffset)

needs_offset = (
isinstance(grouper.loffset, (pd.DateOffset, datetime.timedelta))
isinstance(loffset, (pd.DateOffset, datetime.timedelta))
and isinstance(result.index, pd.DatetimeIndex)
and len(result.index) > 0
)

if needs_offset:
result.index = result.index + grouper.loffset

grouper.loffset = None
result.index = result.index + loffset


class GroupBy(Generic[T_Xarray]):
Expand Down Expand Up @@ -530,14 +542,7 @@ def __repr__(self) -> str:
)

def _get_index_and_items(self, index, grouper):
from xarray.core.resample_cftime import CFTimeGrouper

s = pd.Series(np.arange(index.size), index)
if isinstance(grouper, CFTimeGrouper):
first_items = grouper.first_items(index)
else:
first_items = s.groupby(grouper).first()
_apply_loffset(grouper, first_items)
first_items = grouper.first_items(index)
full_index = first_items.index
if first_items.isnull().any():
first_items = first_items.dropna()
Expand Down Expand Up @@ -1365,3 +1370,50 @@ class DatasetGroupBy( # type: ignore[misc]
ImplementsDatasetReduce,
):
__slots__ = ()


class TimeResampleGrouper:
def __init__(
self,
freq: str,
closed: SideOptions | None,
label: SideOptions | None,
origin: str | DatetimeLike,
offset: pd.Timedelta | datetime.timedelta | str | None,
loffset: datetime.timedelta | str | None,
):
self.freq = freq
self.closed = closed
self.label = label
self.origin = origin
self.offset = offset
self.loffset = loffset

def first_items(self, index):
from xarray import CFTimeIndex
from xarray.core.resample_cftime import CFTimeGrouper

if isinstance(index, CFTimeIndex):
grouper = CFTimeGrouper(
freq=self.freq,
closed=self.closed,
label=self.label,
origin=self.origin,
offset=self.offset,
loffset=self.loffset,
)
return grouper.first_items(index)
else:
s = pd.Series(np.arange(index.size), index)
grouper = pd.Grouper(
freq=self.freq,
closed=self.closed,
label=self.label,
origin=self.origin,
offset=self.offset,
)

first_items = s.groupby(grouper).first()
if self.loffset is not None:
_apply_loffset(self.loffset, first_items)
return first_items
23 changes: 23 additions & 0 deletions xarray/core/pdcompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
from enum import Enum
from typing import Literal

import pandas as pd

from xarray.coding import cftime_offsets


def count_not_none(*args) -> int:
"""Compute the number of non-None arguments.
Expand Down Expand Up @@ -68,3 +72,22 @@ def __repr__(self) -> str:
_NoDefault.no_default
) # Sentinel indicating the default value following pandas
NoDefault = Literal[_NoDefault.no_default] # For typing following pandas


def _convert_base_to_offset(base, freq, index):
"""Required until we officially deprecate the base argument to resample. This
translates a provided `base` argument to an `offset` argument, following logic
from pandas.
"""
from xarray.coding.cftimeindex import CFTimeIndex

if isinstance(index, pd.DatetimeIndex):
freq = pd.tseries.frequencies.to_offset(freq)
if isinstance(freq, pd.offsets.Tick):
return pd.Timedelta(base * freq.nanos // freq.n)
elif isinstance(index, CFTimeIndex):
freq = cftime_offsets.to_offset(freq)
if isinstance(freq, cftime_offsets.Tick):
return base * freq.as_timedelta() // freq.n
else:
raise ValueError("Can only resample using a DatetimeIndex or CFTimeIndex.")
18 changes: 10 additions & 8 deletions xarray/core/resample_cftime.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,13 @@ def __init__(
freq: str | BaseCFTimeOffset,
closed: SideOptions | None = None,
label: SideOptions | None = None,
base: int | None = None,
loffset: str | datetime.timedelta | BaseCFTimeOffset | None = None,
origin: str | CFTimeDatetime = "start_day",
offset: str | datetime.timedelta | None = None,
):
self.offset: datetime.timedelta | None
self.closed: SideOptions
self.label: SideOptions

if base is not None and offset is not None:
raise ValueError("base and offset cannot be provided at the same time")

self.freq = to_offset(freq)
self.loffset = loffset
self.origin = origin
Expand Down Expand Up @@ -122,9 +117,6 @@ def __init__(
else:
self.label = label

if base is not None and isinstance(self.freq, Tick):
offset = type(self.freq)(n=base % self.freq.n).as_timedelta()

if offset is not None:
try:
self.offset = _convert_offset_to_timedelta(offset)
Expand All @@ -150,6 +142,16 @@ def first_items(self, index: CFTimeIndex):
index, self.freq, self.closed, self.label, self.origin, self.offset
)
if self.loffset is not None:
if not isinstance(
self.loffset, (str, datetime.timedelta, BaseCFTimeOffset)
):
# BaseCFTimeOffset is not public API so we do not include it in
# the error message for now.
raise ValueError(
f"`loffset` must be a str or datetime.timedelta object. "
f"Got {self.loffset}."
)

if isinstance(self.loffset, datetime.timedelta):
labels = labels + self.loffset
else:
Expand Down
Loading