Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable origin and offset arguments in resample #7284

Merged
merged 21 commits into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ v2022.11.1 (unreleased)

New Features
~~~~~~~~~~~~

- Enable using `offset` and `origin` arguments in :py:meth:`DataArray.resample`
and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`6538`). By `Spencer
Clark <https://github.com/spencerkclark>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
42 changes: 38 additions & 4 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,13 @@
from .indexes import Index
from .resample import Resample
from .rolling_exp import RollingExp
from .types import DTypeLikeSave, ScalarOrArray, SideOptions, T_DataWithCoords
from .types import (
DatetimeLike,
DTypeLikeSave,
ScalarOrArray,
SideOptions,
T_DataWithCoords,
)
from .variable import Variable

DTypeMaybeMapping = Union[DTypeLikeSave, Mapping[Any, DTypeLikeSave]]
Expand Down Expand Up @@ -817,7 +823,9 @@ def _resample(
skipna: bool | None,
closed: SideOptions | None,
label: SideOptions | None,
base: int,
base: int | None,
offset: pd.Timedelta | datetime.timedelta | str | None,
origin: str | DatetimeLike,
keep_attrs: bool | None,
loffset: datetime.timedelta | str | None,
restore_coord_dims: bool | None,
Expand Down Expand Up @@ -845,6 +853,18 @@ def _resample(
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.

If a datetime is not used, these values are also supported:
- 'epoch': `origin` is 1970-01-01
- 'start': `origin` is the first value of the timeseries
- 'start_day': `origin` is the first day at midnight of the timeseries
- 'end': `origin` is the last value of the timeseries
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
Expand Down Expand Up @@ -960,10 +980,24 @@ def _resample(
if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex):
from .resample_cftime import CFTimeGrouper

grouper = CFTimeGrouper(freq, closed, label, base, loffset)
grouper = CFTimeGrouper(
spencerkclark marked this conversation as resolved.
Show resolved Hide resolved
freq=freq,
closed=closed,
label=label,
base=base,
loffset=loffset,
origin=origin,
offset=offset,
)
else:
grouper = pd.Grouper(
freq=freq, closed=closed, label=label, base=base, loffset=loffset
freq=freq,
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
loffset=loffset,
)
group = DataArray(
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
Expand Down
21 changes: 19 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
from .rolling import DataArrayCoarsen, DataArrayRolling
from .types import (
CoarsenBoundaryOptions,
DatetimeLike,
DatetimeUnitOptions,
Dims,
ErrorOptions,
Expand Down Expand Up @@ -6531,7 +6532,9 @@ def resample(
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
base: int = 0,
base: int | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
keep_attrs: bool | None = None,
loffset: datetime.timedelta | str | None = None,
restore_coord_dims: bool | None = None,
Expand All @@ -6555,10 +6558,22 @@ def resample(
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, default = 0
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.

If a datetime is not used, these values are also supported:
- 'epoch': `origin` is 1970-01-01
- 'start': `origin` is the first value of the timeseries
- 'start_day': `origin` is the first day at midnight of the timeseries
- 'end': `origin` is the last value of the timeseries
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
Expand Down Expand Up @@ -6640,6 +6655,8 @@ def resample(
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
keep_attrs=keep_attrs,
loffset=loffset,
restore_coord_dims=restore_coord_dims,
Expand Down
21 changes: 19 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
CoarsenBoundaryOptions,
CombineAttrsOptions,
CompatOptions,
DatetimeLike,
DatetimeUnitOptions,
Dims,
ErrorOptions,
Expand Down Expand Up @@ -9114,7 +9115,9 @@ def resample(
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
base: int = 0,
base: int | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
keep_attrs: bool | None = None,
loffset: datetime.timedelta | str | None = None,
restore_coord_dims: bool | None = None,
Expand All @@ -9138,10 +9141,22 @@ def resample(
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, default = 0
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.

If a datetime is not used, these values are also supported:
- 'epoch': `origin` is 1970-01-01
- 'start': `origin` is the first value of the timeseries
- 'start_day': `origin` is the first day at midnight of the timeseries
- 'end': `origin` is the last value of the timeseries
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
Expand Down Expand Up @@ -9176,6 +9191,8 @@ def resample(
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
keep_attrs=keep_attrs,
loffset=loffset,
restore_coord_dims=restore_coord_dims,
Expand Down
Loading