Skip to content

Commit

Permalink
Enable origin and offset arguments in resample (pydata#7284)
Browse files Browse the repository at this point in the history
* Initial work toward enabling origin and offset arguments in resample

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix _convert_offset_to_timedelta

* Reduce number of tests

* Address initial review comments

* Add more typing information

* Make cftime import lazy

* Fix module_available import and test

* Remove old origin argument

* Add type annotations for resample_cftime.py

* Add None as a possibility for closed and label

* Add what's new entry

* Add missing type annotation

* Delete added line

* Fix typing errors

* Add comment and test for as_timedelta stub

* Remove old code

* [test-upstream]

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Deepak Cherian <[email protected]>
  • Loading branch information
3 people authored and headtr1ck committed Nov 29, 2022
1 parent 29673d5 commit 91fda37
Show file tree
Hide file tree
Showing 10 changed files with 456 additions and 107 deletions.
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ v2022.11.1 (unreleased)

New Features
~~~~~~~~~~~~

- Enable using `offset` and `origin` arguments in :py:meth:`DataArray.resample`
and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`6538`). By `Spencer
Clark <https://github.com/spencerkclark>`_.
- Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`).
By `Gregory Lee <https://github.com/grlee77>`_ and `Joe Hamman <https://github.com/jhamman>`_.

Expand Down
4 changes: 4 additions & 0 deletions xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,10 @@ def __mul__(self, other):
return new_self * other
return type(self)(n=other * self.n)

def as_timedelta(self):
"""All Tick subclasses must implement an as_timedelta method."""
raise NotImplementedError


def _get_day_of_month(other, day_option):
"""Find the day in `other`'s month that satisfies a BaseCFTimeOffset's
Expand Down
42 changes: 38 additions & 4 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,13 @@
from .indexes import Index
from .resample import Resample
from .rolling_exp import RollingExp
from .types import DTypeLikeSave, ScalarOrArray, SideOptions, T_DataWithCoords
from .types import (
DatetimeLike,
DTypeLikeSave,
ScalarOrArray,
SideOptions,
T_DataWithCoords,
)
from .variable import Variable

DTypeMaybeMapping = Union[DTypeLikeSave, Mapping[Any, DTypeLikeSave]]
Expand Down Expand Up @@ -817,7 +823,9 @@ def _resample(
skipna: bool | None,
closed: SideOptions | None,
label: SideOptions | None,
base: int,
base: int | None,
offset: pd.Timedelta | datetime.timedelta | str | None,
origin: str | DatetimeLike,
keep_attrs: bool | None,
loffset: datetime.timedelta | str | None,
restore_coord_dims: bool | None,
Expand Down Expand Up @@ -845,6 +853,18 @@ def _resample(
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
If a datetime is not used, these values are also supported:
- 'epoch': `origin` is 1970-01-01
- 'start': `origin` is the first value of the timeseries
- 'start_day': `origin` is the first day at midnight of the timeseries
- 'end': `origin` is the last value of the timeseries
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
Expand Down Expand Up @@ -960,10 +980,24 @@ def _resample(
if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex):
from .resample_cftime import CFTimeGrouper

grouper = CFTimeGrouper(freq, closed, label, base, loffset)
grouper = CFTimeGrouper(
freq=freq,
closed=closed,
label=label,
base=base,
loffset=loffset,
origin=origin,
offset=offset,
)
else:
grouper = pd.Grouper(
freq=freq, closed=closed, label=label, base=base, loffset=loffset
freq=freq,
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
loffset=loffset,
)
group = DataArray(
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
Expand Down
21 changes: 19 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
from .rolling import DataArrayCoarsen, DataArrayRolling
from .types import (
CoarsenBoundaryOptions,
DatetimeLike,
DatetimeUnitOptions,
Dims,
ErrorOptions,
Expand Down Expand Up @@ -6531,7 +6532,9 @@ def resample(
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
base: int = 0,
base: int | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
keep_attrs: bool | None = None,
loffset: datetime.timedelta | str | None = None,
restore_coord_dims: bool | None = None,
Expand All @@ -6555,10 +6558,22 @@ def resample(
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, default = 0
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
If a datetime is not used, these values are also supported:
- 'epoch': `origin` is 1970-01-01
- 'start': `origin` is the first value of the timeseries
- 'start_day': `origin` is the first day at midnight of the timeseries
- 'end': `origin` is the last value of the timeseries
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
Expand Down Expand Up @@ -6640,6 +6655,8 @@ def resample(
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
keep_attrs=keep_attrs,
loffset=loffset,
restore_coord_dims=restore_coord_dims,
Expand Down
21 changes: 19 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
CoarsenBoundaryOptions,
CombineAttrsOptions,
CompatOptions,
DatetimeLike,
DatetimeUnitOptions,
Dims,
ErrorOptions,
Expand Down Expand Up @@ -9128,7 +9129,9 @@ def resample(
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
base: int = 0,
base: int | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
keep_attrs: bool | None = None,
loffset: datetime.timedelta | str | None = None,
restore_coord_dims: bool | None = None,
Expand All @@ -9152,10 +9155,22 @@ def resample(
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, default = 0
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
If a datetime is not used, these values are also supported:
- 'epoch': `origin` is 1970-01-01
- 'start': `origin` is the first value of the timeseries
- 'start_day': `origin` is the first day at midnight of the timeseries
- 'end': `origin` is the last value of the timeseries
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
Expand Down Expand Up @@ -9190,6 +9205,8 @@ def resample(
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
keep_attrs=keep_attrs,
loffset=loffset,
restore_coord_dims=restore_coord_dims,
Expand Down
Loading

0 comments on commit 91fda37

Please sign in to comment.