Enable origin and offset arguments in resample (pydata#7284)

* Initial work toward enabling origin and offset arguments in resample * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix _convert_offset_to_timedelta * Reduce number of tests * Address initial review comments * Add more typing information * Make cftime import lazy * Fix module_available import and test * Remove old origin argument * Add type annotations for resample_cftime.py * Add None as a possibility for closed and label * Add what's new entry * Add missing type annotation * Delete added line * Fix typing errors * Add comment and test for as_timedelta stub * Remove old code * [test-upstream] Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <[email protected]>
headtr1ck · Nov 29, 2022 · 91fda37 · 91fda37
1 parent 29673d5
commit 91fda37
Show file tree

Hide file tree

Showing 10 changed files with 456 additions and 107 deletions.
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -21,7 +21,9 @@ v2022.11.1 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
-
+- Enable using `offset` and `origin` arguments in :py:meth:`DataArray.resample`
+  and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`6538`).  By `Spencer
+  Clark <https://github.com/spencerkclark>`_.
 - Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`).
   By `Gregory Lee  <https://github.com/grlee77>`_ and `Joe Hamman <https://github.com/jhamman>`_.
 

diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
@@ -207,6 +207,10 @@ def __mul__(self, other):
             return new_self * other
         return type(self)(n=other * self.n)
 
+    def as_timedelta(self):
+        """All Tick subclasses must implement an as_timedelta method."""
+        raise NotImplementedError
+
 
 def _get_day_of_month(other, day_option):
     """Find the day in `other`'s month that satisfies a BaseCFTimeOffset's

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -44,7 +44,13 @@
     from .indexes import Index
     from .resample import Resample
     from .rolling_exp import RollingExp
-    from .types import DTypeLikeSave, ScalarOrArray, SideOptions, T_DataWithCoords
+    from .types import (
+        DatetimeLike,
+        DTypeLikeSave,
+        ScalarOrArray,
+        SideOptions,
+        T_DataWithCoords,
+    )
     from .variable import Variable
 
     DTypeMaybeMapping = Union[DTypeLikeSave, Mapping[Any, DTypeLikeSave]]
@@ -817,7 +823,9 @@ def _resample(
         skipna: bool | None,
         closed: SideOptions | None,
         label: SideOptions | None,
-        base: int,
+        base: int | None,
+        offset: pd.Timedelta | datetime.timedelta | str | None,
+        origin: str | DatetimeLike,
         keep_attrs: bool | None,
         loffset: datetime.timedelta | str | None,
         restore_coord_dims: bool | None,
@@ -845,6 +853,18 @@ def _resample(
             For frequencies that evenly subdivide 1 day, the "origin" of the
             aggregated intervals. For example, for "24H" frequency, base could
             range from 0 through 23.
+        origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
+            The datetime on which to adjust the grouping. The timezone of origin
+            must match the timezone of the index.
+
+            If a datetime is not used, these values are also supported:
+            - 'epoch': `origin` is 1970-01-01
+            - 'start': `origin` is the first value of the timeseries
+            - 'start_day': `origin` is the first day at midnight of the timeseries
+            - 'end': `origin` is the last value of the timeseries
+            - 'end_day': `origin` is the ceiling midnight of the last day
+        offset : pd.Timedelta, datetime.timedelta, or str, default is None
+            An offset timedelta added to the origin.
         loffset : timedelta or str, optional
             Offset used to adjust the resampled time labels. Some pandas date
             offset strings are supported.
@@ -960,10 +980,24 @@ def _resample(
             if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex):
                 from .resample_cftime import CFTimeGrouper
 
-                grouper = CFTimeGrouper(freq, closed, label, base, loffset)
+                grouper = CFTimeGrouper(
+                    freq=freq,
+                    closed=closed,
+                    label=label,
+                    base=base,
+                    loffset=loffset,
+                    origin=origin,
+                    offset=offset,
+                )
             else:
                 grouper = pd.Grouper(
-                    freq=freq, closed=closed, label=label, base=base, loffset=loffset
+                    freq=freq,
+                    closed=closed,
+                    label=label,
+                    base=base,
+                    offset=offset,
+                    origin=origin,
+                    loffset=loffset,
                 )
         group = DataArray(
             dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -78,6 +78,7 @@
     from .rolling import DataArrayCoarsen, DataArrayRolling
     from .types import (
         CoarsenBoundaryOptions,
+        DatetimeLike,
         DatetimeUnitOptions,
         Dims,
         ErrorOptions,
@@ -6531,7 +6532,9 @@ def resample(
         skipna: bool | None = None,
         closed: SideOptions | None = None,
         label: SideOptions | None = None,
-        base: int = 0,
+        base: int | None = None,
+        offset: pd.Timedelta | datetime.timedelta | str | None = None,
+        origin: str | DatetimeLike = "start_day",
         keep_attrs: bool | None = None,
         loffset: datetime.timedelta | str | None = None,
         restore_coord_dims: bool | None = None,
@@ -6555,10 +6558,22 @@ def resample(
             Side of each interval to treat as closed.
         label : {"left", "right"}, optional
             Side of each interval to use for labeling.
-        base : int, default = 0
+        base : int, optional
             For frequencies that evenly subdivide 1 day, the "origin" of the
             aggregated intervals. For example, for "24H" frequency, base could
             range from 0 through 23.
+        origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
+            The datetime on which to adjust the grouping. The timezone of origin
+            must match the timezone of the index.
+
+            If a datetime is not used, these values are also supported:
+            - 'epoch': `origin` is 1970-01-01
+            - 'start': `origin` is the first value of the timeseries
+            - 'start_day': `origin` is the first day at midnight of the timeseries
+            - 'end': `origin` is the last value of the timeseries
+            - 'end_day': `origin` is the ceiling midnight of the last day
+        offset : pd.Timedelta, datetime.timedelta, or str, default is None
+            An offset timedelta added to the origin.
         loffset : timedelta or str, optional
             Offset used to adjust the resampled time labels. Some pandas date
             offset strings are supported.
@@ -6640,6 +6655,8 @@ def resample(
             closed=closed,
             label=label,
             base=base,
+            offset=offset,
+            origin=origin,
             keep_attrs=keep_attrs,
             loffset=loffset,
             restore_coord_dims=restore_coord_dims,

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -107,6 +107,7 @@
         CoarsenBoundaryOptions,
         CombineAttrsOptions,
         CompatOptions,
+        DatetimeLike,
         DatetimeUnitOptions,
         Dims,
         ErrorOptions,
@@ -9128,7 +9129,9 @@ def resample(
         skipna: bool | None = None,
         closed: SideOptions | None = None,
         label: SideOptions | None = None,
-        base: int = 0,
+        base: int | None = None,
+        offset: pd.Timedelta | datetime.timedelta | str | None = None,
+        origin: str | DatetimeLike = "start_day",
         keep_attrs: bool | None = None,
         loffset: datetime.timedelta | str | None = None,
         restore_coord_dims: bool | None = None,
@@ -9152,10 +9155,22 @@ def resample(
             Side of each interval to treat as closed.
         label : {"left", "right"}, optional
             Side of each interval to use for labeling.
-        base : int, default = 0
+        base : int, optional
             For frequencies that evenly subdivide 1 day, the "origin" of the
             aggregated intervals. For example, for "24H" frequency, base could
             range from 0 through 23.
+        origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
+            The datetime on which to adjust the grouping. The timezone of origin
+            must match the timezone of the index.
+
+            If a datetime is not used, these values are also supported:
+            - 'epoch': `origin` is 1970-01-01
+            - 'start': `origin` is the first value of the timeseries
+            - 'start_day': `origin` is the first day at midnight of the timeseries
+            - 'end': `origin` is the last value of the timeseries
+            - 'end_day': `origin` is the ceiling midnight of the last day
+        offset : pd.Timedelta, datetime.timedelta, or str, default is None
+            An offset timedelta added to the origin.
         loffset : timedelta or str, optional
             Offset used to adjust the resampled time labels. Some pandas date
             offset strings are supported.
@@ -9190,6 +9205,8 @@ def resample(
             closed=closed,
             label=label,
             base=base,
+            offset=offset,
+            origin=origin,
             keep_attrs=keep_attrs,
             loffset=loffset,
             restore_coord_dims=restore_coord_dims,