From 91fda3799a3c6d0efb30335cb6f854c36d0970b2 Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Mon, 28 Nov 2022 18:38:52 -0500
Subject: [PATCH] Enable `origin` and `offset` arguments in `resample` (#7284)

* Initial work toward enabling origin and offset arguments in resample

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix _convert_offset_to_timedelta

* Reduce number of tests

* Address initial review comments

* Add more typing information

* Make cftime import lazy

* Fix module_available import and test

* Remove old origin argument

* Add type annotations for resample_cftime.py

* Add None as a possibility for closed and label

* Add what's new entry

* Add missing type annotation

* Delete added line

* Fix typing errors

* Add comment and test for as_timedelta stub

* Remove old code

* [test-upstream]

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
---
 doc/whats-new.rst                         |   4 +-
 xarray/coding/cftime_offsets.py           |   4 +
 xarray/core/common.py                     |  42 +++-
 xarray/core/dataarray.py                  |  21 +-
 xarray/core/dataset.py                    |  21 +-
 xarray/core/resample_cftime.py            | 259 +++++++++++++++++-----
 xarray/core/types.py                      |   8 +-
 xarray/tests/test_cftime_offsets.py       |   6 +
 xarray/tests/test_cftimeindex_resample.py | 171 ++++++++++----
 xarray/tests/test_groupby.py              |  27 +++
 10 files changed, 456 insertions(+), 107 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index b8a2f47bcf8..48113862c67 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -21,7 +21,9 @@ v2022.11.1 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
-
+- Enable using `offset` and `origin` arguments in :py:meth:`DataArray.resample`
+  and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`6538`).  By `Spencer
+  Clark <https://github.com/spencerkclark>`_.
 - Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`).
   By `Gregory Lee  <https://github.com/grlee77>`_ and `Joe Hamman <https://github.com/jhamman>`_.
 
diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
index a029f39c7b8..04b2d773e2e 100644
--- a/xarray/coding/cftime_offsets.py
+++ b/xarray/coding/cftime_offsets.py
@@ -207,6 +207,10 @@ def __mul__(self, other):
             return new_self * other
         return type(self)(n=other * self.n)
 
+    def as_timedelta(self):
+        """All Tick subclasses must implement an as_timedelta method."""
+        raise NotImplementedError
+
 
 def _get_day_of_month(other, day_option):
     """Find the day in `other`'s month that satisfies a BaseCFTimeOffset's
diff --git a/xarray/core/common.py b/xarray/core/common.py
index b613db9926d..d1387d62e99 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -44,7 +44,13 @@
     from .indexes import Index
     from .resample import Resample
     from .rolling_exp import RollingExp
-    from .types import DTypeLikeSave, ScalarOrArray, SideOptions, T_DataWithCoords
+    from .types import (
+        DatetimeLike,
+        DTypeLikeSave,
+        ScalarOrArray,
+        SideOptions,
+        T_DataWithCoords,
+    )
     from .variable import Variable
 
     DTypeMaybeMapping = Union[DTypeLikeSave, Mapping[Any, DTypeLikeSave]]
@@ -817,7 +823,9 @@ def _resample(
         skipna: bool | None,
         closed: SideOptions | None,
         label: SideOptions | None,
-        base: int,
+        base: int | None,
+        offset: pd.Timedelta | datetime.timedelta | str | None,
+        origin: str | DatetimeLike,
         keep_attrs: bool | None,
         loffset: datetime.timedelta | str | None,
         restore_coord_dims: bool | None,
@@ -845,6 +853,18 @@ def _resample(
             For frequencies that evenly subdivide 1 day, the "origin" of the
             aggregated intervals. For example, for "24H" frequency, base could
             range from 0 through 23.
+        origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
+            The datetime on which to adjust the grouping. The timezone of origin
+            must match the timezone of the index.
+
+            If a datetime is not used, these values are also supported:
+            - 'epoch': `origin` is 1970-01-01
+            - 'start': `origin` is the first value of the timeseries
+            - 'start_day': `origin` is the first day at midnight of the timeseries
+            - 'end': `origin` is the last value of the timeseries
+            - 'end_day': `origin` is the ceiling midnight of the last day
+        offset : pd.Timedelta, datetime.timedelta, or str, default is None
+            An offset timedelta added to the origin.
         loffset : timedelta or str, optional
             Offset used to adjust the resampled time labels. Some pandas date
             offset strings are supported.
@@ -960,10 +980,24 @@ def _resample(
             if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex):
                 from .resample_cftime import CFTimeGrouper
 
-                grouper = CFTimeGrouper(freq, closed, label, base, loffset)
+                grouper = CFTimeGrouper(
+                    freq=freq,
+                    closed=closed,
+                    label=label,
+                    base=base,
+                    loffset=loffset,
+                    origin=origin,
+                    offset=offset,
+                )
             else:
                 grouper = pd.Grouper(
-                    freq=freq, closed=closed, label=label, base=base, loffset=loffset
+                    freq=freq,
+                    closed=closed,
+                    label=label,
+                    base=base,
+                    offset=offset,
+                    origin=origin,
+                    loffset=loffset,
                 )
         group = DataArray(
             dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index caa68bfae5c..6eac634bfff 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -78,6 +78,7 @@
     from .rolling import DataArrayCoarsen, DataArrayRolling
     from .types import (
         CoarsenBoundaryOptions,
+        DatetimeLike,
         DatetimeUnitOptions,
         Dims,
         ErrorOptions,
@@ -6531,7 +6532,9 @@ def resample(
         skipna: bool | None = None,
         closed: SideOptions | None = None,
         label: SideOptions | None = None,
-        base: int = 0,
+        base: int | None = None,
+        offset: pd.Timedelta | datetime.timedelta | str | None = None,
+        origin: str | DatetimeLike = "start_day",
         keep_attrs: bool | None = None,
         loffset: datetime.timedelta | str | None = None,
         restore_coord_dims: bool | None = None,
@@ -6555,10 +6558,22 @@ def resample(
             Side of each interval to treat as closed.
         label : {"left", "right"}, optional
             Side of each interval to use for labeling.
-        base : int, default = 0
+        base : int, optional
             For frequencies that evenly subdivide 1 day, the "origin" of the
             aggregated intervals. For example, for "24H" frequency, base could
             range from 0 through 23.
+        origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
+            The datetime on which to adjust the grouping. The timezone of origin
+            must match the timezone of the index.
+
+            If a datetime is not used, these values are also supported:
+            - 'epoch': `origin` is 1970-01-01
+            - 'start': `origin` is the first value of the timeseries
+            - 'start_day': `origin` is the first day at midnight of the timeseries
+            - 'end': `origin` is the last value of the timeseries
+            - 'end_day': `origin` is the ceiling midnight of the last day
+        offset : pd.Timedelta, datetime.timedelta, or str, default is None
+            An offset timedelta added to the origin.
         loffset : timedelta or str, optional
             Offset used to adjust the resampled time labels. Some pandas date
             offset strings are supported.
@@ -6640,6 +6655,8 @@ def resample(
             closed=closed,
             label=label,
             base=base,
+            offset=offset,
+            origin=origin,
             keep_attrs=keep_attrs,
             loffset=loffset,
             restore_coord_dims=restore_coord_dims,
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 4e7a2b5603b..4f376bdf811 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -107,6 +107,7 @@
         CoarsenBoundaryOptions,
         CombineAttrsOptions,
         CompatOptions,
+        DatetimeLike,
         DatetimeUnitOptions,
         Dims,
         ErrorOptions,
@@ -9128,7 +9129,9 @@ def resample(
         skipna: bool | None = None,
         closed: SideOptions | None = None,
         label: SideOptions | None = None,
-        base: int = 0,
+        base: int | None = None,
+        offset: pd.Timedelta | datetime.timedelta | str | None = None,
+        origin: str | DatetimeLike = "start_day",
         keep_attrs: bool | None = None,
         loffset: datetime.timedelta | str | None = None,
         restore_coord_dims: bool | None = None,
@@ -9152,10 +9155,22 @@ def resample(
             Side of each interval to treat as closed.
         label : {"left", "right"}, optional
             Side of each interval to use for labeling.
-        base : int, default = 0
+        base : int, optional
             For frequencies that evenly subdivide 1 day, the "origin" of the
             aggregated intervals. For example, for "24H" frequency, base could
             range from 0 through 23.
+        origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
+            The datetime on which to adjust the grouping. The timezone of origin
+            must match the timezone of the index.
+
+            If a datetime is not used, these values are also supported:
+            - 'epoch': `origin` is 1970-01-01
+            - 'start': `origin` is the first value of the timeseries
+            - 'start_day': `origin` is the first day at midnight of the timeseries
+            - 'end': `origin` is the last value of the timeseries
+            - 'end_day': `origin` is the ceiling midnight of the last day
+        offset : pd.Timedelta, datetime.timedelta, or str, default is None
+            An offset timedelta added to the origin.
         loffset : timedelta or str, optional
             Offset used to adjust the resampled time labels. Some pandas date
             offset strings are supported.
@@ -9190,6 +9205,8 @@ def resample(
             closed=closed,
             label=label,
             base=base,
+            offset=offset,
+            origin=origin,
             keep_attrs=keep_attrs,
             loffset=loffset,
             restore_coord_dims=restore_coord_dims,
diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py
index 11eceda77ee..da21fdd17cf 100644
--- a/xarray/core/resample_cftime.py
+++ b/xarray/core/resample_cftime.py
@@ -38,21 +38,27 @@
 from __future__ import annotations
 
 import datetime
+import typing
 
 import numpy as np
 import pandas as pd
 
 from ..coding.cftime_offsets import (
-    CFTIME_TICKS,
+    BaseCFTimeOffset,
     Day,
     MonthEnd,
     QuarterEnd,
+    Tick,
     YearEnd,
     cftime_range,
     normalize_date,
     to_offset,
 )
 from ..coding.cftimeindex import CFTimeIndex
+from .types import SideOptions
+
+if typing.TYPE_CHECKING:
+    from .types import CFTimeDatetime
 
 
 class CFTimeGrouper:
@@ -60,25 +66,77 @@ class CFTimeGrouper:
     single method, the only one required for resampling in xarray.  It cannot
     be used in a call to groupby like a pandas.Grouper object can."""
 
-    def __init__(self, freq, closed=None, label=None, base=0, loffset=None):
+    def __init__(
+        self,
+        freq: str | BaseCFTimeOffset,
+        closed: SideOptions | None = None,
+        label: SideOptions | None = None,
+        base: int | None = None,
+        loffset: str | datetime.timedelta | BaseCFTimeOffset | None = None,
+        origin: str | CFTimeDatetime = "start_day",
+        offset: str | datetime.timedelta | None = None,
+    ):
+        self.offset: datetime.timedelta | None
+        self.closed: SideOptions
+        self.label: SideOptions
+
+        if base is not None and offset is not None:
+            raise ValueError("base and offset cannot be provided at the same time")
+
         self.freq = to_offset(freq)
-        self.closed = closed
-        self.label = label
-        self.base = base
         self.loffset = loffset
+        self.origin = origin
 
         if isinstance(self.freq, (MonthEnd, QuarterEnd, YearEnd)):
-            if self.closed is None:
+            if closed is None:
                 self.closed = "right"
-            if self.label is None:
+            else:
+                self.closed = closed
+            if label is None:
                 self.label = "right"
+            else:
+                self.label = label
+        else:
+            # The backward resample sets ``closed`` to ``'right'`` by default
+            # since the last value should be considered as the edge point for
+            # the last bin. When origin in "end" or "end_day", the value for a
+            # specific ``cftime.datetime`` index stands for the resample result
+            # from the current ``cftime.datetime`` minus ``freq`` to the current
+            # ``cftime.datetime`` with a right close.
+            if self.origin in ["end", "end_day"]:
+                if closed is None:
+                    self.closed = "right"
+                else:
+                    self.closed = closed
+                if label is None:
+                    self.label = "right"
+                else:
+                    self.label = label
+            else:
+                if closed is None:
+                    self.closed = "left"
+                else:
+                    self.closed = closed
+                if label is None:
+                    self.label = "left"
+                else:
+                    self.label = label
+
+        if base is not None and isinstance(self.freq, Tick):
+            offset = type(self.freq)(n=base % self.freq.n).as_timedelta()
+
+        if offset is not None:
+            try:
+                self.offset = _convert_offset_to_timedelta(offset)
+            except (ValueError, AttributeError) as error:
+                raise ValueError(
+                    f"offset must be a datetime.timedelta object or an offset string "
+                    f"that can be converted to a timedelta.  Got {offset} instead."
+                ) from error
         else:
-            if self.closed is None:
-                self.closed = "left"
-            if self.label is None:
-                self.label = "left"
+            self.offset = None
 
-    def first_items(self, index):
+    def first_items(self, index: CFTimeIndex):
         """Meant to reproduce the results of the following
 
         grouper = pandas.Grouper(...)
@@ -89,7 +147,7 @@ def first_items(self, index):
         """
 
         datetime_bins, labels = _get_time_bins(
-            index, self.freq, self.closed, self.label, self.base
+            index, self.freq, self.closed, self.label, self.origin, self.offset
         )
         if self.loffset is not None:
             if isinstance(self.loffset, datetime.timedelta):
@@ -111,7 +169,14 @@ def first_items(self, index):
         return first_items.where(non_duplicate)
 
 
-def _get_time_bins(index, freq, closed, label, base):
+def _get_time_bins(
+    index: CFTimeIndex,
+    freq: BaseCFTimeOffset,
+    closed: SideOptions,
+    label: SideOptions,
+    origin: str | CFTimeDatetime,
+    offset: datetime.timedelta | None,
+):
     """Obtain the bins and their respective labels for resampling operations.
 
     Parameters
@@ -122,18 +187,26 @@ def _get_time_bins(index, freq, closed, label, base):
         The offset object representing target conversion a.k.a. resampling
         frequency (e.g., 'MS', '2D', 'H', or '3T' with
         coding.cftime_offsets.to_offset() applied to it).
-    closed : 'left' or 'right', optional
+    closed : 'left' or 'right'
         Which side of bin interval is closed.
         The default is 'left' for all frequency offsets except for 'M' and 'A',
         which have a default of 'right'.
-    label : 'left' or 'right', optional
+    label : 'left' or 'right'
         Which bin edge label to label bucket with.
         The default is 'left' for all frequency offsets except for 'M' and 'A',
         which have a default of 'right'.
-    base : int, optional
-        For frequencies that evenly subdivide 1 day, the "origin" of the
-        aggregated intervals. For example, for '5min' frequency, base could
-        range from 0 through 4. Defaults to 0.
+    origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day'
+        The datetime on which to adjust the grouping. The timezone of origin
+        must match the timezone of the index.
+
+        If a datetime is not used, these values are also supported:
+        - 'epoch': `origin` is 1970-01-01
+        - 'start': `origin` is the first value of the timeseries
+        - 'start_day': `origin` is the first day at midnight of the timeseries
+        - 'end': `origin` is the last value of the timeseries
+        - 'end_day': `origin` is the ceiling midnight of the last day
+    offset : datetime.timedelta, default is None
+        An offset timedelta added to the origin.
 
     Returns
     -------
@@ -154,7 +227,7 @@ def _get_time_bins(index, freq, closed, label, base):
         return datetime_bins, labels
 
     first, last = _get_range_edges(
-        index.min(), index.max(), freq, closed=closed, base=base
+        index.min(), index.max(), freq, closed=closed, origin=origin, offset=offset
     )
     datetime_bins = labels = cftime_range(
         freq=freq, start=first, end=last, name=index.name
@@ -172,7 +245,13 @@ def _get_time_bins(index, freq, closed, label, base):
     return datetime_bins, labels
 
 
-def _adjust_bin_edges(datetime_bins, offset, closed, index, labels):
+def _adjust_bin_edges(
+    datetime_bins: np.ndarray,
+    freq: BaseCFTimeOffset,
+    closed: SideOptions,
+    index: CFTimeIndex,
+    labels: np.ndarray,
+):
     """This is required for determining the bin edges resampling with
     daily frequencies greater than one day, month end, and year end
     frequencies.
@@ -207,8 +286,8 @@ def _adjust_bin_edges(datetime_bins, offset, closed, index, labels):
     This is also required for daily frequencies longer than one day and
     year-end frequencies.
     """
-    is_super_daily = isinstance(offset, (MonthEnd, QuarterEnd, YearEnd)) or (
-        isinstance(offset, Day) and offset.n > 1
+    is_super_daily = isinstance(freq, (MonthEnd, QuarterEnd, YearEnd)) or (
+        isinstance(freq, Day) and freq.n > 1
     )
     if is_super_daily:
         if closed == "right":
@@ -220,7 +299,14 @@ def _adjust_bin_edges(datetime_bins, offset, closed, index, labels):
     return datetime_bins, labels
 
 
-def _get_range_edges(first, last, offset, closed="left", base=0):
+def _get_range_edges(
+    first: CFTimeDatetime,
+    last: CFTimeDatetime,
+    freq: BaseCFTimeOffset,
+    closed: SideOptions = "left",
+    origin: str | CFTimeDatetime = "start_day",
+    offset: datetime.timedelta | None = None,
+):
     """Get the correct starting and ending datetimes for the resampled
     CFTimeIndex range.
 
@@ -232,16 +318,24 @@ def _get_range_edges(first, last, offset, closed="left", base=0):
     last : cftime.datetime
         Uncorrected ending datetime object for resampled CFTimeIndex range.
         Usually the max of the original CFTimeIndex.
-    offset : xarray.coding.cftime_offsets.BaseCFTimeOffset
+    freq : xarray.coding.cftime_offsets.BaseCFTimeOffset
         The offset object representing target conversion a.k.a. resampling
         frequency. Contains information on offset type (e.g. Day or 'D') and
         offset magnitude (e.g., n = 3).
-    closed : 'left' or 'right', optional
+    closed : 'left' or 'right'
         Which side of bin interval is closed. Defaults to 'left'.
-    base : int, optional
-        For frequencies that evenly subdivide 1 day, the "origin" of the
-        aggregated intervals. For example, for '5min' frequency, base could
-        range from 0 through 4. Defaults to 0.
+    origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day'
+        The datetime on which to adjust the grouping. The timezone of origin
+        must match the timezone of the index.
+
+        If a datetime is not used, these values are also supported:
+        - 'epoch': `origin` is 1970-01-01
+        - 'start': `origin` is the first value of the timeseries
+        - 'start_day': `origin` is the first day at midnight of the timeseries
+        - 'end': `origin` is the last value of the timeseries
+        - 'end_day': `origin` is the ceiling midnight of the last day
+    offset : datetime.timedelta, default is None
+        An offset timedelta added to the origin.
 
     Returns
     -------
@@ -250,21 +344,28 @@ def _get_range_edges(first, last, offset, closed="left", base=0):
     last : cftime.datetime
         Corrected ending datetime object for resampled CFTimeIndex range.
     """
-    if isinstance(offset, CFTIME_TICKS):
+    if isinstance(freq, Tick):
         first, last = _adjust_dates_anchored(
-            first, last, offset, closed=closed, base=base
+            first, last, freq, closed=closed, origin=origin, offset=offset
         )
         return first, last
     else:
         first = normalize_date(first)
         last = normalize_date(last)
 
-    first = offset.rollback(first) if closed == "left" else first - offset
-    last = last + offset
+    first = freq.rollback(first) if closed == "left" else first - freq
+    last = last + freq
     return first, last
 
 
-def _adjust_dates_anchored(first, last, offset, closed="right", base=0):
+def _adjust_dates_anchored(
+    first: CFTimeDatetime,
+    last: CFTimeDatetime,
+    freq: Tick,
+    closed: SideOptions = "right",
+    origin: str | CFTimeDatetime = "start_day",
+    offset: datetime.timedelta | None = None,
+):
     """First and last offsets should be calculated from the start day to fix
     an error cause by resampling across multiple days when a one day period is
     not a multiple of the frequency.
@@ -276,16 +377,24 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0):
         A datetime object representing the start of a CFTimeIndex range.
     last : cftime.datetime
         A datetime object representing the end of a CFTimeIndex range.
-    offset : xarray.coding.cftime_offsets.BaseCFTimeOffset
+    freq : xarray.coding.cftime_offsets.BaseCFTimeOffset
         The offset object representing target conversion a.k.a. resampling
         frequency. Contains information on offset type (e.g. Day or 'D') and
         offset magnitude (e.g., n = 3).
-    closed : 'left' or 'right', optional
+    closed : 'left' or 'right'
         Which side of bin interval is closed. Defaults to 'right'.
-    base : int, optional
-        For frequencies that evenly subdivide 1 day, the "origin" of the
-        aggregated intervals. For example, for '5min' frequency, base could
-        range from 0 through 4. Defaults to 0.
+    origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day'
+        The datetime on which to adjust the grouping. The timezone of origin
+        must match the timezone of the index.
+
+        If a datetime is not used, these values are also supported:
+        - 'epoch': `origin` is 1970-01-01
+        - 'start': `origin` is the first value of the timeseries
+        - 'start_day': `origin` is the first day at midnight of the timeseries
+        - 'end': `origin` is the last value of the timeseries
+        - 'end_day': `origin` is the ceiling midnight of the last day
+    offset : datetime.timedelta, default is None
+        An offset timedelta added to the origin.
 
     Returns
     -------
@@ -296,33 +405,59 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0):
         A datetime object representing the end of a date range that has been
         adjusted to fix resampling errors.
     """
+    import cftime
+
+    if origin == "start_day":
+        origin_date = normalize_date(first)
+    elif origin == "start":
+        origin_date = first
+    elif origin == "epoch":
+        origin_date = type(first)(1970, 1, 1)
+    elif origin in ["end", "end_day"]:
+        origin_last = last if origin == "end" else _ceil_via_cftimeindex(last, "D")
+        sub_freq_times = (origin_last - first) // freq.as_timedelta()
+        if closed == "left":
+            sub_freq_times += 1
+        first = origin_last - sub_freq_times * freq
+        origin_date = first
+    elif isinstance(origin, cftime.datetime):
+        origin_date = origin
+    else:
+        raise ValueError(
+            f"origin must be one of {{'epoch', 'start_day', 'start', 'end', 'end_day'}} "
+            f"or a cftime.datetime object.  Got {origin}."
+        )
+
+    if offset is not None:
+        origin_date = origin_date + offset
+
+    foffset = (first - origin_date) % freq.as_timedelta()
+    loffset = (last - origin_date) % freq.as_timedelta()
 
-    base = base % offset.n
-    start_day = normalize_date(first)
-    base_td = type(offset)(n=base).as_timedelta()
-    start_day += base_td
-    foffset = exact_cftime_datetime_difference(start_day, first) % offset.as_timedelta()
-    loffset = exact_cftime_datetime_difference(start_day, last) % offset.as_timedelta()
     if closed == "right":
         if foffset.total_seconds() > 0:
             fresult = first - foffset
         else:
-            fresult = first - offset.as_timedelta()
+            fresult = first - freq.as_timedelta()
 
         if loffset.total_seconds() > 0:
-            lresult = last + (offset.as_timedelta() - loffset)
+            lresult = last + (freq.as_timedelta() - loffset)
         else:
             lresult = last
     else:
-        fresult = first - foffset if foffset.total_seconds() > 0 else first
+        if foffset.total_seconds() > 0:
+            fresult = first - foffset
+        else:
+            fresult = first
+
         if loffset.total_seconds() > 0:
-            lresult = last + (offset.as_timedelta() - loffset)
+            lresult = last + (freq.as_timedelta() - loffset)
         else:
-            lresult = last + offset.as_timedelta()
+            lresult = last + freq
     return fresult, lresult
 
 
-def exact_cftime_datetime_difference(a, b):
+def exact_cftime_datetime_difference(a: CFTimeDatetime, b: CFTimeDatetime):
     """Exact computation of b - a
 
     Assumes:
@@ -360,3 +495,19 @@ def exact_cftime_datetime_difference(a, b):
     seconds = int(round(seconds.total_seconds()))
     microseconds = b.microsecond - a.microsecond
     return datetime.timedelta(seconds=seconds, microseconds=microseconds)
+
+
+def _convert_offset_to_timedelta(
+    offset: datetime.timedelta | str | BaseCFTimeOffset,
+) -> datetime.timedelta:
+    if isinstance(offset, datetime.timedelta):
+        return offset
+    elif isinstance(offset, (str, Tick)):
+        return to_offset(offset).as_timedelta()
+    else:
+        raise ValueError
+
+
+def _ceil_via_cftimeindex(date: CFTimeDatetime, freq: str | BaseCFTimeOffset):
+    index = CFTimeIndex([date])
+    return index.ceil(freq).item()
diff --git a/xarray/core/types.py b/xarray/core/types.py
index 7579148e4c2..adf046dabb2 100644
--- a/xarray/core/types.py
+++ b/xarray/core/types.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import datetime
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -17,6 +18,7 @@
 )
 
 import numpy as np
+import pandas as pd
 from packaging.version import Version
 
 if TYPE_CHECKING:
@@ -82,7 +84,11 @@ def dtype(self) -> np.dtype:
         # anything with a dtype attribute
         _SupportsDType,
     ]
-
+    try:
+        from cftime import datetime as CFTimeDatetime
+    except ImportError:
+        CFTimeDatetime = Any
+    DatetimeLike = Union[pd.Timestamp, datetime.datetime, np.datetime64, CFTimeDatetime]
 else:
     Self: Any = None
     DTypeLikeSave: Any = None
diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py
index 075393e84e7..d28f4594559 100644
--- a/xarray/tests/test_cftime_offsets.py
+++ b/xarray/tests/test_cftime_offsets.py
@@ -1385,3 +1385,9 @@ def test_date_range_like_errors():
         match="'source' must be a 1D array of datetime objects for inferring its range.",
     ):
         date_range_like(da, "noleap")
+
+
+def as_timedelta_not_implemented_error():
+    tick = Tick()
+    with pytest.raises(NotImplementedError):
+        tick.as_timedelta()
diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py
index 35447a39f3c..e780421e09e 100644
--- a/xarray/tests/test_cftimeindex_resample.py
+++ b/xarray/tests/test_cftimeindex_resample.py
@@ -9,7 +9,7 @@
 import xarray as xr
 from xarray.core.resample_cftime import CFTimeGrouper
 
-pytest.importorskip("cftime")
+cftime = pytest.importorskip("cftime")
 
 
 # Create a list of pairs of similar-length initial and resample frequencies
@@ -50,7 +50,63 @@
 ]
 
 
-def da(index):
+def compare_against_pandas(
+    da_datetimeindex,
+    da_cftimeindex,
+    freq,
+    closed=None,
+    label=None,
+    base=None,
+    offset=None,
+    origin=None,
+    loffset=None,
+) -> None:
+    if isinstance(origin, tuple):
+        origin_pandas = pd.Timestamp(datetime.datetime(*origin))
+        origin_cftime = cftime.DatetimeGregorian(*origin)
+    else:
+        origin_pandas = origin
+        origin_cftime = origin
+
+    try:
+        result_datetimeindex = da_datetimeindex.resample(
+            time=freq,
+            closed=closed,
+            label=label,
+            base=base,
+            loffset=loffset,
+            offset=offset,
+            origin=origin_pandas,
+        ).mean()
+    except ValueError:
+        with pytest.raises(ValueError):
+            da_cftimeindex.resample(
+                time=freq,
+                closed=closed,
+                label=label,
+                base=base,
+                loffset=loffset,
+                origin=origin_cftime,
+                offset=offset,
+            ).mean()
+    else:
+        result_cftimeindex = da_cftimeindex.resample(
+            time=freq,
+            closed=closed,
+            label=label,
+            base=base,
+            loffset=loffset,
+            origin=origin_cftime,
+            offset=offset,
+        ).mean()
+    # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass
+    result_cftimeindex["time"] = (
+        result_cftimeindex.xindexes["time"].to_pandas_index().to_datetimeindex()
+    )
+    xr.testing.assert_identical(result_cftimeindex, result_datetimeindex)
+
+
+def da(index) -> xr.DataArray:
     return xr.DataArray(
         np.arange(100.0, 100.0 + index.size), coords=[index], dims=["time"]
     )
@@ -59,53 +115,31 @@ def da(index):
 @pytest.mark.parametrize("freqs", FREQS, ids=lambda x: "{}->{}".format(*x))
 @pytest.mark.parametrize("closed", [None, "left", "right"])
 @pytest.mark.parametrize("label", [None, "left", "right"])
-@pytest.mark.parametrize("base", [24, 31])
-def test_resample(freqs, closed, label, base) -> None:
+@pytest.mark.parametrize(
+    ("base", "offset"), [(24, None), (31, None), (None, "5S")], ids=lambda x: f"{x}"
+)
+def test_resample(freqs, closed, label, base, offset) -> None:
     initial_freq, resample_freq = freqs
     start = "2000-01-01T12:07:01"
+    loffset = "12H"
+    origin = "start"
     index_kwargs = dict(start=start, periods=5, freq=initial_freq)
     datetime_index = pd.date_range(**index_kwargs)
     cftime_index = xr.cftime_range(**index_kwargs)
+    da_datetimeindex = da(datetime_index)
+    da_cftimeindex = da(cftime_index)
 
-    loffset = "12H"
-    try:
-        da_datetime = (
-            da(datetime_index)
-            .resample(
-                time=resample_freq,
-                closed=closed,
-                label=label,
-                base=base,
-                loffset=loffset,
-            )
-            .mean()
-        )
-    except ValueError:
-        with pytest.raises(ValueError):
-            da(cftime_index).resample(
-                time=resample_freq,
-                closed=closed,
-                label=label,
-                base=base,
-                loffset=loffset,
-            ).mean()
-    else:
-        da_cftime = (
-            da(cftime_index)
-            .resample(
-                time=resample_freq,
-                closed=closed,
-                label=label,
-                base=base,
-                loffset=loffset,
-            )
-            .mean()
-        )
-        # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass
-        da_cftime["time"] = (
-            da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex()
-        )
-        xr.testing.assert_identical(da_cftime, da_datetime)
+    compare_against_pandas(
+        da_datetimeindex,
+        da_cftimeindex,
+        resample_freq,
+        closed=closed,
+        label=label,
+        base=base,
+        offset=offset,
+        origin=origin,
+        loffset=loffset,
+    )
 
 
 @pytest.mark.parametrize(
@@ -153,3 +187,54 @@ def test_calendars(calendar) -> None:
     # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass
     da_cftime["time"] = da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex()
     xr.testing.assert_identical(da_cftime, da_datetime)
+
+
+@pytest.mark.parametrize("closed", ["left", "right"])
+@pytest.mark.parametrize(
+    "origin",
+    ["start_day", "start", "end", "end_day", "epoch", (1970, 1, 1, 3, 2)],
+    ids=lambda x: f"{x}",
+)
+def test_origin(closed, origin) -> None:
+    initial_freq, resample_freq = ("3H", "9H")
+    start = "1969-12-31T12:07:01"
+    index_kwargs = dict(start=start, periods=12, freq=initial_freq)
+    datetime_index = pd.date_range(**index_kwargs)
+    cftime_index = xr.cftime_range(**index_kwargs)
+    da_datetimeindex = da(datetime_index)
+    da_cftimeindex = da(cftime_index)
+
+    compare_against_pandas(
+        da_datetimeindex,
+        da_cftimeindex,
+        resample_freq,
+        closed=closed,
+        origin=origin,
+    )
+
+
+def test_base_and_offset_error():
+    cftime_index = xr.cftime_range("2000", periods=5)
+    da_cftime = da(cftime_index)
+    with pytest.raises(ValueError, match="base and offset cannot"):
+        da_cftime.resample(time="2D", base=3, offset="5S")
+
+
+@pytest.mark.parametrize("offset", ["foo", "5MS", 10])
+def test_invalid_offset_error(offset) -> None:
+    cftime_index = xr.cftime_range("2000", periods=5)
+    da_cftime = da(cftime_index)
+    with pytest.raises(ValueError, match="offset must be"):
+        da_cftime.resample(time="2D", offset=offset)
+
+
+def test_timedelta_offset() -> None:
+    timedelta = datetime.timedelta(seconds=5)
+    string = "5S"
+
+    cftime_index = xr.cftime_range("2000", periods=5)
+    da_cftime = da(cftime_index)
+
+    timedelta_result = da_cftime.resample(time="2D", offset=timedelta).mean()
+    string_result = da_cftime.resample(time="2D", offset=string).mean()
+    xr.testing.assert_identical(timedelta_result, string_result)
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index d647c82a76b..063dc22e633 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -1810,6 +1810,33 @@ def test_upsample_interpolate_dask(self, chunked_time):
             # done here due to floating point arithmetic
             assert_allclose(expected, actual, rtol=1e-16)
 
+    def test_resample_base(self) -> None:
+        times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10)
+        array = DataArray(np.arange(10), [("time", times)])
+
+        base = 11
+        actual = array.resample(time="24H", base=base).mean()
+        expected = DataArray(array.to_series().resample("24H", base=base).mean())
+        assert_identical(expected, actual)
+
+    def test_resample_offset(self) -> None:
+        times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10)
+        array = DataArray(np.arange(10), [("time", times)])
+
+        offset = pd.Timedelta("11H")
+        actual = array.resample(time="24H", offset=offset).mean()
+        expected = DataArray(array.to_series().resample("24H", offset=offset).mean())
+        assert_identical(expected, actual)
+
+    def test_resample_origin(self) -> None:
+        times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10)
+        array = DataArray(np.arange(10), [("time", times)])
+
+        origin = "start"
+        actual = array.resample(time="24H", origin=origin).mean()
+        expected = DataArray(array.to_series().resample("24H", origin=origin).mean())
+        assert_identical(expected, actual)
+
 
 class TestDatasetResample:
     def test_resample_and_first(self):