rapidsai · rapids-bot · Dec 12, 2023 · Dec 7, 2023 · Dec 7, 2023 · Dec 7, 2023
@@ -17,6 +17,7 @@
     Tuple,
     Type,
     Union,
+    cast,
 )
 
 import cupy
@@ -1428,14 +1429,19 @@ def __repr__(self):
         dtype_index = tmp_meta.rfind(" dtype=")
         prior_to_dtype = tmp_meta[:dtype_index]
         lines = lines[:-1]
-        lines.append(prior_to_dtype + " dtype='%s'" % self.dtype)
+        keywords = [f"dtype='{self.dtype}'"]
         if self.name is not None:
-            lines[-1] = lines[-1] + ", name='%s'" % self.name
+            keywords.append(f"name={self.name!r}")
         if "length" in tmp_meta:
-            lines[-1] = lines[-1] + ", length=%d)" % len(self)
-        else:
-            lines[-1] = lines[-1] + ")"
-
+            keywords.append(f"length={len(self)}")
+        if (
+            "freq" in tmp_meta
+            and isinstance(self, DatetimeIndex)
+            and self._freq is not None
+        ):
+            keywords.append(f"freq={self._freq}")
+        keywords = ", ".join(keywords)
+        lines.append(f"{prior_to_dtype} {keywords})")
         return "\n".join(lines)
 
     @_cudf_nvtx_annotate
@@ -2126,8 +2132,6 @@ def __init__(
         # pandas dtindex creation first which.  For now
         # just make sure we handle np.datetime64 arrays
         # and then just dispatch upstream
-        if freq is not None:
-            raise NotImplementedError("Freq is not yet supported")
         if tz is not None:
             raise NotImplementedError("tz is not yet supported")
         if normalize is not False:
@@ -2141,6 +2145,8 @@ def __init__(
         if yearfirst is not False:
             raise NotImplementedError("yearfirst == True is not yet supported")
 
+        self._freq = _validate_freq(freq)
+
         valid_dtypes = tuple(
             f"datetime64[{res}]" for res in ("s", "ms", "us", "ns")
         )
@@ -2158,6 +2164,30 @@ def __init__(
 
         super().__init__(data, **kwargs)
 
+        if self._freq is not None:
+            unique_vals = self.to_series().diff().unique()
+            if len(unique_vals) > 2 or (
+                len(unique_vals) == 2
+                and unique_vals[1] != self._freq._maybe_as_fast_pandas_offset()
+            ):
+                raise ValueError("No unique frequency found")
+
+    @_cudf_nvtx_annotate
+    def _copy_type_metadata(
+        self: DatetimeIndex, other: DatetimeIndex, *, override_dtypes=None
+    ) -> GenericIndex:
+        super()._copy_type_metadata(other, override_dtypes=override_dtypes)
+        self._freq = _validate_freq(other._freq)
+        return self
+
+    @classmethod
+    def _from_data(
+        cls, data: MutableMapping, name: Any = no_default, freq: Any = None
+    ):
+        result = super()._from_data(data, name)
+        result._freq = _validate_freq(freq)
+        return result
+
     def __getitem__(self, index):
         value = super().__getitem__(index)
         if cudf.get_option("mode.pandas_compatible") and isinstance(
@@ -2166,6 +2196,11 @@ def __getitem__(self, index):
             return pd.Timestamp(value)
         return value
 
+    @_cudf_nvtx_annotate
+    def copy(self, name=None, deep=False, dtype=None, names=None):
+        idx_copy = super().copy(name=name, deep=deep, dtype=dtype, names=names)
+        return idx_copy._copy_type_metadata(self)
+
     def searchsorted(
         self,
         value,
@@ -2519,7 +2554,13 @@ def to_pandas(self, *, nullable: bool = False) -> pd.DatetimeIndex:
             )
         else:
             nanos = self._values.astype("datetime64[ns]")
-        return pd.DatetimeIndex(nanos.to_pandas(), name=self.name)
+
+        freq = (
+            self._freq._maybe_as_fast_pandas_offset()
+            if self._freq is not None
+            else None
+        )
+        return pd.DatetimeIndex(nanos.to_pandas(), name=self.name, freq=freq)
 
     @_cudf_nvtx_annotate
     def _get_dt_field(self, field):
@@ -3624,3 +3665,11 @@ def _extended_gcd(a: int, b: int) -> Tuple[int, int, int]:
         old_s, s = s, old_s - quotient * s
         old_t, t = t, old_t - quotient * t
     return old_r, old_s, old_t
+
+
+def _validate_freq(freq: Any) -> cudf.DateOffset:
+    if isinstance(freq, str):
+        return cudf.DateOffset._from_freqstr(freq)
+    elif freq is not None and not isinstance(freq, cudf.DateOffset):
+        raise ValueError(f"Invalid frequency: {freq}")
+    return cast(cudf.DateOffset, freq)
@@ -463,13 +463,19 @@ class DateOffset:
     }
 
     _CODES_TO_UNITS = {
+        "N": "nanoseconds",
         "ns": "nanoseconds",
+        "U": "microseconds",
         "us": "microseconds",
         "ms": "milliseconds",
         "L": "milliseconds",
         "s": "seconds",
+        "S": "seconds",
         "m": "minutes",
+        "min": "minutes",
+        "T": "minutes",
         "h": "hours",
+        "H": "hours",
         "D": "days",
         "W": "weeks",
         "M": "months",
@@ -487,7 +493,7 @@ class DateOffset:
         pd_offset.Nano: "nanoseconds",
     }
 
-    _FREQSTR_REGEX = re.compile("([0-9]*)([a-zA-Z]+)")
+    _FREQSTR_REGEX = re.compile("([-+]?[0-9]*)([a-zA-Z]+)")
 
     def __init__(self, n=1, normalize=False, **kwds):
         if normalize:
@@ -843,10 +849,6 @@ def date_range(
         arr = cp.linspace(start=start, stop=end, num=periods)
         result = cudf.core.column.as_column(arr).astype("datetime64[ns]")
         return cudf.DatetimeIndex._from_data({name: result})
-    elif cudf.get_option("mode.pandas_compatible"):
-        raise NotImplementedError(
-            "`DatetimeIndex` with `freq` cannot be constructed."
-        )
 
     # The code logic below assumes `freq` is defined. It is first normalized
     # into `DateOffset` for further computation with timestamps.
@@ -940,7 +942,7 @@ def date_range(
         arr = cp.arange(start=start, stop=stop, step=step, dtype="int64")
         res = cudf.core.column.as_column(arr).astype("datetime64[ns]")
 
-    return cudf.DatetimeIndex._from_data({name: res})
+    return cudf.DatetimeIndex._from_data({name: res}, freq=freq)
 
 
 def _has_fixed_frequency(freq: DateOffset) -> bool:

@@ -707,6 +707,14 @@ def Index__new__(cls, *args, **kwargs):
     "Resampler", cudf.core.resample._Resampler, pd_Resampler
 )
 
+DataFrameResampler = make_intermediate_proxy_type(
+    "DataFrameResampler", cudf.core.resample.DataFrameResampler, pd_Resampler
+)
+
+SeriesResampler = make_intermediate_proxy_type(
+    "SeriesResampler", cudf.core.resample.SeriesResampler, pd_Resampler
+)
+
 StataReader = make_intermediate_proxy_type(
     "StataReader",
     _Unusable,

@@ -12,7 +12,7 @@
 import cudf
 import cudf.testing.dataset_generator as dataset_generator
 from cudf import DataFrame, Series
-from cudf.core._compat import PANDAS_GE_150, PANDAS_LT_140
+from cudf.core._compat import PANDAS_GE_150, PANDAS_GE_200, PANDAS_LT_140
 from cudf.core.index import DatetimeIndex
 from cudf.testing._utils import (
     DATETIME_TYPES,
@@ -1571,6 +1571,44 @@ def test_date_range_start_end_freq(request, start, end, freq):
             reason="https://github.com/rapidsai/cudf/issues/12133",
         )
     )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                isinstance(freq, dict)
+                and freq.get("hours", None) == 10
+                and freq.get("days", None) == 57
+                and freq.get("nanoseconds", None) == 3
+                and (
+                    (
+                        start == "1996-11-21 04:05:30"
+                        and end == "2000-02-13 08:41:06"
+                    )
+                    or (
+                        start == "1970-01-01 00:00:00"
+                        and end == "2000-02-13 08:41:06"
+                    )
+                    or (
+                        start == "1970-01-01 00:00:00"
+                        and end == "1996-11-21 04:05:30"
+                    )
+                    or (
+                        start == "1831-05-08 15:23:21"
+                        and end == "2000-02-13 08:41:06"
+                    )
+                    or (
+                        start == "1831-05-08 15:23:21"
+                        and end == "1996-11-21 04:05:30"
+                    )
+                    or (
+                        start == "1831-05-08 15:23:21"
+                        and end == "1970-01-01 00:00:00"
+                    )
+                )
+            ),
+            reason="Nanosecond offsets being dropped by pandas, which is "
+            "fixed in pandas-2.0+",
+        )
+    )
     if isinstance(freq, str):
         _gfreq = _pfreq = freq
     else:
@@ -1586,7 +1624,29 @@ def test_date_range_start_end_freq(request, start, end, freq):
     )
 
 
-def test_date_range_start_freq_periods(start, freq, periods):
+def test_date_range_start_freq_periods(request, start, freq, periods):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                isinstance(freq, dict)
+                and freq.get("hours", None) == 10
+                and freq.get("days", None) == 57
+                and freq.get("nanoseconds", None) == 3
+                and periods in (10, 100)
+                and (
+                    start
+                    in {
+                        "2000-02-13 08:41:06",
+                        "1996-11-21 04:05:30",
+                        "1970-01-01 00:00:00",
+                        "1831-05-08 15:23:21",
+                    }
+                )
+            ),
+            reason="Nanosecond offsets being dropped by pandas, which is "
+            "fixed in pandas-2.0+",
+        )
+    )
     if isinstance(freq, str):
         _gfreq = _pfreq = freq
     else:
@@ -1613,6 +1673,28 @@ def test_date_range_end_freq_periods(request, end, freq, periods):
             reason="https://github.com/pandas-dev/pandas/issues/46877",
         )
     )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                isinstance(freq, dict)
+                and freq.get("hours", None) == 10
+                and freq.get("days", None) == 57
+                and freq.get("nanoseconds", None) == 3
+                and periods in (10, 100)
+                and (
+                    end
+                    in {
+                        "2000-02-13 08:41:06",
+                        "1996-11-21 04:05:30",
+                        "1970-01-01 00:00:00",
+                        "1831-05-08 15:23:21",
+                    }
+                )
+            ),
+            reason="Nanosecond offsets being dropped by pandas, which is "
+            "fixed in pandas-2.0+",
+        )
+    )
     if isinstance(freq, str):
         _gfreq = _pfreq = freq
     else:
@@ -2163,8 +2245,6 @@ def test_datetime_getitem_na():
 
 def test_daterange_pandas_compatibility():
     with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(NotImplementedError):
-            cudf.date_range("20010101", "20020215", freq="400h", name="times")
         expected = pd.date_range(
             "2010-01-01", "2010-02-01", periods=10, name="times"
         )
@@ -2174,6 +2254,46 @@ def test_daterange_pandas_compatibility():
     assert_eq(expected, actual)
 
 
+@pytest.mark.parametrize(
+    "data,dtype,freq",
+    [
+        ([10], "datetime64[ns]", "2N"),
+        ([10, 12, 14, 16], "datetime64[ns]", "2N"),
+        ([10, 11, 12, 13], "datetime64[ns]", "1N"),
+        ([100, 200, 300, 400], "datetime64[s]", "100s"),
+        ([101, 201, 301, 401], "datetime64[ms]", "100ms"),
+    ],
+)
+def test_datetime_index_with_freq(request, data, dtype, freq):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(not PANDAS_GE_200 and dtype != "datetime64[ns]"),
+            reason="Pandas < 2.0 lacks non-nano-second dtype support.",
+        )
+    )
+    actual = cudf.DatetimeIndex(data, dtype=dtype, freq=freq)
+    expected = pd.DatetimeIndex(data, dtype=dtype, freq=freq)
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "data,dtype,freq",
+    [
+        ([10, 1232, 13244, 13426], "datetime64[ns]", "2N"),
+        ([10, 11, 12, 13], "datetime64[ns]", "1s"),
+        ([10000, 200, 300, 400], "datetime64[s]", "100s"),
+        ([107871, 201, 301, 401], "datetime64[ms]", "100ns"),
+    ],
+)
+def test_datetime_index_freq_error(data, dtype, freq):
+    assert_exceptions_equal(
+        pd.DatetimeIndex,
+        cudf.DatetimeIndex,
+        ([data], {"dtype": dtype, "freq": freq}),
+        ([data], {"dtype": dtype, "freq": freq}),
+    )
+
+
 def test_strings_with_utc_offset_not_implemented():
     with pytest.warns(DeprecationWarning, match="parsing timezone"):  # cupy
         with pytest.raises(NotImplementedError):