rapidsai · rapids-bot · Feb 6, 2024 · Feb 5, 2024 · Feb 5, 2024 · Feb 5, 2024
@@ -1,4 +1,6 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+
+import warnings
 
 from cudf.core.buffer import acquire_spill_lock
 
@@ -85,19 +87,34 @@ cdef libcudf_datetime.rounding_frequency _get_rounding_frequency(object freq):
     cdef libcudf_datetime.rounding_frequency freq_val
 
     # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timedelta.resolution_string.html
+    old_to_new_freq_map = {
+        "H": "h",
+        "N": "ns",
+        "T": "min",
+        "L": "ms",
+        "U": "us",
+        "S": "s",
+    }
+    if freq in old_to_new_freq_map:
+        warnings.warn(
+            f"FutureWarning: {freq} is deprecated and will be "
+            "removed in a future version, please use "
+            f"{old_to_new_freq_map[freq]} instead.",
+            FutureWarning
+        )
     if freq == "D":
         freq_val = libcudf_datetime.rounding_frequency.DAY
-    elif freq == "H":
+    elif freq in ("H", "h"):
         freq_val = libcudf_datetime.rounding_frequency.HOUR
     elif freq in ("T", "min"):
         freq_val = libcudf_datetime.rounding_frequency.MINUTE
-    elif freq == "S":
+    elif freq in ("S", "s"):
         freq_val = libcudf_datetime.rounding_frequency.SECOND
     elif freq in ("L", "ms"):
         freq_val = libcudf_datetime.rounding_frequency.MILLISECOND
     elif freq in ("U", "us"):
         freq_val = libcudf_datetime.rounding_frequency.MICROSECOND
-    elif freq == "N":
+    elif freq in ("N", "ns"):
         freq_val = libcudf_datetime.rounding_frequency.NANOSECOND
     else:
         raise ValueError(f"Invalid resolution: '{freq}'")

@@ -11,4 +11,5 @@
 PANDAS_GE_214 = PANDAS_VERSION >= version.parse("2.1.4")
 PANDAS_GE_220 = PANDAS_VERSION >= version.parse("2.2.0")
 PANDAS_LT_203 = PANDAS_VERSION < version.parse("2.0.3")
+PANDAS_GE_220 = PANDAS_VERSION >= version.parse("2.2.0")
 PANDAS_LT_300 = PANDAS_VERSION < version.parse("3.0.0")
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 import pandas as pd
 
 import cudf
@@ -17,7 +17,7 @@ def test_tz_localize():
 
 
 def test_tz_convert():
-    pidx = pd.date_range("2023-01-01", periods=3, freq="H")
+    pidx = pd.date_range("2023-01-01", periods=3, freq="h")
     idx = cudf.from_pandas(pidx)
     pidx = pidx.tz_localize("UTC")
     idx = idx.tz_localize("UTC")
@@ -27,6 +27,6 @@ def test_tz_convert():
 
 
 def test_delocalize_naive():
-    pidx = pd.date_range("2023-01-01", periods=3, freq="H")
+    pidx = pd.date_range("2023-01-01", periods=3, freq="h")
     idx = cudf.from_pandas(pidx)
     assert_eq(pidx.tz_localize(None), idx.tz_localize(None))
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 import os
 
@@ -130,7 +130,7 @@ def test_delocalize_naive():
     "to_tz", ["Europe/London", "America/Chicago", "UTC", None]
 )
 def test_convert(from_tz, to_tz):
-    ps = pd.Series(pd.date_range("2023-01-01", periods=3, freq="H"))
+    ps = pd.Series(pd.date_range("2023-01-01", periods=3, freq="h"))
     gs = cudf.from_pandas(ps)
     ps = ps.dt.tz_localize(from_tz)
     gs = gs.dt.tz_localize(from_tz)
@@ -140,7 +140,7 @@ def test_convert(from_tz, to_tz):
 
 
 def test_convert_from_naive():
-    gs = cudf.Series(cudf.date_range("2023-01-01", periods=3, freq="H"))
+    gs = cudf.Series(cudf.date_range("2023-01-01", periods=3, freq="h"))
     with pytest.raises(TypeError):
         gs.dt.tz_convert("America/New_York")
 

@@ -23,7 +23,7 @@ def test_dataset_timeseries():
     gdf = cudf.datasets.timeseries(
         "2000",
         "2010",
-        freq="2H",
+        freq="2h",
         dtypes={"value": float, "name": "category", "id": int},
         nulls_frequency=0.7,
         seed=1,

@@ -13,7 +13,12 @@
 import cudf
 import cudf.testing.dataset_generator as dataset_generator
 from cudf import DataFrame, Series
-from cudf.core._compat import PANDAS_EQ_200, PANDAS_GE_200, PANDAS_GE_210
+from cudf.core._compat import (
+    PANDAS_EQ_200,
+    PANDAS_GE_200,
+    PANDAS_GE_210,
+    PANDAS_GE_220,
+)
 from cudf.core.index import DatetimeIndex
 from cudf.testing._utils import (
     DATETIME_TYPES,
@@ -39,7 +44,7 @@ def data1():
 
 def data2():
     return pd.date_range(
-        "20010101", freq="243434324423423234N", name="times", periods=10
+        "20010101", freq="243434324423423234ns", name="times", periods=10
     )
 
 
@@ -1497,10 +1502,10 @@ def test_is_month_start(data, dtype):
     {"hours": 10, "days": 57, "nanoseconds": 3},
     "83D",
     "17h",
-    "-680T",
+    "-680min",
     "110546s",
-    "110546789L",
-    "110546789248U",
+    "110546789ms",
+    "110546789248us",
 ]
 
 
@@ -1540,7 +1545,7 @@ def test_date_range_start_end_freq(request, start, end, freq):
             condition=(
                 start == "1831-05-08 15:23:21"
                 and end == "1996-11-21 04:05:30"
-                and freq == "110546789L"
+                and freq == "110546789ms"
             ),
             reason="https://github.com/rapidsai/cudf/issues/12133",
         )
@@ -1653,7 +1658,8 @@ def test_date_range_end_freq_periods(request, end, freq, periods):
     request.applymarker(
         pytest.mark.xfail(
             condition=(
-                isinstance(freq, dict)
+                not PANDAS_GE_220
+                and isinstance(freq, dict)
                 and freq.get("hours", None) == 10
                 and freq.get("days", None) == 57
                 and freq.get("nanoseconds", None) == 3
@@ -1723,30 +1729,34 @@ def test_date_range_raise_overflow():
 @pytest.mark.parametrize(
     "freqstr_unsupported",
     [
-        "1M",
-        "2SM",
+        "1ME",
+        "2SME",
         "3MS",
-        "4BM",
-        "5CBM",
+        "4BME",
+        "5CBME",
         "6SMS",
         "7BMS",
         "8CBMS",
-        "Q",
-        "2BQ",
+        "QE",
+        "2BQE",
         "3BQS",
-        "10A",
-        "10Y",
-        "9BA",
-        "9BY",
-        "8AS",
+        "10YE",
+        "9BYE",
         "8YS",
-        "7BAS",
         "7BYS",
-        "BH",
+        "bh",
         "B",
     ],
 )
-def test_date_range_raise_unsupported(freqstr_unsupported):
+def test_date_range_raise_unsupported(request, freqstr_unsupported):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                not PANDAS_GE_220 and freqstr_unsupported.endswith("E")
+            ),
+            reason="TODO: Remove this once pandas-2.2 support is added",
+        )
+    )
     s, e = "2001-01-01", "2008-01-31"
     pd.date_range(start=s, end=e, freq=freqstr_unsupported)
     with pytest.raises(ValueError, match="does not yet support"):
@@ -1757,9 +1767,9 @@ def test_date_range_raise_unsupported(freqstr_unsupported):
     # is a valid frequency for every 3 milliseconds.
     if freqstr_unsupported != "3MS":
         freqstr_unsupported = freqstr_unsupported.lower()
-        pd.date_range(start=s, end=e, freq=freqstr_unsupported)
         with pytest.raises(ValueError, match="does not yet support"):
-            cudf.date_range(start=s, end=e, freq=freqstr_unsupported)
+            with expect_warning_if(PANDAS_GE_220):
+                cudf.date_range(start=s, end=e, freq=freqstr_unsupported)
 
 
 ##################################################################
@@ -1957,7 +1967,7 @@ def test_error_values():
 )
 @pytest.mark.parametrize("time_type", DATETIME_TYPES)
 @pytest.mark.parametrize(
-    "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]
+    "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"]
 )
 def test_ceil(request, data, time_type, resolution):
     alias_map = {"L": "ms", "U": "us", "N": "ns"}
@@ -2002,7 +2012,7 @@ def test_ceil(request, data, time_type, resolution):
 )
 @pytest.mark.parametrize("time_type", DATETIME_TYPES)
 @pytest.mark.parametrize(
-    "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]
+    "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"]
 )
 def test_floor(request, data, time_type, resolution):
     alias_map = {"L": "ms", "U": "us", "N": "ns"}
@@ -2048,25 +2058,9 @@ def test_floor(request, data, time_type, resolution):
 )
 @pytest.mark.parametrize("time_type", DATETIME_TYPES)
 @pytest.mark.parametrize(
-    "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]
+    "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"]
 )
-def test_round(request, data, time_type, resolution):
-    alias_map = {"L": "ms", "U": "us", "N": "ns"}
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(
-                PANDAS_EQ_200
-                and resolution in {"L", "ms", "U", "us", "N"}
-                and np.dtype(
-                    f"datetime64[{alias_map.get(resolution, resolution)}]"
-                )
-                > np.dtype(time_type)
-            ),
-            reason="https://github.com/pandas-dev/pandas/issues/52761",
-            strict=True,
-        )
-    )
-
+def test_round(data, time_type, resolution):
     gs = cudf.Series(data, dtype=time_type)
     ps = gs.to_pandas()
 
@@ -2284,20 +2278,20 @@ def test_daterange_pandas_compatibility():
 @pytest.mark.parametrize(
     "data,dtype,freq",
     [
-        ([10], "datetime64[ns]", "2N"),
-        ([10, 12, 14, 16], "datetime64[ns]", "2N"),
-        ([10, 11, 12, 13], "datetime64[ns]", "1N"),
+        ([10], "datetime64[ns]", "2ns"),
+        ([10, 12, 14, 16], "datetime64[ns]", "2ns"),
+        ([10, 11, 12, 13], "datetime64[ns]", "1ns"),
         ([100, 200, 300, 400], "datetime64[s]", "100s"),
         ([101, 201, 301, 401], "datetime64[ms]", "100ms"),
     ],
 )
 def test_datetime_index_with_freq(request, data, dtype, freq):
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(not PANDAS_GE_200 and dtype != "datetime64[ns]"),
-            reason="Pandas < 2.0 lacks non-nano-second dtype support.",
-        )
-    )
+    # request.applymarker(
+    #     pytest.mark.xfail(
+    #         condition=(not PANDAS_GE_200 and dtype != "datetime64[ns]"),
+    #         reason="Pandas < 2.0 lacks non-nano-second dtype support.",
+    #     )
+    # )
     actual = cudf.DatetimeIndex(data, dtype=dtype, freq=freq)
     expected = pd.DatetimeIndex(data, dtype=dtype, freq=freq)
     assert_eq(actual, expected)
@@ -2306,7 +2300,7 @@ def test_datetime_index_with_freq(request, data, dtype, freq):
 @pytest.mark.parametrize(
     "data,dtype,freq",
     [
-        ([10, 1232, 13244, 13426], "datetime64[ns]", "2N"),
+        ([10, 1232, 13244, 13426], "datetime64[ns]", "2ns"),
         ([10, 11, 12, 13], "datetime64[ns]", "1s"),
         ([10000, 200, 300, 400], "datetime64[s]", "100s"),
         ([107871, 201, 301, 401], "datetime64[ms]", "100ns"),
@@ -2454,3 +2448,23 @@ def test_dateimeindex_from_noniso_string():
 def test_to_datetime_errors_non_scalar_not_implemented(errors):
     with pytest.raises(NotImplementedError):
         cudf.to_datetime([1, ""], unit="s", errors=errors)
+
+
+@pytest.mark.parametrize(
+    "freqstr",
+    [
+        "H",
+        "N",
+        "T",
+        "L",
+        "U",
+        "S",
+    ],
+)
+def test_datetime_raise_warning(freqstr):
+    t = cudf.Series(
+        ["2001-01-01 00:04:45", "2001-01-01 00:04:58", "2001-01-01 00:05:04"],
+        dtype="datetime64[ns]",
+    )
+    with pytest.warns(FutureWarning):
+        t.dt.ceil(freqstr)
@@ -2422,7 +2422,7 @@ def test_index_type_methods(data, func):
 
 
 @pytest.mark.parametrize(
-    "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]
+    "resolution", ["D", "h", "min", "s", "ms", "us", "ns"]
 )
 def test_index_datetime_ceil(resolution):
     cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000])
@@ -2435,7 +2435,7 @@ def test_index_datetime_ceil(resolution):
 
 
 @pytest.mark.parametrize(
-    "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]
+    "resolution", ["D", "h", "min", "s", "ms", "us", "ns"]
 )
 def test_index_datetime_floor(resolution):
     cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000])
@@ -2448,7 +2448,7 @@ def test_index_datetime_floor(resolution):
 
 
 @pytest.mark.parametrize(
-    "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]
+    "resolution", ["D", "h", "min", "s", "ms", "us", "ns"]
 )
 def test_index_datetime_round(resolution):
     cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000])
@@ -2490,7 +2490,7 @@ def test_index_nan_as_null(data, nan_idx, NA_idx, nan_as_null):
         pd.Series(
             range(25),
             index=pd.date_range(
-                start="2019-01-01", end="2019-01-02", freq="H"
+                start="2019-01-01", end="2019-01-02", freq="h"
             ),
         ),
     ],

@@ -6,6 +6,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_220
 from cudf.testing._utils import assert_eq
 
 
@@ -166,13 +167,17 @@ def test_interval_index_unique():
     assert_eq(expected, actual)
 
 
+@pytest.mark.xfail(
+    condition=not PANDAS_GE_220,
+    reason="TODO: Remove this once pandas-2.2 support is added",
+)
 @pytest.mark.parametrize("box", [pd.Series, pd.IntervalIndex])
 @pytest.mark.parametrize("tz", ["US/Eastern", None])
 def test_interval_with_datetime(tz, box):
     dti = pd.date_range(
         start=pd.Timestamp("20180101", tz=tz),
         end=pd.Timestamp("20181231", tz=tz),
-        freq="M",
+        freq="ME",
     )
     pobj = box(pd.IntervalIndex.from_breaks(dti))
     if tz is None: