From cf32049b36578570419f458e436aec1a51dcc640 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 6 Feb 2024 11:31:53 -0600 Subject: [PATCH] Deprecate certain frequency strings (#14967) This PR deprecates "H", "N", "T", "L", "U" and "S" as frequencies in all datetime APIs. This PR prepares `branch-24.04` for `pandas-2.2` support. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/14967 --- python/cudf/cudf/_lib/datetime.pyx | 32 ++++- python/cudf/cudf/core/_compat.py | 1 + .../indexes/datetime/test_time_specific.py | 6 +- .../cudf/tests/series/test_datetimelike.py | 6 +- python/cudf/cudf/tests/test_datasets.py | 2 +- python/cudf/cudf/tests/test_datetime.py | 120 ++++++++++-------- python/cudf/cudf/tests/test_index.py | 8 +- python/cudf/cudf/tests/test_interval.py | 7 +- python/cudf/cudf/tests/test_replace.py | 18 ++- python/cudf/cudf/tests/test_resampling.py | 49 +++---- python/cudf/cudf/tests/test_serialize.py | 4 +- python/cudf/cudf/tests/test_sorting.py | 6 + 12 files changed, 158 insertions(+), 101 deletions(-) diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 3d96f59c4d6..c777a3ff766 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -1,4 +1,6 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. + +import warnings from cudf.core.buffer import acquire_spill_lock @@ -85,19 +87,35 @@ cdef libcudf_datetime.rounding_frequency _get_rounding_frequency(object freq): cdef libcudf_datetime.rounding_frequency freq_val # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timedelta.resolution_string.html + old_to_new_freq_map = { + "H": "h", + "N": "ns", + "T": "min", + "L": "ms", + "U": "us", + "S": "s", + } + if freq in old_to_new_freq_map: + warnings.warn( + f"FutureWarning: {freq} is deprecated and will be " + "removed in a future version, please use " + f"{old_to_new_freq_map[freq]} instead.", + FutureWarning + ) + freq = old_to_new_freq_map.get(freq) if freq == "D": freq_val = libcudf_datetime.rounding_frequency.DAY - elif freq == "H": + elif freq == "h": freq_val = libcudf_datetime.rounding_frequency.HOUR - elif freq in ("T", "min"): + elif freq == "min": freq_val = libcudf_datetime.rounding_frequency.MINUTE - elif freq == "S": + elif freq == "s": freq_val = libcudf_datetime.rounding_frequency.SECOND - elif freq in ("L", "ms"): + elif freq == "ms": freq_val = libcudf_datetime.rounding_frequency.MILLISECOND - elif freq in ("U", "us"): + elif freq == "us": freq_val = libcudf_datetime.rounding_frequency.MICROSECOND - elif freq == "N": + elif freq == "ns": freq_val = libcudf_datetime.rounding_frequency.NANOSECOND else: raise ValueError(f"Invalid resolution: '{freq}'") diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py index 5aa685560c8..3e2890e2ac4 100644 --- a/python/cudf/cudf/core/_compat.py +++ b/python/cudf/cudf/core/_compat.py @@ -11,4 +11,5 @@ PANDAS_GE_214 = PANDAS_VERSION >= version.parse("2.1.4") PANDAS_GE_220 = PANDAS_VERSION >= version.parse("2.2.0") PANDAS_LT_203 = PANDAS_VERSION < version.parse("2.0.3") +PANDAS_GE_220 = PANDAS_VERSION >= version.parse("2.2.0") PANDAS_LT_300 = PANDAS_VERSION < version.parse("3.0.0") diff --git a/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py b/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py index 1ed1e23f1ab..b28ef131025 100644 --- a/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py +++ b/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. import pandas as pd import cudf @@ -17,7 +17,7 @@ def test_tz_localize(): def test_tz_convert(): - pidx = pd.date_range("2023-01-01", periods=3, freq="H") + pidx = pd.date_range("2023-01-01", periods=3, freq="h") idx = cudf.from_pandas(pidx) pidx = pidx.tz_localize("UTC") idx = idx.tz_localize("UTC") @@ -27,6 +27,6 @@ def test_tz_convert(): def test_delocalize_naive(): - pidx = pd.date_range("2023-01-01", periods=3, freq="H") + pidx = pd.date_range("2023-01-01", periods=3, freq="h") idx = cudf.from_pandas(pidx) assert_eq(pidx.tz_localize(None), idx.tz_localize(None)) diff --git a/python/cudf/cudf/tests/series/test_datetimelike.py b/python/cudf/cudf/tests/series/test_datetimelike.py index df68eaca399..98be7045923 100644 --- a/python/cudf/cudf/tests/series/test_datetimelike.py +++ b/python/cudf/cudf/tests/series/test_datetimelike.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. import os @@ -130,7 +130,7 @@ def test_delocalize_naive(): "to_tz", ["Europe/London", "America/Chicago", "UTC", None] ) def test_convert(from_tz, to_tz): - ps = pd.Series(pd.date_range("2023-01-01", periods=3, freq="H")) + ps = pd.Series(pd.date_range("2023-01-01", periods=3, freq="h")) gs = cudf.from_pandas(ps) ps = ps.dt.tz_localize(from_tz) gs = gs.dt.tz_localize(from_tz) @@ -140,7 +140,7 @@ def test_convert(from_tz, to_tz): def test_convert_from_naive(): - gs = cudf.Series(cudf.date_range("2023-01-01", periods=3, freq="H")) + gs = cudf.Series(cudf.date_range("2023-01-01", periods=3, freq="h")) with pytest.raises(TypeError): gs.dt.tz_convert("America/New_York") diff --git a/python/cudf/cudf/tests/test_datasets.py b/python/cudf/cudf/tests/test_datasets.py index 45629868ccc..8e5e5ab66c4 100644 --- a/python/cudf/cudf/tests/test_datasets.py +++ b/python/cudf/cudf/tests/test_datasets.py @@ -23,7 +23,7 @@ def test_dataset_timeseries(): gdf = cudf.datasets.timeseries( "2000", "2010", - freq="2H", + freq="2h", dtypes={"value": float, "name": "category", "id": int}, nulls_frequency=0.7, seed=1, diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 24d8aa052e8..1f24337d28b 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -13,7 +13,12 @@ import cudf import cudf.testing.dataset_generator as dataset_generator from cudf import DataFrame, Series -from cudf.core._compat import PANDAS_EQ_200, PANDAS_GE_200, PANDAS_GE_210 +from cudf.core._compat import ( + PANDAS_EQ_200, + PANDAS_GE_200, + PANDAS_GE_210, + PANDAS_GE_220, +) from cudf.core.index import DatetimeIndex from cudf.testing._utils import ( DATETIME_TYPES, @@ -39,7 +44,7 @@ def data1(): def data2(): return pd.date_range( - "20010101", freq="243434324423423234N", name="times", periods=10 + "20010101", freq="243434324423423234ns", name="times", periods=10 ) @@ -1497,10 +1502,10 @@ def test_is_month_start(data, dtype): {"hours": 10, "days": 57, "nanoseconds": 3}, "83D", "17h", - "-680T", + "-680min", "110546s", - "110546789L", - "110546789248U", + "110546789ms", + "110546789248us", ] @@ -1540,7 +1545,7 @@ def test_date_range_start_end_freq(request, start, end, freq): condition=( start == "1831-05-08 15:23:21" and end == "1996-11-21 04:05:30" - and freq == "110546789L" + and freq == "110546789ms" ), reason="https://github.com/rapidsai/cudf/issues/12133", ) @@ -1653,7 +1658,8 @@ def test_date_range_end_freq_periods(request, end, freq, periods): request.applymarker( pytest.mark.xfail( condition=( - isinstance(freq, dict) + not PANDAS_GE_220 + and isinstance(freq, dict) and freq.get("hours", None) == 10 and freq.get("days", None) == 57 and freq.get("nanoseconds", None) == 3 @@ -1723,30 +1729,34 @@ def test_date_range_raise_overflow(): @pytest.mark.parametrize( "freqstr_unsupported", [ - "1M", - "2SM", + "1ME", + "2SME", "3MS", - "4BM", - "5CBM", + "4BME", + "5CBME", "6SMS", "7BMS", "8CBMS", - "Q", - "2BQ", + "QE", + "2BQE", "3BQS", - "10A", - "10Y", - "9BA", - "9BY", - "8AS", + "10YE", + "9BYE", "8YS", - "7BAS", "7BYS", - "BH", + "bh", "B", ], ) -def test_date_range_raise_unsupported(freqstr_unsupported): +def test_date_range_raise_unsupported(request, freqstr_unsupported): + request.applymarker( + pytest.mark.xfail( + condition=( + not PANDAS_GE_220 and freqstr_unsupported.endswith("E") + ), + reason="TODO: Remove this once pandas-2.2 support is added", + ) + ) s, e = "2001-01-01", "2008-01-31" pd.date_range(start=s, end=e, freq=freqstr_unsupported) with pytest.raises(ValueError, match="does not yet support"): @@ -1757,9 +1767,9 @@ def test_date_range_raise_unsupported(freqstr_unsupported): # is a valid frequency for every 3 milliseconds. if freqstr_unsupported != "3MS": freqstr_unsupported = freqstr_unsupported.lower() - pd.date_range(start=s, end=e, freq=freqstr_unsupported) with pytest.raises(ValueError, match="does not yet support"): - cudf.date_range(start=s, end=e, freq=freqstr_unsupported) + with expect_warning_if(PANDAS_GE_220): + cudf.date_range(start=s, end=e, freq=freqstr_unsupported) ################################################################## @@ -1957,7 +1967,7 @@ def test_error_values(): ) @pytest.mark.parametrize("time_type", DATETIME_TYPES) @pytest.mark.parametrize( - "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"] + "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"] ) def test_ceil(request, data, time_type, resolution): alias_map = {"L": "ms", "U": "us", "N": "ns"} @@ -2002,7 +2012,7 @@ def test_ceil(request, data, time_type, resolution): ) @pytest.mark.parametrize("time_type", DATETIME_TYPES) @pytest.mark.parametrize( - "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"] + "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"] ) def test_floor(request, data, time_type, resolution): alias_map = {"L": "ms", "U": "us", "N": "ns"} @@ -2048,25 +2058,9 @@ def test_floor(request, data, time_type, resolution): ) @pytest.mark.parametrize("time_type", DATETIME_TYPES) @pytest.mark.parametrize( - "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"] + "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"] ) -def test_round(request, data, time_type, resolution): - alias_map = {"L": "ms", "U": "us", "N": "ns"} - request.applymarker( - pytest.mark.xfail( - condition=( - PANDAS_EQ_200 - and resolution in {"L", "ms", "U", "us", "N"} - and np.dtype( - f"datetime64[{alias_map.get(resolution, resolution)}]" - ) - > np.dtype(time_type) - ), - reason="https://github.com/pandas-dev/pandas/issues/52761", - strict=True, - ) - ) - +def test_round(data, time_type, resolution): gs = cudf.Series(data, dtype=time_type) ps = gs.to_pandas() @@ -2284,20 +2278,20 @@ def test_daterange_pandas_compatibility(): @pytest.mark.parametrize( "data,dtype,freq", [ - ([10], "datetime64[ns]", "2N"), - ([10, 12, 14, 16], "datetime64[ns]", "2N"), - ([10, 11, 12, 13], "datetime64[ns]", "1N"), + ([10], "datetime64[ns]", "2ns"), + ([10, 12, 14, 16], "datetime64[ns]", "2ns"), + ([10, 11, 12, 13], "datetime64[ns]", "1ns"), ([100, 200, 300, 400], "datetime64[s]", "100s"), ([101, 201, 301, 401], "datetime64[ms]", "100ms"), ], ) def test_datetime_index_with_freq(request, data, dtype, freq): - request.applymarker( - pytest.mark.xfail( - condition=(not PANDAS_GE_200 and dtype != "datetime64[ns]"), - reason="Pandas < 2.0 lacks non-nano-second dtype support.", - ) - ) + # request.applymarker( + # pytest.mark.xfail( + # condition=(not PANDAS_GE_200 and dtype != "datetime64[ns]"), + # reason="Pandas < 2.0 lacks non-nano-second dtype support.", + # ) + # ) actual = cudf.DatetimeIndex(data, dtype=dtype, freq=freq) expected = pd.DatetimeIndex(data, dtype=dtype, freq=freq) assert_eq(actual, expected) @@ -2306,7 +2300,7 @@ def test_datetime_index_with_freq(request, data, dtype, freq): @pytest.mark.parametrize( "data,dtype,freq", [ - ([10, 1232, 13244, 13426], "datetime64[ns]", "2N"), + ([10, 1232, 13244, 13426], "datetime64[ns]", "2ns"), ([10, 11, 12, 13], "datetime64[ns]", "1s"), ([10000, 200, 300, 400], "datetime64[s]", "100s"), ([107871, 201, 301, 401], "datetime64[ms]", "100ns"), @@ -2454,3 +2448,23 @@ def test_dateimeindex_from_noniso_string(): def test_to_datetime_errors_non_scalar_not_implemented(errors): with pytest.raises(NotImplementedError): cudf.to_datetime([1, ""], unit="s", errors=errors) + + +@pytest.mark.parametrize( + "freqstr", + [ + "H", + "N", + "T", + "L", + "U", + "S", + ], +) +def test_datetime_raise_warning(freqstr): + t = cudf.Series( + ["2001-01-01 00:04:45", "2001-01-01 00:04:58", "2001-01-01 00:05:04"], + dtype="datetime64[ns]", + ) + with pytest.warns(FutureWarning): + t.dt.ceil(freqstr) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 996b651b9fe..7a190fb428a 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -2422,7 +2422,7 @@ def test_index_type_methods(data, func): @pytest.mark.parametrize( - "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"] + "resolution", ["D", "h", "min", "s", "ms", "us", "ns"] ) def test_index_datetime_ceil(resolution): cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000]) @@ -2435,7 +2435,7 @@ def test_index_datetime_ceil(resolution): @pytest.mark.parametrize( - "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"] + "resolution", ["D", "h", "min", "s", "ms", "us", "ns"] ) def test_index_datetime_floor(resolution): cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000]) @@ -2448,7 +2448,7 @@ def test_index_datetime_floor(resolution): @pytest.mark.parametrize( - "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"] + "resolution", ["D", "h", "min", "s", "ms", "us", "ns"] ) def test_index_datetime_round(resolution): cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000]) @@ -2490,7 +2490,7 @@ def test_index_nan_as_null(data, nan_idx, NA_idx, nan_as_null): pd.Series( range(25), index=pd.date_range( - start="2019-01-01", end="2019-01-02", freq="H" + start="2019-01-01", end="2019-01-02", freq="h" ), ), ], diff --git a/python/cudf/cudf/tests/test_interval.py b/python/cudf/cudf/tests/test_interval.py index ef853a23004..1c61b378d68 100644 --- a/python/cudf/cudf/tests/test_interval.py +++ b/python/cudf/cudf/tests/test_interval.py @@ -6,6 +6,7 @@ import pytest import cudf +from cudf.core._compat import PANDAS_GE_220 from cudf.testing._utils import assert_eq @@ -166,13 +167,17 @@ def test_interval_index_unique(): assert_eq(expected, actual) +@pytest.mark.xfail( + condition=not PANDAS_GE_220, + reason="TODO: Remove this once pandas-2.2 support is added", +) @pytest.mark.parametrize("box", [pd.Series, pd.IntervalIndex]) @pytest.mark.parametrize("tz", ["US/Eastern", None]) def test_interval_with_datetime(tz, box): dti = pd.date_range( start=pd.Timestamp("20180101", tz=tz), end=pd.Timestamp("20181231", tz=tz), - freq="M", + freq="ME", ) pobj = box(pd.IntervalIndex.from_breaks(dti)) if tz is None: diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py index 6db1c97b9fd..0f8f8de36a1 100644 --- a/python/cudf/cudf/tests/test_replace.py +++ b/python/cudf/cudf/tests/test_replace.py @@ -8,7 +8,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210 +from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210, PANDAS_GE_220 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype from cudf.testing._utils import ( INTEGER_TYPES, @@ -484,7 +484,13 @@ def test_fillna_categorical(psr_data, fill_value, inplace): @pytest.mark.parametrize( "psr_data", [ - pd.Series(pd.date_range("2010-01-01", "2020-01-10", freq="1y")), + pd.Series( + pd.date_range( + "2010-01-01", + "2020-01-10", + freq="1YE" if PANDAS_GE_220 else "1y", + ) + ), pd.Series(["2010-01-01", None, "2011-10-10"], dtype="datetime64[ns]"), pd.Series( [ @@ -525,7 +531,13 @@ def test_fillna_categorical(psr_data, fill_value, inplace): "fill_value", [ pd.Timestamp("2010-01-02"), - pd.Series(pd.date_range("2010-01-01", "2020-01-10", freq="1y")) + pd.Series( + pd.date_range( + "2010-01-01", + "2020-01-10", + freq="1YE" if PANDAS_GE_220 else "1y", + ) + ) + pd.Timedelta("1d"), pd.Series(["2010-01-01", None, "2011-10-10"], dtype="datetime64[ns]"), pd.Series( diff --git a/python/cudf/cudf/tests/test_resampling.py b/python/cudf/cudf/tests/test_resampling.py index 6281d54aa60..ce0fbbfada8 100644 --- a/python/cudf/cudf/tests/test_resampling.py +++ b/python/cudf/cudf/tests/test_resampling.py @@ -24,31 +24,31 @@ def assert_resample_results_equal(lhs, rhs, **kwargs): def test_series_downsample_simple(ts_resolution): # Series with and index of 5min intervals: - index = pd.date_range(start="2001-01-01", periods=10, freq="1T") + index = pd.date_range(start="2001-01-01", periods=10, freq="1min") psr = pd.Series(range(10), index=index) gsr = cudf.from_pandas(psr) gsr.index = gsr.index.astype(f"datetime64[{ts_resolution}]") assert_resample_results_equal( - psr.resample("3T").sum(), - gsr.resample("3T").sum(), + psr.resample("3min").sum(), + gsr.resample("3min").sum(), ) def test_series_upsample_simple(): # Series with and index of 5min intervals: - index = pd.date_range(start="2001-01-01", periods=10, freq="1T") + index = pd.date_range(start="2001-01-01", periods=10, freq="1min") psr = pd.Series(range(10), index=index) gsr = cudf.from_pandas(psr) assert_resample_results_equal( - psr.resample("3T").sum(), - gsr.resample("3T").sum(), + psr.resample("3min").sum(), + gsr.resample("3min").sum(), ) -@pytest.mark.parametrize("rule", ["2S", "10S"]) +@pytest.mark.parametrize("rule", ["2s", "10s"]) def test_series_resample_ffill(rule): - rng = pd.date_range("1/1/2012", periods=10, freq="5S") + rng = pd.date_range("1/1/2012", periods=10, freq="5s") ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng) gts = cudf.from_pandas(ts) assert_resample_results_equal( @@ -56,9 +56,9 @@ def test_series_resample_ffill(rule): ) -@pytest.mark.parametrize("rule", ["2S", "10S"]) +@pytest.mark.parametrize("rule", ["2s", "10s"]) def test_series_resample_bfill(rule): - rng = pd.date_range("1/1/2012", periods=10, freq="5S") + rng = pd.date_range("1/1/2012", periods=10, freq="5s") ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng) gts = cudf.from_pandas(ts) assert_resample_results_equal( @@ -66,9 +66,9 @@ def test_series_resample_bfill(rule): ) -@pytest.mark.parametrize("rule", ["2S", "10S"]) +@pytest.mark.parametrize("rule", ["2s", "10s"]) def test_series_resample_asfreq(rule): - rng = pd.date_range("1/1/2012", periods=100, freq="5S") + rng = pd.date_range("1/1/2012", periods=100, freq="5s") ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng) gts = cudf.from_pandas(ts) assert_resample_results_equal( @@ -79,25 +79,25 @@ def test_series_resample_asfreq(rule): def test_dataframe_resample_aggregation_simple(): pdf = pd.DataFrame( np.random.randn(1000, 3), - index=pd.date_range("1/1/2012", freq="S", periods=1000), + index=pd.date_range("1/1/2012", freq="s", periods=1000), columns=["A", "B", "C"], ) gdf = cudf.from_pandas(pdf) assert_resample_results_equal( - pdf.resample("3T").mean(), gdf.resample("3T").mean() + pdf.resample("3min").mean(), gdf.resample("3min").mean() ) def test_dataframe_resample_multiagg(): pdf = pd.DataFrame( np.random.randn(1000, 3), - index=pd.date_range("1/1/2012", freq="S", periods=1000), + index=pd.date_range("1/1/2012", freq="s", periods=1000), columns=["A", "B", "C"], ) gdf = cudf.from_pandas(pdf) assert_resample_results_equal( - pdf.resample("3T").agg(["sum", "mean", "std"]), - gdf.resample("3T").agg(["sum", "mean", "std"]), + pdf.resample("3min").agg(["sum", "mean", "std"]), + gdf.resample("3min").agg(["sum", "mean", "std"]), ) @@ -106,12 +106,13 @@ def test_dataframe_resample_on(): pdf = pd.DataFrame( { "x": np.random.randn(1000), - "y": pd.date_range("1/1/2012", freq="S", periods=1000), + "y": pd.date_range("1/1/2012", freq="s", periods=1000), } ) gdf = cudf.from_pandas(pdf) assert_resample_results_equal( - pdf.resample("3T", on="y").mean(), gdf.resample("3T", on="y").mean() + pdf.resample("3min", on="y").mean(), + gdf.resample("3min", on="y").mean(), ) @@ -120,15 +121,15 @@ def test_dataframe_resample_level(): pdf = pd.DataFrame( { "x": np.random.randn(1000), - "y": pd.date_range("1/1/2012", freq="S", periods=1000), + "y": pd.date_range("1/1/2012", freq="s", periods=1000), } ) pdi = pd.MultiIndex.from_frame(pdf) pdf = pd.DataFrame({"a": np.random.randn(1000)}, index=pdi) gdf = cudf.from_pandas(pdf) assert_resample_results_equal( - pdf.resample("3T", level="y").mean(), - gdf.resample("3T", level="y").mean(), + pdf.resample("3min", level="y").mean(), + gdf.resample("3min", level="y").mean(), ) @@ -139,8 +140,8 @@ def test_dataframe_resample_level(): ("1us", "10us", "us"), ("ms", "100us", "us"), ("ms", "1s", "s"), - ("s", "1T", "s"), - ("1T", "30s", "s"), + ("s", "1min", "s"), + ("1min", "30s", "s"), ("1D", "10D", "s"), ("10D", "1D", "s"), ], diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py index 4e2a9f581c3..f26d78e7783 100644 --- a/python/cudf/cudf/tests/test_serialize.py +++ b/python/cudf/cudf/tests/test_serialize.py @@ -352,9 +352,9 @@ def test_serialize_seriesgroupby(): def test_serialize_seriesresampler(): - index = cudf.date_range(start="2001-01-01", periods=10, freq="1T") + index = cudf.date_range(start="2001-01-01", periods=10, freq="1min") sr = cudf.Series(range(10), index=index) - re_sampler = sr.resample("3T") + re_sampler = sr.resample("3min") actual = re_sampler.sum() recreated = re_sampler.__class__.deserialize(*re_sampler.serialize()) expected = recreated.sum() diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py index f30c14373bf..dd545da4243 100644 --- a/python/cudf/cudf/tests/test_sorting.py +++ b/python/cudf/cudf/tests/test_sorting.py @@ -8,6 +8,7 @@ import pytest from cudf import DataFrame, Series +from cudf.core._compat import PANDAS_GE_220 from cudf.core.column import NumericalColumn from cudf.testing._utils import ( DATETIME_TYPES, @@ -48,6 +49,11 @@ def test_dataframe_sort_values(nelem, dtype): @pytest.mark.parametrize("ignore_index", [True, False]) @pytest.mark.parametrize("index", ["a", "b", ["a", "b"]]) def test_dataframe_sort_values_ignore_index(index, ignore_index): + if not PANDAS_GE_220 and isinstance(index, list) and not ignore_index: + pytest.skip( + reason="TODO: Remove this once pandas-2.2 support is added", + ) + gdf = DataFrame( {"a": [1, 3, 5, 2, 4], "b": [1, 1, 2, 2, 3], "c": [9, 7, 7, 7, 1]} )