From 5192b608eeed4bda9317c657253c3a5630aa4c5d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 3 Apr 2024 09:11:37 -1000 Subject: [PATCH] Align date_range defaults with pandas, support tz (#15139) Precursor to https://github.com/rapidsai/cudf/issues/15116 * Aligns `date_range` signature with pandas, _technically_ an API breakage with `closed` changing defaults even though it still isn't supported * Copies pandas behavior of allowing `date_range` with just two of `start/end/periods` * Supports `tz` arg now Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/15139 --- python/cudf/cudf/core/tools/datetimes.py | 49 +++++++++++++----------- python/cudf/cudf/tests/test_datetime.py | 16 ++++++++ 2 files changed, 43 insertions(+), 22 deletions(-) diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 65f97c99934..ed8fca88acd 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -799,9 +799,11 @@ def date_range( periods=None, freq=None, tz=None, - normalize=False, + normalize: bool = False, name=None, - closed=None, + closed: Literal["left", "right", "both", "neither"] = "both", + *, + unit: Optional[str] = None, ): """Return a fixed frequency DatetimeIndex. @@ -837,8 +839,13 @@ def date_range( name : str, default None Name of the resulting DatetimeIndex - closed : {None, 'left', 'right'}, optional - Not Supported + closed : {"left", "right", "both", "neither"}, default "both" + Whether to set each bound as closed or open. + Currently only "both" is supported + + unit : str, default None + Specify the desired resolution of the result. Currently + not supported. Returns ------- @@ -875,11 +882,15 @@ def date_range( '2026-04-23 08:00:00'], dtype='datetime64[ns]') """ - if tz is not None: - raise NotImplementedError("tz is currently unsupported.") + if closed != "both": + raise NotImplementedError(f"{closed=} is currently unsupported.") + if unit is not None: + raise NotImplementedError(f"{unit=} is currently unsupported.") + if normalize is not False: + raise NotImplementedError(f"{normalize=} is currently unsupported.") - if closed is not None: - raise NotImplementedError("closed is currently unsupported.") + if freq is None and any(arg is None for arg in (start, end, periods)): + freq = "D" if (start, end, periods, freq).count(None) > 1: raise ValueError( @@ -894,7 +905,7 @@ def date_range( FutureWarning, ) - dtype = np.dtype(" bool: @@ -1026,14 +1039,6 @@ def _has_non_fixed_frequency(freq: DateOffset) -> bool: return len(freq.kwds.keys() & non_fixed_frequencies) > 0 -def _has_mixed_freqeuency(freq: DateOffset) -> bool: - """Utility to determine if `freq` contains mixed fixed and non-fixed - frequency offset. e.g. {months=1, days=5} - """ - - return _has_fixed_frequency(freq) and _has_non_fixed_frequency(freq) - - def _offset_to_nanoseconds_lower_bound(offset: DateOffset) -> int: """Given a DateOffset, which can consist of either fixed frequency or non-fixed frequency offset, convert to the smallest possible fixed diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 7c209078fd2..37ba7acf044 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -2357,3 +2357,19 @@ def test_timezone_array_notimplemented(): def test_to_datetime_errors_ignore_deprecated(): with pytest.warns(FutureWarning): cudf.to_datetime("2001-01-01 00:04:45", errors="ignore") + + +def test_date_range_freq_default(): + result = pd.date_range("2020-01-01", periods=2, name="foo") + expected = cudf.date_range("2020-01-01", periods=2, name="foo") + assert_eq(result, expected) + + +def test_date_range_tz(): + result = pd.date_range("2020-01-01", periods=2, tz="UTC") + expected = cudf.date_range("2020-01-01", periods=2, tz="UTC") + assert_eq(result, expected) + + result = pd.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC") + expected = cudf.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC") + assert_eq(result, expected)