Skip to content

Commit

Permalink
Align date_range defaults with pandas, support tz (#15139)
Browse files Browse the repository at this point in the history
Precursor to #15116

* Aligns `date_range` signature with pandas, _technically_ an API breakage with `closed` changing defaults even though it still isn't supported
* Copies pandas behavior of allowing `date_range` with just two of `start/end/periods`
* Supports `tz` arg now

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #15139
  • Loading branch information
mroeschke authored Apr 3, 2024
1 parent 082f6c9 commit 5192b60
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 22 deletions.
49 changes: 27 additions & 22 deletions python/cudf/cudf/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -799,9 +799,11 @@ def date_range(
periods=None,
freq=None,
tz=None,
normalize=False,
normalize: bool = False,
name=None,
closed=None,
closed: Literal["left", "right", "both", "neither"] = "both",
*,
unit: Optional[str] = None,
):
"""Return a fixed frequency DatetimeIndex.
Expand Down Expand Up @@ -837,8 +839,13 @@ def date_range(
name : str, default None
Name of the resulting DatetimeIndex
closed : {None, 'left', 'right'}, optional
Not Supported
closed : {"left", "right", "both", "neither"}, default "both"
Whether to set each bound as closed or open.
Currently only "both" is supported
unit : str, default None
Specify the desired resolution of the result. Currently
not supported.
Returns
-------
Expand Down Expand Up @@ -875,11 +882,15 @@ def date_range(
'2026-04-23 08:00:00'],
dtype='datetime64[ns]')
"""
if tz is not None:
raise NotImplementedError("tz is currently unsupported.")
if closed != "both":
raise NotImplementedError(f"{closed=} is currently unsupported.")
if unit is not None:
raise NotImplementedError(f"{unit=} is currently unsupported.")
if normalize is not False:
raise NotImplementedError(f"{normalize=} is currently unsupported.")

if closed is not None:
raise NotImplementedError("closed is currently unsupported.")
if freq is None and any(arg is None for arg in (start, end, periods)):
freq = "D"

if (start, end, periods, freq).count(None) > 1:
raise ValueError(
Expand All @@ -894,7 +905,7 @@ def date_range(
FutureWarning,
)

dtype = np.dtype("<M8[ns]")
dtype = np.dtype("datetime64[ns]")

if freq is None:
# `start`, `end`, `periods` is specified, we treat the timestamps as
Expand All @@ -903,7 +914,7 @@ def date_range(
end = cudf.Scalar(end, dtype=dtype).value.astype("int64")
arr = cp.linspace(start=start, stop=end, num=periods)
result = cudf.core.column.as_column(arr).astype("datetime64[ns]")
return cudf.DatetimeIndex._from_data({name: result})
return cudf.DatetimeIndex._from_data({name: result}).tz_localize(tz)

# The code logic below assumes `freq` is defined. It is first normalized
# into `DateOffset` for further computation with timestamps.
Expand All @@ -912,8 +923,8 @@ def date_range(
offset = freq
elif isinstance(freq, str):
offset = pd.tseries.frequencies.to_offset(freq)
if not isinstance(offset, pd.tseries.offsets.Tick) and not isinstance(
offset, pd.tseries.offsets.Week
if not isinstance(
offset, (pd.tseries.offsets.Tick, pd.tseries.offsets.Week)
):
raise ValueError(
f"Unrecognized frequency string {freq}. cuDF does "
Expand All @@ -923,7 +934,7 @@ def date_range(
else:
raise TypeError("`freq` must be a `str` or cudf.DateOffset object.")

if _has_mixed_freqeuency(offset):
if _has_fixed_frequency(offset) and _has_non_fixed_frequency(offset):
raise NotImplementedError(
"Mixing fixed and non-fixed frequency offset is unsupported."
)
Expand Down Expand Up @@ -1001,7 +1012,9 @@ def date_range(
arr = cp.arange(start=start, stop=stop, step=step, dtype="int64")
res = cudf.core.column.as_column(arr).astype("datetime64[ns]")

return cudf.DatetimeIndex._from_data({name: res}, freq=freq)
return cudf.DatetimeIndex._from_data({name: res}, freq=freq).tz_localize(
tz
)


def _has_fixed_frequency(freq: DateOffset) -> bool:
Expand All @@ -1026,14 +1039,6 @@ def _has_non_fixed_frequency(freq: DateOffset) -> bool:
return len(freq.kwds.keys() & non_fixed_frequencies) > 0


def _has_mixed_freqeuency(freq: DateOffset) -> bool:
"""Utility to determine if `freq` contains mixed fixed and non-fixed
frequency offset. e.g. {months=1, days=5}
"""

return _has_fixed_frequency(freq) and _has_non_fixed_frequency(freq)


def _offset_to_nanoseconds_lower_bound(offset: DateOffset) -> int:
"""Given a DateOffset, which can consist of either fixed frequency or
non-fixed frequency offset, convert to the smallest possible fixed
Expand Down
16 changes: 16 additions & 0 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2357,3 +2357,19 @@ def test_timezone_array_notimplemented():
def test_to_datetime_errors_ignore_deprecated():
with pytest.warns(FutureWarning):
cudf.to_datetime("2001-01-01 00:04:45", errors="ignore")


def test_date_range_freq_default():
result = pd.date_range("2020-01-01", periods=2, name="foo")
expected = cudf.date_range("2020-01-01", periods=2, name="foo")
assert_eq(result, expected)


def test_date_range_tz():
result = pd.date_range("2020-01-01", periods=2, tz="UTC")
expected = cudf.date_range("2020-01-01", periods=2, tz="UTC")
assert_eq(result, expected)

result = pd.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC")
expected = cudf.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC")
assert_eq(result, expected)

0 comments on commit 5192b60

Please sign in to comment.