Skip to content

Commit

Permalink
Raise NotImplementedError for datetime strings with UTC offset (#14070)
Browse files Browse the repository at this point in the history
Avoids e.g. DatetimeIndex(["2022-07-22 00:00:00+02:00"]) from dropping the +02:00 since timezones are not supported

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #14070
  • Loading branch information
mroeschke authored Sep 14, 2023
1 parent 89557bb commit 1bfeee7
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
18 changes: 16 additions & 2 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2519,11 +2519,11 @@ def _construct_array(
arbitrary = cupy.asarray(arbitrary, dtype=dtype)
except (TypeError, ValueError):
native_dtype = dtype
inferred_dtype = None
inferred_dtype = infer_dtype(arbitrary, skipna=False)
if (
dtype is None
and not cudf._lib.scalar._is_null_host_scalar(arbitrary)
and (inferred_dtype := infer_dtype(arbitrary, skipna=False))
and inferred_dtype
in (
"mixed",
"mixed-integer",
Expand All @@ -2533,6 +2533,20 @@ def _construct_array(
if inferred_dtype == "interval":
# Only way to construct an Interval column.
return pd.array(arbitrary)
elif (
inferred_dtype == "string" and getattr(dtype, "kind", None) == "M"
):
# We may have date-like strings with timezones
try:
pd_arbitrary = pd.to_datetime(arbitrary)
if isinstance(pd_arbitrary.dtype, pd.DatetimeTZDtype):
raise NotImplementedError(
"cuDF does not yet support timezone-aware datetimes"
)
except pd.errors.OutOfBoundsDatetime:
# https://github.com/pandas-dev/pandas/issues/55096
pass

arbitrary = np.asarray(
arbitrary,
dtype=native_dtype
Expand Down
6 changes: 6 additions & 0 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2141,6 +2141,12 @@ def test_daterange_pandas_compatibility():
assert_eq(expected, actual)


def test_strings_with_utc_offset_not_implemented():
with pytest.warns(DeprecationWarning, match="parsing timezone"): # cupy
with pytest.raises(NotImplementedError):
DatetimeIndex(["2022-07-22 00:00:00+02:00"])


@pytest.mark.parametrize("code", ["z", "Z"])
def test_format_timezone_not_implemented(code):
with pytest.raises(NotImplementedError):
Expand Down

0 comments on commit 1bfeee7

Please sign in to comment.