From 1bfeee7575e137bc75741cb2caf015e55ecab2cd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 13 Sep 2023 14:23:14 -1000 Subject: [PATCH] Raise NotImplementedError for datetime strings with UTC offset (#14070) Avoids e.g. DatetimeIndex(["2022-07-22 00:00:00+02:00"]) from dropping the +02:00 since timezones are not supported Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/14070 --- python/cudf/cudf/core/column/column.py | 18 ++++++++++++++++-- python/cudf/cudf/tests/test_datetime.py | 6 ++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 59ab3569814..d2e2f11a12e 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2519,11 +2519,11 @@ def _construct_array( arbitrary = cupy.asarray(arbitrary, dtype=dtype) except (TypeError, ValueError): native_dtype = dtype - inferred_dtype = None + inferred_dtype = infer_dtype(arbitrary, skipna=False) if ( dtype is None and not cudf._lib.scalar._is_null_host_scalar(arbitrary) - and (inferred_dtype := infer_dtype(arbitrary, skipna=False)) + and inferred_dtype in ( "mixed", "mixed-integer", @@ -2533,6 +2533,20 @@ def _construct_array( if inferred_dtype == "interval": # Only way to construct an Interval column. return pd.array(arbitrary) + elif ( + inferred_dtype == "string" and getattr(dtype, "kind", None) == "M" + ): + # We may have date-like strings with timezones + try: + pd_arbitrary = pd.to_datetime(arbitrary) + if isinstance(pd_arbitrary.dtype, pd.DatetimeTZDtype): + raise NotImplementedError( + "cuDF does not yet support timezone-aware datetimes" + ) + except pd.errors.OutOfBoundsDatetime: + # https://github.com/pandas-dev/pandas/issues/55096 + pass + arbitrary = np.asarray( arbitrary, dtype=native_dtype diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 5cab19eedc6..0cc7112454c 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -2141,6 +2141,12 @@ def test_daterange_pandas_compatibility(): assert_eq(expected, actual) +def test_strings_with_utc_offset_not_implemented(): + with pytest.warns(DeprecationWarning, match="parsing timezone"): # cupy + with pytest.raises(NotImplementedError): + DatetimeIndex(["2022-07-22 00:00:00+02:00"]) + + @pytest.mark.parametrize("code", ["z", "Z"]) def test_format_timezone_not_implemented(code): with pytest.raises(NotImplementedError):