From 9a5a4fec5c80620ca906d28d7e8f662eb21e8198 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 5 Jan 2024 15:25:40 -0800
Subject: [PATCH 01/10] default timedelta format the same for all types

---
 python/cudf/cudf/core/column/timedelta.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 572b3b894dc..7fead6182ee 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -18,13 +18,6 @@
 from cudf.utils.dtypes import np_to_pa_dtype
 from cudf.utils.utils import _all_bools_with_nulls
 
-_dtype_to_format_conversion = {
-    "timedelta64[ns]": "%D days %H:%M:%S",
-    "timedelta64[us]": "%D days %H:%M:%S",
-    "timedelta64[ms]": "%D days %H:%M:%S",
-    "timedelta64[s]": "%D days %H:%M:%S",
-}
-
 _unit_to_nanoseconds_conversion = {
     "ns": 1,
     "us": 1_000,
@@ -318,9 +311,7 @@ def as_string_column(
         self, dtype: Dtype, format=None, **kwargs
     ) -> "cudf.core.column.StringColumn":
         if format is None:
-            format = _dtype_to_format_conversion.get(
-                self.dtype.name, "%D days %H:%M:%S"
-            )
+            format = "%D days %H:%M:%S"
         if len(self) > 0:
             return string._timedelta_to_str_typecast_functions[
                 cudf.dtype(self.dtype)

From 2f624085b9c0918aef5414f061c9c73931bbde98 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 5 Jan 2024 16:00:23 -0800
Subject: [PATCH 02/10] Simplify some timedelta logic

---
 python/cudf/cudf/core/column/timedelta.py | 25 ++++++++---------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 7fead6182ee..79ab0f32a4b 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -79,6 +79,8 @@ def __init__(
         null_count: Optional[int] = None,
     ):
         dtype = cudf.dtype(dtype)
+        if self.dtype.kind != "m":
+            raise TypeError(f"{self.dtype} is not a supported duration type")
 
         if data.size % dtype.itemsize:
             raise ValueError("Buffer size must be divisible by element size")
@@ -94,14 +96,9 @@ def __init__(
             null_count=null_count,
         )
 
-        if self.dtype.type is not np.timedelta64:
-            raise TypeError(f"{self.dtype} is not a supported duration type")
-
-        self._time_unit, _ = np.datetime_data(self.dtype)
-
     def __contains__(self, item: DatetimeLikeScalar) -> bool:
         try:
-            item = np.timedelta64(item, self._time_unit)
+            item = np.timedelta64(item, self.time_unit)
         except ValueError:
             # If item cannot be converted to duration type
             # np.timedelta64 raises ValueError, hence `item`
@@ -228,16 +225,12 @@ def normalize_binop_value(self, other) -> ColumnBinaryOperand:
             "Cannot perform binary operation on timezone-naive columns"
             " and timezone-aware timestamps."
         )
-        if isinstance(other, pd.Timestamp):
-            if other.tz is not None:
+        if isinstance(other, datetime.datetime):
+            if other.tzinfo is not None:
                 raise NotImplementedError(tz_error_msg)
-            other = other.to_datetime64()
-        elif isinstance(other, pd.Timedelta):
-            other = other.to_timedelta64()
+            other = pd.Timestamp(other).to_datetime64()
         elif isinstance(other, datetime.timedelta):
-            other = np.timedelta64(other)
-        elif isinstance(other, datetime.datetime) and other.tzinfo is not None:
-            raise NotImplementedError(tz_error_msg)
+            other = pd.Timedelta(other).to_timedelta64()
 
         if isinstance(other, np.timedelta64):
             other_time_unit = cudf.utils.dtypes.get_time_unit(other)
@@ -249,7 +242,7 @@ def normalize_binop_value(self, other) -> ColumnBinaryOperand:
             else:
                 common_dtype = determine_out_dtype(self.dtype, other.dtype)
             return cudf.Scalar(other.astype(common_dtype))
-        elif np.isscalar(other):
+        elif is_scalar(other):
             return cudf.Scalar(other)
         return NotImplemented
 
@@ -268,7 +261,7 @@ def as_numerical(self) -> "cudf.core.column.NumericalColumn":
 
     @property
     def time_unit(self) -> str:
-        return self._time_unit
+        return np.datetime_data(self.dtype)[0]
 
     def fillna(
         self,

From 1be1f785e80ee61bc4852741397c796381238c32 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 5 Jan 2024 16:11:03 -0800
Subject: [PATCH 03/10] Remove private _time_unit attribute

---
 python/cudf/cudf/core/_internals/timezones.py |  8 ++++----
 python/cudf/cudf/core/column/datetime.py      | 16 +++++++---------
 python/cudf/cudf/core/column/timedelta.py     |  3 ++-
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/python/cudf/cudf/core/_internals/timezones.py b/python/cudf/cudf/core/_internals/timezones.py
index 67043d3fbb3..552fe877fb4 100644
--- a/python/cudf/cudf/core/_internals/timezones.py
+++ b/python/cudf/cudf/core/_internals/timezones.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 import os
 import zoneinfo
@@ -111,7 +111,7 @@ def _find_ambiguous_and_nonexistent(
     tz_data_for_zone = get_tz_data(zone_name)
     transition_times = tz_data_for_zone["transition_times"]
     offsets = tz_data_for_zone["offsets"].astype(
-        f"timedelta64[{data._time_unit}]"
+        f"timedelta64[{data.time_unit}]"
     )
 
     if len(offsets) == 1:  # no transitions
@@ -180,7 +180,7 @@ def localize(
             "Already localized. "
             "Use `tz_convert` to convert between time zones."
         )
-    dtype = pd.DatetimeTZDtype(data._time_unit, zone_name)
+    dtype = pd.DatetimeTZDtype(data.time_unit, zone_name)
     ambiguous, nonexistent = _find_ambiguous_and_nonexistent(data, zone_name)
     localized = cast(
         DatetimeColumn,
@@ -227,7 +227,7 @@ def convert(data: DatetimeTZColumn, zone_name: str) -> DatetimeTZColumn:
         DatetimeTZColumn,
         build_column(
             data=utc_time.base_data,
-            dtype=pd.DatetimeTZDtype(data._time_unit, zone_name),
+            dtype=pd.DatetimeTZDtype(data.time_unit, zone_name),
             mask=utc_time.base_mask,
             size=utc_time.size,
             offset=utc_time.offset,
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 7980b58ab8b..7d96b300d8f 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -1,8 +1,9 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
 import datetime
+import functools
 import locale
 import re
 from locale import nl_langinfo
@@ -236,6 +237,8 @@ def __init__(
         null_count: Optional[int] = None,
     ):
         dtype = cudf.dtype(dtype)
+        if self.dtype.kind != "M":
+            raise TypeError(f"{self.dtype} is not a supported datetime type")
 
         if data.size % dtype.itemsize:
             raise ValueError("Buffer size must be divisible by element size")
@@ -251,14 +254,9 @@ def __init__(
             null_count=null_count,
         )
 
-        if self.dtype.type is not np.datetime64:
-            raise TypeError(f"{self.dtype} is not a supported datetime type")
-
-        self._time_unit, _ = np.datetime_data(self.dtype)
-
     def __contains__(self, item: ScalarLike) -> bool:
         try:
-            item_as_dt64 = np.datetime64(item, self._time_unit)
+            item_as_dt64 = np.datetime64(item, self.time_unit)
         except ValueError:
             # If item cannot be converted to datetime type
             # np.datetime64 raises ValueError, hence `item`
@@ -266,9 +264,9 @@ def __contains__(self, item: ScalarLike) -> bool:
             return False
         return item_as_dt64.astype("int64") in self.as_numerical
 
-    @property
+    @functools.cached_property
     def time_unit(self) -> str:
-        return self._time_unit
+        return np.datetime_data(self.dtype)[0]
 
     @property
     def year(self) -> ColumnBase:
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 79ab0f32a4b..f604cfedc88 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import datetime
+import functools
 from typing import Any, Optional, Sequence, cast
 
 import numpy as np
@@ -259,7 +260,7 @@ def as_numerical(self) -> "cudf.core.column.NumericalColumn":
             ),
         )
 
-    @property
+    @functools.cached_property
     def time_unit(self) -> str:
         return np.datetime_data(self.dtype)[0]
 

From 433ff6e8986f7a0910d5acf46ab11a5c90d0f7b8 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 8 Jan 2024 10:12:22 -0800
Subject: [PATCH 04/10] Replace self

---
 python/cudf/cudf/core/column/datetime.py  | 2 +-
 python/cudf/cudf/core/column/timedelta.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index dfeac2860a5..c1f1d7d44c9 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -240,7 +240,7 @@ def __init__(
         null_count: Optional[int] = None,
     ):
         dtype = cudf.dtype(dtype)
-        if self.dtype.kind != "M":
+        if dtype.kind != "M":
             raise TypeError(f"{self.dtype} is not a supported datetime type")
 
         if data.size % dtype.itemsize:
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index f604cfedc88..11278048bb1 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -80,7 +80,7 @@ def __init__(
         null_count: Optional[int] = None,
     ):
         dtype = cudf.dtype(dtype)
-        if self.dtype.kind != "m":
+        if dtype.kind != "m":
             raise TypeError(f"{self.dtype} is not a supported duration type")
 
         if data.size % dtype.itemsize:

From 8365eee62d2f5b99cc96b783cd07eed36c824e01 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 8 Jan 2024 14:18:49 -0800
Subject: [PATCH 05/10] fix for DatetimeTZDtype

---
 python/cudf/cudf/core/column/datetime.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index c1f1d7d44c9..5e23f4e8c10 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -269,6 +269,8 @@ def __contains__(self, item: ScalarLike) -> bool:
 
     @functools.cached_property
     def time_unit(self) -> str:
+        if isinstance(self.dtype, pd.DatetimeTZDtype):
+            return self.dtype.unit
         return np.datetime_data(self.dtype)[0]
 
     @property

From f0373d894fc8f8c98b013a02cfac705430858d93 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 9 Jan 2024 11:41:17 -0800
Subject: [PATCH 06/10] Fix contains for tz aware types

---
 python/cudf/cudf/core/column/datetime.py         | 16 ++++++++++------
 .../cudf/cudf/tests/series/test_datetimelike.py  | 15 +++++++++++++++
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 5e23f4e8c10..0e24a706e03 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -259,13 +259,17 @@ def __init__(
 
     def __contains__(self, item: ScalarLike) -> bool:
         try:
-            item_as_dt64 = np.datetime64(item, self.time_unit)
-        except ValueError:
-            # If item cannot be converted to datetime type
-            # np.datetime64 raises ValueError, hence `item`
-            # cannot exist in `self`.
+            # TODO(pandas2.0): Change _as_unit to as_unit
+            ts = pd.Timestamp(item)._as_unit(self.time_unit)
+        except Exception:
+            # pandas can raise a variety of errors
+            # item cannot exist in self.
             return False
-        return item_as_dt64.astype("int64") in self.as_numerical
+        if ts.tzinfo is None and isinstance(self.dtype, pd.DatetimeTZDtype):
+            return False
+        elif ts.tzinfo is not None:
+            ts = ts.tz_convert(None)
+        return ts.to_numpy().astype("int64") in self.as_numerical
 
     @functools.cached_property
     def time_unit(self) -> str:
diff --git a/python/cudf/cudf/tests/series/test_datetimelike.py b/python/cudf/cudf/tests/series/test_datetimelike.py
index df68eaca399..1e1c80b11bb 100644
--- a/python/cudf/cudf/tests/series/test_datetimelike.py
+++ b/python/cudf/cudf/tests/series/test_datetimelike.py
@@ -203,3 +203,18 @@ def test_tz_aware_attributes_local():
     result = dti.hour
     expected = cudf.Index([9, 9, 9], dtype="int16")
     assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "item, expected",
+    [
+        ["2020-01-01", False],
+        ["2020-01-01T00:00:00+00:00", True],
+        ["2020-01-01T00:00:00-08:00", False],
+        ["2019-12-31T16:00:00-08:00", True],
+    ],
+)
+def test_contains_tz_aware(item, expected):
+    dti = cudf.date_range("2020", periods=2, freq="D").tz_localize("UTC")
+    result = item in dti
+    assert result == expected

From f8397bb949015e8bdb9299109f02b201fdd5f77c Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 9 Jan 2024 14:25:58 -0800
Subject: [PATCH 07/10] Add copyright

---
 python/cudf/cudf/tests/series/test_datetimelike.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/series/test_datetimelike.py b/python/cudf/cudf/tests/series/test_datetimelike.py
index 1e1c80b11bb..3352da443dd 100644
--- a/python/cudf/cudf/tests/series/test_datetimelike.py
+++ b/python/cudf/cudf/tests/series/test_datetimelike.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 import os
 

From 2e95199c536d87e5b3870582b433a7dfc29e5554 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 19 Jan 2024 14:28:33 -0800
Subject: [PATCH 08/10] Push element indexing logic to subclass

---
 python/cudf/cudf/core/column/column.py    | 8 +-------
 python/cudf/cudf/core/column/datetime.py  | 6 ++++++
 python/cudf/cudf/core/column/timedelta.py | 6 ++++++
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index df5d1c3879a..fe4fd29111f 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -529,13 +529,7 @@ def element_indexing(self, index: int):
             idx = len(self) + idx
         if idx > len(self) - 1 or idx < 0:
             raise IndexError("single positional indexer is out-of-bounds")
-        result = libcudf.copying.get_element(self, idx).value
-        if cudf.get_option("mode.pandas_compatible"):
-            if isinstance(result, np.datetime64):
-                return pd.Timestamp(result)
-            elif isinstance(result, np.timedelta64):
-                return pd.Timedelta(result)
-        return result
+        return libcudf.copying.get_element(self, idx).value
 
     def slice(
         self, start: int, stop: int, stride: Optional[int] = None
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 7bfc2b2e0f1..34bee25f31e 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -346,6 +346,12 @@ def values(self):
             "DateTime Arrays is not yet implemented in cudf"
         )
 
+    def element_indexing(self, index: int):
+        result = super().element_indexing(index)
+        if cudf.get_option("mode.pandas_compatible"):
+            return pd.Timestamp(result)
+        return result
+
     def get_dt_field(self, field: str) -> ColumnBase:
         return libcudf.datetime.extract_datetime_component(self, field)
 
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 4415f5ad481..2e540280c9a 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -117,6 +117,12 @@ def values(self):
             "TimeDelta Arrays is not yet implemented in cudf"
         )
 
+    def element_indexing(self, index: int):
+        result = super().element_indexing(index)
+        if cudf.get_option("mode.pandas_compatible"):
+            return pd.Timedelta(result)
+        return result
+
     @acquire_spill_lock()
     def to_arrow(self) -> pa.Array:
         mask = None

From 512a0232bea1a7fc961f969d47746ed9429c9200 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 31 Jan 2024 13:54:40 -0800
Subject: [PATCH 09/10] Remove _time_unit

---
 python/cudf/cudf/core/column/timedelta.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 8f8c8f96d64..c543499fb2a 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -496,7 +496,7 @@ def components(self, index=None) -> "cudf.DataFrame":
                     _unit_to_nanoseconds_conversion[value[1]], "ns"
                 ).astype(self.dtype)
             )
-            if self._time_unit == value[1]:
+            if self.time_unit == value[1]:
                 break
 
         for name in keys_list:
@@ -588,7 +588,7 @@ def nanoseconds(self) -> "cudf.core.column.NumericalColumn":
         # performing division operation to extract the number
         # of nanoseconds.
 
-        if self._time_unit != "ns":
+        if self.time_unit != "ns":
             res_col = cudf.core.column.full(len(self), 0, dtype="int64")
             if self.nullable:
                 res_col = res_col.set_mask(self.mask)

From 2f88a3ce3555177f0a672649270e41028a03cbc2 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 2 Feb 2024 15:37:09 -0800
Subject: [PATCH 10/10] pandas 2.0 in

---
 python/cudf/cudf/core/column/datetime.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 7de4101a2b2..bd529b6936e 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -266,8 +266,7 @@ def __init__(
 
     def __contains__(self, item: ScalarLike) -> bool:
         try:
-            # TODO(pandas2.0): Change _as_unit to as_unit
-            ts = pd.Timestamp(item)._as_unit(self.time_unit)
+            ts = pd.Timestamp(item).as_unit(self.time_unit)
         except Exception:
             # pandas can raise a variety of errors
             # item cannot exist in self.