rapidsai · rapids-bot · May 18, 2023 · May 10, 2023 · May 10, 2023 · May 11, 2023
@@ -62,7 +62,6 @@ cdef class Column:
         object null_count=None,
         object children=()
     ):
-
         self._size = size
         self._distinct_count = {}
         self._dtype = dtype

@@ -193,15 +193,38 @@ def localize(
     return cast(
         DatetimeTZColumn,
         build_column(
-            data=gmt_data.data,
+            data=gmt_data.base_data,
             dtype=dtype,
-            mask=localized.mask,
+            mask=localized.base_mask,
             size=gmt_data.size,
             offset=gmt_data.offset,
         ),
     )
 
 
+def convert(data: DatetimeTZColumn, zone_name: str) -> DatetimeTZColumn:
+    if not isinstance(data, DatetimeTZColumn):
+        raise TypeError(
+            "Cannot convert from timezone-naive timestamps to "
+            "timezone-aware timestamps. For that, "
+            "use `tz_localize instead."
+        )
+    if zone_name == str(data.dtype.tz):
+        return data.copy()
+    utc_time = data._utc_time
+    out = cast(
+        DatetimeTZColumn,
+        build_column(
+            data=utc_time.base_data,
+            dtype=pd.DatetimeTZDtype(data._time_unit, zone_name),
+            mask=utc_time.base_mask,
+            size=utc_time.size,
+            offset=utc_time.offset,
+        ),
+    )
+    return out
+
+
 def utc_to_local(data: DatetimeColumn, zone_name: str) -> DatetimeColumn:
     tz_data_for_zone = get_tz_data(zone_name)
     transition_times, offsets = tz_data_for_zone._columns

@@ -577,6 +577,18 @@ def to_arrow(self):
             self._local_time.to_arrow(), str(self.dtype.tz)
         )
 
+    @property
+    def _utc_time(self):
+        """Return UTC time as naive timestamps."""
+        return DatetimeColumn(
+            data=self.base_data,
+            dtype=_get_base_dtype(self.dtype),
+            mask=self.base_mask,
+            size=self.size,
+            offset=self.offset,
+            null_count=self.null_count,
+        )
+
     @property
     def _local_time(self):
         """Return the local time as naive timestamps."""
@@ -589,6 +601,18 @@ def as_string_column(
     ) -> "cudf.core.column.StringColumn":
         return self._local_time.as_string_column(dtype, format, **kwargs)
 
+    def __repr__(self):
+        # Arrow prints the UTC timestamps, but we want to print the
+        # local timestamps:
+        arr = self._local_time.to_arrow().cast(
+            pa.timestamp(self.dtype.unit, str(self.dtype.tz))
+        )
+        return (
+            f"{object.__repr__(self)}\n"
+            f"{arr.to_string()}\n"
+            f"dtype: {self.dtype}"
+        )
+
 
 def infer_format(element: str, **kwargs) -> str:
     """

@@ -2544,6 +2544,47 @@ def tz_localize(self, tz, ambiguous="NaT", nonexistent="NaT"):
             result_col = localize(self._column, tz, ambiguous, nonexistent)
         return DatetimeIndex._from_data({self.name: result_col})
 
+    def tz_convert(self, tz):
+        """
+        Convert tz-aware datetimes from one time zone to another.
+
+        Parameters
+        ----------
+        tz: str
+            Time zone for time. Corresponding timestamps would be converted
+            to this time zone of the Datetime Array/Index.
+            A `tz` of None will convert to UTC and remove the timezone
+            information.
+
+        Returns
+        -------
+        DatetimeIndex containing timestamps corresponding to the timezone
+        `tz`.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> dti = cudf.date_range('2018-03-01 09:00', periods=3, freq='D')
+        >>> dti = dti.tz_localize("America/New_York")
+        >>> dti
+        DatetimeIndex(['2018-03-01 09:00:00-05:00',
+                       '2018-03-02 09:00:00-05:00',
+                       '2018-03-03 09:00:00-05:00'],
+                      dtype='datetime64[ns, America/New_York]')
+        >>> dti.tz_convert("Europe/London")
+        DatetimeIndex(['2018-03-01 14:00:00+00:00',
+                       '2018-03-02 14:00:00+00:00',
+                       '2018-03-03 14:00:00+00:00'],
+                      dtype='datetime64[ns, Europe/London]')
+        """
+        from cudf.core._internals.timezones import convert, localize
+
+        if tz is None:
+            result_col = localize(self._column._utc_time, None)
+        else:
+            result_col = convert(self._column, tz)
+        return DatetimeIndex._from_data({self.name: result_col})
+
 
 class TimedeltaIndex(GenericIndex):
     """

@@ -4609,6 +4609,27 @@ def tz_localize(self, tz, ambiguous="NaT", nonexistent="NaT"):
             index=self.series._index,
         )
 
+    @copy_docstring(DatetimeIndex.tz_convert)
+    def tz_convert(self, tz):
+        """
+        Parameters
+        ----------
+        tz: str
+            Time zone for time. Corresponding timestamps would be converted
+            to this time zone of the Datetime Array/Index.
+            A `tz` of None will convert to UTC and remove the
+            timezone information.
+        """
+        from cudf.core._internals.timezones import convert
+
+        if tz is None:
+            result_col = self.series._column._utc_time
+        else:
+            result_col = convert(self.series._column, tz)
+        return Series._from_data(
+            {self.series.name: result_col}, index=self.series._index
+        )
+
 
 class TimedeltaProperties:
     """

@@ -0,0 +1,19 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+from cudf.testing._utils import assert_eq
+
+
+def test_slice_datetimetz_index():
+    data = ["2001-01-01", "2001-01-02", None, None, "2001-01-03"]
+    pidx = pd.DatetimeIndex(data, dtype="datetime64[ns]").tz_localize(
+        "US/Eastern"
+    )
+    idx = cudf.DatetimeIndex(data, dtype="datetime64[ns]").tz_localize(
+        "US/Eastern"
+    )
+    expected = pidx[1:4]
+    got = idx[1:4]
+    assert_eq(expected, got)
@@ -14,3 +14,13 @@ def test_tz_localize():
         pidx.tz_localize("America/New_York"),
         idx.tz_localize("America/New_York"),
     )
+
+
+def test_tz_convert():
+    pidx = pd.date_range("2023-01-01", periods=3, freq="H")
+    idx = cudf.from_pandas(pidx)
+    pidx = pidx.tz_localize("UTC")
+    idx = idx.tz_localize("UTC")
+    assert_eq(
+        pidx.tz_convert("America/New_York"), idx.tz_convert("America/New_York")
+    )
@@ -111,3 +111,60 @@ def test_delocalize(unit, tz):
     expect = psr.dt.tz_localize(tz).dt.tz_localize(None)
     got = sr.dt.tz_localize(tz).dt.tz_localize(None)
     assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "from_tz", ["Europe/London", "America/Chicago", "UTC"]
+)
+@pytest.mark.parametrize(
+    "to_tz", ["Europe/London", "America/Chicago", "UTC", None]
+)
+def test_convert(from_tz, to_tz):
+    ps = pd.Series(pd.date_range("2023-01-01", periods=3, freq="H"))
+    gs = cudf.from_pandas(ps)
+    ps = ps.dt.tz_localize(from_tz)
+    gs = gs.dt.tz_localize(from_tz)
+    expect = ps.dt.tz_convert(to_tz)
+    got = gs.dt.tz_convert(to_tz)
+    assert_eq(expect, got)
+
+
+def test_convert_from_naive():
+    gs = cudf.Series(cudf.date_range("2023-01-01", periods=3, freq="H"))
+    with pytest.raises(TypeError):
+        gs.dt.tz_convert("America/New_York")
+
+
+@pytest.mark.parametrize(
+    "data,original_timezone,target_timezone",
+    [
+        # DST transition:
+        (["2023-03-12 01:30:00"], "America/New_York", "America/Los_Angeles"),
+        # crossing the international date line:
+        (["2023-05-17 23:30:00"], "Pacific/Auckland", "America/Los_Angeles"),
+        # timezone with non-integer offset:
+        (["2023-05-17 12:00:00"], "Asia/Kolkata", "Australia/Eucla"),
+        # timezone with negative offset:
+        (["2023-05-17 09:00:00"], "America/Los_Angeles", "Pacific/Auckland"),
+        # conversion across multiple days:
+        (["2023-05-16 23:30:00"], "America/New_York", "Asia/Kolkata"),
+        # timezone with half-hour offset:
+        (["2023-05-17 12:00:00"], "Asia/Kolkata", "Australia/Adelaide"),
+        # timezone conversion with a timestamp in the future
+        (["2025-01-01 00:00:00"], "America/New_York", "Europe/London"),
+        # timezone conversion with a timestamp in the past
+        (["2000-01-01 12:00:00"], "Europe/Paris", "America/Los_Angeles"),
+        # timezone conversion with a timestamp at midnight
+        (["2023-05-17 00:00:00"], "Asia/Tokyo", "Europe/Paris"),
+    ],
+)
+def test_convert_edge_cases(data, original_timezone, target_timezone):
+    ps = pd.Series(data, dtype="datetime64[s]").dt.tz_localize(
+        original_timezone
+    )
+    gs = cudf.Series(data, dtype="datetime64[s]").dt.tz_localize(
+        original_timezone
+    )
+    expect = ps.dt.tz_convert(target_timezone)
+    got = gs.dt.tz_convert(target_timezone)
+    assert_eq(expect, got)