rapidsai · rapids-bot · Aug 14, 2023 · Aug 12, 2023 · Aug 12, 2023 · Aug 12, 2023
@@ -58,7 +58,7 @@
     UInt64Index,
     interval_range,
 )
-from cudf.core.missing import NA
+from cudf.core.missing import NA, NaT
 from cudf.core.multiindex import MultiIndex
 from cudf.core.reshape import (
     concat,
@@ -90,7 +90,6 @@
     option_context,
     set_option,
 )
-from cudf.utils.dtypes import _NA_REP
 from cudf.utils.utils import clear_cache
 
 cuda.set_memory_manager(RMMNumbaManager)
@@ -125,6 +124,7 @@
     "ListDtype",
     "MultiIndex",
     "NA",
+    "NaT",
     "RangeIndex",
     "Scalar",
     "Series",

@@ -31,7 +31,7 @@ from cudf._lib.types import (
     duration_unit_map,
 )
 from cudf.core.dtypes import ListDtype, StructDtype
-from cudf.core.missing import NA
+from cudf.core.missing import NA, NaT
 
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -178,7 +178,7 @@ cdef class DeviceScalar:
         return self.get_raw_ptr()[0].is_valid()
 
     def __repr__(self):
-        if self.value is NA:
+        if cudf.utils.utils.is_na_like(self.value):
             return (
                 f"{self.__class__.__name__}"
                 f"({self.value}, {repr(self.dtype)})"
@@ -495,7 +495,7 @@ cdef _get_np_scalar_from_timestamp64(unique_ptr[scalar]& s):
     cdef scalar* s_ptr = s.get()
 
     if not s_ptr[0].is_valid():
-        return NA
+        return NaT
 
     cdef libcudf_types.data_type cdtype = s_ptr[0].type()
 
@@ -536,7 +536,7 @@ cdef _get_np_scalar_from_timedelta64(unique_ptr[scalar]& s):
     cdef scalar* s_ptr = s.get()
 
     if not s_ptr[0].is_valid():
-        return NA
+        return NaT
 
     cdef libcudf_types.data_type cdtype = s_ptr[0].type()
 
@@ -586,7 +586,7 @@ def as_device_scalar(val, dtype=None):
 
 
 def _is_null_host_scalar(slr):
-    if slr is None or slr is NA:
+    if cudf.utils.utils.is_na_like(slr):
         return True
     elif isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr):
         return True

@@ -186,7 +186,7 @@ def localize(
         DatetimeColumn,
         data._scatter_by_column(
             data.isnull() | (ambiguous | nonexistent),
-            cudf.Scalar(cudf.NA, dtype=data.dtype),
+            cudf.Scalar(cudf.NaT, dtype=data.dtype),
         ),
     )
     gmt_data = local_to_utc(localized, zone_name)

@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION.
 
 import warnings
 from typing import Tuple, Union
@@ -13,7 +13,6 @@
     is_scalar,
 )
 from cudf.core.column import ColumnBase
-from cudf.core.missing import NA
 from cudf.utils.dtypes import (
     _can_cast,
     _dtype_can_hold_element,
@@ -59,7 +58,7 @@ def _check_and_cast_columns_with_other(
                 f"{type(other).__name__} to {source_dtype.name}"
             )
 
-        if other in {None, NA}:
+        if cudf.utils.utils.is_na_like(other):
             return _normalize_categorical(
                 source_col, cudf.Scalar(other, dtype=source_dtype)
             )

@@ -81,7 +81,6 @@
     ListDtype,
     StructDtype,
 )
-from cudf.core.missing import NA
 from cudf.core.mixins import BinaryOperand, Reducible
 from cudf.errors import MixedTypeError
 from cudf.utils.dtypes import (
@@ -605,7 +604,7 @@ def __setitem__(self, key: Any, value: Any):
             self._mimic_inplace(out, inplace=True)
 
     def _wrap_binop_normalization(self, other):
-        if other is NA or other is None:
+        if cudf.utils.utils.is_na_like(other):
             return cudf.Scalar(other, dtype=self.dtype)
         if isinstance(other, np.ndarray) and other.ndim == 0:
             # Try and maintain the dtype

@@ -1694,7 +1694,19 @@ def _clean_nulls_from_dataframe(self, df):
                 # TODO we need to handle this
                 pass
             elif df._data[col].has_nulls():
-                df[col] = df._data[col].astype("str").fillna(cudf._NA_REP)
+                fill_value = (
+                    str(cudf.NaT)
+                    if isinstance(
+                        df._data[col],
+                        (
+                            cudf.core.column.DatetimeColumn,
+                            cudf.core.column.TimeDeltaColumn,
+                        ),
+                    )
+                    else str(cudf.NA)
+                )
+
+                df[col] = df._data[col].astype("str").fillna(fill_value)
             else:
                 df[col] = df._data[col]
 

@@ -1347,7 +1347,7 @@ def __repr__(self):
             else:
                 output = repr(preprocess.to_pandas())
 
-            output = output.replace("nan", cudf._NA_REP)
+            output = output.replace("nan", str(cudf.NA))
         elif preprocess._values.nullable:
             output = repr(self._clean_nulls_from_index().to_pandas())
 
@@ -1499,8 +1499,14 @@ def __contains__(self, item):
 
     def _clean_nulls_from_index(self):
         if self._values.has_nulls():
+            fill_value = (
+                str(cudf.NaT)
+                if isinstance(self, (DatetimeIndex, TimedeltaIndex))
+                else str(cudf.NA)
+            )
             return cudf.Index(
-                self._values.astype("str").fillna(cudf._NA_REP), name=self.name
+                self._values.astype("str").fillna(fill_value),
+                name=self.name,
             )
 
         return self
@@ -2611,7 +2617,7 @@ def tz_localize(self, tz, ambiguous="NaT", nonexistent="NaT"):
         ...                                   '2018-10-28 03:46:00']))
         >>> s.dt.tz_localize("CET")
         0    2018-10-28 01:20:00.000000000
-        1                             <NA>
+        1                              NaT
         2    2018-10-28 03:46:00.000000000
         dtype: datetime64[ns, CET]
 
@@ -3254,7 +3260,7 @@ def str(self):
 
     def _clean_nulls_from_index(self):
         if self._values.has_nulls():
-            return self.fillna(cudf._NA_REP)
+            return self.fillna(str(cudf.NA))
         else:
             return self
 

@@ -1,9 +1,9 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
 
 # Pandas NAType enforces a single instance exists at a time
 # instantiating this class will yield the existing instance
 # of pandas._libs.missing.NAType, id(cudf.NA) == id(pd.NA).
-from pandas import NA
+from pandas import NA, NaT
 
-__all__ = ["NA"]
+__all__ = ["NA", "NaT"]
@@ -504,7 +504,7 @@ def __repr__(self):
                     ),
                 ):
                     preprocess_df[name] = col.astype("str").fillna(
-                        cudf._NA_REP
+                        str(cudf.NaT)
                     )
 
             tuples_list = list(

@@ -8,9 +8,9 @@
 import pyarrow as pa
 
 import cudf
-from cudf.api.types import is_scalar
+from cudf.api.types import is_datetime64_dtype, is_scalar, is_timedelta64_dtype
 from cudf.core.dtypes import ListDtype, StructDtype
-from cudf.core.missing import NA
+from cudf.core.missing import NA, NaT
 from cudf.core.mixins import BinaryOperand
 from cudf.utils.dtypes import (
     get_allowed_combinations_for_operator,
@@ -243,7 +243,11 @@ def _preprocess_host_value(self, value, dtype):
             dtype = cudf.dtype(dtype)
 
         if not valid:
-            value = NA
+            value = (
+                NaT
+                if is_datetime64_dtype(dtype) or is_timedelta64_dtype(dtype)
+                else NA
+            )
 
         return value, dtype
 

@@ -1403,8 +1403,19 @@ def __repr__(self):
             preprocess._column,
             cudf.core.column.timedelta.TimeDeltaColumn,
         ):
+            fill_value = (
+                str(cudf.NaT)
+                if isinstance(
+                    preprocess._column,
+                    (
+                        cudf.core.column.TimeDeltaColumn,
+                        cudf.core.column.DatetimeColumn,
+                    ),
+                )
+                else str(cudf.NA)
+            )
             output = repr(
-                preprocess.astype("O").fillna(cudf._NA_REP).to_pandas()
+                preprocess.astype("str").fillna(fill_value).to_pandas()
             )
         elif isinstance(
             preprocess._column, cudf.core.column.CategoricalColumn
@@ -1436,7 +1447,7 @@ def __repr__(self):
                 min_rows=min_rows,
                 max_rows=max_rows,
                 length=show_dimensions,
-                na_rep=cudf._NA_REP,
+                na_rep=str(cudf.NA),
             )
         else:
             output = repr(preprocess.to_pandas())

@@ -19,7 +19,7 @@
     is_string_dtype,
     is_struct_dtype,
 )
-from cudf.core.missing import NA
+from cudf.core.missing import NA, NaT
 
 
 def dtype_can_compare_equal_to_other(dtype):
@@ -290,7 +290,7 @@ def assert_column_equal(
 
 
 def null_safe_scalar_equals(left, right):
-    if left in {NA, np.nan} or right in {NA, np.nan}:
+    if left in {NA, NaT, np.nan} or right in {NA, NaT, np.nan}:
         return left is right
     return left == right
 

@@ -1700,7 +1700,12 @@ def test_scalar_null_binops(op, dtype_l, dtype_r):
     rhs = cudf.Scalar(cudf.NA, dtype=dtype_r)
 
     result = op(lhs, rhs)
-    assert result.value is cudf.NA
+    assert result.value is (
+        cudf.NaT
+        if cudf.api.types.is_datetime64_dtype(result.dtype)
+        or cudf.api.types.is_timedelta64_dtype(result.dtype)
+        else cudf.NA
+    )
 
     # make sure dtype is the same as had there been a valid scalar
     valid_lhs = cudf.Scalar(1, dtype=dtype_l)

@@ -2107,3 +2107,8 @@ def test_datetime_binop_tz_timestamp(op):
     date_scalar = datetime.datetime.now(datetime.timezone.utc)
     with pytest.raises(NotImplementedError):
         op(s, date_scalar)
+
+
+def test_datetime_getitem_na():
+    s = cudf.Series([1, 2, None, 3], dtype="datetime64[ns]")
+    assert s[2] is cudf.NaT
@@ -697,7 +697,12 @@ def test_list_scalar_host_construction_null(elem_type, nesting_level):
         dtype = cudf.ListDtype(dtype)
 
     slr = cudf.Scalar(None, dtype=dtype)
-    assert slr.value is cudf.NA
+    assert slr.value is (
+        cudf.NaT
+        if cudf.api.types.is_datetime64_dtype(slr.dtype)
+        or cudf.api.types.is_timedelta64_dtype(slr.dtype)
+        else cudf.NA
+    )
 
 
 @pytest.mark.parametrize(

@@ -2264,7 +2264,7 @@ def test_parquet_writer_statistics(tmpdir, pdf, add_nulls):
         pdf = pdf.drop(columns=["col_category", "col_bool"])
 
     if not add_nulls:
-        # Timedelta types convert NA to None when reading from parquet into
+        # Timedelta types convert NaT to None when reading from parquet into
         # pandas which interferes with series.max()/min()
         for t in TIMEDELTA_TYPES:
             pdf["col_" + t] = pd.Series(np.arange(len(pdf.index))).astype(t)