diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index a98052ce906..a3a8b0c91d1 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1652,6 +1652,27 @@ def build_struct_column( return cast("cudf.core.column.StructColumn", result) +def _make_copy_replacing_NaT_with_null(column): + """Return a copy with NaT values replaced with nulls.""" + if np.issubdtype(column.dtype, np.timedelta64): + na_value = np.timedelta64("NaT", column.time_unit) + elif np.issubdtype(column.dtype, np.datetime64): + na_value = np.datetime64("NaT", column.time_unit) + else: + raise ValueError("This type does not support replacing NaT with null.") + + null = column_empty_like(column, masked=True, newsize=1) + out_col = cudf._lib.replace.replace( + column, + build_column( + Buffer(np.array([na_value], dtype=column.dtype).view("|u1")), + dtype=column.dtype, + ), + null, + ) + return out_col + + def as_column( arbitrary: Any, nan_as_null: bool = None, @@ -1753,9 +1774,7 @@ def as_column( col = col.set_mask(mask) elif np.issubdtype(col.dtype, np.datetime64): if nan_as_null or (mask is None and nan_as_null is None): - # Ignore typing error since this method is only defined for - # DatetimeColumn, not the ColumnBase class. - col = col._make_copy_with_na_as_null() # type: ignore + col = _make_copy_replacing_NaT_with_null(col) return col elif isinstance(arbitrary, (pa.Array, pa.ChunkedArray)): @@ -1886,7 +1905,7 @@ def as_column( mask = None if nan_as_null is None or nan_as_null is True: data = build_column(buffer, dtype=arbitrary.dtype) - data = data._make_copy_with_na_as_null() + data = _make_copy_replacing_NaT_with_null(data) mask = data.mask data = cudf.core.column.datetime.DatetimeColumn( @@ -1904,7 +1923,7 @@ def as_column( mask = None if nan_as_null is None or nan_as_null is True: data = build_column(buffer, dtype=arbitrary.dtype) - data = data._make_copy_with_na_as_null() + data = _make_copy_replacing_NaT_with_null(data) mask = data.mask data = cudf.core.column.timedelta.TimeDeltaColumn( diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 24ec25acbbb..b763790986a 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -20,13 +20,7 @@ from cudf.api.types import is_scalar from cudf.core._compat import PANDAS_GE_120 from cudf.core.buffer import Buffer -from cudf.core.column import ( - ColumnBase, - as_column, - column, - column_empty_like, - string, -) +from cudf.core.column import ColumnBase, as_column, column, string from cudf.utils.utils import _fillna_natwise if PANDAS_GE_120: @@ -493,20 +487,6 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool: else: return False - def _make_copy_with_na_as_null(self): - """Return a copy with NaN values replaced with nulls.""" - null = column_empty_like(self, masked=True, newsize=1) - na_value = np.datetime64("nat", self.time_unit) - out_col = cudf._lib.replace.replace( - self, - column.build_column( - Buffer(np.array([na_value], dtype=self.dtype).view("|u1")), - dtype=self.dtype, - ), - null, - ) - return out_col - def binop_offset(lhs, rhs, op): if rhs._is_no_op: diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py index 36a49aa4b33..8c7fdfa5c39 100644 --- a/python/cudf/cudf/tests/test_timedelta.py +++ b/python/cudf/cudf/tests/test_timedelta.py @@ -1406,3 +1406,13 @@ def test_error_values(): match="TimeDelta Arrays is not yet implemented in cudf", ): s.values + + +@pytest.mark.parametrize("dtype", utils.TIMEDELTA_TYPES) +@pytest.mark.parametrize("name", [None, "delta-index"]) +def test_create_TimedeltaIndex(dtype, name): + gdi = cudf.TimedeltaIndex( + [1132223, 2023232, 342234324, 4234324], dtype=dtype, name=name + ) + pdi = gdi.to_pandas() + assert_eq(pdi, gdi)