Skip to content

Commit

Permalink
TimedeltaIndex constructor raises an AttributeError. (#9884)
Browse files Browse the repository at this point in the history
Fixes: #9829

This PR fixes `TimedeltaIndex` constructor invocation by handling `NaT` values replacement with `nulls`.

Authors:
  - Sheilah Kirui (https://github.com/skirui-source)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #9884
  • Loading branch information
skirui-source authored Dec 17, 2021
1 parent 8c5a85a commit 23cafcf
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 26 deletions.
29 changes: 24 additions & 5 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -1652,6 +1652,27 @@ def build_struct_column(
return cast("cudf.core.column.StructColumn", result)


def _make_copy_replacing_NaT_with_null(column):
"""Return a copy with NaT values replaced with nulls."""
if np.issubdtype(column.dtype, np.timedelta64):
na_value = np.timedelta64("NaT", column.time_unit)
elif np.issubdtype(column.dtype, np.datetime64):
na_value = np.datetime64("NaT", column.time_unit)
else:
raise ValueError("This type does not support replacing NaT with null.")

null = column_empty_like(column, masked=True, newsize=1)
out_col = cudf._lib.replace.replace(
column,
build_column(
Buffer(np.array([na_value], dtype=column.dtype).view("|u1")),
dtype=column.dtype,
),
null,
)
return out_col


def as_column(
arbitrary: Any,
nan_as_null: bool = None,
Expand Down Expand Up @@ -1753,9 +1774,7 @@ def as_column(
col = col.set_mask(mask)
elif np.issubdtype(col.dtype, np.datetime64):
if nan_as_null or (mask is None and nan_as_null is None):
# Ignore typing error since this method is only defined for
# DatetimeColumn, not the ColumnBase class.
col = col._make_copy_with_na_as_null() # type: ignore
col = _make_copy_replacing_NaT_with_null(col)
return col

elif isinstance(arbitrary, (pa.Array, pa.ChunkedArray)):
Expand Down Expand Up @@ -1886,7 +1905,7 @@ def as_column(
mask = None
if nan_as_null is None or nan_as_null is True:
data = build_column(buffer, dtype=arbitrary.dtype)
data = data._make_copy_with_na_as_null()
data = _make_copy_replacing_NaT_with_null(data)
mask = data.mask

data = cudf.core.column.datetime.DatetimeColumn(
Expand All @@ -1904,7 +1923,7 @@ def as_column(
mask = None
if nan_as_null is None or nan_as_null is True:
data = build_column(buffer, dtype=arbitrary.dtype)
data = data._make_copy_with_na_as_null()
data = _make_copy_replacing_NaT_with_null(data)
mask = data.mask

data = cudf.core.column.timedelta.TimeDeltaColumn(
Expand Down
22 changes: 1 addition & 21 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,7 @@
from cudf.api.types import is_scalar
from cudf.core._compat import PANDAS_GE_120
from cudf.core.buffer import Buffer
from cudf.core.column import (
ColumnBase,
as_column,
column,
column_empty_like,
string,
)
from cudf.core.column import ColumnBase, as_column, column, string
from cudf.utils.utils import _fillna_natwise

if PANDAS_GE_120:
Expand Down Expand Up @@ -493,20 +487,6 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool:
else:
return False

def _make_copy_with_na_as_null(self):
"""Return a copy with NaN values replaced with nulls."""
null = column_empty_like(self, masked=True, newsize=1)
na_value = np.datetime64("nat", self.time_unit)
out_col = cudf._lib.replace.replace(
self,
column.build_column(
Buffer(np.array([na_value], dtype=self.dtype).view("|u1")),
dtype=self.dtype,
),
null,
)
return out_col


def binop_offset(lhs, rhs, op):
if rhs._is_no_op:
Expand Down
10 changes: 10 additions & 0 deletions python/cudf/cudf/tests/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -1406,3 +1406,13 @@ def test_error_values():
match="TimeDelta Arrays is not yet implemented in cudf",
):
s.values


@pytest.mark.parametrize("dtype", utils.TIMEDELTA_TYPES)
@pytest.mark.parametrize("name", [None, "delta-index"])
def test_create_TimedeltaIndex(dtype, name):
gdi = cudf.TimedeltaIndex(
[1132223, 2023232, 342234324, 4234324], dtype=dtype, name=name
)
pdi = gdi.to_pandas()
assert_eq(pdi, gdi)

0 comments on commit 23cafcf

Please sign in to comment.