Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG FIX] TimedeltaIndex constructor raises an AttributeError. #9884

Merged
merged 14 commits into from
Dec 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -1652,6 +1652,27 @@ def build_struct_column(
return cast("cudf.core.column.StructColumn", result)


def _make_copy_replacing_NaT_with_null(column):
"""Return a copy with NaT values replaced with nulls."""
if np.issubdtype(column.dtype, np.timedelta64):
na_value = np.timedelta64("NaT", column.time_unit)
elif np.issubdtype(column.dtype, np.datetime64):
na_value = np.datetime64("NaT", column.time_unit)
else:
raise ValueError("This type does not support replacing NaT with null.")

null = column_empty_like(column, masked=True, newsize=1)
out_col = cudf._lib.replace.replace(
column,
build_column(
Buffer(np.array([na_value], dtype=column.dtype).view("|u1")),
dtype=column.dtype,
),
null,
)
return out_col


def as_column(
arbitrary: Any,
nan_as_null: bool = None,
Expand Down Expand Up @@ -1753,9 +1774,7 @@ def as_column(
col = col.set_mask(mask)
elif np.issubdtype(col.dtype, np.datetime64):
if nan_as_null or (mask is None and nan_as_null is None):
# Ignore typing error since this method is only defined for
# DatetimeColumn, not the ColumnBase class.
col = col._make_copy_with_na_as_null() # type: ignore
col = _make_copy_replacing_NaT_with_null(col)
return col

elif isinstance(arbitrary, (pa.Array, pa.ChunkedArray)):
Expand Down Expand Up @@ -1886,7 +1905,7 @@ def as_column(
mask = None
if nan_as_null is None or nan_as_null is True:
data = build_column(buffer, dtype=arbitrary.dtype)
data = data._make_copy_with_na_as_null()
data = _make_copy_replacing_NaT_with_null(data)
mask = data.mask

data = cudf.core.column.datetime.DatetimeColumn(
Expand All @@ -1904,7 +1923,7 @@ def as_column(
mask = None
if nan_as_null is None or nan_as_null is True:
data = build_column(buffer, dtype=arbitrary.dtype)
data = data._make_copy_with_na_as_null()
data = _make_copy_replacing_NaT_with_null(data)
mask = data.mask

data = cudf.core.column.timedelta.TimeDeltaColumn(
Expand Down
22 changes: 1 addition & 21 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,7 @@
from cudf.api.types import is_scalar
from cudf.core._compat import PANDAS_GE_120
from cudf.core.buffer import Buffer
from cudf.core.column import (
ColumnBase,
as_column,
column,
column_empty_like,
string,
)
from cudf.core.column import ColumnBase, as_column, column, string
from cudf.utils.utils import _fillna_natwise

if PANDAS_GE_120:
Expand Down Expand Up @@ -493,20 +487,6 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool:
else:
return False

def _make_copy_with_na_as_null(self):
"""Return a copy with NaN values replaced with nulls."""
null = column_empty_like(self, masked=True, newsize=1)
na_value = np.datetime64("nat", self.time_unit)
out_col = cudf._lib.replace.replace(
self,
column.build_column(
Buffer(np.array([na_value], dtype=self.dtype).view("|u1")),
dtype=self.dtype,
),
null,
)
return out_col


def binop_offset(lhs, rhs, op):
if rhs._is_no_op:
Expand Down
10 changes: 10 additions & 0 deletions python/cudf/cudf/tests/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -1406,3 +1406,13 @@ def test_error_values():
match="TimeDelta Arrays is not yet implemented in cudf",
):
s.values


@pytest.mark.parametrize("dtype", utils.TIMEDELTA_TYPES)
@pytest.mark.parametrize("name", [None, "delta-index"])
def test_create_TimedeltaIndex(dtype, name):
gdi = cudf.TimedeltaIndex(
[1132223, 2023232, 342234324, 4234324], dtype=dtype, name=name
)
pdi = gdi.to_pandas()
assert_eq(pdi, gdi)