Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG FIX] TimedeltaIndex constructor raises an AttributeError. #9884

Merged
merged 14 commits into from
Dec 17, 2021
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,25 @@ def _get_mask_as_column(self) -> ColumnBase:
self.base_mask, self.offset, self.offset + len(self)
)

def _make_copy_replacing_NaT_with_null(self):
bdice marked this conversation as resolved.
Show resolved Hide resolved
"""Return a copy with NaT values replaced with nulls."""
if np.issubdtype(self.dtype, np.timedelta64):
na_value = np.timedelta64("NaT", self.time_unit)

elif np.issubdtype(self.dtype, np.datetime64):
na_value = np.datetime64("NaT", self.time_unit)
skirui-source marked this conversation as resolved.
Show resolved Hide resolved

null = column_empty_like(self, masked=True, newsize=1)
out_col = cudf._lib.replace.replace(
self,
build_column(
Buffer(np.array([na_value], dtype=self.dtype).view("|u1")),
dtype=self.dtype,
),
null,
)
return out_col

def memory_usage(self) -> int:
n = 0
if self.data is not None:
Expand Down Expand Up @@ -1755,7 +1774,7 @@ def as_column(
if nan_as_null or (mask is None and nan_as_null is None):
# Ignore typing error since this method is only defined for
# DatetimeColumn, not the ColumnBase class.
skirui-source marked this conversation as resolved.
Show resolved Hide resolved
col = col._make_copy_with_na_as_null() # type: ignore
col = col._make_copy_replacing_NaT_with_null() # type: ignore
return col

elif isinstance(arbitrary, (pa.Array, pa.ChunkedArray)):
Expand Down Expand Up @@ -1878,7 +1897,7 @@ def as_column(
mask = None
if nan_as_null is None or nan_as_null is True:
data = build_column(buffer, dtype=arbitrary.dtype)
data = data._make_copy_with_na_as_null()
data = data._make_copy_replacing_NaT_with_null()
mask = data.mask

data = cudf.core.column.datetime.DatetimeColumn(
Expand All @@ -1896,7 +1915,7 @@ def as_column(
mask = None
if nan_as_null is None or nan_as_null is True:
data = build_column(buffer, dtype=arbitrary.dtype)
data = data._make_copy_with_na_as_null()
data = data._make_copy_replacing_NaT_with_null()
mask = data.mask

data = cudf.core.column.timedelta.TimeDeltaColumn(
Expand Down
22 changes: 1 addition & 21 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,7 @@
from cudf.api.types import is_scalar
from cudf.core._compat import PANDAS_GE_120
from cudf.core.buffer import Buffer
from cudf.core.column import (
ColumnBase,
as_column,
column,
column_empty_like,
string,
)
from cudf.core.column import ColumnBase, as_column, column, string
from cudf.utils.utils import _fillna_natwise

if PANDAS_GE_120:
Expand Down Expand Up @@ -493,20 +487,6 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool:
else:
return False

def _make_copy_with_na_as_null(self):
"""Return a copy with NaN values replaced with nulls."""
null = column_empty_like(self, masked=True, newsize=1)
na_value = np.datetime64("nat", self.time_unit)
out_col = cudf._lib.replace.replace(
self,
column.build_column(
Buffer(np.array([na_value], dtype=self.dtype).view("|u1")),
dtype=self.dtype,
),
null,
)
return out_col


def binop_offset(lhs, rhs, op):
if rhs._is_no_op:
Expand Down
10 changes: 10 additions & 0 deletions python/cudf/cudf/tests/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -1406,3 +1406,13 @@ def test_error_values():
match="TimeDelta Arrays is not yet implemented in cudf",
):
s.values


@pytest.mark.parametrize("dtype", utils.TIMEDELTA_TYPES)
@pytest.mark.parametrize("name", [None, "delta-index"])
def test_create_TimedeltaIndex(dtype, name):
gdi = cudf.TimedeltaIndex(
[1132223, 2023232, 342234324, 4234324], dtype=dtype, name=name
)
pdi = gdi.to_pandas()
assert_eq(pdi, gdi)