Skip to content

Commit

Permalink
Preserve sub-second data for time scalars in column construction (#15655
Browse files Browse the repository at this point in the history
)

Fixes: #15654 

This PR makes fixes such that sub-second timestamp data is not being dropped in column construction.

Forks out of #14534

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #15655
  • Loading branch information
galipremsagar authored May 7, 2024
1 parent 8d9c06a commit 5d244df
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 2 deletions.
3 changes: 2 additions & 1 deletion python/cudf/cudf/_lib/scalar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ def as_device_scalar(val, dtype=None):
def _is_null_host_scalar(slr):
if cudf.utils.utils.is_na_like(slr):
return True
elif isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr):
elif (isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr)) or \
slr is pd.NaT:
return True
else:
return False
Expand Down
13 changes: 13 additions & 0 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2163,6 +2163,19 @@ def as_column(
nan_as_null=nan_as_null,
length=length,
)
elif (
isinstance(element, (pd.Timestamp, pd.Timedelta))
or element is pd.NaT
):
# TODO: Remove this after
# https://github.com/apache/arrow/issues/26492
# is fixed.
return as_column(
pd.Series(arbitrary),
dtype=dtype,
nan_as_null=nan_as_null,
length=length,
)
elif not any(element is na for na in (None, pd.NA, np.nan)):
# Might have NA + element like above, but short-circuit if
# an element pyarrow/pandas might be able to parse
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1215,7 +1215,7 @@ def dtypes(self):
>>> df.dtypes
float float64
int int64
datetime datetime64[us]
datetime datetime64[ns]
string object
dtype: object
"""
Expand Down
3 changes: 3 additions & 0 deletions python/cudf/cudf/core/scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ def _preprocess_host_value(self, value, dtype):

if dtype is None:
if not valid:
if value is NaT:
value = value.to_numpy()

if isinstance(value, (np.datetime64, np.timedelta64)):
unit, _ = np.datetime_data(value)
if unit == "generic":
Expand Down
28 changes: 28 additions & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2786,3 +2786,31 @@ def test_squeeze(axis, data):
def test_squeeze_invalid_axis(axis):
with pytest.raises(ValueError):
cudf.Series([1]).squeeze(axis=axis)


@pytest.mark.parametrize("data", [None, 123, 33243243232423, 0])
def test_timestamp_series_init(data):
scalar = pd.Timestamp(data)
expected = pd.Series([scalar])
actual = cudf.Series([scalar])

assert_eq(expected, actual)

expected = pd.Series(scalar)
actual = cudf.Series(scalar)

assert_eq(expected, actual)


@pytest.mark.parametrize("data", [None, 123, 33243243232423, 0])
def test_timedelta_series_init(data):
scalar = pd.Timedelta(data)
expected = pd.Series([scalar])
actual = cudf.Series([scalar])

assert_eq(expected, actual)

expected = pd.Series(scalar)
actual = cudf.Series(scalar)

assert_eq(expected, actual)

0 comments on commit 5d244df

Please sign in to comment.