diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index d8cee514fb7..e5c78fca893 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -58,7 +58,7 @@ UInt64Index, interval_range, ) -from cudf.core.missing import NA +from cudf.core.missing import NA, NaT from cudf.core.multiindex import MultiIndex from cudf.core.reshape import ( concat, @@ -90,7 +90,6 @@ option_context, set_option, ) -from cudf.utils.dtypes import _NA_REP from cudf.utils.utils import clear_cache cuda.set_memory_manager(RMMNumbaManager) @@ -125,6 +124,7 @@ "ListDtype", "MultiIndex", "NA", + "NaT", "RangeIndex", "Scalar", "Series", diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 0ff736b9204..39a1b0609cf 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -31,7 +31,7 @@ from cudf._lib.types import ( duration_unit_map, ) from cudf.core.dtypes import ListDtype, StructDtype -from cudf.core.missing import NA +from cudf.core.missing import NA, NaT from cudf._lib.column cimport Column from cudf._lib.cpp.column.column_view cimport column_view @@ -178,7 +178,7 @@ cdef class DeviceScalar: return self.get_raw_ptr()[0].is_valid() def __repr__(self): - if self.value is NA: + if cudf.utils.utils.is_na_like(self.value): return ( f"{self.__class__.__name__}" f"({self.value}, {repr(self.dtype)})" @@ -495,7 +495,7 @@ cdef _get_np_scalar_from_timestamp64(unique_ptr[scalar]& s): cdef scalar* s_ptr = s.get() if not s_ptr[0].is_valid(): - return NA + return NaT cdef libcudf_types.data_type cdtype = s_ptr[0].type() @@ -536,7 +536,7 @@ cdef _get_np_scalar_from_timedelta64(unique_ptr[scalar]& s): cdef scalar* s_ptr = s.get() if not s_ptr[0].is_valid(): - return NA + return NaT cdef libcudf_types.data_type cdtype = s_ptr[0].type() @@ -586,7 +586,7 @@ def as_device_scalar(val, dtype=None): def _is_null_host_scalar(slr): - if slr is None or slr is NA: + if cudf.utils.utils.is_na_like(slr): return True elif isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr): return True diff --git a/python/cudf/cudf/core/_internals/timezones.py b/python/cudf/cudf/core/_internals/timezones.py index 2895fd3476c..67043d3fbb3 100644 --- a/python/cudf/cudf/core/_internals/timezones.py +++ b/python/cudf/cudf/core/_internals/timezones.py @@ -186,7 +186,7 @@ def localize( DatetimeColumn, data._scatter_by_column( data.isnull() | (ambiguous | nonexistent), - cudf.Scalar(cudf.NA, dtype=data.dtype), + cudf.Scalar(cudf.NaT, dtype=data.dtype), ), ) gmt_data = local_to_utc(localized, zone_name) diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py index 6d4a2990e34..0f65861dc72 100644 --- a/python/cudf/cudf/core/_internals/where.py +++ b/python/cudf/cudf/core/_internals/where.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. import warnings from typing import Tuple, Union @@ -13,7 +13,6 @@ is_scalar, ) from cudf.core.column import ColumnBase -from cudf.core.missing import NA from cudf.utils.dtypes import ( _can_cast, _dtype_can_hold_element, @@ -59,7 +58,7 @@ def _check_and_cast_columns_with_other( f"{type(other).__name__} to {source_dtype.name}" ) - if other in {None, NA}: + if cudf.utils.utils.is_na_like(other): return _normalize_categorical( source_col, cudf.Scalar(other, dtype=source_dtype) ) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 0b2b9fda2fd..263da9b350e 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -81,7 +81,6 @@ ListDtype, StructDtype, ) -from cudf.core.missing import NA from cudf.core.mixins import BinaryOperand, Reducible from cudf.errors import MixedTypeError from cudf.utils.dtypes import ( @@ -605,7 +604,7 @@ def __setitem__(self, key: Any, value: Any): self._mimic_inplace(out, inplace=True) def _wrap_binop_normalization(self, other): - if other is NA or other is None: + if cudf.utils.utils.is_na_like(other): return cudf.Scalar(other, dtype=self.dtype) if isinstance(other, np.ndarray) and other.ndim == 0: # Try and maintain the dtype diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index e4b944a88af..fc624c0b8eb 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -1694,7 +1694,19 @@ def _clean_nulls_from_dataframe(self, df): # TODO we need to handle this pass elif df._data[col].has_nulls(): - df[col] = df._data[col].astype("str").fillna(cudf._NA_REP) + fill_value = ( + str(cudf.NaT) + if isinstance( + df._data[col], + ( + cudf.core.column.DatetimeColumn, + cudf.core.column.TimeDeltaColumn, + ), + ) + else str(cudf.NA) + ) + + df[col] = df._data[col].astype("str").fillna(fill_value) else: df[col] = df._data[col] diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index c3d468e5656..b7ee85758b9 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1347,7 +1347,7 @@ def __repr__(self): else: output = repr(preprocess.to_pandas()) - output = output.replace("nan", cudf._NA_REP) + output = output.replace("nan", str(cudf.NA)) elif preprocess._values.nullable: output = repr(self._clean_nulls_from_index().to_pandas()) @@ -1499,8 +1499,14 @@ def __contains__(self, item): def _clean_nulls_from_index(self): if self._values.has_nulls(): + fill_value = ( + str(cudf.NaT) + if isinstance(self, (DatetimeIndex, TimedeltaIndex)) + else str(cudf.NA) + ) return cudf.Index( - self._values.astype("str").fillna(cudf._NA_REP), name=self.name + self._values.astype("str").fillna(fill_value), + name=self.name, ) return self @@ -2611,7 +2617,7 @@ def tz_localize(self, tz, ambiguous="NaT", nonexistent="NaT"): ... '2018-10-28 03:46:00'])) >>> s.dt.tz_localize("CET") 0 2018-10-28 01:20:00.000000000 - 1 + 1 NaT 2 2018-10-28 03:46:00.000000000 dtype: datetime64[ns, CET] @@ -3254,7 +3260,7 @@ def str(self): def _clean_nulls_from_index(self): if self._values.has_nulls(): - return self.fillna(cudf._NA_REP) + return self.fillna(str(cudf.NA)) else: return self diff --git a/python/cudf/cudf/core/missing.py b/python/cudf/cudf/core/missing.py index 02bcb7636f4..0d48a1d4136 100644 --- a/python/cudf/cudf/core/missing.py +++ b/python/cudf/cudf/core/missing.py @@ -1,9 +1,9 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. # Pandas NAType enforces a single instance exists at a time # instantiating this class will yield the existing instance # of pandas._libs.missing.NAType, id(cudf.NA) == id(pd.NA). -from pandas import NA +from pandas import NA, NaT -__all__ = ["NA"] +__all__ = ["NA", "NaT"] diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 6e9d068ef50..54c67458b55 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -504,7 +504,7 @@ def __repr__(self): ), ): preprocess_df[name] = col.astype("str").fillna( - cudf._NA_REP + str(cudf.NaT) ) tuples_list = list( diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index 438e3c7477d..a20628f6601 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -8,9 +8,9 @@ import pyarrow as pa import cudf -from cudf.api.types import is_scalar +from cudf.api.types import is_datetime64_dtype, is_scalar, is_timedelta64_dtype from cudf.core.dtypes import ListDtype, StructDtype -from cudf.core.missing import NA +from cudf.core.missing import NA, NaT from cudf.core.mixins import BinaryOperand from cudf.utils.dtypes import ( get_allowed_combinations_for_operator, @@ -243,7 +243,11 @@ def _preprocess_host_value(self, value, dtype): dtype = cudf.dtype(dtype) if not valid: - value = NA + value = ( + NaT + if is_datetime64_dtype(dtype) or is_timedelta64_dtype(dtype) + else NA + ) return value, dtype diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index a0cec69eca9..b63261ef840 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1403,8 +1403,19 @@ def __repr__(self): preprocess._column, cudf.core.column.timedelta.TimeDeltaColumn, ): + fill_value = ( + str(cudf.NaT) + if isinstance( + preprocess._column, + ( + cudf.core.column.TimeDeltaColumn, + cudf.core.column.DatetimeColumn, + ), + ) + else str(cudf.NA) + ) output = repr( - preprocess.astype("O").fillna(cudf._NA_REP).to_pandas() + preprocess.astype("str").fillna(fill_value).to_pandas() ) elif isinstance( preprocess._column, cudf.core.column.CategoricalColumn @@ -1436,7 +1447,7 @@ def __repr__(self): min_rows=min_rows, max_rows=max_rows, length=show_dimensions, - na_rep=cudf._NA_REP, + na_rep=str(cudf.NA), ) else: output = repr(preprocess.to_pandas()) diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index 484c013f774..a9c54ddcaa1 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -19,7 +19,7 @@ is_string_dtype, is_struct_dtype, ) -from cudf.core.missing import NA +from cudf.core.missing import NA, NaT def dtype_can_compare_equal_to_other(dtype): @@ -290,7 +290,7 @@ def assert_column_equal( def null_safe_scalar_equals(left, right): - if left in {NA, np.nan} or right in {NA, np.nan}: + if left in {NA, NaT, np.nan} or right in {NA, NaT, np.nan}: return left is right return left == right diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 65a15d7c567..549cd8da78e 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1700,7 +1700,12 @@ def test_scalar_null_binops(op, dtype_l, dtype_r): rhs = cudf.Scalar(cudf.NA, dtype=dtype_r) result = op(lhs, rhs) - assert result.value is cudf.NA + assert result.value is ( + cudf.NaT + if cudf.api.types.is_datetime64_dtype(result.dtype) + or cudf.api.types.is_timedelta64_dtype(result.dtype) + else cudf.NA + ) # make sure dtype is the same as had there been a valid scalar valid_lhs = cudf.Scalar(1, dtype=dtype_l) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index dcb8781e712..a59f8b62e7e 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -2107,3 +2107,8 @@ def test_datetime_binop_tz_timestamp(op): date_scalar = datetime.datetime.now(datetime.timezone.utc) with pytest.raises(NotImplementedError): op(s, date_scalar) + + +def test_datetime_getitem_na(): + s = cudf.Series([1, 2, None, 3], dtype="datetime64[ns]") + assert s[2] is cudf.NaT diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 87a0424998f..5dd58d8a875 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -697,7 +697,12 @@ def test_list_scalar_host_construction_null(elem_type, nesting_level): dtype = cudf.ListDtype(dtype) slr = cudf.Scalar(None, dtype=dtype) - assert slr.value is cudf.NA + assert slr.value is ( + cudf.NaT + if cudf.api.types.is_datetime64_dtype(slr.dtype) + or cudf.api.types.is_timedelta64_dtype(slr.dtype) + else cudf.NA + ) @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index ff4c2e2a14d..a08ab211b8e 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -2264,7 +2264,7 @@ def test_parquet_writer_statistics(tmpdir, pdf, add_nulls): pdf = pdf.drop(columns=["col_category", "col_bool"]) if not add_nulls: - # Timedelta types convert NA to None when reading from parquet into + # Timedelta types convert NaT to None when reading from parquet into # pandas which interferes with series.max()/min() for t in TIMEDELTA_TYPES: pdf["col_" + t] = pd.Series(np.arange(len(pdf.index))).astype(t) diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index e7fa401f1ec..b944e0483d0 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -40,10 +40,7 @@ def test_null_series(nrows, dtype): ps = sr.to_pandas() pd.options.display.max_rows = int(nrows) - psrepr = repr(ps) - psrepr = psrepr.replace("NaN", "") - psrepr = psrepr.replace("NaT", "") - psrepr = psrepr.replace("None", "") + psrepr = repr(ps).replace("NaN", "").replace("None", "") if "UInt" in psrepr: psrepr = psrepr.replace("UInt", "uint") elif "Int" in psrepr: @@ -71,12 +68,7 @@ def test_null_dataframe(ncols): gdf[dtype] = sr pdf = gdf.to_pandas() pd.options.display.max_columns = int(ncols) - pdf_repr = ( - repr(pdf) - .replace("NaN", "") - .replace("NaT", "") - .replace("None", "") - ) + pdf_repr = repr(pdf).replace("NaN", "").replace("None", "") assert pdf_repr.split() == repr(gdf).split() pd.reset_option("display.max_columns") @@ -359,33 +351,33 @@ def test_dataframe_sliced(gdf, slice, max_seq_items, max_rows): cudf.Index(np.array([10, 20, 30, None], dtype="datetime64[ns]")), "DatetimeIndex([1970-01-01 00:00:00.000000010, " "1970-01-01 00:00:00.000000020," - "\n 1970-01-01 00:00:00.000000030, ],\n " + "\n 1970-01-01 00:00:00.000000030, NaT],\n " "dtype='datetime64[ns]')", ), ( cudf.Index(np.array([10, 20, 30, None], dtype="datetime64[s]")), "DatetimeIndex([1970-01-01 00:00:10, " "1970-01-01 00:00:20, 1970-01-01 00:00:30,\n" - " ],\n dtype='datetime64[s]')", + " NaT],\n dtype='datetime64[s]')", ), ( cudf.Index(np.array([10, 20, 30, None], dtype="datetime64[us]")), "DatetimeIndex([1970-01-01 00:00:00.000010, " "1970-01-01 00:00:00.000020,\n " - "1970-01-01 00:00:00.000030, ],\n " + "1970-01-01 00:00:00.000030, NaT],\n " "dtype='datetime64[us]')", ), ( cudf.Index(np.array([10, 20, 30, None], dtype="datetime64[ms]")), "DatetimeIndex([1970-01-01 00:00:00.010, " "1970-01-01 00:00:00.020,\n " - "1970-01-01 00:00:00.030, ],\n " + "1970-01-01 00:00:00.030, NaT],\n " "dtype='datetime64[ms]')", ), ( cudf.Index(np.array([None] * 10, dtype="datetime64[ms]")), - "DatetimeIndex([, , , , , , , , " - ",\n ],\n dtype='datetime64[ms]')", + "DatetimeIndex([NaT, NaT, NaT, NaT, NaT, NaT, NaT, NaT, " + "NaT, NaT], dtype='datetime64[ms]')", ), ], ) @@ -473,12 +465,7 @@ def test_dataframe_null_index_repr(df, pandas_special_case): pdf = df gdf = cudf.from_pandas(pdf) - expected_repr = ( - repr(pdf) - .replace("NaN", "") - .replace("NaT", "") - .replace("None", "") - ) + expected_repr = repr(pdf).replace("NaN", "").replace("None", "") actual_repr = repr(gdf) if pandas_special_case: @@ -552,12 +539,7 @@ def test_series_null_index_repr(sr, pandas_special_case): psr = sr gsr = cudf.from_pandas(psr) - expected_repr = ( - repr(psr) - .replace("NaN", "") - .replace("NaT", "") - .replace("None", "") - ) + expected_repr = repr(psr).replace("NaN", "").replace("None", "") actual_repr = repr(gsr) if pandas_special_case: @@ -603,9 +585,7 @@ def test_timedelta_series_s_us_repr(data, dtype): sr = cudf.Series(data, dtype=dtype) psr = sr.to_pandas() - expected = ( - repr(psr).replace("timedelta64[ns]", dtype).replace("NaT", "") - ) + expected = repr(psr).replace("timedelta64[ns]", dtype) actual = repr(sr) assert expected.split() == actual.split() @@ -658,7 +638,7 @@ def test_timedelta_series_s_us_repr(data, dtype): """ 0 0 days 00:00:00.001000000 1 0 days 00:00:00.000200000 - 2 + 2 NaT dtype: timedelta64[ns] """ ), @@ -669,7 +649,7 @@ def test_timedelta_series_s_us_repr(data, dtype): """ 0 0 days 00:16:40 1 0 days 00:03:20 - 2 + 2 NaT dtype: timedelta64[ms] """ ), @@ -680,11 +660,11 @@ def test_timedelta_series_s_us_repr(data, dtype): ), textwrap.dedent( """ - 0 - 1 - 2 - 3 - 4 + 0 NaT + 1 NaT + 2 NaT + 3 NaT + 4 NaT dtype: timedelta64[ns] """ ), @@ -695,11 +675,11 @@ def test_timedelta_series_s_us_repr(data, dtype): ), textwrap.dedent( """ - 0 - 1 - 2 - 3 - 4 + 0 NaT + 1 NaT + 2 NaT + 3 NaT + 4 NaT dtype: timedelta64[ms] """ ), @@ -930,10 +910,10 @@ def test_timedelta_series_ns_ms_repr(ser, expected_repr): """ a b 0 1579 days 08:54:14 10 - 1 11 + 1 NaT 11 2 2839 days 15:29:05 22 3 2586 days 00:33:31 33 - 4 44 + 4 NaT 44 5 42066 days 12:52:14 55 6 0 days 06:27:14 66 """ @@ -961,10 +941,10 @@ def test_timedelta_series_ns_ms_repr(ser, expected_repr): """ a a 1579 days 08:54:14 - b + b NaT c 2839 days 15:29:05 d 2586 days 00:33:31 - e + e NaT f 42066 days 12:52:14 g 0 days 06:27:14 """ @@ -994,10 +974,10 @@ def test_timedelta_series_ns_ms_repr(ser, expected_repr): """ a 1 days 13:54:17.654 1 - 2 + NaT 2 2 days 20:09:05.345 3 2 days 14:03:52.411 4 - 5 + NaT 5 42 days 01:35:48.734 6 0 days 00:00:23.234 7 """ @@ -1027,10 +1007,10 @@ def test_timedelta_series_ns_ms_repr(ser, expected_repr): """ a 0 days 00:00:00.136457654 a - f + NaT f 0 days 00:00:00.245345345 q 0 days 00:00:00.223432411 e - w + NaT w 0 days 00:00:03.634548734 e 0 days 00:00:00.000023234 t """ @@ -1057,7 +1037,7 @@ def test_timedelta_dataframe_repr(df, expected_repr): cudf.Index( [None, None, None, None, None], dtype="timedelta64[us]" ), - "TimedeltaIndex([, , , , ], " + "TimedeltaIndex([NaT, NaT, NaT, NaT, NaT], " "dtype='timedelta64[us]')", ), ( @@ -1073,9 +1053,9 @@ def test_timedelta_dataframe_repr(df, expected_repr): ], dtype="timedelta64[us]", ), - "TimedeltaIndex([0 days 00:02:16.457654, , " + "TimedeltaIndex([0 days 00:02:16.457654, NaT, " "0 days 00:04:05.345345, " - "0 days 00:03:43.432411, ," + "0 days 00:03:43.432411, NaT," " 0 days 01:00:34.548734, 0 days 00:00:00.023234]," " dtype='timedelta64[us]')", ), @@ -1092,8 +1072,8 @@ def test_timedelta_dataframe_repr(df, expected_repr): ], dtype="timedelta64[s]", ), - "TimedeltaIndex([1579 days 08:54:14, , 2839 days 15:29:05," - " 2586 days 00:33:31, , 42066 days 12:52:14, " + "TimedeltaIndex([1579 days 08:54:14, NaT, 2839 days 15:29:05," + " 2586 days 00:33:31, NaT, 42066 days 12:52:14, " "0 days 06:27:14]," " dtype='timedelta64[s]')", ), @@ -1190,7 +1170,7 @@ def test_multiindex_repr(pmi, max_seq_items): .index, textwrap.dedent( """ - MultiIndex([( '', 'abc'), + MultiIndex([( 'NaT', 'abc'), ('1970-01-01 00:00:00.000000001', ), ('1970-01-01 00:00:00.000000002', 'xyz'), ('1970-01-01 00:00:00.000000003', )], @@ -1210,7 +1190,7 @@ def test_multiindex_repr(pmi, max_seq_items): .index, textwrap.dedent( """ - MultiIndex([( '', 'abc', 0.345), + MultiIndex([( 'NaT', 'abc', 0.345), ('1970-01-01 00:00:00.000000001', , ), ('1970-01-01 00:00:00.000000002', 'xyz', 100.0), ('1970-01-01 00:00:00.000000003', , 10.0)], @@ -1230,7 +1210,7 @@ def test_multiindex_repr(pmi, max_seq_items): .index, textwrap.dedent( """ - MultiIndex([('abc', '', 0.345), + MultiIndex([('abc', 'NaT', 0.345), ( , '0 days 00:00:00.000000001', ), ('xyz', '0 days 00:00:00.000000002', 100.0), ( , '0 days 00:00:00.000000003', 10.0)], @@ -1272,10 +1252,10 @@ def test_multiindex_repr(pmi, max_seq_items): .index, textwrap.dedent( """ - MultiIndex([('', ), - ('', ), - ('', ), - ('', )], + MultiIndex([('NaT', ), + ('NaT', ), + ('NaT', ), + ('NaT', )], names=['b', 'a']) """ ), diff --git a/python/cudf/cudf/tests/test_scalar.py b/python/cudf/cudf/tests/test_scalar.py index c1aeb987eff..d73a1d40aaa 100644 --- a/python/cudf/cudf/tests/test_scalar.py +++ b/python/cudf/cudf/tests/test_scalar.py @@ -212,7 +212,12 @@ def test_scalar_roundtrip(value): ) def test_null_scalar(dtype): s = cudf.Scalar(None, dtype=dtype) - assert s.value is cudf.NA + if cudf.api.types.is_datetime64_dtype( + dtype + ) or cudf.api.types.is_timedelta64_dtype(dtype): + assert s.value is cudf.NaT + else: + assert s.value is cudf.NA assert s.dtype == ( cudf.dtype(dtype) if not isinstance(dtype, cudf.core.dtypes.DecimalDtype) @@ -236,7 +241,7 @@ def test_null_scalar(dtype): ) def test_nat_to_null_scalar_succeeds(value): s = cudf.Scalar(value) - assert s.value is cudf.NA + assert s.value is cudf.NaT assert not s.is_valid() assert s.dtype == value.dtype @@ -349,7 +354,12 @@ def test_scalar_implicit_int_conversion(value): def test_scalar_invalid_implicit_conversion(cls, dtype): try: - cls(pd.NA) + cls( + pd.NaT + if cudf.api.types.is_datetime64_dtype(dtype) + or cudf.api.types.is_timedelta64_dtype(dtype) + else pd.NA + ) except TypeError as e: with pytest.raises(TypeError, match=re.escape(str(e))): slr = cudf.Scalar(None, dtype=dtype) diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py index 82ef309d116..681b42fd3bd 100644 --- a/python/cudf/cudf/tests/test_timedelta.py +++ b/python/cudf/cudf/tests/test_timedelta.py @@ -1441,4 +1441,4 @@ def test_timdelta_binop_tz_timestamp(op): def test_timedelta_getitem_na(): s = cudf.Series([1, 2, None, 3], dtype="timedelta64[ns]") - assert s[2] is cudf.NA + assert s[2] is cudf.NaT diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index d5e9e5854df..ea96a0859ce 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -13,9 +13,6 @@ import cudf from cudf._typing import DtypeObj from cudf.api.types import is_bool, is_float, is_integer -from cudf.core.missing import NA - -_NA_REP = "" """Map numpy dtype to pyarrow types. Note that np.bool_ bitwidth (8) is different from pa.bool_ (1). Special @@ -639,7 +636,7 @@ def _can_cast(from_dtype, to_dtype): `np.can_cast` but with some special handling around cudf specific dtypes. """ - if from_dtype in {None, NA}: + if cudf.utils.utils.is_na_like(from_dtype): return True if isinstance(from_dtype, type): from_dtype = cudf.dtype(from_dtype) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index dadd9aa2cc4..e2cb3f145a1 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -345,6 +345,14 @@ def search_range(x: int, ri: range, *, side: str) -> int: return max(min(len(ri), offset), 0) +def is_na_like(obj): + """ + Check if `obj` is a cudf NA value, + i.e., None, cudf.NA or cudf.NaT + """ + return obj is None or obj is cudf.NA or obj is cudf.NaT + + def _get_color_for_nvtx(name): m = hashlib.sha256() m.update(name.encode())