diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index c13ec33c51c..79ec25c1843 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1866,7 +1866,7 @@ def as_memoryview(arbitrary: Any) -> Optional[memoryview]: def as_column( arbitrary: Any, - nan_as_null: Optional[bool] = None, + nan_as_null: Optional[bool] = False, dtype: Optional[Dtype] = None, length: Optional[int] = None, ): @@ -1918,20 +1918,18 @@ def as_column( if dtype is not None: column = column.astype(dtype) return column - elif isinstance(arbitrary, ColumnBase): - if dtype is not None: - return arbitrary.astype(dtype) + elif isinstance(arbitrary, (ColumnBase, cudf.Series, cudf.BaseIndex)): + if isinstance(arbitrary, cudf.Series): + column = arbitrary._column + elif isinstance(arbitrary, cudf.BaseIndex): + column = arbitrary._values else: - return arbitrary - elif isinstance(arbitrary, cudf.Series): - data = arbitrary._column - if dtype is not None: - data = data.astype(dtype) - elif isinstance(arbitrary, cudf.BaseIndex): - data = arbitrary._values + column = arbitrary + if column.dtype.kind == "f" and (nan_as_null is None or nan_as_null): + column = column.nans_to_nulls() if dtype is not None: - data = data.astype(dtype) - + column = column.astype(dtype) + return column elif hasattr(arbitrary, "__cuda_array_interface__"): desc = arbitrary.__cuda_array_interface__ shape = desc["shape"] diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index 3cbe58ed39c..a7b2027f9f0 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. import itertools import warnings @@ -770,7 +770,7 @@ def get_dummies( result_data.update(col_enc_data) return cudf.DataFrame._from_data(result_data, index=df._index) else: - ser = cudf.Series(df) + ser = cudf.Series(df, nan_as_null=False) unique = _get_unique(column=ser._column, dummy_na=dummy_na) data = _one_hot_encode_column( column=ser._column, diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index bc1eaef86db..1208b9382fd 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1705,7 +1705,7 @@ def _concat(cls, objs, axis=0, index=True): if len(objs): col = col._with_type_metadata(objs[0].dtype) - return cls(data=col, index=index, name=name) + return cls._from_data({name: col}, index=index) @property # type: ignore @_cudf_nvtx_annotate diff --git a/python/cudf/cudf/tests/test_dlpack.py b/python/cudf/cudf/tests/test_dlpack.py index 6e34817c4fd..1a2890f09a1 100644 --- a/python/cudf/cudf/tests/test_dlpack.py +++ b/python/cudf/cudf/tests/test_dlpack.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. import itertools from contextlib import ExitStack as does_not_raise diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 36033a72479..848da861acc 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -2616,6 +2616,14 @@ def test_series_error_nan_non_float_dtypes(): s[0] = np.nan +@pytest.mark.parametrize("klass", [cudf.Index, cudf.Series]) +def test_nan_as_null_from_cudf_objects(klass): + data = klass(pa.array([float("nan")])) + result = klass(data, nan_as_null=True) + expected = klass(pa.array([None], type=pa.float64())) + assert_eq(result, expected) + + @pytest.mark.parametrize( "dtype", [