Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create objects from iterables that contain cudf.NA #8442

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions python/cudf/cudf/core/scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import numpy as np
import pyarrow as pa
from pandas._libs.missing import NAType as pd_NAType

from cudf._lib.scalar import DeviceScalar, _is_null_host_scalar
from cudf.core.column.column import ColumnBase
Expand Down Expand Up @@ -354,15 +355,11 @@ def astype(self, dtype):
return Scalar(self.value, dtype)


class _NAType(object):
def __init__(self):
pass

def __repr__(self):
return "<NA>"

def __bool__(self):
raise TypeError("boolean value of cudf.NA is ambiguous")
class _NAType(pd_NAType):
# Pandas NAType enforces a single instance exists at a time
# instantiating this class will yield the existing instance
# of pandas._libs.missing.NAType, id(cudf.NA) == id(pd.NA).
pass


NA = _NAType()
16 changes: 15 additions & 1 deletion python/cudf/cudf/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import cudf
from cudf.core._compat import PANDAS_GE_110
from cudf.tests.utils import assert_eq, assert_exceptions_equal
from cudf.tests.utils import NUMERIC_TYPES, assert_eq, assert_exceptions_equal


@pytest.fixture
Expand Down Expand Up @@ -790,3 +790,17 @@ def test_categorical_setitem_with_nan():
[1, np.nan, np.nan, np.nan, np.nan, None], nan_as_null=False
).astype(gs.dtype)
assert_eq(gs, expected_series)


@pytest.mark.parametrize("dtype", list(NUMERIC_TYPES) + ["object"])
@pytest.mark.parametrize("input_obj", [[1, cudf.NA, 3]])
def test_series_construction_with_nulls(input_obj, dtype):
dtype = np.dtype(dtype)
input_obj = [
dtype.type(v) if v is not cudf.NA else cudf.NA for v in input_obj
]

expect = pd.Series(input_obj, dtype="category")
got = cudf.Series(input_obj, dtype="category").to_pandas()

assert_eq(expect, got)
11 changes: 11 additions & 0 deletions python/cudf/cudf/tests/test_decimal.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) 2021, NVIDIA CORPORATION.

import decimal
from decimal import Decimal

import numpy as np
Expand Down Expand Up @@ -286,3 +287,13 @@ def test_series_setitem_decimal(args):
expect = _decimal_series(expect, dtype)
data[to] = item
assert_eq(data, expect)


@pytest.mark.parametrize(
"input_obj", [[decimal.Decimal(1), cudf.NA, decimal.Decimal(3)]]
)
def test_series_construction_with_nulls(input_obj):
expect = pa.array(input_obj, from_pandas=True)
got = cudf.Series(input_obj).to_arrow()

assert expect == got
10 changes: 10 additions & 0 deletions python/cudf/cudf/tests/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,3 +350,13 @@ def test_list_getitem(indata, expect):
list_sr = cudf.Series([indata])
# __getitem__ shall fill None with cudf.NA
assert list_sr[0] == expect


@pytest.mark.parametrize(
"input_obj", [[[1, cudf.NA, 3]], [[1, cudf.NA, 3], [4, 5, cudf.NA]]]
)
def test_construction_series_with_nulls(input_obj):
expect = pa.array(input_obj, from_pandas=True)
got = cudf.Series(input_obj).to_arrow()

assert expect == got
24 changes: 23 additions & 1 deletion python/cudf/cudf/tests/test_numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

import cudf
from cudf.core._compat import PANDAS_GE_100
from cudf.tests.utils import assert_eq
from cudf.tests.utils import NUMERIC_TYPES, assert_eq
from cudf.utils.dtypes import cudf_dtypes_to_pandas_dtypes


def test_can_cast_safely_same_kind():
Expand Down Expand Up @@ -384,3 +385,24 @@ def test_to_numeric_error(data, errors):
got = cudf.to_numeric(data, errors=errors)

assert_eq(expect, got)


@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
@pytest.mark.parametrize("input_obj", [[1, cudf.NA, 3]])
def test_series_construction_with_nulls(dtype, input_obj):
dtype = np.dtype(dtype)
# numpy case

expect = pd.Series(input_obj, dtype=cudf_dtypes_to_pandas_dtypes[dtype])
got = cudf.Series(input_obj, dtype=dtype).to_pandas(nullable=True)

assert_eq(expect, got)

# Test numpy array of objects case
np_data = np.array(
[dtype.type(v) if v is not cudf.NA else cudf.NA for v in input_obj]
)

expect = pd.Series(input_obj, dtype=cudf_dtypes_to_pandas_dtypes[dtype])
marlenezw marked this conversation as resolved.
Show resolved Hide resolved
got = cudf.Series(input_obj, dtype=dtype).to_pandas(nullable=True)
assert_eq(expect, got)
6 changes: 1 addition & 5 deletions python/cudf/cudf/tests/test_scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,11 +329,7 @@ def test_scalar_invalid_implicit_conversion(cls, dtype):
try:
cls(pd.NA)
except TypeError as e:

error = (
str(e).replace("NAType", "_NAType").replace(" NA ", " cudf.NA ")
)
with pytest.raises(TypeError, match=re.escape(str(error))):
with pytest.raises(TypeError, match=re.escape(str(e))):
slr = pycudf_scalar(None, dtype=dtype)
cls(slr)

Expand Down
9 changes: 9 additions & 0 deletions python/cudf/cudf/tests/test_struct.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2020, NVIDIA CORPORATION.

import pandas as pd
import pyarrow as pa
import pytest

import cudf
Expand Down Expand Up @@ -44,3 +45,11 @@ def test_struct_for_field(key, expect):
expect = cudf.Series(expect)
got = sr.struct.field(key)
assert_eq(expect, got)


@pytest.mark.parametrize("input_obj", [[{"a": 1, "b": cudf.NA, "c": 3}]])
def test_series_construction_with_nulls(input_obj):
expect = pa.array(input_obj, from_pandas=True)
got = cudf.Series(input_obj).to_arrow()

assert expect == got