From d47e25330d194140d0e19c7ee8b369cafbd3447b Mon Sep 17 00:00:00 2001 From: Sarah Yurick <53962159+sarahyurick@users.noreply.github.com> Date: Mon, 2 Aug 2021 12:44:14 -0400 Subject: [PATCH 1/3] handle cudf.NA --- python/cudf/cudf/core/dataframe.py | 2 ++ python/cudf/cudf/tests/test_dataframe.py | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 0355b677337..0ed4553f9f8 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3079,6 +3079,8 @@ def insert(self, loc, name, value): ) if _is_scalar_or_zero_d_array(value): + if value is cudf.NA: + value = None value = utils.scalar_broadcast_to(value, len(self)) if len(self) == 0: diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 9acf6783095..b63739a0915 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -5054,6 +5054,18 @@ def test_insert(data): assert_eq(pdf, gdf) +@pytest.mark.parametrize( + "data", [{"A": [1, 2, 3], "B": ["a", "b", "c"]}], +) +def test_insert_NA(data): + pdf = pd.DataFrame.from_dict(data) + gdf = cudf.DataFrame.from_pandas(pdf) + + pdf["C"] = pd.NA + gdf["C"] = cudf.NA + assert_eq(pdf, gdf) + + def test_cov(): gdf = cudf.datasets.randomdata(10) pdf = gdf.to_pandas() From 7665bc1e7a696de67cab3f658b43e79b383237f0 Mon Sep 17 00:00:00 2001 From: Sarah Yurick <53962159+sarahyurick@users.noreply.github.com> Date: Mon, 2 Aug 2021 13:39:58 -0400 Subject: [PATCH 2/3] move fix to scalar_broadcast_to --- python/cudf/cudf/core/dataframe.py | 2 -- python/cudf/cudf/utils/utils.py | 10 +++++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 0ed4553f9f8..0355b677337 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3079,8 +3079,6 @@ def insert(self, loc, name, value): ) if _is_scalar_or_zero_d_array(value): - if value is cudf.NA: - value = None value = utils.scalar_broadcast_to(value, len(self)) if len(self) == 0: diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 209f61ad399..8a806ed7652 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -42,9 +42,13 @@ def scalar_broadcast_to(scalar, size, dtype=None): if isinstance(size, (tuple, list)): size = size[0] - if scalar is None or ( - isinstance(scalar, (np.datetime64, np.timedelta64)) - and np.isnat(scalar) + if ( + scalar is None + or scalar is cudf.NA + or ( + isinstance(scalar, (np.datetime64, np.timedelta64)) + and np.isnat(scalar) + ) ): if dtype is None: dtype = "object" From 80a5b1525997224359af8ada0724d13f2252617e Mon Sep 17 00:00:00 2001 From: Sarah Yurick <53962159+sarahyurick@users.noreply.github.com> Date: Mon, 2 Aug 2021 14:02:02 -0400 Subject: [PATCH 3/3] use existing method --- python/cudf/cudf/utils/utils.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 8a806ed7652..1293122c31a 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -42,14 +42,7 @@ def scalar_broadcast_to(scalar, size, dtype=None): if isinstance(size, (tuple, list)): size = size[0] - if ( - scalar is None - or scalar is cudf.NA - or ( - isinstance(scalar, (np.datetime64, np.timedelta64)) - and np.isnat(scalar) - ) - ): + if cudf._lib.scalar._is_null_host_scalar(scalar): if dtype is None: dtype = "object" return column.column_empty(size, dtype=dtype, masked=True)