From b3eb2aae310b2c01a2df589d1fdac56e3e72290d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 7 May 2024 03:33:21 +0000 Subject: [PATCH 1/2] Allow fillna to validate for Categories.fillna --- python/cudf/cudf/core/column/categorical.py | 6 +++--- python/cudf/cudf/core/frame.py | 15 +++++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index e3e73035046..c4964edf32d 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -1045,9 +1045,6 @@ def fillna( """ Fill null values with *fill_value* """ - if not self.nullable: - return self - if fill_value is not None: fill_is_scalar = np.isscalar(fill_value) @@ -1079,6 +1076,9 @@ def fillna( self.codes.dtype ) + if not self.nullable: + return self.copy() + return super().fillna(fill_value, method=method) def indices_of( diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 017190ab5b4..58932db2bda 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -762,10 +762,17 @@ def fillna( else: replace_val = None should_fill = ( - col_name in value - and col.has_nulls(include_nan=True) - and not libcudf.scalar._is_null_host_scalar(replace_val) - ) or method is not None + ( + col_name in value + and col.has_nulls(include_nan=True) + and not libcudf.scalar._is_null_host_scalar(replace_val) + ) + or method is not None + or ( + isinstance(col, cudf.core.column.CategoricalColumn) + and not libcudf.scalar._is_null_host_scalar(replace_val) + ) + ) if should_fill: filled_data[col_name] = col.fillna(replace_val, method) else: From d7b0a97a11791f9fef2e9a42b567804b4f04e012 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 7 May 2024 11:32:45 +0000 Subject: [PATCH 2/2] add test --- python/cudf/cudf/core/column/categorical.py | 4 +++- python/cudf/cudf/tests/test_categorical.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index c4964edf32d..dc51cd4f28f 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -1076,8 +1076,10 @@ def fillna( self.codes.dtype ) + # Validation of `fill_value` will have to be performed + # before returning self. if not self.nullable: - return self.copy() + return self return super().fillna(fill_value, method=method) diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py index 7aba2e45532..07ce81e3c39 100644 --- a/python/cudf/cudf/tests/test_categorical.py +++ b/python/cudf/cudf/tests/test_categorical.py @@ -859,3 +859,19 @@ def test_cat_from_scalar(scalar): gs = cudf.Series(scalar, dtype="category") assert_eq(ps, gs) + + +def test_cat_groupby_fillna(): + ps = pd.Series(["a", "b", "c"], dtype="category") + gs = cudf.from_pandas(ps) + + with pytest.warns(FutureWarning): + pg = ps.groupby(ps) + gg = gs.groupby(gs) + + assert_exceptions_equal( + lfunc=pg.fillna, + rfunc=gg.fillna, + lfunc_args_and_kwargs=(("d",), {}), + rfunc_args_and_kwargs=(("d",), {}), + )