[REVIEW] Raise error when numeric_only=True for non-numeric Series (#…

…12843) This PR raises an error when numeric_only=True for rank if the Series is of non-numeric dtype.
rapidsai · Feb 24, 2023 · d1b1ea8 · d1b1ea8
1 parent 7d62d4e
commit d1b1ea8
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 10 deletions.
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
@@ -4771,7 +4771,7 @@ def rank(
         self,
         axis=0,
         method="average",
-        numeric_only=None,
+        numeric_only=False,
         na_option="keep",
         ascending=True,
         pct=False,
@@ -4794,7 +4794,7 @@ def rank(
             * max: highest rank in the group
             * first: ranks assigned in order they appear in the array
             * dense: like 'min', but rank always increases by 1 between groups.
-        numeric_only : bool, optional
+        numeric_only : bool, default False
             For DataFrame objects, rank only numeric columns if set to True.
         na_option : {'keep', 'top', 'bottom'}, default 'keep'
             How to rank NaN values:
@@ -4829,6 +4829,13 @@ def rank(
 
         source = self
         if numeric_only:
+            if isinstance(
+                source, cudf.Series
+            ) and not _is_non_decimal_numeric_dtype(self.dtype):
+                raise TypeError(
+                    "Series.rank does not allow numeric_only=True with "
+                    "non-numeric dtype."
+                )
             numeric_cols = (
                 name
                 for name in self._data.names

diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 from itertools import chain, combinations_with_replacement, product
 
@@ -55,13 +55,18 @@ def test_rank_all_arguments(
     assert_eq(gdf["col1"].rank(**kwargs), pdf["col1"].rank(**kwargs))
     assert_eq(gdf["col2"].rank(**kwargs), pdf["col2"].rank(**kwargs))
     if numeric_only:
-        with pytest.warns(FutureWarning):
-            expect = pdf["str"].rank(**kwargs)
-        got = gdf["str"].rank(**kwargs)
-        assert expect.empty == got.empty
-        expected = pdf.select_dtypes(include=np.number)
-    else:
-        expected = pdf.copy(deep=True)
+        assert_exceptions_equal(
+            lfunc=pdf["str"].rank,
+            rfunc=gdf["str"].rank,
+            lfunc_args_and_kwargs=(
+                [],
+                kwargs,
+            ),
+            rfunc_args_and_kwargs=(
+                [],
+                kwargs,
+            ),
+        )
 
     actual = gdf.rank(**kwargs)
     expected = pdf.rank(**kwargs)