Skip to content

Commit

Permalink
[REVIEW] Raise error when numeric_only=True for non-numeric Series (#…
Browse files Browse the repository at this point in the history
…12843)

This PR raises an error when numeric_only=True for rank if the Series is of non-numeric dtype.
  • Loading branch information
galipremsagar authored Feb 24, 2023
1 parent 7d62d4e commit d1b1ea8
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 10 deletions.
11 changes: 9 additions & 2 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4771,7 +4771,7 @@ def rank(
self,
axis=0,
method="average",
numeric_only=None,
numeric_only=False,
na_option="keep",
ascending=True,
pct=False,
Expand All @@ -4794,7 +4794,7 @@ def rank(
* max: highest rank in the group
* first: ranks assigned in order they appear in the array
* dense: like 'min', but rank always increases by 1 between groups.
numeric_only : bool, optional
numeric_only : bool, default False
For DataFrame objects, rank only numeric columns if set to True.
na_option : {'keep', 'top', 'bottom'}, default 'keep'
How to rank NaN values:
Expand Down Expand Up @@ -4829,6 +4829,13 @@ def rank(

source = self
if numeric_only:
if isinstance(
source, cudf.Series
) and not _is_non_decimal_numeric_dtype(self.dtype):
raise TypeError(
"Series.rank does not allow numeric_only=True with "
"non-numeric dtype."
)
numeric_cols = (
name
for name in self._data.names
Expand Down
21 changes: 13 additions & 8 deletions python/cudf/cudf/tests/test_rank.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from itertools import chain, combinations_with_replacement, product

Expand Down Expand Up @@ -55,13 +55,18 @@ def test_rank_all_arguments(
assert_eq(gdf["col1"].rank(**kwargs), pdf["col1"].rank(**kwargs))
assert_eq(gdf["col2"].rank(**kwargs), pdf["col2"].rank(**kwargs))
if numeric_only:
with pytest.warns(FutureWarning):
expect = pdf["str"].rank(**kwargs)
got = gdf["str"].rank(**kwargs)
assert expect.empty == got.empty
expected = pdf.select_dtypes(include=np.number)
else:
expected = pdf.copy(deep=True)
assert_exceptions_equal(
lfunc=pdf["str"].rank,
rfunc=gdf["str"].rank,
lfunc_args_and_kwargs=(
[],
kwargs,
),
rfunc_args_and_kwargs=(
[],
kwargs,
),
)

actual = gdf.rank(**kwargs)
expected = pdf.rank(**kwargs)
Expand Down

0 comments on commit d1b1ea8

Please sign in to comment.