From d1b1ea80a88053fae05f07f7805c96614902b70b Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 24 Feb 2023 17:10:07 -0600 Subject: [PATCH] [REVIEW] Raise error when `numeric_only=True` for non-numeric Series (#12843) This PR raises an error when numeric_only=True for rank if the Series is of non-numeric dtype. --- python/cudf/cudf/core/indexed_frame.py | 11 +++++++++-- python/cudf/cudf/tests/test_rank.py | 21 +++++++++++++-------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 2992cb005e5..159cc318789 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -4771,7 +4771,7 @@ def rank( self, axis=0, method="average", - numeric_only=None, + numeric_only=False, na_option="keep", ascending=True, pct=False, @@ -4794,7 +4794,7 @@ def rank( * max: highest rank in the group * first: ranks assigned in order they appear in the array * dense: like 'min', but rank always increases by 1 between groups. - numeric_only : bool, optional + numeric_only : bool, default False For DataFrame objects, rank only numeric columns if set to True. na_option : {'keep', 'top', 'bottom'}, default 'keep' How to rank NaN values: @@ -4829,6 +4829,13 @@ def rank( source = self if numeric_only: + if isinstance( + source, cudf.Series + ) and not _is_non_decimal_numeric_dtype(self.dtype): + raise TypeError( + "Series.rank does not allow numeric_only=True with " + "non-numeric dtype." + ) numeric_cols = ( name for name in self._data.names diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py index 9bd67309ece..0aa3d53f962 100644 --- a/python/cudf/cudf/tests/test_rank.py +++ b/python/cudf/cudf/tests/test_rank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from itertools import chain, combinations_with_replacement, product @@ -55,13 +55,18 @@ def test_rank_all_arguments( assert_eq(gdf["col1"].rank(**kwargs), pdf["col1"].rank(**kwargs)) assert_eq(gdf["col2"].rank(**kwargs), pdf["col2"].rank(**kwargs)) if numeric_only: - with pytest.warns(FutureWarning): - expect = pdf["str"].rank(**kwargs) - got = gdf["str"].rank(**kwargs) - assert expect.empty == got.empty - expected = pdf.select_dtypes(include=np.number) - else: - expected = pdf.copy(deep=True) + assert_exceptions_equal( + lfunc=pdf["str"].rank, + rfunc=gdf["str"].rank, + lfunc_args_and_kwargs=( + [], + kwargs, + ), + rfunc_args_and_kwargs=( + [], + kwargs, + ), + ) actual = gdf.rank(**kwargs) expected = pdf.rank(**kwargs)