From 338b933a898a92fb4f05cbf454790aee09cac59d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 24 Feb 2023 08:29:10 -0800 Subject: [PATCH 1/2] Raise error for numeric_only series on non-numeric data --- python/cudf/cudf/core/indexed_frame.py | 7 +++++++ python/cudf/cudf/tests/test_rank.py | 21 +++++++++++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 2992cb005e5..8414a5cead4 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -4829,6 +4829,13 @@ def rank( source = self if numeric_only: + if isinstance( + source, cudf.Series + ) and not _is_non_decimal_numeric_dtype(self.dtype): + raise TypeError( + "Series.rank does not allow numeric_only=True with " + "non-numeric dtype." + ) numeric_cols = ( name for name in self._data.names diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py index 9bd67309ece..0aa3d53f962 100644 --- a/python/cudf/cudf/tests/test_rank.py +++ b/python/cudf/cudf/tests/test_rank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from itertools import chain, combinations_with_replacement, product @@ -55,13 +55,18 @@ def test_rank_all_arguments( assert_eq(gdf["col1"].rank(**kwargs), pdf["col1"].rank(**kwargs)) assert_eq(gdf["col2"].rank(**kwargs), pdf["col2"].rank(**kwargs)) if numeric_only: - with pytest.warns(FutureWarning): - expect = pdf["str"].rank(**kwargs) - got = gdf["str"].rank(**kwargs) - assert expect.empty == got.empty - expected = pdf.select_dtypes(include=np.number) - else: - expected = pdf.copy(deep=True) + assert_exceptions_equal( + lfunc=pdf["str"].rank, + rfunc=gdf["str"].rank, + lfunc_args_and_kwargs=( + [], + kwargs, + ), + rfunc_args_and_kwargs=( + [], + kwargs, + ), + ) actual = gdf.rank(**kwargs) expected = pdf.rank(**kwargs) From e658a586d113720cb45fc7e9f79c0bebcd63555b Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 24 Feb 2023 09:20:24 -0800 Subject: [PATCH 2/2] change default --- python/cudf/cudf/core/indexed_frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 8414a5cead4..159cc318789 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -4771,7 +4771,7 @@ def rank( self, axis=0, method="average", - numeric_only=None, + numeric_only=False, na_option="keep", ascending=True, pct=False, @@ -4794,7 +4794,7 @@ def rank( * max: highest rank in the group * first: ranks assigned in order they appear in the array * dense: like 'min', but rank always increases by 1 between groups. - numeric_only : bool, optional + numeric_only : bool, default False For DataFrame objects, rank only numeric columns if set to True. na_option : {'keep', 'top', 'bottom'}, default 'keep' How to rank NaN values: