From 3f175ced5728f8e5a20d8cfc5dda21286d62d3df Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 25 Feb 2022 08:49:46 -0600 Subject: [PATCH] Fix warnings in test_categorical.py. (#10354) This PR catches or silences warnings in `test_categorical.py`. (I am working through one test file at a time so we can enable `-Werr` in the future.) Most of the warnings come from deprecated `inplace` arguments to pandas' categorical functions. The `inplace` argument will be removed in pandas 2.0. Until then, we should just hide the warning. Additionally, I refactored some `inplace` behavior to make the expected behavior of the test clearer. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Ashwin Srinath (https://github.com/shwina) URL: https://github.com/rapidsai/cudf/pull/10354 --- python/cudf/cudf/core/_compat.py | 3 +- python/cudf/cudf/tests/test_categorical.py | 199 ++++++++++++--------- 2 files changed, 119 insertions(+), 83 deletions(-) diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py index 2cf579ce3f1..70162c7afc6 100644 --- a/python/cudf/cudf/core/_compat.py +++ b/python/cudf/cudf/core/_compat.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import pandas as pd from packaging import version @@ -9,4 +9,5 @@ PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2") PANDAS_LE_122 = PANDAS_VERSION <= version.parse("1.2.2") PANDAS_GE_130 = PANDAS_VERSION >= version.parse("1.3.0") +PANDAS_GE_134 = PANDAS_VERSION >= version.parse("1.3.4") PANDAS_LT_140 = PANDAS_VERSION < version.parse("1.4.0") diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py index bc3ae721554..19a5cd4a49d 100644 --- a/python/cudf/cudf/tests/test_categorical.py +++ b/python/cudf/cudf/tests/test_categorical.py @@ -1,14 +1,17 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. import operator import string +import warnings +from contextlib import contextmanager +from textwrap import dedent import numpy as np import pandas as pd import pytest import cudf -from cudf.core._compat import PANDAS_GE_110 +from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_134 from cudf.testing._utils import ( NUMERIC_TYPES, assert_eq, @@ -16,6 +19,30 @@ ) +@contextmanager +def _hide_deprecated_pandas_categorical_inplace_warnings(function_name): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + ( + "The `inplace` parameter in " + f"pandas.Categorical.{function_name} is deprecated and will " + "be removed in a future version." + ), + category=FutureWarning, + ) + yield + + +@contextmanager +def _hide_cudf_safe_casting_warning(): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", "Can't safely cast column", category=UserWarning, + ) + yield + + @pytest.fixture def pd_str_cat(): categories = list("abc") @@ -51,9 +78,8 @@ def test_categorical_basic(): assert_eq(cat.codes, cudf_cat.codes.to_numpy()) +@pytest.mark.skipif(not PANDAS_GE_110, reason="requires pandas>=1.1.0") def test_categorical_integer(): - if not PANDAS_GE_110: - pytest.xfail(reason="pandas >=1.1 required") cat = pd.Categorical(["a", "_", "_", "c", "a"], categories=["a", "b", "c"]) pdsr = pd.Series(cat) sr = cudf.Series(cat) @@ -67,17 +93,17 @@ def test_categorical_integer(): sr.cat.codes.astype(pdsr.cat.codes.dtype).fillna(-1).to_numpy(), ) - string = str(sr) - expect_str = """ -0 a -1 -2 -3 c -4 a -dtype: category -Categories (3, object): ['a', 'b', 'c'] -""" - assert string.split() == expect_str.split() + expect_str = dedent( + """\ + 0 a + 1 + 2 + 3 c + 4 a + dtype: category + Categories (3, object): ['a', 'b', 'c']""" + ) + assert str(sr) == expect_str def test_categorical_compare_unordered(): @@ -152,23 +178,9 @@ def test_categorical_binary_add(): rfunc=operator.add, lfunc_args_and_kwargs=([pdsr, pdsr],), rfunc_args_and_kwargs=([sr, sr],), - expected_error_message="Series of dtype `category` cannot perform " - "the operation: add", - ) - - -def test_categorical_unary_ceil(): - cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"]) - pdsr = pd.Series(cat) - sr = cudf.Series(cat) - - assert_exceptions_equal( - lfunc=getattr, - rfunc=sr.ceil, - lfunc_args_and_kwargs=([pdsr, "ceil"],), - check_exception_type=False, - expected_error_message="Series of dtype `category` cannot " - "perform the operation: ceil", + expected_error_message=( + "Series of dtype `category` cannot perform the operation: add" + ), ) @@ -238,26 +250,25 @@ def test_cat_series_binop_error(): df["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc")) df["b"] = np.arange(len(df)) - dfa = df["a"] - dfb = df["b"] + pdf = df.to_pandas() - # lhs is a categorical + # lhs is categorical assert_exceptions_equal( lfunc=operator.add, rfunc=operator.add, - lfunc_args_and_kwargs=([dfa, dfb],), - rfunc_args_and_kwargs=([dfa, dfb],), - check_exception_type=False, - expected_error_message="Series of dtype `category` cannot " - "perform the operation: add", + lfunc_args_and_kwargs=([pdf["a"], pdf["b"]],), + rfunc_args_and_kwargs=([df["a"], df["b"]],), + expected_error_message=( + "Series of dtype `category` cannot perform the operation: add" + ), ) - # if lhs is a numerical + + # lhs is numerical assert_exceptions_equal( lfunc=operator.add, rfunc=operator.add, - lfunc_args_and_kwargs=([dfb, dfa],), - rfunc_args_and_kwargs=([dfb, dfa],), - check_exception_type=False, + lfunc_args_and_kwargs=([pdf["b"], pdf["a"]],), + rfunc_args_and_kwargs=([df["b"], df["a"]],), expected_error_message="'add' operator not supported", ) @@ -367,8 +378,9 @@ def test_categorical_as_ordered(pd_str_cat, inplace): pd_sr_1 = pd_sr.cat.as_ordered(inplace=inplace) cd_sr_1 = cd_sr.cat.as_ordered(inplace=inplace) - pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1 - cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1 + if inplace: + pd_sr_1 = pd_sr + cd_sr_1 = cd_sr assert cd_sr_1.cat.ordered is True assert cd_sr_1.cat.ordered == pd_sr_1.cat.ordered @@ -386,8 +398,9 @@ def test_categorical_as_unordered(pd_str_cat, inplace): pd_sr_1 = pd_sr.cat.as_unordered(inplace=inplace) cd_sr_1 = cd_sr.cat.as_unordered(inplace=inplace) - pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1 - cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1 + if inplace: + pd_sr_1 = pd_sr + cd_sr_1 = cd_sr assert cd_sr_1.cat.ordered is False assert cd_sr_1.cat.ordered == pd_sr_1.cat.ordered @@ -401,8 +414,9 @@ def test_categorical_as_unordered(pd_str_cat, inplace): [ pytest.param( True, - marks=pytest.mark.xfail( - reason="https://github.com/pandas-dev/pandas/issues/43232" + marks=pytest.mark.skipif( + not PANDAS_GE_134, + reason="https://github.com/pandas-dev/pandas/issues/43232", ), ), False, @@ -421,10 +435,14 @@ def test_categorical_reorder_categories( kwargs = dict(ordered=to_ordered, inplace=inplace) - pd_sr_1 = pd_sr.cat.reorder_categories(list("cba"), **kwargs) + with _hide_deprecated_pandas_categorical_inplace_warnings( + "reorder_categories" + ): + pd_sr_1 = pd_sr.cat.reorder_categories(list("cba"), **kwargs) cd_sr_1 = cd_sr.cat.reorder_categories(list("cba"), **kwargs) - pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1 - cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1 + if inplace: + pd_sr_1 = pd_sr + cd_sr_1 = cd_sr assert_eq(pd_sr_1, cd_sr_1) @@ -436,8 +454,9 @@ def test_categorical_reorder_categories( [ pytest.param( True, - marks=pytest.mark.xfail( - reason="https://github.com/pandas-dev/pandas/issues/43232" + marks=pytest.mark.skipif( + not PANDAS_GE_134, + reason="https://github.com/pandas-dev/pandas/issues/43232", ), ), False, @@ -452,10 +471,14 @@ def test_categorical_add_categories(pd_str_cat, inplace): assert str(pd_sr) == str(cd_sr) - pd_sr_1 = pd_sr.cat.add_categories(["d"], inplace=inplace) + with _hide_deprecated_pandas_categorical_inplace_warnings( + "add_categories" + ): + pd_sr_1 = pd_sr.cat.add_categories(["d"], inplace=inplace) cd_sr_1 = cd_sr.cat.add_categories(["d"], inplace=inplace) - pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1 - cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1 + if inplace: + pd_sr_1 = pd_sr + cd_sr_1 = cd_sr assert "d" in pd_sr_1.cat.categories.to_list() assert "d" in cd_sr_1.cat.categories.to_pandas().to_list() @@ -468,8 +491,9 @@ def test_categorical_add_categories(pd_str_cat, inplace): [ pytest.param( True, - marks=pytest.mark.xfail( - reason="https://github.com/pandas-dev/pandas/issues/43232" + marks=pytest.mark.skipif( + not PANDAS_GE_134, + reason="https://github.com/pandas-dev/pandas/issues/43232", ), ), False, @@ -484,10 +508,14 @@ def test_categorical_remove_categories(pd_str_cat, inplace): assert str(pd_sr) == str(cd_sr) - pd_sr_1 = pd_sr.cat.remove_categories(["a"], inplace=inplace) + with _hide_deprecated_pandas_categorical_inplace_warnings( + "remove_categories" + ): + pd_sr_1 = pd_sr.cat.remove_categories(["a"], inplace=inplace) cd_sr_1 = cd_sr.cat.remove_categories(["a"], inplace=inplace) - pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1 - cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1 + if inplace: + pd_sr_1 = pd_sr + cd_sr_1 = cd_sr assert "a" not in pd_sr_1.cat.categories.to_list() assert "a" not in cd_sr_1.cat.categories.to_pandas().to_list() @@ -495,13 +523,16 @@ def test_categorical_remove_categories(pd_str_cat, inplace): assert_eq(pd_sr_1, cd_sr_1) # test using ordered operators - assert_exceptions_equal( - lfunc=cd_sr.to_pandas().cat.remove_categories, - rfunc=cd_sr.cat.remove_categories, - lfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}), - rfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}), - expected_error_message="removals must all be in old categories", - ) + with _hide_deprecated_pandas_categorical_inplace_warnings( + "remove_categories" + ): + assert_exceptions_equal( + lfunc=cd_sr.to_pandas().cat.remove_categories, + rfunc=cd_sr.cat.remove_categories, + lfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}), + rfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}), + expected_error_message="removals must all be in old categories", + ) def test_categorical_dataframe_slice_copy(): @@ -583,19 +614,21 @@ def test_categorical_set_categories_categoricals(data, new_categories): pd_data = data.copy().astype("category") gd_data = cudf.from_pandas(pd_data) - assert_eq( - pd_data.cat.set_categories(new_categories=new_categories), - gd_data.cat.set_categories(new_categories=new_categories), - ) + expected = pd_data.cat.set_categories(new_categories=new_categories) + with _hide_cudf_safe_casting_warning(): + actual = gd_data.cat.set_categories(new_categories=new_categories) - assert_eq( - pd_data.cat.set_categories( - new_categories=pd.Series(new_categories, dtype="category") - ), - gd_data.cat.set_categories( - new_categories=cudf.Series(new_categories, dtype="category") - ), + assert_eq(expected, actual) + + expected = pd_data.cat.set_categories( + new_categories=pd.Series(new_categories, dtype="category") ) + with _hide_cudf_safe_casting_warning(): + actual = gd_data.cat.set_categories( + new_categories=cudf.Series(new_categories, dtype="category") + ) + + assert_eq(expected, actual) @pytest.mark.parametrize( @@ -703,7 +736,9 @@ def test_add_categories(data, add): gds = cudf.Series(data, dtype="category") expected = pds.cat.add_categories(add) - actual = gds.cat.add_categories(add) + with _hide_cudf_safe_casting_warning(): + actual = gds.cat.add_categories(add) + assert_eq( expected.cat.codes, actual.cat.codes.astype(expected.cat.codes.dtype) )