Skip to content

Commit

Permalink
Fix warnings in test_categorical.py. (NVIDIA#10354)
Browse files Browse the repository at this point in the history
This PR catches or silences warnings in `test_categorical.py`. (I am working through one test file at a time so we can enable `-Werr` in the future.) Most of the warnings come from deprecated `inplace` arguments to pandas' categorical functions. The `inplace` argument will be removed in pandas 2.0. Until then, we should just hide the warning.

Additionally, I refactored some `inplace` behavior to make the expected behavior of the test clearer.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)

URL: rapidsai/cudf#10354
  • Loading branch information
bdice authored Feb 25, 2022
1 parent 044922d commit 3f175ce
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 83 deletions.
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/_compat.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import pandas as pd
from packaging import version
Expand All @@ -9,4 +9,5 @@
PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2")
PANDAS_LE_122 = PANDAS_VERSION <= version.parse("1.2.2")
PANDAS_GE_130 = PANDAS_VERSION >= version.parse("1.3.0")
PANDAS_GE_134 = PANDAS_VERSION >= version.parse("1.3.4")
PANDAS_LT_140 = PANDAS_VERSION < version.parse("1.4.0")
199 changes: 117 additions & 82 deletions python/cudf/cudf/tests/test_categorical.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,48 @@
# Copyright (c) 2018-2021, NVIDIA CORPORATION.
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

import operator
import string
import warnings
from contextlib import contextmanager
from textwrap import dedent

import numpy as np
import pandas as pd
import pytest

import cudf
from cudf.core._compat import PANDAS_GE_110
from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_134
from cudf.testing._utils import (
NUMERIC_TYPES,
assert_eq,
assert_exceptions_equal,
)


@contextmanager
def _hide_deprecated_pandas_categorical_inplace_warnings(function_name):
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
(
"The `inplace` parameter in "
f"pandas.Categorical.{function_name} is deprecated and will "
"be removed in a future version."
),
category=FutureWarning,
)
yield


@contextmanager
def _hide_cudf_safe_casting_warning():
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", "Can't safely cast column", category=UserWarning,
)
yield


@pytest.fixture
def pd_str_cat():
categories = list("abc")
Expand Down Expand Up @@ -51,9 +78,8 @@ def test_categorical_basic():
assert_eq(cat.codes, cudf_cat.codes.to_numpy())


@pytest.mark.skipif(not PANDAS_GE_110, reason="requires pandas>=1.1.0")
def test_categorical_integer():
if not PANDAS_GE_110:
pytest.xfail(reason="pandas >=1.1 required")
cat = pd.Categorical(["a", "_", "_", "c", "a"], categories=["a", "b", "c"])
pdsr = pd.Series(cat)
sr = cudf.Series(cat)
Expand All @@ -67,17 +93,17 @@ def test_categorical_integer():
sr.cat.codes.astype(pdsr.cat.codes.dtype).fillna(-1).to_numpy(),
)

string = str(sr)
expect_str = """
0 a
1 <NA>
2 <NA>
3 c
4 a
dtype: category
Categories (3, object): ['a', 'b', 'c']
"""
assert string.split() == expect_str.split()
expect_str = dedent(
"""\
0 a
1 <NA>
2 <NA>
3 c
4 a
dtype: category
Categories (3, object): ['a', 'b', 'c']"""
)
assert str(sr) == expect_str


def test_categorical_compare_unordered():
Expand Down Expand Up @@ -152,23 +178,9 @@ def test_categorical_binary_add():
rfunc=operator.add,
lfunc_args_and_kwargs=([pdsr, pdsr],),
rfunc_args_and_kwargs=([sr, sr],),
expected_error_message="Series of dtype `category` cannot perform "
"the operation: add",
)


def test_categorical_unary_ceil():
cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
pdsr = pd.Series(cat)
sr = cudf.Series(cat)

assert_exceptions_equal(
lfunc=getattr,
rfunc=sr.ceil,
lfunc_args_and_kwargs=([pdsr, "ceil"],),
check_exception_type=False,
expected_error_message="Series of dtype `category` cannot "
"perform the operation: ceil",
expected_error_message=(
"Series of dtype `category` cannot perform the operation: add"
),
)


Expand Down Expand Up @@ -238,26 +250,25 @@ def test_cat_series_binop_error():
df["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
df["b"] = np.arange(len(df))

dfa = df["a"]
dfb = df["b"]
pdf = df.to_pandas()

# lhs is a categorical
# lhs is categorical
assert_exceptions_equal(
lfunc=operator.add,
rfunc=operator.add,
lfunc_args_and_kwargs=([dfa, dfb],),
rfunc_args_and_kwargs=([dfa, dfb],),
check_exception_type=False,
expected_error_message="Series of dtype `category` cannot "
"perform the operation: add",
lfunc_args_and_kwargs=([pdf["a"], pdf["b"]],),
rfunc_args_and_kwargs=([df["a"], df["b"]],),
expected_error_message=(
"Series of dtype `category` cannot perform the operation: add"
),
)
# if lhs is a numerical

# lhs is numerical
assert_exceptions_equal(
lfunc=operator.add,
rfunc=operator.add,
lfunc_args_and_kwargs=([dfb, dfa],),
rfunc_args_and_kwargs=([dfb, dfa],),
check_exception_type=False,
lfunc_args_and_kwargs=([pdf["b"], pdf["a"]],),
rfunc_args_and_kwargs=([df["b"], df["a"]],),
expected_error_message="'add' operator not supported",
)

Expand Down Expand Up @@ -367,8 +378,9 @@ def test_categorical_as_ordered(pd_str_cat, inplace):

pd_sr_1 = pd_sr.cat.as_ordered(inplace=inplace)
cd_sr_1 = cd_sr.cat.as_ordered(inplace=inplace)
pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1
cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1
if inplace:
pd_sr_1 = pd_sr
cd_sr_1 = cd_sr

assert cd_sr_1.cat.ordered is True
assert cd_sr_1.cat.ordered == pd_sr_1.cat.ordered
Expand All @@ -386,8 +398,9 @@ def test_categorical_as_unordered(pd_str_cat, inplace):

pd_sr_1 = pd_sr.cat.as_unordered(inplace=inplace)
cd_sr_1 = cd_sr.cat.as_unordered(inplace=inplace)
pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1
cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1
if inplace:
pd_sr_1 = pd_sr
cd_sr_1 = cd_sr

assert cd_sr_1.cat.ordered is False
assert cd_sr_1.cat.ordered == pd_sr_1.cat.ordered
Expand All @@ -401,8 +414,9 @@ def test_categorical_as_unordered(pd_str_cat, inplace):
[
pytest.param(
True,
marks=pytest.mark.xfail(
reason="https://github.com/pandas-dev/pandas/issues/43232"
marks=pytest.mark.skipif(
not PANDAS_GE_134,
reason="https://github.com/pandas-dev/pandas/issues/43232",
),
),
False,
Expand All @@ -421,10 +435,14 @@ def test_categorical_reorder_categories(

kwargs = dict(ordered=to_ordered, inplace=inplace)

pd_sr_1 = pd_sr.cat.reorder_categories(list("cba"), **kwargs)
with _hide_deprecated_pandas_categorical_inplace_warnings(
"reorder_categories"
):
pd_sr_1 = pd_sr.cat.reorder_categories(list("cba"), **kwargs)
cd_sr_1 = cd_sr.cat.reorder_categories(list("cba"), **kwargs)
pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1
cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1
if inplace:
pd_sr_1 = pd_sr
cd_sr_1 = cd_sr

assert_eq(pd_sr_1, cd_sr_1)

Expand All @@ -436,8 +454,9 @@ def test_categorical_reorder_categories(
[
pytest.param(
True,
marks=pytest.mark.xfail(
reason="https://github.com/pandas-dev/pandas/issues/43232"
marks=pytest.mark.skipif(
not PANDAS_GE_134,
reason="https://github.com/pandas-dev/pandas/issues/43232",
),
),
False,
Expand All @@ -452,10 +471,14 @@ def test_categorical_add_categories(pd_str_cat, inplace):

assert str(pd_sr) == str(cd_sr)

pd_sr_1 = pd_sr.cat.add_categories(["d"], inplace=inplace)
with _hide_deprecated_pandas_categorical_inplace_warnings(
"add_categories"
):
pd_sr_1 = pd_sr.cat.add_categories(["d"], inplace=inplace)
cd_sr_1 = cd_sr.cat.add_categories(["d"], inplace=inplace)
pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1
cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1
if inplace:
pd_sr_1 = pd_sr
cd_sr_1 = cd_sr

assert "d" in pd_sr_1.cat.categories.to_list()
assert "d" in cd_sr_1.cat.categories.to_pandas().to_list()
Expand All @@ -468,8 +491,9 @@ def test_categorical_add_categories(pd_str_cat, inplace):
[
pytest.param(
True,
marks=pytest.mark.xfail(
reason="https://github.com/pandas-dev/pandas/issues/43232"
marks=pytest.mark.skipif(
not PANDAS_GE_134,
reason="https://github.com/pandas-dev/pandas/issues/43232",
),
),
False,
Expand All @@ -484,24 +508,31 @@ def test_categorical_remove_categories(pd_str_cat, inplace):

assert str(pd_sr) == str(cd_sr)

pd_sr_1 = pd_sr.cat.remove_categories(["a"], inplace=inplace)
with _hide_deprecated_pandas_categorical_inplace_warnings(
"remove_categories"
):
pd_sr_1 = pd_sr.cat.remove_categories(["a"], inplace=inplace)
cd_sr_1 = cd_sr.cat.remove_categories(["a"], inplace=inplace)
pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1
cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1
if inplace:
pd_sr_1 = pd_sr
cd_sr_1 = cd_sr

assert "a" not in pd_sr_1.cat.categories.to_list()
assert "a" not in cd_sr_1.cat.categories.to_pandas().to_list()

assert_eq(pd_sr_1, cd_sr_1)

# test using ordered operators
assert_exceptions_equal(
lfunc=cd_sr.to_pandas().cat.remove_categories,
rfunc=cd_sr.cat.remove_categories,
lfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}),
rfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}),
expected_error_message="removals must all be in old categories",
)
with _hide_deprecated_pandas_categorical_inplace_warnings(
"remove_categories"
):
assert_exceptions_equal(
lfunc=cd_sr.to_pandas().cat.remove_categories,
rfunc=cd_sr.cat.remove_categories,
lfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}),
rfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}),
expected_error_message="removals must all be in old categories",
)


def test_categorical_dataframe_slice_copy():
Expand Down Expand Up @@ -583,19 +614,21 @@ def test_categorical_set_categories_categoricals(data, new_categories):
pd_data = data.copy().astype("category")
gd_data = cudf.from_pandas(pd_data)

assert_eq(
pd_data.cat.set_categories(new_categories=new_categories),
gd_data.cat.set_categories(new_categories=new_categories),
)
expected = pd_data.cat.set_categories(new_categories=new_categories)
with _hide_cudf_safe_casting_warning():
actual = gd_data.cat.set_categories(new_categories=new_categories)

assert_eq(
pd_data.cat.set_categories(
new_categories=pd.Series(new_categories, dtype="category")
),
gd_data.cat.set_categories(
new_categories=cudf.Series(new_categories, dtype="category")
),
assert_eq(expected, actual)

expected = pd_data.cat.set_categories(
new_categories=pd.Series(new_categories, dtype="category")
)
with _hide_cudf_safe_casting_warning():
actual = gd_data.cat.set_categories(
new_categories=cudf.Series(new_categories, dtype="category")
)

assert_eq(expected, actual)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -703,7 +736,9 @@ def test_add_categories(data, add):
gds = cudf.Series(data, dtype="category")

expected = pds.cat.add_categories(add)
actual = gds.cat.add_categories(add)
with _hide_cudf_safe_casting_warning():
actual = gds.cat.add_categories(add)

assert_eq(
expected.cat.codes, actual.cat.codes.astype(expected.cat.codes.dtype)
)
Expand Down

0 comments on commit 3f175ce

Please sign in to comment.