Skip to content

Commit

Permalink
Fix warnings in test modules up to test_dataframe.py (#12355)
Browse files Browse the repository at this point in the history
I realized that my previous warning reduction PRs were causing some circular work where I would add a new warning to cudf to match pandas, which would cause those new warnings to appear in modules that I had previously declared free of warnings. To prevent this, I've changed my approach to instead go through the test modules in alphabetical order and ensure that they are all error free up to that point. This PR removes warnings from all test modules up to test_dataframe.py.

Contributes to #9999 and #10363.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: #12355
  • Loading branch information
vyasr authored Dec 13, 2022
1 parent 865ee1d commit 8eb6f22
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 23 deletions.
15 changes: 15 additions & 0 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import warnings
from collections import abc
from functools import cached_property
from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence, Tuple, cast
Expand Down Expand Up @@ -181,6 +182,13 @@ def as_ordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]:
dtype: category
Categories (3, int64): [1 < 2 < 10]
"""
if inplace:
warnings.warn(
"The inplace parameter is deprecated and will be removed in a "
"future release. set_ordered will always return a new Series "
"in the future.",
FutureWarning,
)
return self._return_or_inplace(
self._column.as_ordered(), inplace=inplace
)
Expand Down Expand Up @@ -248,6 +256,13 @@ def as_unordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]:
dtype: category
Categories (3, int64): [1, 2, 10]
"""
if inplace:
warnings.warn(
"The inplace parameter is deprecated and will be removed in a "
"future release. set_ordered will always return a new Series "
"in the future.",
FutureWarning,
)
return self._return_or_inplace(
self._column.as_unordered(), inplace=inplace
)
Expand Down
11 changes: 10 additions & 1 deletion python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import pandas as pd
import pyarrow as pa
from nvtx import annotate
from packaging.version import Version
from pandas._config import get_option
from pandas.core.dtypes.common import is_float, is_integer
from pandas.io.formats import console
Expand Down Expand Up @@ -1162,7 +1163,15 @@ def __getitem__(self, arg):
elif can_convert_to_column(arg):
mask = arg
if is_list_like(mask):
mask = pd.Series(mask)
# An explicit dtype is needed to avoid pandas warnings from
# empty sets of columns. This shouldn't be needed in pandas
# 2.0, we don't need to specify a dtype when we know we're not
# trying to match any columns so the default is fine.
dtype = None
if len(mask) == 0:
assert Version(pd.__version__) < Version("2.0.0")
dtype = "float64"
mask = pd.Series(mask, dtype=dtype)
if mask.dtype == "bool":
return self._apply_boolean_mask(mask)
else:
Expand Down
7 changes: 6 additions & 1 deletion python/cudf/cudf/core/join/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,12 @@ def _validate_merge_params(
if (
isinstance(lhs, cudf.DataFrame)
and isinstance(rhs, cudf.DataFrame)
and lhs._data.nlevels != rhs._data.nlevels
# An empty column is considered to have 1 level by pandas (can be
# seen by using lhs.columns.nlevels, but we don't want to use
# columns internally because it's expensive).
# TODO: Investigate whether ColumnAccessor.nlevels should be
# modified in the size 0 case.
and max(lhs._data.nlevels, 1) != max(rhs._data.nlevels, 1)
):
warnings.warn(
"merging between different levels is deprecated and will be "
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/tests/test_array_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def test_array_func_cudf_series(np_ar, func):
@pytest.mark.parametrize(
"func",
[
lambda x: np.mean(x),
lambda x: np.sum(x),
lambda x: np.mean(x, axis=0),
lambda x: np.sum(x, axis=0),
lambda x: np.var(x, ddof=1),
lambda x: np.dot(x, x.transpose()),
],
Expand Down
7 changes: 4 additions & 3 deletions python/cudf/cudf/tests/test_array_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ def test_ufunc_index(ufunc):
pytest.xfail(reason="Operation not supported by cupy")
raise

expect = ufunc(*(arg.to_pandas() for arg in pandas_args))
with _hide_ufunc_warnings(ufunc):
expect = ufunc(*(arg.to_pandas() for arg in pandas_args))

try:
if ufunc.nout > 1:
Expand Down Expand Up @@ -313,8 +314,8 @@ def test_ufunc_dataframe(ufunc, has_nulls, indexed):
"pandas does not currently support misaligned indexes in "
"DataFrames, but we do. Until this is fixed we will skip these "
"tests. See the error here: "
"https://github.com/pandas-dev/pandas/blob/main/pandas/core/arraylike.py#L212, " # noqa: E501
"called from https://github.com/pandas-dev/pandas/blob/main/pandas/core/arraylike.py#L258" # noqa: E501
"https://github.com/pandas-dev/pandas/blob/1.5.x/pandas/core/arraylike.py#L212, " # noqa: E501
"called from https://github.com/pandas-dev/pandas/blob/1.5.x/pandas/core/arraylike.py#L258" # noqa: E501
)
# TODO: Enable the check below when we remove the check above.
# if indexed and fname in (
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1624,8 +1624,8 @@ def test_scalar_null_binops(op, dtype_l, dtype_r):
assert result.value is cudf.NA

# make sure dtype is the same as had there been a valid scalar
valid_lhs = cudf.Scalar(0, dtype=dtype_l)
valid_rhs = cudf.Scalar(0, dtype=dtype_r)
valid_lhs = cudf.Scalar(1, dtype=dtype_l)
valid_rhs = cudf.Scalar(1, dtype=dtype_r)

valid_result = op(valid_lhs, valid_rhs)
assert result.dtype == valid_result.dtype
Expand Down
17 changes: 13 additions & 4 deletions python/cudf/cudf/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
NUMERIC_TYPES,
assert_eq,
assert_exceptions_equal,
expect_warning_if,
)


Expand Down Expand Up @@ -375,8 +376,12 @@ def test_categorical_as_ordered(pd_str_cat, inplace):
assert cd_sr.cat.ordered is False
assert cd_sr.cat.ordered == pd_sr.cat.ordered

pd_sr_1 = pd_sr.cat.as_ordered(inplace=inplace)
cd_sr_1 = cd_sr.cat.as_ordered(inplace=inplace)
# pandas internally uses a deprecated call to set_ordered(inplace=inplace)
# inside as_ordered.
with pytest.warns(FutureWarning):
pd_sr_1 = pd_sr.cat.as_ordered(inplace=inplace)
with expect_warning_if(inplace, FutureWarning):
cd_sr_1 = cd_sr.cat.as_ordered(inplace=inplace)
if inplace:
pd_sr_1 = pd_sr
cd_sr_1 = cd_sr
Expand All @@ -395,8 +400,12 @@ def test_categorical_as_unordered(pd_str_cat, inplace):
assert cd_sr.cat.ordered is True
assert cd_sr.cat.ordered == pd_sr.cat.ordered

pd_sr_1 = pd_sr.cat.as_unordered(inplace=inplace)
cd_sr_1 = cd_sr.cat.as_unordered(inplace=inplace)
# pandas internally uses a deprecated call to set_ordered(inplace=inplace)
# inside as_unordered.
with pytest.warns(FutureWarning):
pd_sr_1 = pd_sr.cat.as_unordered(inplace=inplace)
with expect_warning_if(inplace, FutureWarning):
cd_sr_1 = cd_sr.cat.as_unordered(inplace=inplace)
if inplace:
pd_sr_1 = pd_sr
cd_sr_1 = cd_sr
Expand Down
17 changes: 9 additions & 8 deletions python/cudf/cudf/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def make_frames(index=None, nulls="none"):
mask = np.arange(10)
np.random.shuffle(mask)
mask = mask[:5]
df.y.loc[mask] = np.nan
df2.y.loc[mask] = np.nan
df.loc[mask, "y"] = np.nan
df2.loc[mask, "y"] = np.nan
gdf = gd.DataFrame.from_pandas(df)
gdf2 = gd.DataFrame.from_pandas(df2)
if index:
Expand Down Expand Up @@ -376,12 +376,13 @@ def test_pandas_concat_compatibility_axis1_eq_index():
ps1 = s1.to_pandas()
ps2 = s2.to_pandas()

assert_exceptions_equal(
lfunc=pd.concat,
rfunc=gd.concat,
lfunc_args_and_kwargs=([], {"objs": [ps1, ps2], "axis": 1}),
rfunc_args_and_kwargs=([], {"objs": [s1, s2], "axis": 1}),
)
with pytest.warns(FutureWarning):
assert_exceptions_equal(
lfunc=pd.concat,
rfunc=gd.concat,
lfunc_args_and_kwargs=([], {"objs": [ps1, ps2], "axis": 1}),
rfunc_args_and_kwargs=([], {"objs": [s1, s2], "axis": 1}),
)


@pytest.mark.parametrize("name", [None, "a"])
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1646,7 +1646,7 @@ def test_csv_writer_numeric_data(dtype, nelem, tmpdir):

df = make_numeric_dataframe(nelem, dtype)
gdf = cudf.from_pandas(df)
df.to_csv(path_or_buf=pdf_df_fname, index=False, line_terminator="\n")
df.to_csv(path_or_buf=pdf_df_fname, index=False, lineterminator="\n")
gdf.to_csv(path_or_buf=gdf_df_fname, index=False)

assert os.path.exists(pdf_df_fname)
Expand All @@ -1663,7 +1663,7 @@ def test_csv_writer_datetime_data(tmpdir):

df = make_datetime_dataframe()
gdf = cudf.from_pandas(df)
df.to_csv(path_or_buf=pdf_df_fname, index=False, line_terminator="\n")
df.to_csv(path_or_buf=pdf_df_fname, index=False, lineterminator="\n")
gdf.to_csv(path_or_buf=gdf_df_fname, index=False)

assert os.path.exists(pdf_df_fname)
Expand Down

0 comments on commit 8eb6f22

Please sign in to comment.