Fix warnings in test modules up to test_dataframe.py (#12355)

I realized that my previous warning reduction PRs were causing some circular work where I would add a new warning to cudf to match pandas, which would cause those new warnings to appear in modules that I had previously declared free of warnings. To prevent this, I've changed my approach to instead go through the test modules in alphabetical order and ensure that they are all error free up to that point. This PR removes warnings from all test modules up to test_dataframe.py. Contributes to #9999 and #10363. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) URL: #12355
rapidsai · Dec 13, 2022 · 8eb6f22 · 8eb6f22
1 parent 865ee1d
commit 8eb6f22
Show file tree

Hide file tree

Showing 9 changed files with 63 additions and 23 deletions.
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import warnings
 from collections import abc
 from functools import cached_property
 from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence, Tuple, cast
@@ -181,6 +182,13 @@ def as_ordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]:
         dtype: category
         Categories (3, int64): [1 < 2 < 10]
         """
+        if inplace:
+            warnings.warn(
+                "The inplace parameter is deprecated and will be removed in a "
+                "future release. set_ordered will always return a new Series "
+                "in the future.",
+                FutureWarning,
+            )
         return self._return_or_inplace(
             self._column.as_ordered(), inplace=inplace
         )
@@ -248,6 +256,13 @@ def as_unordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]:
         dtype: category
         Categories (3, int64): [1, 2, 10]
         """
+        if inplace:
+            warnings.warn(
+                "The inplace parameter is deprecated and will be removed in a "
+                "future release. set_ordered will always return a new Series "
+                "in the future.",
+                FutureWarning,
+            )
         return self._return_or_inplace(
             self._column.as_unordered(), inplace=inplace
         )

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
@@ -31,6 +31,7 @@
 import pandas as pd
 import pyarrow as pa
 from nvtx import annotate
+from packaging.version import Version
 from pandas._config import get_option
 from pandas.core.dtypes.common import is_float, is_integer
 from pandas.io.formats import console
@@ -1162,7 +1163,15 @@ def __getitem__(self, arg):
         elif can_convert_to_column(arg):
             mask = arg
             if is_list_like(mask):
-                mask = pd.Series(mask)
+                # An explicit dtype is needed to avoid pandas warnings from
+                # empty sets of columns. This shouldn't be needed in pandas
+                # 2.0, we don't need to specify a dtype when we know we're not
+                # trying to match any columns so the default is fine.
+                dtype = None
+                if len(mask) == 0:
+                    assert Version(pd.__version__) < Version("2.0.0")
+                    dtype = "float64"
+                mask = pd.Series(mask, dtype=dtype)
             if mask.dtype == "bool":
                 return self._apply_boolean_mask(mask)
             else:

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
@@ -415,7 +415,12 @@ def _validate_merge_params(
         if (
             isinstance(lhs, cudf.DataFrame)
             and isinstance(rhs, cudf.DataFrame)
-            and lhs._data.nlevels != rhs._data.nlevels
+            # An empty column is considered to have 1 level by pandas (can be
+            # seen by using lhs.columns.nlevels, but we don't want to use
+            # columns internally because it's expensive).
+            # TODO: Investigate whether ColumnAccessor.nlevels should be
+            # modified in the size 0 case.
+            and max(lhs._data.nlevels, 1) != max(rhs._data.nlevels, 1)
         ):
             warnings.warn(
                 "merging between different levels is deprecated and will be "

diff --git a/python/cudf/cudf/tests/test_array_function.py b/python/cudf/cudf/tests/test_array_function.py
@@ -63,8 +63,8 @@ def test_array_func_cudf_series(np_ar, func):
 @pytest.mark.parametrize(
     "func",
     [
-        lambda x: np.mean(x),
-        lambda x: np.sum(x),
+        lambda x: np.mean(x, axis=0),
+        lambda x: np.sum(x, axis=0),
         lambda x: np.var(x, ddof=1),
         lambda x: np.dot(x, x.transpose()),
     ],

diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py
@@ -77,7 +77,8 @@ def test_ufunc_index(ufunc):
             pytest.xfail(reason="Operation not supported by cupy")
         raise
 
-    expect = ufunc(*(arg.to_pandas() for arg in pandas_args))
+    with _hide_ufunc_warnings(ufunc):
+        expect = ufunc(*(arg.to_pandas() for arg in pandas_args))
 
     try:
         if ufunc.nout > 1:
@@ -313,8 +314,8 @@ def test_ufunc_dataframe(ufunc, has_nulls, indexed):
             "pandas does not currently support misaligned indexes in "
             "DataFrames, but we do. Until this is fixed we will skip these "
             "tests. See the error here: "
-            "https://github.com/pandas-dev/pandas/blob/main/pandas/core/arraylike.py#L212, "  # noqa: E501
-            "called from https://github.com/pandas-dev/pandas/blob/main/pandas/core/arraylike.py#L258"  # noqa: E501
+            "https://github.com/pandas-dev/pandas/blob/1.5.x/pandas/core/arraylike.py#L212, "  # noqa: E501
+            "called from https://github.com/pandas-dev/pandas/blob/1.5.x/pandas/core/arraylike.py#L258"  # noqa: E501
         )
     # TODO: Enable the check below when we remove the check above.
     # if indexed and fname in (

diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
@@ -1624,8 +1624,8 @@ def test_scalar_null_binops(op, dtype_l, dtype_r):
     assert result.value is cudf.NA
 
     # make sure dtype is the same as had there been a valid scalar
-    valid_lhs = cudf.Scalar(0, dtype=dtype_l)
-    valid_rhs = cudf.Scalar(0, dtype=dtype_r)
+    valid_lhs = cudf.Scalar(1, dtype=dtype_l)
+    valid_rhs = cudf.Scalar(1, dtype=dtype_r)
 
     valid_result = op(valid_lhs, valid_rhs)
     assert result.dtype == valid_result.dtype

diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
@@ -16,6 +16,7 @@
     NUMERIC_TYPES,
     assert_eq,
     assert_exceptions_equal,
+    expect_warning_if,
 )
 
 
@@ -375,8 +376,12 @@ def test_categorical_as_ordered(pd_str_cat, inplace):
     assert cd_sr.cat.ordered is False
     assert cd_sr.cat.ordered == pd_sr.cat.ordered
 
-    pd_sr_1 = pd_sr.cat.as_ordered(inplace=inplace)
-    cd_sr_1 = cd_sr.cat.as_ordered(inplace=inplace)
+    # pandas internally uses a deprecated call to set_ordered(inplace=inplace)
+    # inside as_ordered.
+    with pytest.warns(FutureWarning):
+        pd_sr_1 = pd_sr.cat.as_ordered(inplace=inplace)
+    with expect_warning_if(inplace, FutureWarning):
+        cd_sr_1 = cd_sr.cat.as_ordered(inplace=inplace)
     if inplace:
         pd_sr_1 = pd_sr
         cd_sr_1 = cd_sr
@@ -395,8 +400,12 @@ def test_categorical_as_unordered(pd_str_cat, inplace):
     assert cd_sr.cat.ordered is True
     assert cd_sr.cat.ordered == pd_sr.cat.ordered
 
-    pd_sr_1 = pd_sr.cat.as_unordered(inplace=inplace)
-    cd_sr_1 = cd_sr.cat.as_unordered(inplace=inplace)
+    # pandas internally uses a deprecated call to set_ordered(inplace=inplace)
+    # inside as_unordered.
+    with pytest.warns(FutureWarning):
+        pd_sr_1 = pd_sr.cat.as_unordered(inplace=inplace)
+    with expect_warning_if(inplace, FutureWarning):
+        cd_sr_1 = cd_sr.cat.as_unordered(inplace=inplace)
     if inplace:
         pd_sr_1 = pd_sr
         cd_sr_1 = cd_sr

diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py
@@ -38,8 +38,8 @@ def make_frames(index=None, nulls="none"):
         mask = np.arange(10)
         np.random.shuffle(mask)
         mask = mask[:5]
-        df.y.loc[mask] = np.nan
-        df2.y.loc[mask] = np.nan
+        df.loc[mask, "y"] = np.nan
+        df2.loc[mask, "y"] = np.nan
     gdf = gd.DataFrame.from_pandas(df)
     gdf2 = gd.DataFrame.from_pandas(df2)
     if index:
@@ -376,12 +376,13 @@ def test_pandas_concat_compatibility_axis1_eq_index():
     ps1 = s1.to_pandas()
     ps2 = s2.to_pandas()
 
-    assert_exceptions_equal(
-        lfunc=pd.concat,
-        rfunc=gd.concat,
-        lfunc_args_and_kwargs=([], {"objs": [ps1, ps2], "axis": 1}),
-        rfunc_args_and_kwargs=([], {"objs": [s1, s2], "axis": 1}),
-    )
+    with pytest.warns(FutureWarning):
+        assert_exceptions_equal(
+            lfunc=pd.concat,
+            rfunc=gd.concat,
+            lfunc_args_and_kwargs=([], {"objs": [ps1, ps2], "axis": 1}),
+            rfunc_args_and_kwargs=([], {"objs": [s1, s2], "axis": 1}),
+        )
 
 
 @pytest.mark.parametrize("name", [None, "a"])

diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
@@ -1646,7 +1646,7 @@ def test_csv_writer_numeric_data(dtype, nelem, tmpdir):
 
     df = make_numeric_dataframe(nelem, dtype)
     gdf = cudf.from_pandas(df)
-    df.to_csv(path_or_buf=pdf_df_fname, index=False, line_terminator="\n")
+    df.to_csv(path_or_buf=pdf_df_fname, index=False, lineterminator="\n")
     gdf.to_csv(path_or_buf=gdf_df_fname, index=False)
 
     assert os.path.exists(pdf_df_fname)
@@ -1663,7 +1663,7 @@ def test_csv_writer_datetime_data(tmpdir):
 
     df = make_datetime_dataframe()
     gdf = cudf.from_pandas(df)
-    df.to_csv(path_or_buf=pdf_df_fname, index=False, line_terminator="\n")
+    df.to_csv(path_or_buf=pdf_df_fname, index=False, lineterminator="\n")
     gdf.to_csv(path_or_buf=gdf_df_fname, index=False)
 
     assert os.path.exists(pdf_df_fname)