Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce warnings in pytest output #10168

Merged
merged 12 commits into from
Feb 4, 2022
18 changes: 13 additions & 5 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2096,16 +2096,24 @@ def as_column(
dtype = "bool"
np_type = np.dtype(dtype).type
pa_type = np_to_pa_dtype(np.dtype(dtype))
data = as_column(
pa.array(
# TODO: A warning is emitted from pyarrow 5.0.0's function
# pyarrow.lib._sequence_to_array:
# "DeprecationWarning: an integer is required (got type float).
# Implicit conversion to integers using __int__ is deprecated,
# and may be removed in a future version of Python."
# This warning does not appear in pyarrow 6.0.1 and will be
# resolved by https://github.com/rapidsai/cudf/pull/9686/.
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
pa_array = pa.array(
arbitrary,
type=pa_type,
from_pandas=True
if nan_as_null is None
else nan_as_null,
),
dtype=dtype,
nan_as_null=nan_as_null,
)
data = as_column(
pa_array, dtype=dtype, nan_as_null=nan_as_null,
)
except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError):
if is_categorical_dtype(dtype):
Expand Down
12 changes: 6 additions & 6 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3200,9 +3200,9 @@ def ceil(self):
"""

warnings.warn(
"Series.ceil and DataFrame.ceil are deprecated and will be \
removed in the future",
DeprecationWarning,
"Series.ceil and DataFrame.ceil are deprecated and will be "
"removed in the future",
FutureWarning,
vyasr marked this conversation as resolved.
Show resolved Hide resolved
)

return self._unaryop("ceil")
Expand Down Expand Up @@ -3239,9 +3239,9 @@ def floor(self):
"""

warnings.warn(
"Series.ceil and DataFrame.ceil are deprecated and will be \
removed in the future",
DeprecationWarning,
"Series.floor and DataFrame.floor are deprecated and will be "
"removed in the future.",
FutureWarning,
)

return self._unaryop("floor")
Expand Down
11 changes: 10 additions & 1 deletion python/cudf/cudf/testing/_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.

import re
import warnings
from collections.abc import Mapping, Sequence
from contextlib import contextmanager
from decimal import Decimal
Expand Down Expand Up @@ -109,7 +110,15 @@ def assert_eq(left, right, **kwargs):
if isinstance(left, pd.DataFrame):
tm.assert_frame_equal(left, right, **kwargs)
elif isinstance(left, pd.Series):
tm.assert_series_equal(left, right, **kwargs)
# TODO: A warning is emitted from the function
# pandas.testing.assert_series_equal for some inputs:
# "DeprecationWarning: elementwise comparison failed; this will raise
# an error in the future."
# This warning comes from a call from pandas to numpy. It is ignored
# here because it cannot be fixed within cudf.
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
tm.assert_series_equal(left, right, **kwargs)
elif isinstance(left, pd.Index):
tm.assert_index_equal(left, right, **kwargs)
elif isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
Expand Down
61 changes: 41 additions & 20 deletions python/cudf/cudf/tests/test_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,36 @@

@pytest.fixture(params=dtypes, ids=dtypes)
def pandas_input(request):
Copy link
Contributor Author

@bdice bdice Jan 31, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fixture was throwing a lot of warnings about unsafe casting -- in particular, things like creating a Series with dtype int8 from data that could exceed 127 (the max random value was 1000). Instead, I implemented a function that creates random values based on the type.

data = np.random.randint(0, 1000, 100)
return pd.Series(data, dtype=request.param)
rng = np.random.default_rng()
dtype = request.param
size = 100

def random_ints(dtype, size):
dtype_min = np.iinfo(dtype).min
dtype_max = np.iinfo(dtype).max
return rng.integers(dtype_min, dtype_max, size=size, dtype=dtype)

try:
dtype = np.dtype(request.param)
except TypeError:
if dtype == "category":
data = random_ints(np.int64, size)
raise
else:
if dtype.kind == "b":
data = rng.choice([False, True], size=size)
elif dtype.kind in ("m", "M"):
# datetime or timedelta
data = random_ints(np.int64, size)
elif dtype.kind == "U":
# Unicode strings of integers like "12345"
data = random_ints(np.int64, size).astype(dtype.str)
elif dtype.kind == "f":
# floats in [0.0, 1.0)
data = rng.random(size=size, dtype=dtype)
else:
data = random_ints(dtype, size)
return pd.Series(data, dtype=dtype)


def str_host_view(list_of_str, to_dtype):
Expand Down Expand Up @@ -379,29 +407,22 @@ def test_as_column_buffer(data, expected):


@pytest.mark.parametrize(
"data,expected",
"data,pyarrow_kwargs,cudf_kwargs",
[
(
pa.array([100, 200, 300], type=pa.decimal128(3)),
cudf.core.column.as_column(
[100, 200, 300], dtype=cudf.core.dtypes.Decimal128Dtype(3, 0)
),
),
(
pa.array([{"a": 1, "b": 3}, {"c": 2, "d": 4}]),
cudf.core.column.as_column([{"a": 1, "b": 3}, {"c": 2, "d": 4}]),
),
(
pa.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]),
cudf.core.column.as_column(
[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]
),
[100, 200, 300],
{"type": pa.decimal128(3)},
{"dtype": cudf.core.dtypes.Decimal128Dtype(3, 0)},
),
([{"a": 1, "b": 3}, {"c": 2, "d": 4}], {}, {},),
([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], {}, {},),
],
)
def test_as_column_arrow_array(data, expected):
actual_column = cudf.core.column.as_column(data)
assert_eq(cudf.Series(actual_column), cudf.Series(expected))
def test_as_column_arrow_array(data, pyarrow_kwargs, cudf_kwargs):
pyarrow_data = pa.array(data, **pyarrow_kwargs)
cudf_from_pyarrow = as_column(pyarrow_data)
expected = as_column(data, **cudf_kwargs)
assert_eq(cudf.Series(cudf_from_pyarrow), cudf.Series(expected))


@pytest.mark.parametrize(
Expand Down
17 changes: 14 additions & 3 deletions python/cudf/cudf/tests/test_unaops.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,21 @@ def test_series_neg():
def test_series_ceil():
arr = np.random.random(100) * 100
sr = Series(arr)
np.testing.assert_equal(sr.ceil().to_numpy(), np.ceil(arr))
with pytest.warns(
FutureWarning, match="Series.ceil and DataFrame.ceil are deprecated"
):
sr = sr.ceil()
np.testing.assert_equal(sr.to_numpy(), np.ceil(arr))


def test_series_floor():
arr = np.random.random(100) * 100
sr = Series(arr)
np.testing.assert_equal(sr.floor().to_numpy(), np.floor(arr))
with pytest.warns(
FutureWarning, match="Series.floor and DataFrame.floor are deprecated"
):
sr = sr.floor()
np.testing.assert_equal(sr.to_numpy(), np.floor(arr))


@pytest.mark.parametrize("nelem", [1, 7, 8, 9, 32, 64, 128])
Expand All @@ -74,7 +82,10 @@ def test_validity_ceil(nelem):
sr = Series.from_masked_array(data, mask)

# Result
res = sr.ceil()
with pytest.warns(
FutureWarning, match="Series.ceil and DataFrame.ceil are deprecated"
):
res = sr.ceil()

na_value = -100000
got = res.fillna(na_value).to_numpy()
Expand Down