diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index ad95c23e395..2c3951c0e5e 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2093,16 +2093,24 @@ def as_column( dtype = "bool" np_type = np.dtype(dtype).type pa_type = np_to_pa_dtype(np.dtype(dtype)) - data = as_column( - pa.array( + # TODO: A warning is emitted from pyarrow 5.0.0's function + # pyarrow.lib._sequence_to_array: + # "DeprecationWarning: an integer is required (got type float). + # Implicit conversion to integers using __int__ is deprecated, + # and may be removed in a future version of Python." + # This warning does not appear in pyarrow 6.0.1 and will be + # resolved by https://github.com/rapidsai/cudf/pull/9686/. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + pa_array = pa.array( arbitrary, type=pa_type, from_pandas=True if nan_as_null is None else nan_as_null, - ), - dtype=dtype, - nan_as_null=nan_as_null, + ) + data = as_column( + pa_array, dtype=dtype, nan_as_null=nan_as_null, ) except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError): if is_categorical_dtype(dtype): diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index fb82517a160..6b83f927727 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3220,9 +3220,9 @@ def ceil(self): """ warnings.warn( - "Series.ceil and DataFrame.ceil are deprecated and will be \ - removed in the future", - DeprecationWarning, + "Series.ceil and DataFrame.ceil are deprecated and will be " + "removed in the future", + FutureWarning, ) return self._unaryop("ceil") @@ -3259,9 +3259,9 @@ def floor(self): """ warnings.warn( - "Series.ceil and DataFrame.ceil are deprecated and will be \ - removed in the future", - DeprecationWarning, + "Series.floor and DataFrame.floor are deprecated and will be " + "removed in the future.", + FutureWarning, ) return self._unaryop("floor") diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index 41dac26edf8..6c602d321eb 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -1,6 +1,7 @@ # Copyright (c) 2020-2021, NVIDIA CORPORATION. import re +import warnings from collections.abc import Mapping, Sequence from contextlib import contextmanager from decimal import Decimal @@ -109,7 +110,15 @@ def assert_eq(left, right, **kwargs): if isinstance(left, pd.DataFrame): tm.assert_frame_equal(left, right, **kwargs) elif isinstance(left, pd.Series): - tm.assert_series_equal(left, right, **kwargs) + # TODO: A warning is emitted from the function + # pandas.testing.assert_series_equal for some inputs: + # "DeprecationWarning: elementwise comparison failed; this will raise + # an error in the future." + # This warning comes from a call from pandas to numpy. It is ignored + # here because it cannot be fixed within cudf. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + tm.assert_series_equal(left, right, **kwargs) elif isinstance(left, pd.Index): tm.assert_index_equal(left, right, **kwargs) elif isinstance(left, np.ndarray) and isinstance(right, np.ndarray): diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py index 748cf958ac3..365b351061d 100644 --- a/python/cudf/cudf/tests/test_column.py +++ b/python/cudf/cudf/tests/test_column.py @@ -29,8 +29,37 @@ @pytest.fixture(params=dtypes, ids=dtypes) def pandas_input(request): - data = np.random.randint(0, 1000, 100) - return pd.Series(data, dtype=request.param) + dtype = request.param + rng = np.random.default_rng() + size = 100 + + def random_ints(dtype, size): + dtype_min = np.iinfo(dtype).min + dtype_max = np.iinfo(dtype).max + return rng.integers(dtype_min, dtype_max, size=size, dtype=dtype) + + try: + dtype = np.dtype(dtype) + except TypeError: + if dtype == "category": + data = random_ints(np.int64, size) + else: + raise + else: + if dtype.kind == "b": + data = rng.choice([False, True], size=size) + elif dtype.kind in ("m", "M"): + # datetime or timedelta + data = random_ints(np.int64, size) + elif dtype.kind == "U": + # Unicode strings of integers like "12345" + data = random_ints(np.int64, size).astype(dtype.str) + elif dtype.kind == "f": + # floats in [0.0, 1.0) + data = rng.random(size=size, dtype=dtype) + else: + data = random_ints(dtype, size) + return pd.Series(data, dtype=dtype) def str_host_view(list_of_str, to_dtype): @@ -379,29 +408,22 @@ def test_as_column_buffer(data, expected): @pytest.mark.parametrize( - "data,expected", + "data,pyarrow_kwargs,cudf_kwargs", [ ( - pa.array([100, 200, 300], type=pa.decimal128(3)), - cudf.core.column.as_column( - [100, 200, 300], dtype=cudf.core.dtypes.Decimal128Dtype(3, 0) - ), - ), - ( - pa.array([{"a": 1, "b": 3}, {"c": 2, "d": 4}]), - cudf.core.column.as_column([{"a": 1, "b": 3}, {"c": 2, "d": 4}]), - ), - ( - pa.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]), - cudf.core.column.as_column( - [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]] - ), + [100, 200, 300], + {"type": pa.decimal128(3)}, + {"dtype": cudf.core.dtypes.Decimal128Dtype(3, 0)}, ), + ([{"a": 1, "b": 3}, {"c": 2, "d": 4}], {}, {},), + ([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], {}, {},), ], ) -def test_as_column_arrow_array(data, expected): - actual_column = cudf.core.column.as_column(data) - assert_eq(cudf.Series(actual_column), cudf.Series(expected)) +def test_as_column_arrow_array(data, pyarrow_kwargs, cudf_kwargs): + pyarrow_data = pa.array(data, **pyarrow_kwargs) + cudf_from_pyarrow = as_column(pyarrow_data) + expected = as_column(data, **cudf_kwargs) + assert_eq(cudf.Series(cudf_from_pyarrow), cudf.Series(expected)) @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/test_unaops.py index dd736abd7d0..2e8da615e3e 100644 --- a/python/cudf/cudf/tests/test_unaops.py +++ b/python/cudf/cudf/tests/test_unaops.py @@ -54,13 +54,21 @@ def test_series_neg(): def test_series_ceil(): arr = np.random.random(100) * 100 sr = Series(arr) - np.testing.assert_equal(sr.ceil().to_numpy(), np.ceil(arr)) + with pytest.warns( + FutureWarning, match="Series.ceil and DataFrame.ceil are deprecated" + ): + sr = sr.ceil() + np.testing.assert_equal(sr.to_numpy(), np.ceil(arr)) def test_series_floor(): arr = np.random.random(100) * 100 sr = Series(arr) - np.testing.assert_equal(sr.floor().to_numpy(), np.floor(arr)) + with pytest.warns( + FutureWarning, match="Series.floor and DataFrame.floor are deprecated" + ): + sr = sr.floor() + np.testing.assert_equal(sr.to_numpy(), np.floor(arr)) @pytest.mark.parametrize("nelem", [1, 7, 8, 9, 32, 64, 128]) @@ -72,7 +80,10 @@ def test_validity_ceil(nelem): sr = Series.from_masked_array(data, mask) # Result - res = sr.ceil() + with pytest.warns( + FutureWarning, match="Series.ceil and DataFrame.ceil are deprecated" + ): + res = sr.ceil() na_value = -100000 got = res.fillna(na_value).to_numpy()