diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py index 764885dd7b6..3eeeac405b3 100644 --- a/python/cudf/cudf/io/csv.py +++ b/python/cudf/cudf/io/csv.py @@ -1,5 +1,6 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. +import warnings from collections import abc from io import BytesIO, StringIO @@ -55,6 +56,13 @@ def read_csv( ): """{docstring}""" + if delim_whitespace is not False: + warnings.warn( + "The 'delim_whitespace' keyword in pd.read_csv is deprecated and " + "will be removed in a future version. Use ``sep='\\s+'`` instead", + FutureWarning, + ) + if use_python_file_object and bytes_per_thread is not None: raise ValueError( "bytes_per_thread is only supported when " diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index 8171f3a1872..9b08ef30545 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -17,8 +17,12 @@ import cudf from cudf import read_csv -from cudf.core._compat import PANDAS_GE_200 -from cudf.testing._utils import assert_eq, assert_exceptions_equal +from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_220 +from cudf.testing._utils import ( + assert_eq, + assert_exceptions_equal, + expect_warning_if, +) def make_numeric_dataframe(nrows, dtype): @@ -1263,20 +1267,28 @@ def test_csv_reader_delim_whitespace(): buffer = "1 2 3\n4 5 6" # with header row - cu_df = read_csv(StringIO(buffer), delim_whitespace=True) - pd_df = pd.read_csv(StringIO(buffer), delim_whitespace=True) + with pytest.warns(FutureWarning): + cu_df = read_csv(StringIO(buffer), delim_whitespace=True) + with expect_warning_if(PANDAS_GE_220): + pd_df = pd.read_csv(StringIO(buffer), delim_whitespace=True) assert_eq(pd_df, cu_df) # without header row - cu_df = read_csv(StringIO(buffer), delim_whitespace=True, header=None) - pd_df = pd.read_csv(StringIO(buffer), delim_whitespace=True, header=None) + with pytest.warns(FutureWarning): + cu_df = read_csv(StringIO(buffer), delim_whitespace=True, header=None) + with expect_warning_if(PANDAS_GE_220): + pd_df = pd.read_csv( + StringIO(buffer), delim_whitespace=True, header=None + ) assert pd_df.shape == cu_df.shape # should raise an error if used with delimiter or sep with pytest.raises(ValueError): - read_csv(StringIO(buffer), delim_whitespace=True, delimiter=" ") + with pytest.warns(FutureWarning): + read_csv(StringIO(buffer), delim_whitespace=True, delimiter=" ") with pytest.raises(ValueError): - read_csv(StringIO(buffer), delim_whitespace=True, sep=" ") + with pytest.warns(FutureWarning): + read_csv(StringIO(buffer), delim_whitespace=True, sep=" ") def test_csv_reader_unnamed_cols():