pandas-dev · natmokval · Apr 16, 2024 · Apr 17, 2024 · Apr 17, 2024
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -137,7 +137,6 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
         encoding_errors: str | None
         dialect: str | csv.Dialect | None
         on_bad_lines: str
-        delim_whitespace: bool | lib.NoDefault
         low_memory: bool
         memory_map: bool
         float_precision: Literal["high", "legacy", "round_trip"] | None
@@ -517,15 +516,13 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
 
 
 class _C_Parser_Defaults(TypedDict):
-    delim_whitespace: Literal[False]
     na_filter: Literal[True]
     low_memory: Literal[True]
     memory_map: Literal[False]
     float_precision: None
 
 
 _c_parser_defaults: _C_Parser_Defaults = {
-    "delim_whitespace": False,
     "na_filter": True,
     "low_memory": True,
     "memory_map": False,
@@ -551,7 +548,6 @@ class _Fwf_Defaults(TypedDict):
     "thousands",
     "memory_map",
     "dialect",
-    "delim_whitespace",
     "quoting",
     "lineterminator",
     "converters",
@@ -783,7 +779,6 @@ def read_csv(
     # Error Handling
     on_bad_lines: str = "error",
     # Internal
-    delim_whitespace: bool | lib.NoDefault = lib.no_default,
     low_memory: bool = _c_parser_defaults["low_memory"],
     memory_map: bool = False,
     float_precision: Literal["high", "legacy", "round_trip"] | None = None,
@@ -833,17 +828,6 @@ def read_csv(
             stacklevel=find_stack_level(),
         )
 
-    if delim_whitespace is not lib.no_default:
-        # GH#55569
-        warnings.warn(
-            "The 'delim_whitespace' keyword in pd.read_csv is deprecated and "
-            "will be removed in a future version. Use ``sep='\\s+'`` instead",
-            FutureWarning,
-            stacklevel=find_stack_level(),
-        )
-    else:
-        delim_whitespace = False
-
     # locals() should never be modified
     kwds = locals().copy()
     del kwds["filepath_or_buffer"]
@@ -852,7 +836,6 @@ def read_csv(
     kwds_defaults = _refine_defaults_read(
         dialect,
         delimiter,
-        delim_whitespace,
         engine,
         sep,
         on_bad_lines,
@@ -974,7 +957,6 @@ def read_table(
     # Error Handling
     on_bad_lines: str = "error",
     # Internal
-    delim_whitespace: bool | lib.NoDefault = lib.no_default,
     low_memory: bool = _c_parser_defaults["low_memory"],
     memory_map: bool = False,
     float_precision: Literal["high", "legacy", "round_trip"] | None = None,
@@ -1015,17 +997,6 @@ def read_table(
             stacklevel=find_stack_level(),
         )
 
-    if delim_whitespace is not lib.no_default:
-        # GH#55569
-        warnings.warn(
-            "The 'delim_whitespace' keyword in pd.read_table is deprecated and "
-            "will be removed in a future version. Use ``sep='\\s+'`` instead",
-            FutureWarning,
-            stacklevel=find_stack_level(),
-        )
-    else:
-        delim_whitespace = False
-
     # locals() should never be modified
     kwds = locals().copy()
     del kwds["filepath_or_buffer"]
@@ -1034,7 +1005,6 @@ def read_table(
     kwds_defaults = _refine_defaults_read(
         dialect,
         delimiter,
-        delim_whitespace,
         engine,
         sep,
         on_bad_lines,
@@ -1341,9 +1311,8 @@ def _clean_options(
                 engine = "python"
 
         sep = options["delimiter"]
-        delim_whitespace = options["delim_whitespace"]
 
-        if sep is None and not delim_whitespace:
+        if sep is None:
             if engine in ("c", "pyarrow"):
                 fallback_reason = (
                     f"the '{engine}' engine does not support "
@@ -1352,7 +1321,6 @@ def _clean_options(
                 engine = "python"
         elif sep is not None and len(sep) > 1:
             if engine == "c" and sep == r"\s+":
-                result["delim_whitespace"] = True
                 del result["delimiter"]
             elif engine not in ("python", "python-fwf"):
                 # wait until regex engine integrated
@@ -1362,9 +1330,6 @@ def _clean_options(
                     r"different from '\s+' are interpreted as regex)"
                 )
                 engine = "python"
-        elif delim_whitespace:
-            if "python" in engine:
-                result["delimiter"] = r"\s+"
         elif sep is not None:
             encodeable = True
             encoding = sys.getfilesystemencoding() or "utf-8"
@@ -1779,7 +1744,6 @@ def _stringify_na_values(na_values, floatify: bool) -> set[str | float]:
 def _refine_defaults_read(
     dialect: str | csv.Dialect | None,
     delimiter: str | None | lib.NoDefault,
-    delim_whitespace: bool,
     engine: CSVEngine | None,
     sep: str | None | lib.NoDefault,
     on_bad_lines: str | Callable,
@@ -1862,12 +1826,6 @@ def _refine_defaults_read(
     if delimiter is None:
         delimiter = sep
 
-    if delim_whitespace and (delimiter is not lib.no_default):
-        raise ValueError(
-            "Specified a delimiter with both sep and "
-            "delim_whitespace=True; you can only specify one."
-        )
-
     if delimiter == "\n":
         raise ValueError(
             r"Specified \n as separator or delimiter. This forces the python engine "

diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
@@ -480,7 +480,7 @@ def test_read_empty_with_usecols(all_parsers, data, kwargs, expected):
         (
             {
                 "header": None,
-                "delim_whitespace": True,
+                "sep": r"\s+",
                 "skiprows": [0, 1, 2, 3, 5, 6],
                 "skip_blank_lines": True,
             },
@@ -489,7 +489,7 @@ def test_read_empty_with_usecols(all_parsers, data, kwargs, expected):
         # gh-8983: test skipping set of rows after a row with trailing spaces.
         (
             {
-                "delim_whitespace": True,
+                "sep": r"\s+",
                 "skiprows": [1, 2, 3, 5, 6],
                 "skip_blank_lines": True,
             },
@@ -501,21 +501,10 @@ def test_trailing_spaces(all_parsers, kwargs, expected):
     data = "A B C  \nrandom line with trailing spaces    \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n   \n5.1,NaN,10.0\n"  # noqa: E501
     parser = all_parsers
 
-    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
-
     if parser.engine == "pyarrow":
-        msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine"
-        with pytest.raises(ValueError, match=msg):
-            with tm.assert_produces_warning(
-                FutureWarning, match=depr_msg, check_stacklevel=False
-            ):
-                parser.read_csv(StringIO(data.replace(",", "  ")), **kwargs)
-        return
+        parser.read_csv(StringIO(data.replace(",", "  ")), **kwargs)
 
-    with tm.assert_produces_warning(
-        FutureWarning, match=depr_msg, check_stacklevel=False
-    ):
-        result = parser.read_csv(StringIO(data.replace(",", "  ")), **kwargs)
+    result = parser.read_csv(StringIO(data.replace(",", "  ")), **kwargs)
     tm.assert_frame_equal(result, expected)
 
 
@@ -815,25 +804,13 @@ def test_read_csv_names_not_accepting_sets(all_parsers):
         parser.read_csv(StringIO(data), names=set("QAZ"))
 
 
+@xfail_pyarrow
 def test_read_table_delim_whitespace_default_sep(all_parsers):
     # GH: 35958
     f = StringIO("a  b  c\n1 -2 -3\n4  5   6")
     parser = all_parsers
 
-    depr_msg = "The 'delim_whitespace' keyword in pd.read_table is deprecated"
-
-    if parser.engine == "pyarrow":
-        msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine"
-        with pytest.raises(ValueError, match=msg):
-            with tm.assert_produces_warning(
-                FutureWarning, match=depr_msg, check_stacklevel=False
-            ):
-                parser.read_table(f, delim_whitespace=True)
-        return
-    with tm.assert_produces_warning(
-        FutureWarning, match=depr_msg, check_stacklevel=False
-    ):
-        result = parser.read_table(f, delim_whitespace=True)
+    result = parser.read_table(f, sep=r"\s+")
     expected = DataFrame({"a": [1, 4], "b": [-2, 5], "c": [-3, 6]})
     tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
@@ -682,7 +682,6 @@ def test_header_missing_rows(all_parsers):
         parser.read_csv(StringIO(data), header=[0, 1, 2])
 
 
-# ValueError: The 'delim_whitespace' option is not supported with the 'pyarrow' engine
 @xfail_pyarrow
 def test_header_multiple_whitespaces(all_parsers):
     # GH#54931
@@ -695,7 +694,6 @@ def test_header_multiple_whitespaces(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-# ValueError: The 'delim_whitespace' option is not supported with the 'pyarrow' engine
 @xfail_pyarrow
 def test_header_delim_whitespace(all_parsers):
     # GH#54918
@@ -705,11 +703,7 @@ def test_header_delim_whitespace(all_parsers):
 3,4
     """
 
-    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
-    with tm.assert_produces_warning(
-        FutureWarning, match=depr_msg, check_stacklevel=False
-    ):
-        result = parser.read_csv(StringIO(data), delim_whitespace=True)
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
     expected = DataFrame({"a,b": ["1,2", "3,4"]})
     tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
@@ -593,9 +593,7 @@ def test_skiprows_inference():
 """.strip()
     skiprows = 2
 
-    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=depr_msg):
-        expected = read_csv(StringIO(data), skiprows=skiprows, delim_whitespace=True)
+    expected = read_csv(StringIO(data), skiprows=skiprows, sep=r"\s+")
 
     result = read_fwf(StringIO(data), skiprows=skiprows)
     tm.assert_frame_equal(result, expected)
@@ -611,9 +609,7 @@ def test_skiprows_by_index_inference():
 """.strip()
     skiprows = [0, 2]
 
-    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=depr_msg):
-        expected = read_csv(StringIO(data), skiprows=skiprows, delim_whitespace=True)
+    expected = read_csv(StringIO(data), skiprows=skiprows, sep=r"\s+")
 
     result = read_fwf(StringIO(data), skiprows=skiprows)
     tm.assert_frame_equal(result, expected)

diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py
@@ -254,29 +254,12 @@ def test_usecols_regex_sep(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 def test_usecols_with_whitespace(all_parsers):
     parser = all_parsers
     data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
 
-    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
-
-    if parser.engine == "pyarrow":
-        msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine"
-        with pytest.raises(ValueError, match=msg):
-            with tm.assert_produces_warning(
-                FutureWarning, match=depr_msg, check_stacklevel=False
-            ):
-                parser.read_csv(
-                    StringIO(data), delim_whitespace=True, usecols=("a", "b")
-                )
-        return
-
-    with tm.assert_produces_warning(
-        FutureWarning, match=depr_msg, check_stacklevel=False
-    ):
-        result = parser.read_csv(
-            StringIO(data), delim_whitespace=True, usecols=("a", "b")
-        )
+    result = parser.read_csv(StringIO(data), sep=r"\s+", usecols=("a", "b"))
     expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
     tm.assert_frame_equal(result, expected)