DEPR: HDFStore.iteritems, read_csv(use_cols) behavior (pandas-dev#49483)

* Remove HDFStore iteritems * Enforce use_cols deprecation in read_csv * Spelling
noatamir · Nov 9, 2022 · abbe952 · abbe952
1 parent 1c1ddf8
commit abbe952
Show file tree

Hide file tree

Showing 7 changed files with 14 additions and 45 deletions.
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -180,7 +180,7 @@ Removal of prior version deprecations/changes
 - Enforced deprecation changing behavior when passing ``datetime64[ns]`` dtype data and timezone-aware dtype to :class:`Series`, interpreting the values as wall-times instead of UTC times, matching :class:`DatetimeIndex` behavior (:issue:`41662`)
 - Removed deprecated :meth:`DataFrame._AXIS_NUMBERS`, :meth:`DataFrame._AXIS_NAMES`, :meth:`Series._AXIS_NUMBERS`, :meth:`Series._AXIS_NAMES` (:issue:`33637`)
 - Removed deprecated :meth:`Index.to_native_types`, use ``obj.astype(str)`` instead (:issue:`36418`)
-- Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems`, use ``obj.items`` instead (:issue:`45321`)
+- Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems` and :meth:`HDFStore.iteritems` use ``obj.items`` instead (:issue:`45321`)
 - Removed deprecated :meth:`DatetimeIndex.union_many` (:issue:`45018`)
 - Removed deprecated ``weekofyear`` and ``week`` attributes of :class:`DatetimeArray`, :class:`DatetimeIndex` and ``dt`` accessor in favor of ``isocalendar().week`` (:issue:`33595`)
 - Removed deprecated :meth:`RangeIndex._start`, :meth:`RangeIndex._stop`, :meth:`RangeIndex._step`, use ``start``, ``stop``, ``step`` instead (:issue:`30482`)
@@ -289,6 +289,7 @@ Removal of prior version deprecations/changes
 - Removed ``pandas.SparseSeries`` and ``pandas.SparseDataFrame``, including pickle support. (:issue:`30642`)
 - Enforced disallowing passing an integer ``fill_value`` to :meth:`DataFrame.shift` and :meth:`Series.shift`` with datetime64, timedelta64, or period dtypes (:issue:`32591`)
 - Enforced disallowing a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
+- Enforced disallowing using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
 - Enforced disallowing the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`)
 - Enforced disallowing a tuple of column labels into :meth:`.DataFrameGroupBy.__getitem__` (:issue:`30546`)
 - Enforced disallowing setting values with ``.loc`` using a positional slice. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -13,6 +13,7 @@ import sys
 import time
 import warnings
 
+from pandas.errors import ParserError
 from pandas.util._exceptions import find_stack_level
 
 from pandas import StringDtype
@@ -971,11 +972,9 @@ cdef class TextReader:
                 all(isinstance(u, int) for u in self.usecols)):
             missing_usecols = [col for col in self.usecols if col >= num_cols]
             if missing_usecols:
-                warnings.warn(
-                    "Defining usecols with out of bounds indices is deprecated "
-                    "and will raise a ParserError in a future version.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
+                raise ParserError(
+                    "Defining usecols without of bounds indices is not allowed. "
+                    f"{missing_usecols} are out of bounds.",
                 )
 
         results = {}

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -1528,8 +1528,6 @@ class ExcelFile:
            - Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
              then ``openpyxl`` will be used.
            - Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised.
-           - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
-             This case will raise a ``ValueError`` in a future version of pandas.
 
            .. warning::
 

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -20,7 +20,6 @@
     Sequence,
     cast,
 )
-import warnings
 
 import numpy as np
 
@@ -34,7 +33,6 @@
     EmptyDataError,
     ParserError,
 )
-from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import is_integer
 from pandas.core.dtypes.inference import is_dict_like
@@ -592,11 +590,9 @@ def _handle_usecols(
                     col for col in self.usecols if col >= num_original_columns
                 ]
                 if missing_usecols:
-                    warnings.warn(
-                        "Defining usecols with out of bounds indices is deprecated "
-                        "and will raise a ParserError in a future version.",
-                        FutureWarning,
-                        stacklevel=find_stack_level(),
+                    raise ParserError(
+                        "Defining usecols without of bounds indices is not allowed. "
+                        f"{missing_usecols} are out of bounds.",
                     )
                 col_indices = self.usecols
 

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -684,18 +684,6 @@ def items(self) -> Iterator[tuple[str, list]]:
         for g in self.groups():
             yield g._v_pathname, g
 
-    def iteritems(self):
-        """
-        iterate on key->group
-        """
-        warnings.warn(
-            "iteritems is deprecated and will be removed in a future version. "
-            "Use .items instead.",
-            FutureWarning,
-            stacklevel=find_stack_level(),
-        )
-        yield from self.items()
-
     def open(self, mode: str = "a", **kwargs) -> None:
         """
         Open the file in the specified mode

diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py
@@ -7,6 +7,8 @@
 import numpy as np
 import pytest
 
+from pandas.errors import ParserError
+
 from pandas import (
     DataFrame,
     Index,
@@ -402,20 +404,14 @@ def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols):
 
 @pytest.mark.parametrize("names", [None, ["a", "b"]])
 def test_usecols_indices_out_of_bounds(all_parsers, names):
-    # GH#25623
+    # GH#25623 & GH 41130; enforced in 2.0
     parser = all_parsers
     data = """
 a,b
 1,2
     """
-    with tm.assert_produces_warning(
-        FutureWarning, check_stacklevel=False, raise_on_extra_warnings=False
-    ):
-        result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0)
-    expected = DataFrame({"a": [1], "b": [None]})
-    if names is None and parser.engine == "python":
-        expected = DataFrame({"a": [1]})
-    tm.assert_frame_equal(result, expected)
+    with pytest.raises(ParserError, match="Defining usecols without of bounds"):
+        parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0)
 
 
 def test_usecols_additional_columns(all_parsers):

diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
@@ -1009,15 +1009,6 @@ def test_to_hdf_with_object_column_names(tmp_path, setup_path):
             assert len(result)
 
 
-def test_hdfstore_iteritems_deprecated(tmp_path, setup_path):
-    path = tmp_path / setup_path
-    df = DataFrame({"a": [1]})
-    with HDFStore(path, mode="w") as hdf:
-        hdf.put("table", df)
-        with tm.assert_produces_warning(FutureWarning):
-            next(hdf.iteritems())
-
-
 def test_hdfstore_strides(setup_path):
     # GH22073
     df = DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})