Broaden read csv param types (#630)

* gh-623: broaden 'names' param of read_csv Broaden the type hint for the 'names' param of read_csv (and read_table, which behaves similarly) from previous list[str], so that other valid types are accepted by mypy. * allow None as names param of read_clipboard Noticed as I found clipboard after the changes to read_csv and read_table, and it calls it, so should match - but it was missing None as an option. * broaden 'names' param of read_clipboard Match prior change to read_csv, since read_clipboard calls read_csv. * broaden 'names' param of read_excel Match prior change to read_csv, read_table, read_clipboard. * gh-605: broader usecols param type hint This fixes the pycharm tooltip problem in gh-605, as well as allowing more list-like types of strings (tuples of strings, as well as mutable sequences of strings other than list), and callables that accept hashables, not just strings. * test that read_excel accepts string for usecols * test names and usecols correctly exclude strings Strings aren't valid arguments here (except for read_excel, where we have a test now to check that this is accepted). Adding tests to make sure the type hints aren't overly wide and accept string arguments by mistake.
pandas-dev · Apr 6, 2023 · a9dc8ba · a9dc8ba
1 parent 5945ada
commit a9dc8ba
Show file tree

Hide file tree

Showing 5 changed files with 325 additions and 100 deletions.
diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi
@@ -269,6 +269,9 @@ ListLikeExceptSeriesAndStr = TypeVar(
     "ListLikeExceptSeriesAndStr", MutableSequence, np.ndarray, tuple, "Index"
 )
 ListLikeU: TypeAlias = Sequence | np.ndarray | Series | Index
+ListLikeHashable: TypeAlias = (
+    MutableSequence[HashableT] | np.ndarray | tuple[HashableT, ...] | range
+)
 StrLike: TypeAlias = str | np.str_
 IndexIterScalar: TypeAlias = (
     str
@@ -295,6 +298,16 @@ np_ndarray_str: TypeAlias = npt.NDArray[np.str_]
 
 IndexType: TypeAlias = slice | np_ndarray_anyint | Index | list[int] | Series[int]
 MaskType: TypeAlias = Series[bool] | np_ndarray_bool | list[bool]
+UsecolsArgType: TypeAlias = (
+    MutableSequence[str]
+    | tuple[str, ...]
+    | Sequence[int]
+    | Series
+    | Index
+    | np.ndarray
+    | Callable[[HashableT], bool]
+    | None
+)
 # Scratch types for generics
 S1 = TypeVar(
     "S1",

diff --git a/pandas-stubs/io/clipboards.pyi b/pandas-stubs/io/clipboards.pyi
@@ -11,16 +11,15 @@ from typing import (
 )
 
 from pandas.core.frame import DataFrame
-from pandas.core.indexes.base import Index
-from pandas.core.series import Series
 
 from pandas._typing import (
     CompressionOptions,
     CSVEngine,
     CSVQuoting,
     DtypeArg,
+    ListLikeHashable,
     StorageOptions,
-    npt,
+    UsecolsArgType,
 )
 
 from pandas.io.parsers import TextFileReader
@@ -31,15 +30,9 @@ def read_clipboard(
     *,
     delimiter: str | None = ...,
     header: int | Sequence[int] | Literal["infer"] | None = ...,
-    names: list[str] = ...,
+    names: ListLikeHashable | None = ...,
     index_col: int | str | Sequence[str | int] | Literal[False] | None = ...,
-    usecols: list[str]
-    | Sequence[int]
-    | Series
-    | Index
-    | npt.NDArray
-    | Callable[[str], bool]
-    | None = ...,
+    usecols: UsecolsArgType = ...,
     dtype: DtypeArg | defaultdict | None = ...,
     engine: CSVEngine | None = ...,
     converters: dict[int | str, Callable[[str], Any]] = ...,
@@ -94,15 +87,9 @@ def read_clipboard(
     *,
     delimiter: str | None = ...,
     header: int | Sequence[int] | Literal["infer"] | None = ...,
-    names: list[str] = ...,
+    names: ListLikeHashable | None = ...,
     index_col: int | str | Sequence[str | int] | Literal[False] | None = ...,
-    usecols: list[str]
-    | Sequence[int]
-    | Series
-    | Index
-    | npt.NDArray
-    | Callable[[str], bool]
-    | None = ...,
+    usecols: UsecolsArgType = ...,
     dtype: DtypeArg | defaultdict | None = ...,
     engine: CSVEngine | None = ...,
     converters: dict[int | str, Callable[[str], Any]] = ...,
@@ -157,15 +144,9 @@ def read_clipboard(
     *,
     delimiter: str | None = ...,
     header: int | Sequence[int] | Literal["infer"] | None = ...,
-    names: list[str] = ...,
+    names: ListLikeHashable | None = ...,
     index_col: int | str | Sequence[str | int] | Literal[False] | None = ...,
-    usecols: list[str]
-    | Sequence[int]
-    | Series
-    | Index
-    | npt.NDArray
-    | Callable[[str], bool]
-    | None = ...,
+    usecols: UsecolsArgType = ...,
     dtype: DtypeArg | defaultdict | None = ...,
     engine: CSVEngine | None = ...,
     converters: dict[int | str, Callable[[str], Any]] = ...,

diff --git a/pandas-stubs/io/excel/_base.pyi b/pandas-stubs/io/excel/_base.pyi
@@ -22,8 +22,10 @@ from xlrd.book import Book
 from pandas._typing import (
     Dtype,
     FilePath,
+    ListLikeHashable,
     ReadBuffer,
     StorageOptions,
+    UsecolsArgType,
     WriteExcelBuffer,
 )
 
@@ -40,9 +42,9 @@ def read_excel(
     sheet_name: list[int | str] | None,
     *,
     header: int | Sequence[int] | None = ...,
-    names: list[str] | None = ...,
+    names: ListLikeHashable | None = ...,
     index_col: int | Sequence[int] | None = ...,
-    usecols: Sequence[int] | Sequence[str] | Callable[[str], bool] | None = ...,
+    usecols: str | UsecolsArgType = ...,
     dtype: str | Dtype | Mapping[str, str | Dtype] | None = ...,
     engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
     converters: Mapping[int | str, Callable[[object], object]] | None = ...,
@@ -78,9 +80,9 @@ def read_excel(
     sheet_name: int | str = ...,
     *,
     header: int | Sequence[int] | None = ...,
-    names: list[str] | None = ...,
+    names: ListLikeHashable | None = ...,
     index_col: int | Sequence[int] | None = ...,
-    usecols: Sequence[int] | Sequence[str] | Callable[[str], bool] | None = ...,
+    usecols: str | UsecolsArgType = ...,
     dtype: str | Dtype | Mapping[str, str | Dtype] | None = ...,
     engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
     converters: Mapping[int | str, Callable[[object], object]] | None = ...,
@@ -155,13 +157,9 @@ class ExcelFile:
         self,
         sheet_name: list[int | str] | None,
         header: int | Sequence[int] | None = ...,
-        names: list[str] | None = ...,
+        names: ListLikeHashable | None = ...,
         index_col: int | Sequence[int] | None = ...,
-        usecols: str
-        | Sequence[int]
-        | Sequence[str]
-        | Callable[[str], bool]
-        | None = ...,
+        usecols: str | UsecolsArgType = ...,
         converters: dict[int | str, Callable[[object], object]] | None = ...,
         true_values: Iterable[Hashable] | None = ...,
         false_values: Iterable[Hashable] | None = ...,
@@ -185,13 +183,9 @@ class ExcelFile:
         self,
         sheet_name: int | str,
         header: int | Sequence[int] | None = ...,
-        names: list[str] | None = ...,
+        names: ListLikeHashable | None = ...,
         index_col: int | Sequence[int] | None = ...,
-        usecols: str
-        | Sequence[int]
-        | Sequence[str]
-        | Callable[[str], bool]
-        | None = ...,
+        usecols: str | UsecolsArgType = ...,
         converters: dict[int | str, Callable[[object], object]] | None = ...,
         true_values: Iterable[Hashable] | None = ...,
         false_values: Iterable[Hashable] | None = ...,

diff --git a/pandas-stubs/io/parsers/readers.pyi b/pandas-stubs/io/parsers/readers.pyi
@@ -16,8 +16,6 @@ from typing import (
 )
 
 from pandas.core.frame import DataFrame
-from pandas.core.indexes.base import Index
-from pandas.core.series import Series
 from typing_extensions import Self
 
 from pandas._typing import (
@@ -26,9 +24,10 @@ from pandas._typing import (
     CSVQuoting,
     DtypeArg,
     FilePath,
+    ListLikeHashable,
     ReadCsvBuffer,
     StorageOptions,
-    npt,
+    UsecolsArgType,
 )
 
 from pandas.io.common import IOHandles
@@ -40,16 +39,9 @@ def read_csv(
     sep: str | None = ...,
     delimiter: str | None = ...,
     header: int | Sequence[int] | Literal["infer"] | None = ...,
-    names: list[str] | None = ...,
+    names: ListLikeHashable | None = ...,
     index_col: int | str | Sequence[str | int] | Literal[False] | None = ...,
-    usecols: list[str]
-    | tuple[str, ...]
-    | Sequence[int]
-    | Series
-    | Index
-    | npt.NDArray
-    | Callable[[str], bool]
-    | None = ...,
+    usecols: UsecolsArgType = ...,
     dtype: DtypeArg | defaultdict | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[int | str, Callable[[str], Any]]
@@ -106,16 +98,9 @@ def read_csv(
     sep: str | None = ...,
     delimiter: str | None = ...,
     header: int | Sequence[int] | Literal["infer"] | None = ...,
-    names: list[str] | None = ...,
+    names: ListLikeHashable | None = ...,
     index_col: int | str | Sequence[str | int] | Literal[False] | None = ...,
-    usecols: list[str]
-    | tuple[str, ...]
-    | Sequence[int]
-    | Series
-    | Index
-    | npt.NDArray
-    | Callable[[str], bool]
-    | None = ...,
+    usecols: UsecolsArgType = ...,
     dtype: DtypeArg | defaultdict | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[int | str, Callable[[str], Any]]
@@ -172,16 +157,9 @@ def read_csv(
     sep: str | None = ...,
     delimiter: str | None = ...,
     header: int | Sequence[int] | Literal["infer"] | None = ...,
-    names: list[str] | None = ...,
+    names: ListLikeHashable | None = ...,
     index_col: int | str | Sequence[str | int] | Literal[False] | None = ...,
-    usecols: list[str]
-    | tuple[str, ...]
-    | Sequence[int]
-    | Series
-    | Index
-    | npt.NDArray
-    | Callable[[str], bool]
-    | None = ...,
+    usecols: UsecolsArgType = ...,
     dtype: DtypeArg | defaultdict | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[int | str, Callable[[str], Any]]
@@ -238,16 +216,9 @@ def read_table(
     sep: str | None = ...,
     delimiter: str | None = ...,
     header: int | Sequence[int] | Literal["infer"] | None = ...,
-    names: list[str] | None = ...,
+    names: ListLikeHashable | None = ...,
     index_col: int | str | Sequence[str | int] | Literal[False] | None = ...,
-    usecols: list[str]
-    | tuple[str, ...]
-    | Sequence[int]
-    | Series
-    | Index
-    | npt.NDArray
-    | Callable[[str], bool]
-    | None = ...,
+    usecols: UsecolsArgType = ...,
     dtype: DtypeArg | defaultdict | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[int | str, Callable[[str], Any]]
@@ -304,16 +275,9 @@ def read_table(
     sep: str | None = ...,
     delimiter: str | None = ...,
     header: int | Sequence[int] | Literal["infer"] | None = ...,
-    names: list[str] | None = ...,
+    names: ListLikeHashable | None = ...,
     index_col: int | str | Sequence[str | int] | Literal[False] | None = ...,
-    usecols: list[str]
-    | tuple[str, ...]
-    | Sequence[int]
-    | Series
-    | Index
-    | npt.NDArray
-    | Callable[[str], bool]
-    | None = ...,
+    usecols: UsecolsArgType = ...,
     dtype: DtypeArg | defaultdict | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[int | str, Callable[[str], Any]]
@@ -370,16 +334,9 @@ def read_table(
     sep: str | None = ...,
     delimiter: str | None = ...,
     header: int | Sequence[int] | Literal["infer"] | None = ...,
-    names: list[str] | None = ...,
+    names: ListLikeHashable | None = ...,
     index_col: int | str | Sequence[str | int] | Literal[False] | None = ...,
-    usecols: list[str]
-    | tuple[str, ...]
-    | Sequence[int]
-    | Series
-    | Index
-    | npt.NDArray
-    | Callable[[str], bool]
-    | None = ...,
+    usecols: UsecolsArgType = ...,
     dtype: DtypeArg | defaultdict | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[int | str, Callable[[str], Any]]