From d39e4f8c9c2a24c8420336409dcc43e5bfe0d7d9 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 29 May 2020 21:47:23 +0100 Subject: [PATCH] CLN: drop **kwargs from read_excel --- doc/source/whatsnew/v1.1.0.rst | 2 ++ pandas/io/excel/_base.py | 13 ++----------- pandas/tests/io/excel/test_readers.py | 17 ----------------- pandas/tests/io/excel/test_writers.py | 4 +--- 4 files changed, 5 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 88bf0e005a2215..f77c5df938e6ad 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -394,6 +394,8 @@ Backwards incompatible API changes - :meth:`Series.to_timestamp` now raises a ``TypeError`` if the axis is not a :class:`PeriodIndex`. Previously an ``AttributeError`` was raised (:issue:`33327`) - :meth:`Series.to_period` now raises a ``TypeError`` if the axis is not a :class:`DatetimeIndex`. Previously an ``AttributeError`` was raised (:issue:`33327`) - :func: `pandas.api.dtypes.is_string_dtype` no longer incorrectly identifies categorical series as string. +- :func:`read_excel` no longer takes ``**kwds`` arguments. This means that passing in keyword ``chunksize`` now raises a ``TypeError`` + (previously raised a ``NotImplementedError``), while passing in keyword ``encoding`` now raises a ``TypeError`` (:issue:`xxxxx`) ``MultiIndex.get_indexer`` interprets `method` argument differently ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d55bdffe689f23..f9e2327ffa92af 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -285,6 +285,7 @@ def read_excel( nrows=None, na_values=None, keep_default_na=True, + na_filter=True, verbose=False, parse_dates=False, date_parser=None, @@ -293,13 +294,8 @@ def read_excel( skipfooter=0, convert_float=True, mangle_dupe_cols=True, - **kwds, ): - for arg in ("sheet", "sheetname", "parse_cols"): - if arg in kwds: - raise TypeError(f"read_excel() got an unexpected keyword argument `{arg}`") - if not isinstance(io, ExcelFile): io = ExcelFile(io, engine=engine) elif engine and engine != io.engine: @@ -323,6 +319,7 @@ def read_excel( nrows=nrows, na_values=na_values, keep_default_na=keep_default_na, + na_filter=na_filter, verbose=verbose, parse_dates=parse_dates, date_parser=date_parser, @@ -331,7 +328,6 @@ def read_excel( skipfooter=skipfooter, convert_float=convert_float, mangle_dupe_cols=mangle_dupe_cols, - **kwds, ) @@ -861,11 +857,6 @@ def parse( DataFrame or dict of DataFrames DataFrame from the passed in Excel file. """ - if "chunksize" in kwds: - raise NotImplementedError( - "chunksize keyword of read_excel is not implemented" - ) - return self._reader.parse( sheet_name=sheet_name, header=header, diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index fd1533dd65dc44..109da630f76a23 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -897,12 +897,6 @@ def test_read_excel_bool_header_arg(self, read_ext): with pytest.raises(TypeError, match=msg): pd.read_excel("test1" + read_ext, header=arg) - def test_read_excel_chunksize(self, read_ext): - # GH 8011 - msg = "chunksize keyword of read_excel is not implemented" - with pytest.raises(NotImplementedError, match=msg): - pd.read_excel("test1" + read_ext, chunksize=100) - def test_read_excel_skiprows_list(self, read_ext): # GH 4903 if pd.read_excel.keywords["engine"] == "pyxlsb": @@ -1048,17 +1042,6 @@ def test_excel_passes_na_filter(self, read_ext, na_filter): expected = DataFrame(expected, columns=["Test"]) tm.assert_frame_equal(parsed, expected) - @pytest.mark.parametrize("arg", ["sheet", "sheetname", "parse_cols"]) - @td.check_file_leaks - def test_unexpected_kwargs_raises(self, read_ext, arg): - # gh-17964 - kwarg = {arg: "Sheet1"} - msg = fr"unexpected keyword argument `{arg}`" - - with pd.ExcelFile("test1" + read_ext) as excel: - with pytest.raises(TypeError, match=msg): - pd.read_excel(excel, **kwarg) - def test_excel_table_sheet_by_index(self, read_ext, df_ref): # For some reason pd.read_excel has no attribute 'keywords' here. # Skipping based on read_ext instead. diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index b909f1f3a958ff..ba759c7766fa5c 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -836,9 +836,7 @@ def test_to_excel_output_encoding(self, ext): with tm.ensure_clean("__tmp_to_excel_float_format__." + ext) as filename: df.to_excel(filename, sheet_name="TestSheet", encoding="utf8") - result = pd.read_excel( - filename, sheet_name="TestSheet", encoding="utf8", index_col=0 - ) + result = pd.read_excel(filename, sheet_name="TestSheet", index_col=0) tm.assert_frame_equal(result, df) def test_to_excel_unicode_filename(self, ext, path):