From 86f8d16bb26f72eb9454e444dabd42510a759723 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 12 Nov 2018 05:04:40 -0800 Subject: [PATCH] DEPR: Deprecate usecols as int in read_excel (#23635) Follow-up to gh-23544. --- doc/source/io.rst | 5 +++++ doc/source/whatsnew/v0.24.0.txt | 1 + pandas/io/excel.py | 8 ++++++++ pandas/tests/io/test_excel.py | 33 ++++++++++++++++++++++----------- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index beb1c1daba962c..34dc185c200e63 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2854,6 +2854,11 @@ It is often the case that users will insert columns to do temporary computations in Excel and you may not want to read in those columns. ``read_excel`` takes a ``usecols`` keyword to allow you to specify a subset of columns to parse. +.. deprecated:: 0.24.0 + +Passing in an integer for ``usecols`` has been deprecated. Please pass in a list +of ints from 0 to ``usecols`` inclusive instead. + If ``usecols`` is an integer, then it is assumed to indicate the last column to be parsed. diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 73fd526640212a..ffde84a235bd62 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -970,6 +970,7 @@ Deprecations - The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`) - Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) +- :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`) - Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) .. _whatsnew_0240.deprecations.datetimelike_int_ops: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 2e93c237bb7eaa..c25a7670cce442 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -95,6 +95,10 @@ usecols : int, str, list-like, or callable default None * If None, then parse all columns, * If int, then indicates last column to be parsed + + .. deprecated:: 0.24.0 + Pass in a list of ints instead from 0 to `usecols` inclusive. + * If string, then indicates comma separated list of Excel column letters and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of both sides. @@ -778,6 +782,10 @@ def _maybe_convert_usecols(usecols): return usecols if is_integer(usecols): + warnings.warn(("Passing in an integer for `usecols` has been " + "deprecated. Please pass in a list of ints from " + "0 to `usecols` inclusive instead."), + FutureWarning, stacklevel=2) return lrange(usecols + 1) if isinstance(usecols, compat.string_types): diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 49a3a3d58672db..9b147d53c06c4b 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -105,23 +105,34 @@ def get_exceldf(self, basename, ext, *args, **kwds): class ReadingTestsBase(SharedItems): # This is based on ExcelWriterBase - @td.skip_if_no('xlrd', '1.0.1') # GH-22682 + @td.skip_if_no("xlrd", "1.0.1") # see gh-22682 def test_usecols_int(self, ext): - dfref = self.get_csv_refdf('test1') - dfref = dfref.reindex(columns=['A', 'B', 'C']) - df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols=3) - df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], - index_col=0, usecols=3) + df_ref = self.get_csv_refdf("test1") + df_ref = df_ref.reindex(columns=["A", "B", "C"]) - with tm.assert_produces_warning(FutureWarning): - df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + # usecols as int + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df1 = self.get_exceldf("test1", ext, "Sheet1", + index_col=0, usecols=3) + + # usecols as int + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df2 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1], + index_col=0, usecols=3) + + # parse_cols instead of usecols, usecols as int + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df3 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1], index_col=0, parse_cols=3) # TODO add index to xls file) - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) - tm.assert_frame_equal(df3, dfref, check_names=False) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + tm.assert_frame_equal(df3, df_ref, check_names=False) @td.skip_if_no('xlrd', '1.0.1') # GH-22682 def test_usecols_list(self, ext):