Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REG: read_excel with engine specified raises on non-path/non-buffer #39586

Merged
merged 11 commits into from
Feb 7, 2021
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.2.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Fixed regressions
- Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`)
- Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`)
- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
-
- Fixed regression in :func:`read_excel` that incorrectly raised when the argument ``io`` was a non-path and non-buffer and the ``engine`` argument was specified (:issue:`39528`)

.. ---------------------------------------------------------------------------

Expand Down
31 changes: 21 additions & 10 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1069,26 +1069,37 @@ def __init__(

xlrd_version = LooseVersion(get_version(xlrd))

if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book):
ext = "xls"
else:
ext = inspect_excel_format(
content_or_path=path_or_buffer, storage_options=storage_options
)

ext = None
if engine is None:
# Only determine ext if it is needed
if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book):
rhshadrach marked this conversation as resolved.
Show resolved Hide resolved
ext = "xls"
else:
ext = inspect_excel_format(
content_or_path=path_or_buffer, storage_options=storage_options
)

# ext will always be valid, otherwise inspect_excel_format would raise
engine = config.get_option(f"io.excel.{ext}.reader", silent=True)
if engine == "auto":
engine = get_default_engine(ext, mode="reader")

if engine == "xlrd" and ext != "xls" and xlrd_version is not None:
if xlrd_version >= "2":
if engine == "xlrd" and xlrd_version is not None:
if ext is None:
# Need ext to determine ext in order to raise/warn
if isinstance(path_or_buffer, xlrd.Book):
ext = "xls"
else:
ext = inspect_excel_format(
path_or_buffer, storage_options=storage_options
)

if ext != "xls" and xlrd_version >= "2":
raise ValueError(
f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, "
f"only the xls format is supported. Install openpyxl instead."
)
else:
elif ext != "xls":
caller = inspect.stack()[1]
if (
caller.filename.endswith(
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/io/excel/test_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,12 @@ def test_to_excel_with_openpyxl_engine(ext):
).highlight_max()

styled.to_excel(filename, engine="openpyxl")


def test_read_workbook(datapath, ext):
# GH 39528
filename = datapath("io", "data", "excel", "test1" + ext)
wb = openpyxl.load_workbook(filename)
result = pd.read_excel(wb, engine="openpyxl")
expected = pd.read_excel(filename)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It cannot hurt to call wb.close() before tm.assert_frame_equal

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @twoertwein, added.

tm.assert_frame_equal(result, expected)
9 changes: 8 additions & 1 deletion pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from functools import partial
import os
from urllib.error import URLError
from zipfile import BadZipFile

import numpy as np
import pytest
Expand Down Expand Up @@ -685,7 +686,13 @@ def test_missing_file_raises(self, read_ext):

def test_corrupt_bytes_raises(self, read_ext, engine):
bad_stream = b"foo"
with pytest.raises(ValueError, match="File is not a recognized excel file"):
if engine is None or engine == "xlrd":
error = ValueError
msg = "File is not a recognized excel file"
else:
error = BadZipFile
msg = "File is not a zip file"
with pytest.raises(error, match=msg):
pd.read_excel(bad_stream)

@tm.network
Expand Down