Skip to content

Commit

Permalink
Backport PR #41806 on branch 1.2.x" (#41835)
Browse files Browse the repository at this point in the history
  • Loading branch information
twoertwein authored Jun 6, 2021
1 parent 29e8da1 commit 60fd04a
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Fixed regressions
- Fixed regression in :meth:`DataFrame.sum` and :meth:`DataFrame.prod` when ``min_count`` and ``numeric_only`` are both given (:issue:`41074`)
- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
- Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)
- Regression in :func:`ExcelFile` when a corrupt file is opened but not closed (:issue:`41778`)

.. ---------------------------------------------------------------------------
Expand Down
11 changes: 10 additions & 1 deletion pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,11 @@ def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
elif hasattr(self.handles.handle, "read"):
# N.B. xlrd.Book has a read attribute too
self.handles.handle.seek(0)
self.book = self.load_workbook(self.handles.handle)
try:
self.book = self.load_workbook(self.handles.handle)
except Exception:
self.close()
raise
elif isinstance(self.handles.handle, bytes):
self.book = self.load_workbook(BytesIO(self.handles.handle))
else:
Expand All @@ -406,6 +410,11 @@ def load_workbook(self, filepath_or_buffer):
pass

def close(self):
if hasattr(self, "book") and hasattr(self.book, "close"):
# pyxlsb: opens a TemporaryFile
# openpyxl: https://stackoverflow.com/questions/31416842/
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
self.book.close()
self.handles.close()

@property
Expand Down
6 changes: 0 additions & 6 deletions pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,12 +487,6 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
filepath_or_buffer, read_only=True, data_only=True, keep_links=False
)

def close(self):
# https://stackoverflow.com/questions/31416842/
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
self.book.close()
super().close()

@property
def sheet_names(self) -> List[str]:
return self.book.sheetnames
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import datetime, time
from functools import partial
import os
from pathlib import Path
from urllib.error import URLError
from zipfile import BadZipFile

Expand Down Expand Up @@ -1273,3 +1274,21 @@ def test_read_datetime_multiindex(self, engine, read_ext):
expected = DataFrame([], columns=expected_column_index)

tm.assert_frame_equal(expected, actual)

def test_corrupt_files_closed(self, request, engine, read_ext):
# GH41778
errors = (BadZipFile, ValueError)
if engine is None:
pytest.skip()
elif engine == "xlrd":
import xlrd

errors = (BadZipFile, ValueError, xlrd.biffh.XLRDError)

with tm.ensure_clean(f"corrupt{read_ext}") as file:
Path(file).write_text("corrupt")
with tm.assert_produces_warning(False):
try:
pd.ExcelFile(file, engine=engine)
except errors:
pass

0 comments on commit 60fd04a

Please sign in to comment.