Skip to content

Commit

Permalink
TST: Add xfail test for IndexError when extracting text
Browse files Browse the repository at this point in the history
See #1091
  • Loading branch information
MartinThoma committed Jul 17, 2022
1 parent 5ddf4cb commit a91dce7
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,3 +322,14 @@ def test_get_fonts(pdf_path, password, embedded, unembedded):
a = a.union(a_tmp)
b = b.union(b_tmp)
assert (a, b) == (embedded, unembedded)


@pytest.mark.xfail(reason="#1091")
def test_text_extraction_issue_1091():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/966/966635.pdf"
name = "tika-966635.pdf"
stream = BytesIO(get_pdf_from_url(url, name=name))
with pytest.warns(PdfReadWarning):
reader = PdfReader(stream)
for page in reader.pages:
page.extract_text()

0 comments on commit a91dce7

Please sign in to comment.