Skip to content

Commit

Permalink
BUG: catch the case where w[0] is an IndirectObject instead of an int (
Browse files Browse the repository at this point in the history
…#2154)

Closes #2137
  • Loading branch information
rchen19 authored Sep 10, 2023
1 parent 0ca4d37 commit 4657df5
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pypdf/_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def compute_space_width(
else:
w = []
while len(w) > 0:
st = w[0]
st = w[0] if isinstance(w[0], int) else w[0].get_object()
second = w[1].get_object()
if isinstance(second, int):
for x in range(st, second):
Expand Down
12 changes: 12 additions & 0 deletions tests/test_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,18 @@ def test_unixxx_glyphs():
assert pat in txt


@pytest.mark.enable_socket()
def test_cmap_compute_space_width():
# issue 2137
# original file URL:
url = "https://arxiv.org/pdf/2005.05909.pdf"
# URL from github issue is too long to pass code stype check, use original arxiv URL instead
# url = "https://github.com/py-pdf/pypdf/files/12489914/Morris.et.al.-.2020.-.TextAttack.A.Framework.for.Adversarial.Attacks.Data.Augmentation.and.Adversarial.Training.in.NLP.pdf"
name = "TextAttack_paper.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
reader.pages[0].extract_text() # no error


@pytest.mark.enable_socket()
def test_tabs_in_cmap():
"""Issue #2173"""
Expand Down

0 comments on commit 4657df5

Please sign in to comment.