From 24197f6b9ece91effab62494c674e9b50b15e3f1 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Tue, 11 Apr 2023 22:40:09 +0200 Subject: [PATCH] ROB : invalid startxref pointing 1 char before fixes #1756 --- pypdf/_reader.py | 2 ++ tests/test_reader.py | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/pypdf/_reader.py b/pypdf/_reader.py index 3b21c29e7..a09c6a4de 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -1879,6 +1879,8 @@ def _get_xref_issues(stream: StreamType, startxref: int) -> int: """ stream.seek(startxref - 1, 0) # -1 to check character before line = stream.read(1) + if line == b"j": + line = stream.read(1) if line not in b"\r\n \t": return 1 line = stream.read(4) diff --git a/tests/test_reader.py b/tests/test_reader.py index c1c24fb46..002c59d4b 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1392,3 +1392,12 @@ def test_broken_file_header(): pdf_data.find(b"xref") - 1, ) PdfReader(io.BytesIO(pdf_data)) + + +@pytest.mark.enable_socket() +def test_iss1756(): + url = "https://github.com/py-pdf/pypdf/files/11105591/641-Attachment-B-Pediatric-Cardiac-Arrest-8-1-2019.pdf" + name = "iss1756.pdf" + in_pdf = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) + in_pdf.trailer["/ID"] + len(in_pdf.pages)