py-pdf · MartinThoma · Jul 20, 2022 · Jul 19, 2022
diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py
@@ -1317,6 +1317,15 @@ def read(self, stream: StreamType) -> None:
                 if found:
                     continue
                 # no xref table found at specified location
+                if "/Root" in self.trailer and not self.strict:
+                    # if Root has been already found, just raise warning
+                    warnings.warn("Invalid parent xref., rebuild xref", PdfReadWarning)
+                    try:
+                        self._rebuild_xref_table(stream)
+                        break
+                    except Exception:
+                        raise PdfReadError("can not rebuild xref")
+                    break
                 raise PdfReadError("Could not find xref table at specified location")
         # if not zero-indexed, verify that the table is correct; change it if necessary
         if self.xref_index and not self.strict:

diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -770,12 +770,12 @@ def test_get_fields():
     assert dict(fields["c1-1"]) == ({"/FT": "/Btn", "/T": "c1-1"})
 
 
+# covers also issue 1089
+@pytest.mark.filterwarnings("ignore::PyPDF2.errors.PdfReadWarning")
 def test_get_fields_read_else_block():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/934/934771.pdf"
     name = "tika-934771.pdf"
-    with pytest.raises(PdfReadError) as exc:
-        PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
-    assert exc.value.args[0] == "Could not find xref table at specified location"
+    PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
 
 
 def test_get_fields_read_else_block2():
@@ -786,12 +786,11 @@ def test_get_fields_read_else_block2():
     assert fields is None
 
 
+@pytest.mark.filterwarnings("ignore::PyPDF2.errors.PdfReadWarning")
 def test_get_fields_read_else_block3():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/957/957721.pdf"
     name = "tika-957721.pdf"
-    with pytest.raises(PdfReadError) as exc:
-        PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
-    assert exc.value.args[0] == "Could not find xref table at specified location"
+    PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
 
 
 def test_metadata_is_none():