Skip to content

Commit

Permalink
ROB: Deal with invalid Destinations (#1028)
Browse files Browse the repository at this point in the history
Issue: An
        AttributeError: 'NoneType' object has no attribute 'get_object'
    was raised when trying to write a page that was merged.

Fix: Guard IndirectObject.get_object access

Closes #997
  • Loading branch information
MartinThoma authored Jun 26, 2022
1 parent 3c750c1 commit 595739f
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 2 deletions.
2 changes: 2 additions & 0 deletions PyPDF2/_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,8 @@ def _trim_outline(
else:
prev_header_added = False
for j in range(*pages):
if o["/Page"] is None:
continue
if pdf.pages[j].get_object() == o["/Page"].get_object():
o[NameObject("/Page")] = o["/Page"].get_object()
new_outline.append(o)
Expand Down
5 changes: 4 additions & 1 deletion PyPDF2/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,10 @@ def __init__(self, idnum: int, generation: int, pdf: Any) -> None: # PdfReader
self.pdf = pdf

def get_object(self) -> Optional[PdfObject]:
return self.pdf.get_object(self).get_object()
obj = self.pdf.get_object(self)
if obj is None:
return None
return obj.get_object()

def __repr__(self) -> str:
return f"IndirectObject({self.idnum!r}, {self.generation!r})"
Expand Down
20 changes: 19 additions & 1 deletion tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

from PyPDF2 import PdfReader, PdfWriter
from PyPDF2 import PdfMerger, PdfReader, PdfWriter
from PyPDF2.constants import TypFitArguments as TF
from PyPDF2.errors import PdfReadError, PdfReadWarning, PdfStreamError
from PyPDF2.generic import (
Expand Down Expand Up @@ -473,3 +473,21 @@ def test_bool_repr():

# cleanup
os.remove("tmp-fields-report.txt")


def test_issue_997():
url = "https://github.com/py-pdf/PyPDF2/files/8908874/Exhibit_A-2_930_Enterprise_Zone_Tax_Credits_final.pdf"
name = "gh-issue-997.pdf"

merger = PdfMerger()
merged_filename = "tmp-out.pdf"
with pytest.warns(PdfReadWarning, match="not defined"):
merger.append(
BytesIO(get_pdf_from_url(url, name=name))
) # here the error raises
with open(merged_filename, "wb") as f:
merger.write(f)
merger.close()

# cleanup
os.remove(merged_filename)

0 comments on commit 595739f

Please sign in to comment.