Skip to content

Commit

Permalink
fix issues with missing destinations (#604)
Browse files Browse the repository at this point in the history
#604 

root cause: probably extraction from a document not extracting properly destination

changes:

    getDestinationPageNumber return -1 with NullObject
    in case of Strict = False, return a destination to first page to prevent error (no change in case of Strict=True)
    note ; warning generated

Test added with the sample test

(duplicate of  #821 to match refactoring)
  • Loading branch information
pubpub-zz committed Apr 30, 2022
1 parent 5e86977 commit 56af049
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 1 deletion.
14 changes: 13 additions & 1 deletion PyPDF2/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,8 @@ def _getPageNumberByIndirect(self, indirectRef):
id2num[x.indirectRef.idnum] = i
self._pageId2Num = id2num

if isinstance(indirectRef, NullObject):
return -1
if isinstance(indirectRef, int):
idnum = indirectRef
else:
Expand Down Expand Up @@ -595,7 +597,17 @@ def getDestinationPageNumber(self, destination):
def _buildDestination(self, title, array):
page, typ = array[0:2]
array = array[2:]
return Destination(title, page, typ, *array)
try:
return Destination(title, page, typ, *array)
except PdfReadError:
warnings.warn("Unknown destination : " + title + " " + str(array))
if self.strict:
raise
else:
#create a link to first Page
return Destination(title, self.getPage(0).indirectRef,
TextStringObject("/Fit"))


def _buildOutline(self, node):
dest, title, outline = None, None, None
Expand Down
Binary file added Resources/issue-604.pdf
Binary file not shown.
36 changes: 36 additions & 0 deletions Tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,42 @@ def test_reader_properties():
assert reader.pageMode is None
assert reader.isEncrypted is False

@pytest.mark.parametrize(
"strict",
[(True), (False)],
)
def test_issue604(strict):
"""
Test with invalid destinations
"""
with open(os.path.join(RESOURCE_ROOT, "issue-604.pdf"), "rb") as f:
pdf = None
bookmarks = None
if strict:
with pytest.raises(PdfReadError) as exc:
pdf = PdfFileReader(f, strict=strict)
bookmarks = pdf.getOutlines()
if "Unknown Destination" not in exc.value.args[0]:
raise Exception("Expected exception not raised")
return # bookmarks not correct
else:
pdf = PdfFileReader(f, strict=strict)
bookmarks = pdf.getOutlines()

def getDestPages(x):
# print(x)
if isinstance(x,list):
r = [getDestPages(y) for y in x]
return r
else:
return pdf.getDestinationPageNumber(x) + 1

out = []
for (
b
) in bookmarks: # b can be destination or a list:preferred to just print them
out.append(getDestPages(b))
#print(out)

def test_decode_permissions():
reader = PdfFileReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf"))
Expand Down

0 comments on commit 56af049

Please sign in to comment.