Skip to content

Commit

Permalink
Address 3820
Browse files Browse the repository at this point in the history
When creating link destinations from a PDF's names dictionary as created by `.resolve_names()`, we incorrectly use strings with %-escaped spaces (and other characters) as keys to find dictionary values.
This change revert %-escaped names to the original. E.g. every "%20" sub-string is converted to a space, etc.
  • Loading branch information
JorjMcKie committed Aug 30, 2024
1 parent 9c19e27 commit 877d7e5
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 1 deletion.
13 changes: 12 additions & 1 deletion src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6835,6 +6835,17 @@ def uri_to_dict(uri):
else:
ret[item] = None
return ret

def unescape(name):
"""Unescape '%AB' substrings to chr(0xAB)."""
split = name.replace("%%", "%25") # take care of escaped '%'
split = split.split("%")
newname = split[0]
for item in split[1:]:
piece = item[:2]
newname += chr(int(piece, base=16))
newname += item[2:]
return newname

if rlink and not self.uri.startswith("#"):
self.uri = f"#page={rlink[0] + 1}&zoom=0,{_format_g(rlink[1])},{_format_g(rlink[2])}"
Expand Down Expand Up @@ -6862,7 +6873,7 @@ def uri_to_dict(uri):
m = re.match('^#nameddest=(.*)', self.uri)
assert document
if document and m:
named = m.group(1)
named = unescape(m.group(1))
self.named = document.resolve_names().get(named)
if self.named is None:
# document.resolve_names() does not contain an
Expand Down
Binary file added tests/resources/test-3820.pdf
Binary file not shown.
12 changes: 12 additions & 0 deletions tests/test_toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
circular = os.path.join(scriptdir, "resources", "circular-toc.pdf")
full_toc = os.path.join(scriptdir, "resources", "full_toc.txt")
simple_toc = os.path.join(scriptdir, "resources", "simple_toc.txt")
file_3820 = os.path.join(scriptdir, "resources", "test-3820.pdf")
doc = pymupdf.open(filename)


Expand Down Expand Up @@ -274,3 +275,14 @@ def test_3400():
links_actual.append( (page_i, link) )

assert links_actual == links_expected



def test_3820():
"""Ensure all extended TOC items point to pages."""
doc = pymupdf.open(file_3820)
toc = doc.get_toc(simple=False)
for _, _, epage, dest in toc:
assert epage == dest["page"] + 1


0 comments on commit 877d7e5

Please sign in to comment.