Skip to content

Commit

Permalink
Merge pull request #1210 from Kozea/fix_1126
Browse files Browse the repository at this point in the history
Support download attribute in <a> for attachment
  • Loading branch information
liZe authored Sep 10, 2020
2 parents d935217 + a9822fb commit 37445db
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 75 deletions.
16 changes: 10 additions & 6 deletions weasyprint/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ def _gather_links_and_bookmarks(box, bookmarks, links, anchors, matrix):
has_link = link and not isinstance(box, boxes.TextBox)
# In case of duplicate IDs, only the first is an anchor.
has_anchor = anchor_name and anchor_name not in anchors
is_attachment = hasattr(box, 'is_attachment') and box.is_attachment
is_attachment = getattr(box, 'is_attachment', False)
download_name = getattr(box, 'attachment_download', None)

if has_bookmark or has_link or has_anchor:
pos_x, pos_y, width, height = box.hit_area()
Expand All @@ -133,9 +134,11 @@ def _gather_links_and_bookmarks(box, bookmarks, links, anchors, matrix):
if matrix:
link = (
link_type, target, rectangle_aabb(
matrix, pos_x, pos_y, width, height))
matrix, pos_x, pos_y, width, height), download_name)
else:
link = (link_type, target, (pos_x, pos_y, width, height))
link = (
link_type, target, (pos_x, pos_y, width, height),
download_name)
links.append(link)
if matrix and (has_bookmark or has_anchor):
pos_x, pos_y = matrix.transform_point(pos_x, pos_y)
Expand Down Expand Up @@ -487,14 +490,15 @@ def resolve_links(self):
for page in self.pages:
page_links = []
for link in page.links:
link_type, anchor_name, rectangle = link
link_type, anchor_name, rectangle, _ = link
if link_type == 'internal':
if anchor_name not in anchors:
LOGGER.error(
'No anchor #%s for internal URI reference',
anchor_name)
else:
page_links.append((link_type, anchor_name, rectangle))
page_links.append(
(link_type, anchor_name, rectangle, None))
else:
# External link
page_links.append(link)
Expand Down Expand Up @@ -565,7 +569,7 @@ def add_hyperlinks(self, links, anchors, context, scale):
# defined by cairo when drawing targets. This would give a feeling
# similiar to what browsers do with links that span multiple lines.
for link in links:
link_type, link_target, rectangle = link
link_type, link_target, rectangle, _ = link
if link_type == 'external':
attributes = "rect=[{} {} {} {}] uri='{}'".format(*(
[int(round(i * scale)) for i in rectangle] +
Expand Down
1 change: 1 addition & 0 deletions weasyprint/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ def handle_td(element, box, _get_image_from_uri, _base_url):
def handle_a(element, box, _get_image_from_uri, base_url):
"""Handle the ``rel`` attribute."""
box.is_attachment = element_has_link_type(element, 'attachment')
box.attachment_download = element.get('download')
return [box]


Expand Down
46 changes: 7 additions & 39 deletions weasyprint/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

import hashlib
import io
import mimetypes
import os
import re
import string
Expand Down Expand Up @@ -361,7 +360,7 @@ def _write_compressed_file_object(pdf, file):
return object_number


def _get_filename_from_result(url, result):
def _get_filename_from_url(url):
"""Derive a filename from a fetched resource.
This is either the filename returned by the URL fetcher, the last URL path
Expand All @@ -371,12 +370,6 @@ def _get_filename_from_result(url, result):

filename = None

# A given filename will always take precedence
if result:
filename = result.get('filename')
if filename:
return filename

# The URL path likely contains a filename, which is a good second guess
if url:
split = urlsplit(url)
Expand All @@ -385,32 +378,7 @@ def _get_filename_from_result(url, result):
if filename == '':
filename = None

if filename is None:
# The URL lacks a path altogether. Use a synthetic name.

# Using guess_extension is a great idea, but sadly the extension is
# probably random, depending on the alignment of the stars, which car
# you're driving and which software has been installed on your machine.
#
# Unfortuneatly this isn't even imdepodent on one machine, because the
# extension can depend on PYTHONHASHSEED if mimetypes has multiple
# extensions to offer
extension = None
if result:
mime_type = result.get('mime_type')
if mime_type == 'text/plain':
# text/plain has a phletora of extensions - all garbage
extension = '.txt'
else:
extension = mimetypes.guess_extension(mime_type) or '.bin'
else:
extension = '.bin'

filename = 'attachment' + extension
else:
filename = unquote(filename)

return filename
return 'attachment.bin' if filename is None else unquote(filename)


def _write_pdf_embedded_files(pdf, attachments, url_fetcher):
Expand Down Expand Up @@ -438,7 +406,7 @@ def _write_pdf_embedded_files(pdf, attachments, url_fetcher):
return pdf.write_new_object(b''.join(content))


def _write_pdf_attachment(pdf, attachment, url_fetcher):
def _write_pdf_attachment(pdf, attachment, url_fetcher, download_name=None):
"""Write an attachment to the PDF stream.
:return:
Expand Down Expand Up @@ -466,7 +434,7 @@ def _write_pdf_attachment(pdf, attachment, url_fetcher):

# TODO: Use the result object from a URL fetch operation to provide more
# details on the possible filename
filename = _get_filename_from_result(url, None)
filename = download_name or _get_filename_from_url(url)

return pdf.write_new_object(pdf_format(
'<< /Type /Filespec /F () /UF {0!P} /EF << /F {1} 0 R >> '
Expand Down Expand Up @@ -509,11 +477,11 @@ def write_pdf_metadata(fileobj, scale, url_fetcher, attachments,
# because two links might have the same href, but different titles.
annot_files = {}
for page_links in attachment_links:
for link_type, target, rectangle in page_links:
for link_type, target, rectangle, download_name in page_links:
if link_type == 'attachment' and target not in annot_files:
# TODO: use the title attribute as description
annot_files[target] = _write_pdf_attachment(
pdf, (target, None), url_fetcher)
pdf, (target, None), url_fetcher, download_name)

for pdf_page, document_page, page_links in zip(
pdf.pages, pages, attachment_links):
Expand Down Expand Up @@ -555,7 +523,7 @@ def write_pdf_metadata(fileobj, scale, url_fetcher, attachments,
# would give a feeling similiar to what browsers do with links that
# span multiple lines.
annotations = []
for link_type, target, rectangle in page_links:
for link_type, target, rectangle, _ in page_links:
if link_type == 'attachment' and annot_files[target] is not None:
matrix = cairo.Matrix(
xx=scale, yy=-scale, y0=document_page.height * scale)
Expand Down
77 changes: 48 additions & 29 deletions weasyprint/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,12 @@ def _round_meta(pages):
anchors[anchor_name] = round(pos_x, 6), round(pos_y, 6)
links = page.links
for i, link in enumerate(links):
link_type, target, (pos_x, pos_y, width, height) = link
link_type, target, (pos_x, pos_y, width, height), download_name = (
link)
link = (
link_type, target, (round(pos_x, 6), round(pos_y, 6),
round(width, 6), round(height, 6)))
round(width, 6), round(height, 6)),
download_name)
links[i] = link
bookmarks = page.bookmarks
for i, (level, label, (pos_x, pos_y), state) in enumerate(bookmarks):
Expand Down Expand Up @@ -720,28 +722,28 @@ def assert_links(html, expected_links_by_page, expected_anchors_by_page,
</p>
''', [
[
('external', 'http://weasyprint.org', (0, 0, 30, 20)),
('external', 'http://weasyprint.org', (0, 0, 30, 30)),
('internal', 'lipsum', (10, 100, 32, 20)),
('internal', 'lipsum', (10, 100, 32, 32))
('external', 'http://weasyprint.org', (0, 0, 30, 20), None),
('external', 'http://weasyprint.org', (0, 0, 30, 30), None),
('internal', 'lipsum', (10, 100, 32, 20), None),
('internal', 'lipsum', (10, 100, 32, 32), None)
],
[('internal', 'hello', (0, 0, 200, 30))],
[('internal', 'hello', (0, 0, 200, 30), None)],
], [
{'hello': (0, 200)},
{'lipsum': (0, 0)}
], [
(
[
('external', 'http://weasyprint.org', (0, 0, 30, 20)),
('external', 'http://weasyprint.org', (0, 0, 30, 30)),
('internal', 'lipsum', (10, 100, 32, 20)),
('internal', 'lipsum', (10, 100, 32, 32))
('external', 'http://weasyprint.org', (0, 0, 30, 20), None),
('external', 'http://weasyprint.org', (0, 0, 30, 30), None),
('internal', 'lipsum', (10, 100, 32, 20), None),
('internal', 'lipsum', (10, 100, 32, 32), None)
],
[('hello', 0, 200)],
),
(
[
('internal', 'hello', (0, 0, 200, 30))
('internal', 'hello', (0, 0, 200, 30), None)
],
[('lipsum', 0, 0)]),
])
Expand All @@ -751,28 +753,30 @@ def assert_links(html, expected_links_by_page, expected_anchors_by_page,
<body style="width: 200px">
<a href="../lipsum/é_%E9" style="display: block; margin: 10px 5px">
''', [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
(5, 10, 190, 0))]],
(5, 10, 190, 0), None)]],
[{}], [([('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
(5, 10, 190, 0))], [])],
(5, 10, 190, 0), None)], [])],
base_url='http://weasyprint.org/foo/bar/')

assert_links(
'''
<body style="width: 200px">
<div style="display: block; margin: 10px 5px;
-weasy-link: url(../lipsum/é_%E9)">
''', [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
(5, 10, 190, 0))]],
(5, 10, 190, 0), None)]],
[{}], [([('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
(5, 10, 190, 0))], [])],
(5, 10, 190, 0), None)], [])],
base_url='http://weasyprint.org/foo/bar/')

# Relative URI reference without a base URI: allowed for links
assert_links(
'''
<body style="width: 200px">
<a href="../lipsum" style="display: block; margin: 10px 5px">
''', [[('external', '../lipsum', (5, 10, 190, 0))]], [{}],
[([('external', '../lipsum', (5, 10, 190, 0))], [])], base_url=None)
''', [[('external', '../lipsum', (5, 10, 190, 0), None)]], [{}],
[([('external', '../lipsum', (5, 10, 190, 0), None)], [])],
base_url=None)

# Relative URI reference without a base URI: not supported for -weasy-link
assert_links(
Expand All @@ -791,11 +795,11 @@ def assert_links(html, expected_links_by_page, expected_anchors_by_page,
<a href="#lipsum" id="lipsum"
style="display: block; margin: 10px 5px"></a>
<a href="http://weasyprint.org/" style="display: block"></a>
''', [[('internal', 'lipsum', (5, 10, 190, 0)),
('external', 'http://weasyprint.org/', (0, 10, 200, 0))]],
''', [[('internal', 'lipsum', (5, 10, 190, 0), None),
('external', 'http://weasyprint.org/', (0, 10, 200, 0), None)]],
[{'lipsum': (5, 10)}],
[([('internal', 'lipsum', (5, 10, 190, 0)),
('external', 'http://weasyprint.org/', (0, 10, 200, 0))],
[([('internal', 'lipsum', (5, 10, 190, 0), None),
('external', 'http://weasyprint.org/', (0, 10, 200, 0), None)],
[('lipsum', 5, 10)])],
base_url=None)

Expand All @@ -805,9 +809,10 @@ def assert_links(html, expected_links_by_page, expected_anchors_by_page,
<div style="-weasy-link: url(#lipsum);
margin: 10px 5px" id="lipsum">
''',
[[('internal', 'lipsum', (5, 10, 190, 0))]],
[[('internal', 'lipsum', (5, 10, 190, 0), None)]],
[{'lipsum': (5, 10)}],
[([('internal', 'lipsum', (5, 10, 190, 0))], [('lipsum', 5, 10)])],
[([('internal', 'lipsum', (5, 10, 190, 0), None)],
[('lipsum', 5, 10)])],
base_url=None)

assert_links(
Expand All @@ -817,10 +822,11 @@ def assert_links(html, expected_links_by_page, expected_anchors_by_page,
<a href="#lipsum"></a>
<a href="#missing" id="lipsum"></a>
''',
[[('internal', 'lipsum', (0, 0, 200, 15)),
('internal', 'missing', (0, 15, 200, 15))]],
[[('internal', 'lipsum', (0, 0, 200, 15), None),
('internal', 'missing', (0, 15, 200, 15), None)]],
[{'lipsum': (0, 15)}],
[([('internal', 'lipsum', (0, 0, 200, 15))], [('lipsum', 0, 15)])],
[([('internal', 'lipsum', (0, 0, 200, 15), None)],
[('lipsum', 0, 15)])],
base_url=None,
warnings=[
'ERROR: No anchor #missing for internal URI reference'])
Expand All @@ -831,11 +837,24 @@ def assert_links(html, expected_links_by_page, expected_anchors_by_page,
<a href="#lipsum" id="lipsum" style="display: block; height: 20px;
transform: rotate(90deg) scale(2)">
''',
[[('internal', 'lipsum', (30, 10, 40, 200))]],
[[('internal', 'lipsum', (30, 10, 40, 200), None)]],
[{'lipsum': (70, 10)}],
[([('internal', 'lipsum', (30, 10, 40, 200))], [('lipsum', 70, 10)])],
[([('internal', 'lipsum', (30, 10, 40, 200), None)],
[('lipsum', 70, 10)])],
round=True)

# Download for attachment
assert_links(
'''
<body style="width: 200px">
<a rel=attachment href="pattern.png" download="wow.png"
style="display: block; margin: 10px 5px">
''', [[('attachment', 'pattern.png',
(5, 10, 190, 0), 'wow.png')]],
[{}], [([('attachment', 'pattern.png',
(5, 10, 190, 0), 'wow.png')], [])],
base_url=None)


# Make relative URL references work with our custom URL scheme.
uses_relative.append('weasyprint-custom')
Expand Down
2 changes: 1 addition & 1 deletion weasyprint/tools/navigator.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def render_template(url):
for width, height, data_url, links, anchors in get_pages(html):
write('<section style="width: {0}px; height: {1}px">\n'
' <img src="{2}">\n'.format(width, height, data_url))
for link_type, target, (pos_x, pos_y, width, height) in links:
for link_type, target, (pos_x, pos_y, width, height), _ in links:
href = ('#' + target if link_type == 'internal'
else '/view/' + target)
write(' <a style="left: {0}px; top: {1}px; '
Expand Down

0 comments on commit 37445db

Please sign in to comment.