Skip to content

Commit

Permalink
Merge pull request #1695 from lutrasecurity/lang
Browse files Browse the repository at this point in the history
Add language information to PDF
  • Loading branch information
liZe authored Aug 18, 2022
2 parents 9a4c5fc + 42db8bd commit 67cd41a
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 2 deletions.
4 changes: 4 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,6 +998,7 @@ def assert_meta(html, **meta):
meta.setdefault('created', None)
meta.setdefault('modified', None)
meta.setdefault('attachments', [])
meta.setdefault('lang', None)
meta.setdefault('custom', {})
assert vars(FakeHTML(string=html).render().metadata) == meta

Expand All @@ -1011,6 +1012,7 @@ def test_html_meta_1():
def test_html_meta_2():
assert_meta(
'''
<html lang="en"><head>
<meta name=author content="I Me &amp; Myself">
<meta name=author content="Smith, John">
<title>Test document</title>
Expand All @@ -1027,6 +1029,7 @@ def test_html_meta_2():
<meta name=dcterms.modified content=2013>
<meta name=keywords content="Python; pydyf">
<meta name=description content="Blah… ">
</head></html>
''',
authors=['I Me & Myself', 'Smith, John'],
title='Test document',
Expand All @@ -1035,6 +1038,7 @@ def test_html_meta_2():
description="Blah… ",
created='2011-04',
modified='2013',
lang='en',
custom={'dummy': 'ignored'})


Expand Down
5 changes: 4 additions & 1 deletion weasyprint/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class DocumentMetadata:
"""
def __init__(self, title=None, authors=None, description=None,
keywords=None, generator=None, created=None, modified=None,
attachments=None, custom=None):
attachments=None, lang=None, custom=None):
#: The title of the document, as a string or :obj:`None`.
#: Extracted from the ``<title>`` element in HTML
#: and written to the ``/Title`` info field in PDF.
Expand Down Expand Up @@ -145,6 +145,9 @@ def __init__(self, title=None, authors=None, description=None,
#: Extracted from the ``<link rel=attachment>`` elements in HTML
#: and written to the ``/EmbeddedFiles`` dictionary in PDF.
self.attachments = attachments or []
#: Document language as BCP 47 language tags.
#: Extracted from ``<html lang=lang>`` in HTML.
self.lang = lang
#: Custom metadata, as a dict whose keys are the metadata names and
#: values are the metadata values.
self.custom = custom or {}
Expand Down
3 changes: 2 additions & 1 deletion weasyprint/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ def get_html_metadata(html):
modified = None
attachments = []
custom = {}
lang = html.etree_element.attrib.get('lang', None)
for element in html.wrapper_element.query_all('title', 'meta', 'link'):
element = element.etree_element
if element.tag == 'title' and title is None:
Expand Down Expand Up @@ -305,7 +306,7 @@ def get_html_metadata(html):
return dict(title=title, description=description, generator=generator,
keywords=keywords, authors=authors,
created=created, modified=modified,
attachments=attachments, custom=custom)
attachments=attachments, lang=lang, custom=custom)


def strip_whitespace(string):
Expand Down
2 changes: 2 additions & 0 deletions weasyprint/pdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,8 @@ def generate_pdf(pages, url_fetcher, metadata, fonts, target, zoom,
if metadata.modified:
pdf.info['ModDate'] = pydyf.String(
_w3c_date_to_pdf(metadata.modified, 'modified'))
if metadata.lang:
pdf.catalog['Lang'] = pydyf.String(metadata.lang)
if custom_metadata:
for key, value in metadata.custom.items():
key = ''.join(char for char in key if char.isalnum())
Expand Down

0 comments on commit 67cd41a

Please sign in to comment.