Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add language information to PDF #1695

Merged
merged 2 commits into from
Aug 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,6 +998,7 @@ def assert_meta(html, **meta):
meta.setdefault('created', None)
meta.setdefault('modified', None)
meta.setdefault('attachments', [])
meta.setdefault('lang', None)
meta.setdefault('custom', {})
assert vars(FakeHTML(string=html).render().metadata) == meta

Expand All @@ -1011,6 +1012,7 @@ def test_html_meta_1():
def test_html_meta_2():
assert_meta(
'''
<html lang="en"><head>
<meta name=author content="I Me &amp; Myself">
<meta name=author content="Smith, John">
<title>Test document</title>
Expand All @@ -1027,6 +1029,7 @@ def test_html_meta_2():
<meta name=dcterms.modified content=2013>
<meta name=keywords content="Python; pydyf">
<meta name=description content="Blah… ">
</head></html>
''',
authors=['I Me & Myself', 'Smith, John'],
title='Test document',
Expand All @@ -1035,6 +1038,7 @@ def test_html_meta_2():
description="Blah… ",
created='2011-04',
modified='2013',
lang='en',
custom={'dummy': 'ignored'})


Expand Down
5 changes: 4 additions & 1 deletion weasyprint/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class DocumentMetadata:
"""
def __init__(self, title=None, authors=None, description=None,
keywords=None, generator=None, created=None, modified=None,
attachments=None, custom=None):
attachments=None, lang=None, custom=None):
#: The title of the document, as a string or :obj:`None`.
#: Extracted from the ``<title>`` element in HTML
#: and written to the ``/Title`` info field in PDF.
Expand Down Expand Up @@ -145,6 +145,9 @@ def __init__(self, title=None, authors=None, description=None,
#: Extracted from the ``<link rel=attachment>`` elements in HTML
#: and written to the ``/EmbeddedFiles`` dictionary in PDF.
self.attachments = attachments or []
#: Document language as BCP 47 language tags.
#: Extracted from ``<html lang=lang>`` in HTML.
Comment on lines +148 to +149
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#: Document language as BCP 47 language tags.
#: Extracted from ``<html lang=lang>`` in HTML.
#: Document language, as a string or :obj:`None`.
#: Extracted from ``<html lang=lang>`` in HTML
#: and written as Lang to the document catalog.

self.lang = lang
#: Custom metadata, as a dict whose keys are the metadata names and
#: values are the metadata values.
self.custom = custom or {}
Expand Down
3 changes: 2 additions & 1 deletion weasyprint/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ def get_html_metadata(html):
modified = None
attachments = []
custom = {}
lang = html.etree_element.attrib.get('lang', None)
for element in html.wrapper_element.query_all('title', 'meta', 'link'):
element = element.etree_element
if element.tag == 'title' and title is None:
Expand Down Expand Up @@ -305,7 +306,7 @@ def get_html_metadata(html):
return dict(title=title, description=description, generator=generator,
keywords=keywords, authors=authors,
created=created, modified=modified,
attachments=attachments, custom=custom)
attachments=attachments, lang=lang, custom=custom)


def strip_whitespace(string):
Expand Down
2 changes: 2 additions & 0 deletions weasyprint/pdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,8 @@ def generate_pdf(pages, url_fetcher, metadata, fonts, target, zoom,
if metadata.modified:
pdf.info['ModDate'] = pydyf.String(
_w3c_date_to_pdf(metadata.modified, 'modified'))
if metadata.lang:
pdf.catalog['Lang'] = pydyf.String(metadata.lang)
if custom_metadata:
for key, value in metadata.custom.items():
key = ''.join(char for char in key if char.isalnum())
Expand Down