From 2656b4e4dc917bb246232b2b1466e0e7af58ee96 Mon Sep 17 00:00:00 2001
From: Konstantin Weddige <konstantin@skathi.net>
Date: Fri, 22 Jul 2022 10:06:54 +0200
Subject: [PATCH 1/2] Add language to pdf

---
 weasyprint/document.py     | 5 ++++-
 weasyprint/html.py         | 3 ++-
 weasyprint/pdf/__init__.py | 2 ++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/weasyprint/document.py b/weasyprint/document.py
index 9f7580346..923757d05 100644
--- a/weasyprint/document.py
+++ b/weasyprint/document.py
@@ -104,7 +104,7 @@ class DocumentMetadata:
     """
     def __init__(self, title=None, authors=None, description=None,
                  keywords=None, generator=None, created=None, modified=None,
-                 attachments=None, custom=None):
+                 attachments=None, lang=None, custom=None):
         #: The title of the document, as a string or :obj:`None`.
         #: Extracted from the ``<title>`` element in HTML
         #: and written to the ``/Title`` info field in PDF.
@@ -145,6 +145,9 @@ def __init__(self, title=None, authors=None, description=None,
         #: Extracted from the ``<link rel=attachment>`` elements in HTML
         #: and written to the ``/EmbeddedFiles`` dictionary in PDF.
         self.attachments = attachments or []
+        #: Document language as BCP 47 language tags.
+        #: Extracted from ``<html lang=lang>`` in HTML.
+        self.lang = lang
         #: Custom metadata, as a dict whose keys are the metadata names and
         #: values are the metadata values.
         self.custom = custom or {}
diff --git a/weasyprint/html.py b/weasyprint/html.py
index cedf94765..3af11751a 100644
--- a/weasyprint/html.py
+++ b/weasyprint/html.py
@@ -267,6 +267,7 @@ def get_html_metadata(html):
     modified = None
     attachments = []
     custom = {}
+    lang = html.etree_element.attrib.get('lang', None)
     for element in html.wrapper_element.query_all('title', 'meta', 'link'):
         element = element.etree_element
         if element.tag == 'title' and title is None:
@@ -305,7 +306,7 @@ def get_html_metadata(html):
     return dict(title=title, description=description, generator=generator,
                 keywords=keywords, authors=authors,
                 created=created, modified=modified,
-                attachments=attachments, custom=custom)
+                attachments=attachments, lang=lang, custom=custom)
 
 
 def strip_whitespace(string):
diff --git a/weasyprint/pdf/__init__.py b/weasyprint/pdf/__init__.py
index 9e40d1c85..99f25c75e 100644
--- a/weasyprint/pdf/__init__.py
+++ b/weasyprint/pdf/__init__.py
@@ -417,6 +417,8 @@ def generate_pdf(pages, url_fetcher, metadata, fonts, target, zoom,
     if metadata.modified:
         pdf.info['ModDate'] = pydyf.String(
             _w3c_date_to_pdf(metadata.modified, 'modified'))
+    if metadata.lang:
+        pdf.catalog['Lang'] = pydyf.String(metadata.lang)
     if custom_metadata:
         for key, value in metadata.custom.items():
             key = ''.join(char for char in key if char.isalnum())

From 42db8bdb033bcc111db549cafffefaafdeced398 Mon Sep 17 00:00:00 2001
From: Konstantin Weddige <konstantin@skathi.net>
Date: Fri, 22 Jul 2022 12:14:46 +0200
Subject: [PATCH 2/2] Update tests

---
 tests/test_api.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/test_api.py b/tests/test_api.py
index 83c2ddd5f..65a5d3ab8 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -998,6 +998,7 @@ def assert_meta(html, **meta):
     meta.setdefault('created', None)
     meta.setdefault('modified', None)
     meta.setdefault('attachments', [])
+    meta.setdefault('lang', None)
     meta.setdefault('custom', {})
     assert vars(FakeHTML(string=html).render().metadata) == meta
 
@@ -1011,6 +1012,7 @@ def test_html_meta_1():
 def test_html_meta_2():
     assert_meta(
         '''
+            <html lang="en"><head>
             <meta name=author content="I Me &amp; Myself">
             <meta name=author content="Smith, John">
             <title>Test document</title>
@@ -1027,6 +1029,7 @@ def test_html_meta_2():
             <meta name=dcterms.modified content=2013>
             <meta name=keywords content="Python; pydyf">
             <meta name=description content="Blah… ">
+            </head></html>
         ''',
         authors=['I Me & Myself', 'Smith, John'],
         title='Test document',
@@ -1035,6 +1038,7 @@ def test_html_meta_2():
         description="Blah… ",
         created='2011-04',
         modified='2013',
+        lang='en',
         custom={'dummy': 'ignored'})