diff --git a/nbconvert/exporters/html.py b/nbconvert/exporters/html.py index 30c10b7f0..055e69915 100644 --- a/nbconvert/exporters/html.py +++ b/nbconvert/exporters/html.py @@ -12,8 +12,9 @@ import jinja2 import markupsafe +from bs4 import BeautifulSoup from jupyter_core.paths import jupyter_path -from traitlets import Bool, Unicode, default +from traitlets import Bool, Unicode, default, validate from traitlets.config import Config if tuple(int(x) for x in jinja2.__version__.split(".")[:3]) < (3, 0, 0): @@ -27,6 +28,7 @@ from nbconvert.filters.highlight import Highlight2HTML from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath from nbconvert.filters.widgetsdatatypefilter import WidgetsDataTypeFilter +from nbconvert.utils.iso639_1 import iso639_1 from .templateexporter import TemplateExporter @@ -202,6 +204,20 @@ def default_config(self): c = c2 return c + language_code = Unicode( + "en", help="Language code of the content, should be one of the ISO639-1" + ).tag(config=True) + + @validate("language_code") + def _valid_language_code(self, proposal): + if self.language_code not in iso639_1: + self.log.warn( + f'"{self.language_code}" is not an ISO 639-1 language code. ' + 'It has been replaced by the default value "en".' + ) + return proposal["trait"].default_value + return proposal["value"] + @contextfilter def markdown2html(self, context, source): """Markdown to HTML filter respecting the anchor_link_text setting""" @@ -240,7 +256,18 @@ def from_notebook_node( # type:ignore self.register_filter("highlight_code", highlight_code) self.register_filter("filter_data_type", filter_data_type) - return super().from_notebook_node(nb, resources, **kw) + html, resources = super().from_notebook_node(nb, resources, **kw) + soup = BeautifulSoup(html, features="html.parser") + # Add image's alternative text + for elem in soup.select("img:not([alt])"): + elem.attrs["alt"] = "Image" + # Set input and output focusable + for elem in soup.select(".jp-Notebook div.jp-Cell-inputWrapper"): + elem.attrs["tabindex"] = "0" + for elem in soup.select(".jp-Notebook div.jp-OutputArea-output"): + elem.attrs["tabindex"] = "0" + + return str(soup), resources def _init_resources(self, resources): # noqa def resources_include_css(name): @@ -318,4 +345,5 @@ def resources_include_url(name): resources["widget_renderer_url"] = self.widget_renderer_url resources["html_manager_semver_range"] = self.html_manager_semver_range resources["should_sanitize_html"] = self.sanitize_html + resources["language_code"] = self.language_code return resources diff --git a/nbconvert/exporters/tests/test_html.py b/nbconvert/exporters/tests/test_html.py index 6d5db7c3c..04c2a74f8 100644 --- a/nbconvert/exporters/tests/test_html.py +++ b/nbconvert/exporters/tests/test_html.py @@ -76,12 +76,12 @@ def test_png_metadata(self): (output, resources) = HTMLExporter(template_name="classic").from_filename( self._get_notebook(nb_name="pngmetadata.ipynb") ) - check_for_png = re.compile(r']*?)>') + check_for_png = re.compile(r']*?)>') result = check_for_png.search(output) assert result attr_string = result.group(1) - assert "width" in attr_string - assert "height" in attr_string + assert "width=" in attr_string + assert "height=" in attr_string def test_javascript_output(self): nb = v4.new_notebook( @@ -103,13 +103,12 @@ def test_attachments(self): (output, resources) = HTMLExporter(template_name="classic").from_file( self._get_notebook(nb_name="attachment.ipynb") ) - check_for_png = re.compile(r']*?)>') + check_for_png = re.compile(r'') result = check_for_png.search(output) assert result - self.assertTrue(result.group(0).strip().startswith('= ' 'symbols' '(' - ''x y z'' + '\'x y z\'' ')' ) for no_input_flag in (False, True): @@ -382,7 +382,7 @@ def test_no_input(self): with open("notebook1.html", encoding="utf8") as f: text = f.read() - assert no_input_flag == ("In [" not in text) + assert no_input_flag == ("In\xa0[" not in text) assert no_input_flag == ("Out[6]" not in text) assert no_input_flag == (input_content_html not in text) @@ -580,7 +580,7 @@ def test_not_embedding_images_htmlexporter(self): with open("notebook5_embed_images.html", encoding="utf8") as f: text = f.read() assert "./containerized_deployments.jpeg" in text - assert "src='./containerized_deployments.jpeg'" in text + assert 'src="./containerized_deployments.jpeg"' in text assert text.count("data:image/jpeg;base64") == 0 def test_embedding_images_htmlexporter(self): diff --git a/nbconvert/utils/iso639_1.py b/nbconvert/utils/iso639_1.py new file mode 100644 index 000000000..ce5c2c664 --- /dev/null +++ b/nbconvert/utils/iso639_1.py @@ -0,0 +1,191 @@ +""" List of ISO639-1 language code""" + +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +iso639_1 = [ + "aa", + "ab", + "ae", + "af", + "ak", + "am", + "an", + "ar", + "as", + "av", + "ay", + "az", + "ba", + "be", + "bg", + "bh", + "bi", + "bm", + "bn", + "bo", + "br", + "bs", + "ca", + "ce", + "ch", + "co", + "cr", + "cs", + "cu", + "cv", + "cy", + "da", + "de", + "dv", + "dz", + "ee", + "el", + "en", + "eo", + "es", + "et", + "eu", + "fa", + "ff", + "fi", + "fj", + "fo", + "fr", + "fy", + "ga", + "gd", + "gl", + "gn", + "gu", + "gv", + "ha", + "he", + "hi", + "ho", + "hr", + "ht", + "hu", + "hy", + "hz", + "ia", + "id", + "ie", + "ig", + "ii", + "ik", + "io", + "is", + "it", + "iu", + "ja", + "jv", + "ka", + "kg", + "ki", + "kj", + "kk", + "kl", + "km", + "kn", + "ko", + "kr", + "ks", + "ku", + "kv", + "kw", + "ky", + "la", + "lb", + "lg", + "li", + "ln", + "lo", + "lt", + "lu", + "lv", + "mg", + "mh", + "mi", + "mk", + "ml", + "mn", + "mr", + "ms", + "mt", + "my", + "na", + "nb", + "nd", + "ne", + "ng", + "nl", + "nn", + "no", + "nr", + "nv", + "ny", + "oc", + "oj", + "om", + "or", + "os", + "pa", + "pi", + "pl", + "ps", + "pt", + "qu", + "rm", + "rn", + "ro", + "ru", + "rw", + "sa", + "sc", + "sd", + "se", + "sg", + "si", + "sk", + "sl", + "sm", + "sn", + "so", + "sq", + "sr", + "ss", + "st", + "su", + "sv", + "sw", + "ta", + "te", + "tg", + "th", + "ti", + "tk", + "tl", + "tn", + "to", + "tr", + "ts", + "tt", + "tw", + "ty", + "ug", + "uk", + "ur", + "uz", + "ve", + "vi", + "vo", + "wa", + "wo", + "xh", + "yi", + "yo", + "za", + "zh", + "zu", +] diff --git a/share/templates/classic/index.html.j2 b/share/templates/classic/index.html.j2 index 87a8578ac..30cd50264 100644 --- a/share/templates/classic/index.html.j2 +++ b/share/templates/classic/index.html.j2 @@ -4,7 +4,7 @@ {%- block header -%} - +
{%- block html_head -%} @@ -92,13 +92,15 @@ div#notebook-container{ {% block body_header %} -