Sphinx support: add docutils support files (#1931)

See #2, #1385 for context. Superseeds #1566. This is the docutils parsing, transforms and writing part, building on PR #1930. It contains a pseudo-package, `sphinx_pep_extensions`, which itself contains: ### Docutils parsing: - `PEPParser` - collates transforms and interfaces with Sphinx core - `PEPRole` - deals with :PEP:`blah` in RST source ### Docutils transforms: - `PEPContents` (Creates table of contents without page title) - `PEPFooter` (Dels with footnotes, link to source, last modified commit) - `PEPHeaders` (Parses RFC2822 headers) - `PEPTitle` - Creates document title from PEP headers - `PEPZero` - Masks email addresses and creates links to PEP numbers from tables in `pep-0000.rst` ### Docutils HTML output: - `PEPTranslator` - Overrides to the default HTML translator to enable better matching of the current PEP styles
python · Jun 9, 2021 · 0f3bbd9 · 0f3bbd9
1 parent 3533799
commit 0f3bbd9
Show file tree

Hide file tree

Showing 12 changed files with 615 additions and 3 deletions.
diff --git a/build.py b/build.py
@@ -14,7 +14,7 @@ def create_parser():
     # flags / options
     parser.add_argument("-f", "--fail-on-warning", action="store_true")
     parser.add_argument("-n", "--nitpicky", action="store_true")
-    parser.add_argument("-j", "--jobs", type=int)
+    parser.add_argument("-j", "--jobs", type=int, default=1)
 
     # extra build steps
     parser.add_argument("-i", "--index-file", action="store_true")  # for PEP 0

diff --git a/conf.py b/conf.py
@@ -1,16 +1,24 @@
 """Configuration for building PEPs using Sphinx."""
 
+import sys
+from pathlib import Path
+
+sys.path.append(str(Path("pep_sphinx_extensions").absolute()))
+
 # -- Project information -----------------------------------------------------
 
 project = "PEPs"
 master_doc = "contents"
 
 # -- General configuration ---------------------------------------------------
 
+# Add any Sphinx extension module names here, as strings.
+extensions = ["pep_sphinx_extensions", "sphinx.ext.githubpages"]
+
 # The file extensions of source files. Sphinx uses these suffixes as sources.
 source_suffix = {
-    ".rst": "restructuredtext",
-    ".txt": "restructuredtext",
+    ".rst": "pep",
+    ".txt": "pep",
 }
 
 # List of patterns (relative to source dir) to ignore when looking for source files.
@@ -32,6 +40,7 @@
 # -- Options for HTML output -------------------------------------------------
 
 # HTML output settings
+html_math_renderer = "maths_to_html"  # Maths rendering
 html_show_copyright = False  # Turn off miscellany
 html_show_sphinx = False
 html_title = "peps.python.org"  # Set <title/>
diff --git a/pep_sphinx_extensions/__init__.py b/pep_sphinx_extensions/__init__.py
@@ -0,0 +1,47 @@
+"""Sphinx extensions for performant PEP processing"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from sphinx.environment import default_settings
+from docutils.writers.html5_polyglot import HTMLTranslator
+
+from pep_sphinx_extensions.pep_processor.html import pep_html_translator
+from pep_sphinx_extensions.pep_processor.parsing import pep_parser
+from pep_sphinx_extensions.pep_processor.parsing import pep_role
+
+if TYPE_CHECKING:
+    from sphinx.application import Sphinx
+
+# Monkeypatch sphinx.environment.default_settings as Sphinx doesn't allow custom settings or Readers
+# These settings should go in docutils.conf, but are overridden here for now so as not to affect
+# pep2html.py
+default_settings |= {
+    "pep_references": True,
+    "rfc_references": True,
+    "pep_base_url": "",
+    "pep_file_url_template": "pep-%04d.html",
+    "_disable_config": True,  # disable using docutils.conf whilst running both PEP generators
+}
+
+
+def _depart_maths():
+    pass  # No-op callable for the type checker
+
+
+def setup(app: Sphinx) -> dict[str, bool]:
+    """Initialize Sphinx extension."""
+
+    # Register plugin logic
+    app.add_source_parser(pep_parser.PEPParser)  # Add PEP transforms
+    app.add_role("pep", pep_role.PEPRole(), override=True)  # Transform PEP references to links
+    app.set_translator("html", pep_html_translator.PEPTranslator)  # Docutils Node Visitor overrides
+
+    # Mathematics rendering
+    inline_maths = HTMLTranslator.visit_math, _depart_maths
+    block_maths = HTMLTranslator.visit_math_block, _depart_maths
+    app.add_html_math_renderer("maths_to_html", inline_maths, block_maths)  # Render maths to HTML
+
+    # Parallel safety: https://www.sphinx-doc.org/en/master/extdev/index.html#extension-metadata
+    return {"parallel_read_safe": True, "parallel_write_safe": True}
diff --git a/pep_sphinx_extensions/config.py b/pep_sphinx_extensions/config.py
@@ -0,0 +1,6 @@
+"""Miscellaneous configuration variables for the PEP Sphinx extensions."""
+
+pep_stem = "pep-{:0>4}"
+pep_url = f"{pep_stem}.html"
+pep_vcs_url = "https://github.com/python/peps/blob/master/"
+pep_commits_url = "https://github.com/python/peps/commits/master/"
diff --git a/pep_sphinx_extensions/pep_processor/html/pep_html_translator.py b/pep_sphinx_extensions/pep_processor/html/pep_html_translator.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from docutils import nodes
+import sphinx.writers.html5 as html5
+
+if TYPE_CHECKING:
+    from sphinx.builders import html
+
+
+class PEPTranslator(html5.HTML5Translator):
+    """Custom RST -> HTML translation rules for PEPs."""
+
+    def __init__(self, document: nodes.document, builder: html.StandaloneHTMLBuilder):
+        super().__init__(document, builder)
+        self.compact_simple: bool = False
+
+    @staticmethod
+    def should_be_compact_paragraph(node: nodes.paragraph) -> bool:
+        """Check if paragraph should be compact.
+
+        Omitting <p/> tags around paragraph nodes gives visually compact lists.
+
+        """
+        # Never compact paragraphs that are children of document or compound.
+        if isinstance(node.parent, (nodes.document, nodes.compound)):
+            return False
+
+        # Check for custom attributes in paragraph.
+        for key, value in node.non_default_attributes().items():
+            # if key equals "classes", carry on
+            # if value is empty, or contains only "first", only "last", or both
+            # "first" and "last", carry on
+            # else return False
+            if any((key != "classes", not set(value) <= {"first", "last"})):
+                return False
+
+        # Only first paragraph can be compact (ignoring initial label & invisible nodes)
+        first = isinstance(node.parent[0], nodes.label)
+        visible_siblings = [child for child in node.parent.children[first:] if not isinstance(child, nodes.Invisible)]
+        if visible_siblings[0] is not node:
+            return False
+
+        # otherwise, the paragraph should be compact
+        return True
+
+    def visit_paragraph(self, node: nodes.paragraph) -> None:
+        """Remove <p> tags if possible."""
+        if self.should_be_compact_paragraph(node):
+            self.context.append("")
+        else:
+            self.body.append(self.starttag(node, "p", ""))
+            self.context.append("</p>\n")
+
+    def depart_paragraph(self, _: nodes.paragraph) -> None:
+        """Add corresponding end tag from `visit_paragraph`."""
+        self.body.append(self.context.pop())
+
+    def depart_label(self, node) -> None:
+        """PEP link/citation block cleanup with italicised backlinks."""
+        if not self.settings.footnote_backlinks:
+            self.body.append("</span>")
+            self.body.append("</dt>\n<dd>")
+            return
+
+        # If only one reference to this footnote
+        back_references = node.parent["backrefs"]
+        if len(back_references) == 1:
+            self.body.append("</a>")
+
+        # Close the tag
+        self.body.append("</span>")
+
+        # If more than one reference
+        if len(back_references) > 1:
+            back_links = [f"<a href='#{ref}'>{i}</a>" for i, ref in enumerate(back_references, start=1)]
+            back_links_str = ", ".join(back_links)
+            self.body.append(f"<span class='fn-backref''><em> ({back_links_str}) </em></span>")
+
+        # Close the def tags
+        self.body.append("</dt>\n<dd>")
+
+    def unknown_visit(self, node: nodes.Node) -> None:
+        """No processing for unknown node types."""
+        pass
diff --git a/pep_sphinx_extensions/pep_processor/parsing/pep_parser.py b/pep_sphinx_extensions/pep_processor/parsing/pep_parser.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from sphinx import parsers
+
+from pep_sphinx_extensions.pep_processor.transforms import pep_headers
+from pep_sphinx_extensions.pep_processor.transforms import pep_title
+from pep_sphinx_extensions.pep_processor.transforms import pep_contents
+from pep_sphinx_extensions.pep_processor.transforms import pep_footer
+
+if TYPE_CHECKING:
+    from docutils import transforms
+
+
+class PEPParser(parsers.RSTParser):
+    """RST parser with custom PEP transforms."""
+
+    supported = ("pep", "python-enhancement-proposal")  # for source_suffix in conf.py
+
+    def __init__(self):
+        """Mark the document as containing RFC 2822 headers."""
+        super().__init__(rfc2822=True)
+
+    def get_transforms(self) -> list[type[transforms.Transform]]:
+        """Use our custom PEP transform rules."""
+        return [
+            pep_headers.PEPHeaders,
+            pep_title.PEPTitle,
+            pep_contents.PEPContents,
+            pep_footer.PEPFooter,
+        ]
diff --git a/pep_sphinx_extensions/pep_processor/parsing/pep_role.py b/pep_sphinx_extensions/pep_processor/parsing/pep_role.py
@@ -0,0 +1,16 @@
+from sphinx import roles
+
+from pep_sphinx_extensions.config import pep_url
+
+
+class PEPRole(roles.PEP):
+    """Override the :pep: role"""
+
+    def build_uri(self) -> str:
+        """Get PEP URI from role text."""
+        base_url = self.inliner.document.settings.pep_base_url
+        pep_num, _, fragment = self.target.partition("#")
+        pep_base = base_url + pep_url.format(int(pep_num))
+        if fragment:
+            return f"{pep_base}#{fragment}"
+        return pep_base
diff --git a/pep_sphinx_extensions/pep_processor/transforms/pep_contents.py b/pep_sphinx_extensions/pep_processor/transforms/pep_contents.py
@@ -0,0 +1,63 @@
+from pathlib import Path
+
+from docutils import nodes
+from docutils import transforms
+from docutils.transforms import parts
+
+
+class PEPContents(transforms.Transform):
+    """Add TOC placeholder and horizontal rule after PEP title and headers."""
+
+    # Use same priority as docutils.transforms.Contents
+    default_priority = 380
+
+    def apply(self) -> None:
+        if not Path(self.document["source"]).match("pep-*"):
+            return  # not a PEP file, exit early
+
+        # Create the contents placeholder section
+        title = nodes.title("", "Contents")
+        contents_topic = nodes.topic("", title, classes=["contents"])
+        if not self.document.has_name("contents"):
+            contents_topic["names"].append("contents")
+        self.document.note_implicit_target(contents_topic)
+
+        # Add a table of contents builder
+        pending = nodes.pending(Contents)
+        contents_topic += pending
+        self.document.note_pending(pending)
+
+        # Insert the toc after title and PEP headers
+        self.document.children[0].insert(2, contents_topic)
+
+        # Add a horizontal rule before contents
+        transition = nodes.transition()
+        self.document[0].insert(2, transition)
+
+
+class Contents(parts.Contents):
+    """Build Table of Contents from document."""
+    def __init__(self, document, startnode=None):
+        super().__init__(document, startnode)
+
+        # used in parts.Contents.build_contents
+        self.toc_id = None
+        self.backlinks = None
+
+    def apply(self) -> None:
+        # used in parts.Contents.build_contents
+        self.toc_id = self.startnode.parent["ids"][0]
+        self.backlinks = self.document.settings.toc_backlinks
+
+        # let the writer (or output software) build the contents list?
+        if getattr(self.document.settings, "use_latex_toc", False):
+            # move customisation settings to the parent node
+            self.startnode.parent.attributes.update(self.startnode.details)
+            self.startnode.parent.remove(self.startnode)
+        else:
+            contents = self.build_contents(self.document[0])
+            if contents:
+                self.startnode.replace_self(contents)
+            else:
+                # if no contents, remove the empty placeholder
+                self.startnode.parent.parent.remove(self.startnode.parent)