Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sphinx - PEP 0 generation #1932

Merged
merged 37 commits into from
Jun 12, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
4a52b3c
Add PEP 0 parser
AA-Turner Apr 20, 2021
d3771d4
Add PEP 0 writer
AA-Turner Apr 20, 2021
c8268fb
Add PEP 0 generator and authors override
AA-Turner Apr 20, 2021
85ae140
Add/update build and run
AA-Turner Apr 20, 2021
835adfc
Simplify `create_index_file`
AA-Turner May 7, 2021
530ca9a
Special status handling
AA-Turner Jun 9, 2021
2578fe2
Add constants for PEP related magic strings
AA-Turner Jun 9, 2021
c839d51
Prefer checking on class
AA-Turner Jun 9, 2021
a9b0559
Add PEP.hide_status, use constants
AA-Turner Jun 9, 2021
77c5492
Remove comment from 2008 (current method works fine)
AA-Turner Jun 9, 2021
f6f7b65
Clarify intent of for-else loop
AA-Turner Jun 9, 2021
d0513e2
Hook in to Sphinx (oops, missed when splitting out this PR)
AA-Turner Jun 9, 2021
b8d9eff
Rename AUTHORS.csv for clarity
AA-Turner Jun 9, 2021
4b0d042
Sort and strip spaces
AA-Turner Jun 9, 2021
a993eed
Prefer `authors_overrides` name
AA-Turner Jun 9, 2021
92fe1fb
Add pep_0_errors.py
AA-Turner Jun 9, 2021
3f695ab
Move author_sort_by to writer
AA-Turner Jun 9, 2021
327fd1b
PEP init misc
AA-Turner Jun 9, 2021
403bff3
Split out Author
AA-Turner Jun 9, 2021
0d9bf61
Drop pep_0 prefix
AA-Turner Jun 9, 2021
dedb043
Pass title length as an argument
AA-Turner Jun 9, 2021
84518a3
Add constants.py to hold global type / status values
AA-Turner Jun 9, 2021
5164571
Capitalise constants
AA-Turner Jun 9, 2021
29738c5
Capitalise constants
AA-Turner Jun 9, 2021
918a4b9
Update PEP classification algorithm
AA-Turner Jun 9, 2021
70011e0
Extract static methods to module level
AA-Turner Jun 9, 2021
e72bed1
Add emit_text, emit_pep_row
AA-Turner Jun 9, 2021
32454c8
Use constants in writer.py
AA-Turner Jun 9, 2021
e42938a
Sort imports
AA-Turner Jun 9, 2021
d4447ab
Sort constants
AA-Turner Jun 9, 2021
5ebcb9d
Fix sorting in historical and dead PEPs
AA-Turner Jun 9, 2021
a4a4f50
Extract static methods to module level
AA-Turner Jun 9, 2021
1ec8438
Extract static methods to module level (parser.py
AA-Turner Jun 9, 2021
de9ab25
Make Author a NamedTuple
AA-Turner Jun 9, 2021
4cb6e8c
Fix author duplication bug with NamedTuples
AA-Turner Jun 9, 2021
1e62868
Revert to old PEP classification algorithm
AA-Turner Jun 10, 2021
48b72c2
Define PEP equality
AA-Turner Jun 10, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions AUTHORS.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Full Name, Surname First, Name Reference
Ernest W. Durbin III, "Durbin, Ernest W., III", Durbin
Inada Naoki, "Inada, Naoki", Inada
Guido van Rossum, "van Rossum, Guido (GvR)", GvR
Just van Rossum, "van Rossum, Just (JvR)", JvR
The Python core team and community, The Python core team and community, python-dev
P.J. Eby, "Eby, Phillip J.", Eby
Greg Ewing, "Ewing, Gregory", Ewing
Jim Jewett, "Jewett, Jim J.", Jewett
Nathaniel Smith, "Smith, Nathaniel J.", Smith
Martin v. Löwis, "von Löwis, Martin", von Löwis

11 changes: 11 additions & 0 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import argparse
from pathlib import Path
import shutil

from sphinx.application import Sphinx

Expand All @@ -22,6 +23,13 @@ def create_parser():
return parser.parse_args()


def create_index_file(html_root: Path):
"""Copies PEP 0 to the root index.html so that /peps/ works."""
pep_zero_path = html_root / "pep-0000" / "index.html"
if pep_zero_path.is_file():
shutil.copy(pep_zero_path, html_root / "index.html")


if __name__ == "__main__":
args = create_parser()

Expand Down Expand Up @@ -52,3 +60,6 @@ def create_parser():
)
app.builder.copysource = False # Prevent unneeded source copying - we link direct to GitHub
app.build()

if args.index_file:
create_index_file(build_directory)
288 changes: 288 additions & 0 deletions pep_sphinx_extensions/pep_zero_generator/pep_0_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
"""Code for handling object representation of a PEP."""

from __future__ import annotations

from email.parser import HeaderParser
from pathlib import Path
import re
import textwrap
from typing import NamedTuple
import unicodedata


class PEPError(Exception):
def __init__(self, error: str, pep_file: Path, pep_number: int | None = None):
super().__init__(error)
self.filename = pep_file
self.number = pep_number

def __str__(self):
error_msg = super(PEPError, self).__str__()
error_msg = f"({self.filename}): {error_msg}"
pep_str = f"PEP {self.number}"
return f"{pep_str} {error_msg}" if self.number is not None else error_msg


class Name(NamedTuple):
name: str = None # mononym
forename: str = None
surname: str = None
suffix: str = None


class Author:
"""Represent PEP authors.

Attributes:
last_first: The author's name in Surname, Forename, Suffix order.
nick: Author's nickname for PEP tables. Defaults to surname.
email: The author's email address.
_first_last: The author's full name, unchanged

"""
__slots__ = "last_first", "nick", "email", "_first_last"

def __init__(self, author_email_tuple: tuple[str, str], authors_exceptions: dict[str, dict[str, str]]):
"""Parse the name and email address of an author."""
name, email = author_email_tuple
self._first_last: str = name.strip()
self.email: str = email.lower()

self.last_first: str = ""
self.nick: str = ""

if self._first_last in authors_exceptions:
name_dict = authors_exceptions[self._first_last]
self.last_first = name_dict["Surname First"]
self.nick = name_dict["Name Reference"]
else:
name_parts = self._parse_name(self._first_last)
if name_parts.name is not None:
self.last_first = self.nick = name_parts.name
else:
if name_parts.surname[1] == ".":
# Add an escape to avoid docutils turning `v.` into `22.`.
name_parts.surname = f"\\{name_parts.surname}"
self.last_first = f"{name_parts.surname}, {name_parts.forename}"
self.nick = name_parts.surname

if name_parts.suffix is not None:
self.last_first += f", {name_parts.suffix}"

def __hash__(self):
return hash(self.last_first)

def __eq__(self, other):
if not isinstance(other, Author):
return NotImplemented
return self.last_first == other.last_first

def __len__(self):
return len(unicodedata.normalize("NFC", self.last_first))

@staticmethod
def _parse_name(full_name: str) -> Name:
"""Decompose a full name into parts.

If a mononym (e.g, 'Aahz') then return the full name. If there are
suffixes in the name (e.g. ', Jr.' or 'III'), then find and extract
them. If there is a middle initial followed by a full stop, then
combine the following words into a surname (e.g. N. Vander Weele). If
there is a leading, lowercase portion to the last name (e.g. 'van' or
'von') then include it in the surname.

"""
possible_suffixes = {"Jr", "Jr.", "II", "III"}

pre_suffix, _, raw_suffix = full_name.partition(",")
name_parts = pre_suffix.strip().split(" ")
num_parts = len(name_parts)
suffix = raw_suffix.strip() or None

if num_parts == 0:
raise ValueError("Name is empty!")
elif num_parts == 1:
return Name(name=name_parts[0], suffix=suffix)
elif num_parts == 2:
return Name(forename=name_parts[0].strip(), surname=name_parts[1], suffix=suffix)

# handles rogue uncaught suffixes
if name_parts[-1] in possible_suffixes:
suffix = f"{name_parts.pop(-1)} {suffix}".strip()

# handles von, van, v. etc.
if name_parts[-2].islower():
forename = " ".join(name_parts[:-2]).strip()
surname = " ".join(name_parts[-2:])
return Name(forename=forename, surname=surname, suffix=suffix)

# handles double surnames after a middle initial (e.g. N. Vander Weele)
elif any(s.endswith(".") for s in name_parts):
split_position = [i for i, x in enumerate(name_parts) if x.endswith(".")][-1] + 1
forename = " ".join(name_parts[:split_position]).strip()
surname = " ".join(name_parts[split_position:])
return Name(forename=forename, surname=surname, suffix=suffix)

# default to using the last item as the surname
else:
forename = " ".join(name_parts[:-1]).strip()
return Name(forename=forename, surname=name_parts[-1], suffix=suffix)


def author_sort_by(author: Author) -> str:
"""Skip lower-cased words in surname when sorting."""
surname, *_ = author.last_first.split(",")
surname_parts = surname.split()
for i, part in enumerate(surname_parts):
if part[0].isupper():
base = " ".join(surname_parts[i:]).lower()
return unicodedata.normalize("NFKD", base)
# If no capitals, use the whole string
return unicodedata.normalize("NFKD", surname.lower())


class PEP:
"""Representation of PEPs.

Attributes:
number : PEP number.
title : PEP title.
pep_type : The type of PEP. Can only be one of the values from PEP.type_values.
status : The PEP's status. Value must be found in PEP.status_values.
authors : A list of the authors.

"""

# The required RFC 822 headers for all PEPs.
required_headers = {"PEP", "Title", "Author", "Status", "Type", "Created"}

# Valid values for the Type header.
type_values = {"Standards Track", "Informational", "Process"}
# Valid values for the Status header.
# Active PEPs can only be for Informational or Process PEPs.
status_values = {
"Accepted", "Provisional", "Rejected", "Withdrawn",
"Deferred", "Final", "Active", "Draft", "Superseded",
}

def raise_pep_error(self, msg: str, pep_num: bool = False) -> None:
pep_number = self.number if pep_num else None
raise PEPError(msg, self.filename, pep_number=pep_number)

def __init__(self, filename: Path, author_lookup: dict, title_length: int):
"""Init object from an open PEP file object.

pep_file is full text of the PEP file, filename is path of the PEP file, author_lookup is author exceptions file

"""
self.filename: Path = filename
self.title_length: int = title_length

# Parse the headers.
pep_text = filename.read_text("UTF8")
metadata = HeaderParser().parsestr(pep_text)
required_header_misses = self.required_headers - set(metadata.keys())
if required_header_misses:
msg = f"PEP is missing required headers ({', '.join(required_header_misses)})"
self.raise_pep_error(msg)

try:
self.number: int = int(metadata["PEP"])
except ValueError:
self.raise_pep_error("PEP number isn't an integer")

# Check PEP number matches filename
if self.number != int(filename.stem[4:]):
self.raise_pep_error(f"PEP number does not match file name ({filename})", pep_num=True)

# Title
self.title: str = metadata["Title"]

# Type
self.pep_type: str = metadata["Type"]
if self.pep_type not in self.type_values:
self.raise_pep_error(f"{self.pep_type} is not a valid Type value", pep_num=True)

# Status
status = metadata["Status"]
if status not in self.status_values:
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
if status == "April Fool!": # See PEP 401 :)
status = "Rejected"
else:
self.raise_pep_error(f"{status} is not a valid Status value", pep_num=True)

# Special case for Active PEPs.
if status == "Active" and self.pep_type not in {"Process", "Informational"}:
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
msg = "Only Process and Informational PEPs may have an Active status"
self.raise_pep_error(msg, pep_num=True)

# Special case for Provisional PEPs.
if status == "Provisional" and self.pep_type != "Standards Track":
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
msg = "Only Standards Track PEPs may have a Provisional status"
self.raise_pep_error(msg, pep_num=True)
self.status: str = status

# Parse PEP authors
self.authors: list[Author] = self.parse_authors(metadata["Author"], author_lookup)

def parse_authors(self, author_header: str, author_lookup: dict) -> list[Author]:
"""Parse Author header line"""
authors_and_emails = self._parse_author(author_header)
if not authors_and_emails:
raise self.raise_pep_error("no authors found", pep_num=True)
return [Author(author_tuple, author_lookup) for author_tuple in authors_and_emails]

angled = re.compile(r"(?P<author>.+?) <(?P<email>.+?)>(,\s*)?")
paren = re.compile(r"(?P<email>.+?) \((?P<author>.+?)\)(,\s*)?")
simple = re.compile(r"(?P<author>[^,]+)(,\s*)?")

@staticmethod
def _parse_author(data: str) -> list[tuple[str, str]]:
"""Return a list of author names and emails."""
# XXX Consider using email.utils.parseaddr (doesn't work with names
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
# lacking an email address.

author_list = []
for regex in (PEP.angled, PEP.paren, PEP.simple):
for match in regex.finditer(data):
# Watch out for suffixes like 'Jr.' when they are comma-separated
# from the name and thus cause issues when *all* names are only
# separated by commas.
match_dict = match.groupdict()
author = match_dict["author"]
if not author.partition(" ")[1] and author.endswith("."):
prev_author = author_list.pop()
author = ", ".join([prev_author, author])
if "email" not in match_dict:
email = ""
else:
email = match_dict["email"]
author_list.append((author, email))
else:
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
# If authors were found then stop searching as only expect one
# style of author citation.
if author_list:
break
return author_list

@property
def title_abbr(self) -> str:
"""Shorten the title to be no longer than the max title length."""
if len(self.title) <= self.title_length:
return self.title
wrapped_title, *_excess = textwrap.wrap(self.title, self.title_length - 4)
return f"{wrapped_title} ..."

@property
def pep(self) -> dict[str, str | int]:
"""Return the line entry for the PEP."""
return {
# how the type is to be represented in the index
"type": self.pep_type[0].upper(),
"number": self.number,
"title": self.title_abbr,
# how the status should be represented in the index
"status": self.status[0].upper() if self.status not in {"Draft", "Active"} else " ",
# the author list as a comma-separated with only last names
"authors": ", ".join(x.nick for x in self.authors),
}
Loading