-
-
Notifications
You must be signed in to change notification settings - Fork 14.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #130047 from NixOS/doc-manpage-role
doc: Add support for manpage references
- Loading branch information
Showing
14 changed files
with
398 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
23 changes: 23 additions & 0 deletions
23
doc/build-aux/pandoc-filters/docbook-reader/citerefentry-to-rst-role.lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
--[[ | ||
Converts Code AST nodes produced by pandoc’s DocBook reader | ||
from citerefentry elements into AST for corresponding role | ||
for reStructuredText. | ||
We use subset of MyST syntax (CommonMark with features from rST) | ||
so let’s use the rST AST for rST features. | ||
Reference: https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-manpage | ||
]] | ||
|
||
function Code(elem) | ||
elem.classes = elem.classes:map(function (x) | ||
if x == 'citerefentry' then | ||
elem.attributes['role'] = 'manpage' | ||
return 'interpreted-text' | ||
else | ||
return x | ||
end | ||
end) | ||
|
||
return elem | ||
end |
10 changes: 10 additions & 0 deletions
10
doc/labelless-link-is-xref.lua → ...docbook-writer/labelless-link-is-xref.lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
--[[ | ||
Converts AST for reStructuredText roles into corresponding | ||
DocBook elements. | ||
Currently, only a subset of roles is supported. | ||
Reference: | ||
List of roles: | ||
https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html | ||
manpage: | ||
https://tdg.docbook.org/tdg/5.1/citerefentry.html | ||
file: | ||
https://tdg.docbook.org/tdg/5.1/filename.html | ||
]] | ||
|
||
function Code(elem) | ||
if elem.classes:includes('interpreted-text') then | ||
local tag = nil | ||
local content = elem.text | ||
if elem.attributes['role'] == 'manpage' then | ||
tag = 'citerefentry' | ||
local title, volnum = content:match('^(.+)%((%w+)%)$') | ||
if title == nil then | ||
-- No volnum in parentheses. | ||
title = content | ||
end | ||
content = '<refentrytitle>' .. title .. '</refentrytitle>' .. (volnum ~= nil and ('<manvolnum>' .. volnum .. '</manvolnum>') or '') | ||
elseif elem.attributes['role'] == 'file' then | ||
tag = 'filename' | ||
end | ||
|
||
if tag ~= nil then | ||
return pandoc.RawInline('docbook', '<' .. tag .. '>' .. content .. '</' .. tag .. '>') | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
--[[ | ||
Turns a manpage reference into a link, when a mapping is defined | ||
in the unix-man-urls.lua file. | ||
]] | ||
|
||
local man_urls = { | ||
["tmpfiles.d(5)"] = "https://www.freedesktop.org/software/systemd/man/tmpfiles.d.html", | ||
["nix.conf(5)"] = "https://nixos.org/manual/nix/stable/#sec-conf-file", | ||
["systemd.time(7)"] = "https://www.freedesktop.org/software/systemd/man/systemd.time.html", | ||
["systemd.timer(5)"] = "https://www.freedesktop.org/software/systemd/man/systemd.timer.html", | ||
} | ||
|
||
function Code(elem) | ||
local is_man_role = elem.classes:includes('interpreted-text') and elem.attributes['role'] == 'manpage' | ||
if is_man_role and man_urls[elem.text] ~= nil then | ||
return pandoc.Link(elem, man_urls[elem.text]) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
--[[ | ||
Replaces Str AST nodes containing {role}, followed by a Code node | ||
by a Code node with attrs that would be produced by rST reader | ||
from the role syntax. | ||
This is to emulate MyST syntax in Pandoc. | ||
(MyST is a CommonMark flavour with rST features mixed in.) | ||
Reference: https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html#roles-an-in-line-extension-point | ||
]] | ||
|
||
function Inlines(inlines) | ||
for i = #inlines-1,1,-1 do | ||
local first = inlines[i] | ||
local second = inlines[i+1] | ||
local correct_tags = first.tag == 'Str' and second.tag == 'Code' | ||
if correct_tags then | ||
-- docutils supports alphanumeric strings separated by [-._:] | ||
-- We are slightly more liberal for simplicity. | ||
local role = first.text:match('^{([-._+:%w]+)}$') | ||
if role ~= nil then | ||
inlines:remove(i) | ||
second.attributes['role'] = role | ||
second.classes:insert('interpreted-text') | ||
end | ||
end | ||
end | ||
return inlines | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
--[[ | ||
Replaces Code nodes with attrs that would be produced by rST reader | ||
from the role syntax by a Str AST node containing {role}, followed by a Code node. | ||
This is to emulate MyST syntax in Pandoc. | ||
(MyST is a CommonMark flavour with rST features mixed in.) | ||
Reference: https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html#roles-an-in-line-extension-point | ||
]] | ||
|
||
function Code(elem) | ||
local role = elem.attributes['role'] | ||
|
||
if elem.classes:includes('interpreted-text') and role ~= nil then | ||
elem.classes = elem.classes:filter(function (c) | ||
return c ~= 'interpreted-text' | ||
end) | ||
elem.attributes['role'] = nil | ||
|
||
return { | ||
pandoc.Str('{' .. role .. '}'), | ||
elem, | ||
} | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
#! /usr/bin/env nix-shell | ||
#! nix-shell -I nixpkgs=. -i bash -p pandoc | ||
|
||
# This script is temporarily needed while we transition the manual to | ||
# CommonMark. It converts DocBook files into our CommonMark flavour. | ||
|
||
debug= | ||
files=() | ||
|
||
while [ "$#" -gt 0 ]; do | ||
i="$1"; shift 1 | ||
case "$i" in | ||
--debug) | ||
debug=1 | ||
;; | ||
*) | ||
files+=("$i") | ||
;; | ||
esac | ||
done | ||
|
||
echo "WARNING: This is an experimental script and might not preserve all formatting." > /dev/stderr | ||
echo "Please report any issues you discover." > /dev/stderr | ||
|
||
outExtension="md" | ||
if [[ $debug ]]; then | ||
outExtension="json" | ||
fi | ||
|
||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||
|
||
# NOTE: Keep in sync with Nixpkgs manual (/doc/Makefile). | ||
# TODO: Remove raw-attribute when we can get rid of DocBook altogether. | ||
pandoc_commonmark_enabled_extensions=+attributes+fenced_divs+footnotes+bracketed_spans+definition_lists+pipe_tables+raw_attribute | ||
targetLang="commonmark${pandoc_commonmark_enabled_extensions}+smart" | ||
if [[ $debug ]]; then | ||
targetLang=json | ||
fi | ||
pandoc_flags=( | ||
# Not needed: | ||
# - diagram-generator.lua (we do not support that in NixOS manual to limit dependencies) | ||
# - media extraction (was only required for diagram generator) | ||
# - myst-reader/roles.lua (only relevant for MyST → DocBook) | ||
# - link-unix-man-references.lua (links should only be added to display output) | ||
# - docbook-writer/rst-roles.lua (only relevant for → DocBook) | ||
# - docbook-writer/labelless-link-is-xref.lua (only relevant for → DocBook) | ||
"--lua-filter=$DIR/../../doc/build-aux/pandoc-filters/docbook-reader/citerefentry-to-rst-role.lua" | ||
"--lua-filter=$DIR/../../doc/build-aux/pandoc-filters/myst-writer/roles.lua" | ||
"--lua-filter=$DIR/doc/unknown-code-language.lua" | ||
-f docbook | ||
-t "$targetLang" | ||
--tab-stop=2 | ||
--wrap=none | ||
) | ||
|
||
for file in "${files[@]}"; do | ||
if [[ ! -f "$file" ]]; then | ||
echo "db-to-md.sh: $file does not exist" > /dev/stderr | ||
exit 1 | ||
else | ||
rootElement=$(xmllint --xpath 'name(//*)' "$file") | ||
|
||
if [[ $rootElement = chapter ]]; then | ||
extension=".chapter.$outExtension" | ||
elif [[ $rootElement = section ]]; then | ||
extension=".section.$outExtension" | ||
else | ||
echo "db-to-md.sh: $file contains an unsupported root element $rootElement" > /dev/stderr | ||
exit 1 | ||
fi | ||
|
||
outFile="${file%".section.xml"}" | ||
outFile="${outFile%".chapter.xml"}" | ||
outFile="${outFile%".xml"}$extension" | ||
temp1=$(mktemp) | ||
$DIR/doc/escape-code-markup.py "$file" "$temp1" | ||
if [[ $debug ]]; then | ||
echo "Converted $file to $temp1" > /dev/stderr | ||
fi | ||
temp2=$(mktemp) | ||
$DIR/doc/replace-xrefs-by-empty-links.py "$temp1" "$temp2" | ||
if [[ $debug ]]; then | ||
echo "Converted $temp1 to $temp2" > /dev/stderr | ||
fi | ||
pandoc "$temp2" -o "$outFile" "${pandoc_flags[@]}" | ||
echo "Converted $file to $outFile" > /dev/stderr | ||
fi | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#! /usr/bin/env nix-shell | ||
#! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml | ||
|
||
""" | ||
Pandoc will strip any markup within code elements so | ||
let’s escape them so that they can be handled manually. | ||
""" | ||
|
||
import lxml.etree as ET | ||
import re | ||
import sys | ||
|
||
def replace_element_by_text(el: ET.Element, text: str) -> None: | ||
""" | ||
Author: bernulf | ||
Source: https://stackoverflow.com/a/10520552/160386 | ||
SPDX-License-Identifier: CC-BY-SA-3.0 | ||
""" | ||
text = text + (el.tail or "") | ||
parent = el.getparent() | ||
if parent is not None: | ||
previous = el.getprevious() | ||
if previous is not None: | ||
previous.tail = (previous.tail or "") + text | ||
else: | ||
parent.text = (parent.text or "") + text | ||
parent.remove(el) | ||
|
||
DOCBOOK_NS = "http://docbook.org/ns/docbook" | ||
|
||
# List of elements that pandoc’s DocBook reader strips markup from. | ||
# https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/Readers/DocBook.hs | ||
code_elements = [ | ||
# CodeBlock | ||
"literallayout", | ||
"screen", | ||
"programlisting", | ||
# Code (inline) | ||
"classname", | ||
"code", | ||
"filename", | ||
"envar", | ||
"literal", | ||
"computeroutput", | ||
"prompt", | ||
"parameter", | ||
"option", | ||
"markup", | ||
"wordasword", | ||
"command", | ||
"varname", | ||
"function", | ||
"type", | ||
"symbol", | ||
"constant", | ||
"userinput", | ||
"systemitem", | ||
] | ||
|
||
XMLNS_REGEX = re.compile(r'\s+xmlns(?::[^=]+)?="[^"]*"') | ||
ROOT_ELEMENT_REGEX = re.compile(r'^\s*<[^>]+>') | ||
|
||
def remove_xmlns(match: re.Match) -> str: | ||
""" | ||
Removes xmlns attributes. | ||
Expects a match containing an opening tag. | ||
""" | ||
return XMLNS_REGEX.sub('', match.group(0)) | ||
|
||
if __name__ == '__main__': | ||
assert len(sys.argv) >= 3, "usage: escape-code-markup.py <input> <output>" | ||
|
||
tree = ET.parse(sys.argv[1]) | ||
name_predicate = " or ".join([f"local-name()='{el}'" for el in code_elements]) | ||
|
||
for markup in tree.xpath(f"//*[({name_predicate}) and namespace-uri()='{DOCBOOK_NS}']/*"): | ||
text = ET.tostring(markup, encoding=str) | ||
|
||
# tostring adds xmlns attributes to the element we want to stringify | ||
# as if it was supposed to be usable standalone. | ||
# We are just converting it to CDATA so we do not care. | ||
# Let’s strip the namespace declarations to keep the code clean. | ||
# | ||
# Note that this removes even namespaces that were potentially | ||
# in the original file. Though, that should be very rare – | ||
# most of the time, we will stringify empty DocBook elements | ||
# like <xref> or <co> or, at worst, <link> with xlink:href attribute. | ||
# | ||
# Also note that the regex expects the root element to be first | ||
# thing in the string. But that should be fine, the tostring method | ||
# does not produce XML declaration or doctype by default. | ||
text = ROOT_ELEMENT_REGEX.sub(remove_xmlns, text) | ||
|
||
replace_element_by_text(markup, text) | ||
|
||
tree.write(sys.argv[2]) |
Oops, something went wrong.