Skip to content

Commit

Permalink
Merge pull request #130047 from NixOS/doc-manpage-role
Browse files Browse the repository at this point in the history
doc: Add support for manpage references
  • Loading branch information
jtojnar authored Sep 7, 2021
2 parents 82d19cb + 7baf180 commit 1e75936
Show file tree
Hide file tree
Showing 14 changed files with 398 additions and 4 deletions.
6 changes: 6 additions & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@
# Nixpkgs build-support
/pkgs/build-support/writers @lassulus @Profpatsch

# Nixpkgs documentation
/maintainers/scripts/db-to-md.sh @jtojnar @ryantm
/maintainers/scripts/doc @jtojnar @ryantm
/doc/build-aux/pandoc-filters @jtojnar
/doc/contributing/contributing-to-documentation.chapter.md @jtojnar

# NixOS Internals
/nixos/default.nix @nbp @infinisil
/nixos/lib/from-env.nix @nbp @infinisil
Expand Down
9 changes: 7 additions & 2 deletions doc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@ MD_TARGETS=$(addsuffix .xml, $(basename $(shell find . -type f -regex '.*\.md$$'
PANDOC ?= pandoc

pandoc_media_dir = media
# NOTE: Keep in sync with NixOS manual (/nixos/doc/manual/md-to-db.sh).
# NOTE: Keep in sync with NixOS manual (/nixos/doc/manual/md-to-db.sh) and conversion script (/maintainers/scripts/db-to-md.sh).
# TODO: Remove raw-attribute when we can get rid of DocBook altogether.
pandoc_commonmark_enabled_extensions = +attributes+fenced_divs+footnotes+bracketed_spans+definition_lists+pipe_tables+raw_attribute
# Not needed:
# - docbook-reader/citerefentry-to-rst-role.lua (only relevant for DocBook → MarkDown/rST/MyST)
pandoc_flags = --extract-media=$(pandoc_media_dir) \
--lua-filter=$(PANDOC_LUA_FILTERS_DIR)/diagram-generator.lua \
--lua-filter=labelless-link-is-xref.lua \
--lua-filter=build-aux/pandoc-filters/myst-reader/roles.lua \
--lua-filter=build-aux/pandoc-filters/link-unix-man-references.lua \
--lua-filter=build-aux/pandoc-filters/docbook-writer/rst-roles.lua \
--lua-filter=build-aux/pandoc-filters/docbook-writer/labelless-link-is-xref.lua \
-f commonmark$(pandoc_commonmark_enabled_extensions)+smart

.PHONY: all
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
--[[
Converts Code AST nodes produced by pandoc’s DocBook reader
from citerefentry elements into AST for corresponding role
for reStructuredText.
We use subset of MyST syntax (CommonMark with features from rST)
so let’s use the rST AST for rST features.
Reference: https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-manpage
]]

function Code(elem)
elem.classes = elem.classes:map(function (x)
if x == 'citerefentry' then
elem.attributes['role'] = 'manpage'
return 'interpreted-text'
else
return x
end
end)

return elem
end
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
--[[
Converts Link AST nodes with empty label to DocBook xref elements.
This is a temporary script to be able use cross-references conveniently
using syntax taken from MyST, while we still use docbook-xsl
for generating the documentation.
Reference: https://myst-parser.readthedocs.io/en/latest/using/syntax.html#targets-and-cross-referencing
]]

local function starts_with(start, str)
return str:sub(1, #start) == start
end
Expand Down
36 changes: 36 additions & 0 deletions doc/build-aux/pandoc-filters/docbook-writer/rst-roles.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
--[[
Converts AST for reStructuredText roles into corresponding
DocBook elements.
Currently, only a subset of roles is supported.
Reference:
List of roles:
https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html
manpage:
https://tdg.docbook.org/tdg/5.1/citerefentry.html
file:
https://tdg.docbook.org/tdg/5.1/filename.html
]]

function Code(elem)
if elem.classes:includes('interpreted-text') then
local tag = nil
local content = elem.text
if elem.attributes['role'] == 'manpage' then
tag = 'citerefentry'
local title, volnum = content:match('^(.+)%((%w+)%)$')
if title == nil then
-- No volnum in parentheses.
title = content
end
content = '<refentrytitle>' .. title .. '</refentrytitle>' .. (volnum ~= nil and ('<manvolnum>' .. volnum .. '</manvolnum>') or '')
elseif elem.attributes['role'] == 'file' then
tag = 'filename'
end

if tag ~= nil then
return pandoc.RawInline('docbook', '<' .. tag .. '>' .. content .. '</' .. tag .. '>')
end
end
end
18 changes: 18 additions & 0 deletions doc/build-aux/pandoc-filters/link-unix-man-references.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
--[[
Turns a manpage reference into a link, when a mapping is defined
in the unix-man-urls.lua file.
]]

local man_urls = {
["tmpfiles.d(5)"] = "https://www.freedesktop.org/software/systemd/man/tmpfiles.d.html",
["nix.conf(5)"] = "https://nixos.org/manual/nix/stable/#sec-conf-file",
["systemd.time(7)"] = "https://www.freedesktop.org/software/systemd/man/systemd.time.html",
["systemd.timer(5)"] = "https://www.freedesktop.org/software/systemd/man/systemd.timer.html",
}

function Code(elem)
local is_man_role = elem.classes:includes('interpreted-text') and elem.attributes['role'] == 'manpage'
if is_man_role and man_urls[elem.text] ~= nil then
return pandoc.Link(elem, man_urls[elem.text])
end
end
29 changes: 29 additions & 0 deletions doc/build-aux/pandoc-filters/myst-reader/roles.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
--[[
Replaces Str AST nodes containing {role}, followed by a Code node
by a Code node with attrs that would be produced by rST reader
from the role syntax.
This is to emulate MyST syntax in Pandoc.
(MyST is a CommonMark flavour with rST features mixed in.)
Reference: https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html#roles-an-in-line-extension-point
]]

function Inlines(inlines)
for i = #inlines-1,1,-1 do
local first = inlines[i]
local second = inlines[i+1]
local correct_tags = first.tag == 'Str' and second.tag == 'Code'
if correct_tags then
-- docutils supports alphanumeric strings separated by [-._:]
-- We are slightly more liberal for simplicity.
local role = first.text:match('^{([-._+:%w]+)}$')
if role ~= nil then
inlines:remove(i)
second.attributes['role'] = role
second.classes:insert('interpreted-text')
end
end
end
return inlines
end
25 changes: 25 additions & 0 deletions doc/build-aux/pandoc-filters/myst-writer/roles.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
--[[
Replaces Code nodes with attrs that would be produced by rST reader
from the role syntax by a Str AST node containing {role}, followed by a Code node.
This is to emulate MyST syntax in Pandoc.
(MyST is a CommonMark flavour with rST features mixed in.)
Reference: https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html#roles-an-in-line-extension-point
]]

function Code(elem)
local role = elem.attributes['role']

if elem.classes:includes('interpreted-text') and role ~= nil then
elem.classes = elem.classes:filter(function (c)
return c ~= 'interpreted-text'
end)
elem.attributes['role'] = nil

return {
pandoc.Str('{' .. role .. '}'),
elem,
}
end
end
7 changes: 7 additions & 0 deletions doc/contributing/contributing-to-documentation.chapter.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ Additionally, the following syntax extensions are currently used:

This syntax is taken from [MyST](https://myst-parser.readthedocs.io/en/latest/using/syntax.html#targets-and-cross-referencing).

- []{#ssec-contributing-markup-inline-roles}
If you want to link to a man page, you can use `` {manpage}`nix.conf(5)` ``, which will turn into {manpage}`nix.conf(5)`.

The references will turn into links when a mapping exists in {file}`doc/build-aux/pandoc-filters/unix-man-urls.lua`.

This syntax is taken from [MyST](https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html#roles-an-in-line-extension-point). Though, the feature originates from [reStructuredText](https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-manpage) with slightly different syntax.

- []{#ssec-contributing-markup-admonitions}
**Admonitions**, set off from the text to bring attention to something.

Expand Down
88 changes: 88 additions & 0 deletions maintainers/scripts/db-to-md.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#! /usr/bin/env nix-shell
#! nix-shell -I nixpkgs=. -i bash -p pandoc

# This script is temporarily needed while we transition the manual to
# CommonMark. It converts DocBook files into our CommonMark flavour.

debug=
files=()

while [ "$#" -gt 0 ]; do
i="$1"; shift 1
case "$i" in
--debug)
debug=1
;;
*)
files+=("$i")
;;
esac
done

echo "WARNING: This is an experimental script and might not preserve all formatting." > /dev/stderr
echo "Please report any issues you discover." > /dev/stderr

outExtension="md"
if [[ $debug ]]; then
outExtension="json"
fi

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

# NOTE: Keep in sync with Nixpkgs manual (/doc/Makefile).
# TODO: Remove raw-attribute when we can get rid of DocBook altogether.
pandoc_commonmark_enabled_extensions=+attributes+fenced_divs+footnotes+bracketed_spans+definition_lists+pipe_tables+raw_attribute
targetLang="commonmark${pandoc_commonmark_enabled_extensions}+smart"
if [[ $debug ]]; then
targetLang=json
fi
pandoc_flags=(
# Not needed:
# - diagram-generator.lua (we do not support that in NixOS manual to limit dependencies)
# - media extraction (was only required for diagram generator)
# - myst-reader/roles.lua (only relevant for MyST → DocBook)
# - link-unix-man-references.lua (links should only be added to display output)
# - docbook-writer/rst-roles.lua (only relevant for → DocBook)
# - docbook-writer/labelless-link-is-xref.lua (only relevant for → DocBook)
"--lua-filter=$DIR/../../doc/build-aux/pandoc-filters/docbook-reader/citerefentry-to-rst-role.lua"
"--lua-filter=$DIR/../../doc/build-aux/pandoc-filters/myst-writer/roles.lua"
"--lua-filter=$DIR/doc/unknown-code-language.lua"
-f docbook
-t "$targetLang"
--tab-stop=2
--wrap=none
)

for file in "${files[@]}"; do
if [[ ! -f "$file" ]]; then
echo "db-to-md.sh: $file does not exist" > /dev/stderr
exit 1
else
rootElement=$(xmllint --xpath 'name(//*)' "$file")

if [[ $rootElement = chapter ]]; then
extension=".chapter.$outExtension"
elif [[ $rootElement = section ]]; then
extension=".section.$outExtension"
else
echo "db-to-md.sh: $file contains an unsupported root element $rootElement" > /dev/stderr
exit 1
fi

outFile="${file%".section.xml"}"
outFile="${outFile%".chapter.xml"}"
outFile="${outFile%".xml"}$extension"
temp1=$(mktemp)
$DIR/doc/escape-code-markup.py "$file" "$temp1"
if [[ $debug ]]; then
echo "Converted $file to $temp1" > /dev/stderr
fi
temp2=$(mktemp)
$DIR/doc/replace-xrefs-by-empty-links.py "$temp1" "$temp2"
if [[ $debug ]]; then
echo "Converted $temp1 to $temp2" > /dev/stderr
fi
pandoc "$temp2" -o "$outFile" "${pandoc_flags[@]}"
echo "Converted $file to $outFile" > /dev/stderr
fi
done
97 changes: 97 additions & 0 deletions maintainers/scripts/doc/escape-code-markup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#! /usr/bin/env nix-shell
#! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml

"""
Pandoc will strip any markup within code elements so
let’s escape them so that they can be handled manually.
"""

import lxml.etree as ET
import re
import sys

def replace_element_by_text(el: ET.Element, text: str) -> None:
"""
Author: bernulf
Source: https://stackoverflow.com/a/10520552/160386
SPDX-License-Identifier: CC-BY-SA-3.0
"""
text = text + (el.tail or "")
parent = el.getparent()
if parent is not None:
previous = el.getprevious()
if previous is not None:
previous.tail = (previous.tail or "") + text
else:
parent.text = (parent.text or "") + text
parent.remove(el)

DOCBOOK_NS = "http://docbook.org/ns/docbook"

# List of elements that pandoc’s DocBook reader strips markup from.
# https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/Readers/DocBook.hs
code_elements = [
# CodeBlock
"literallayout",
"screen",
"programlisting",
# Code (inline)
"classname",
"code",
"filename",
"envar",
"literal",
"computeroutput",
"prompt",
"parameter",
"option",
"markup",
"wordasword",
"command",
"varname",
"function",
"type",
"symbol",
"constant",
"userinput",
"systemitem",
]

XMLNS_REGEX = re.compile(r'\s+xmlns(?::[^=]+)?="[^"]*"')
ROOT_ELEMENT_REGEX = re.compile(r'^\s*<[^>]+>')

def remove_xmlns(match: re.Match) -> str:
"""
Removes xmlns attributes.
Expects a match containing an opening tag.
"""
return XMLNS_REGEX.sub('', match.group(0))

if __name__ == '__main__':
assert len(sys.argv) >= 3, "usage: escape-code-markup.py <input> <output>"

tree = ET.parse(sys.argv[1])
name_predicate = " or ".join([f"local-name()='{el}'" for el in code_elements])

for markup in tree.xpath(f"//*[({name_predicate}) and namespace-uri()='{DOCBOOK_NS}']/*"):
text = ET.tostring(markup, encoding=str)

# tostring adds xmlns attributes to the element we want to stringify
# as if it was supposed to be usable standalone.
# We are just converting it to CDATA so we do not care.
# Let’s strip the namespace declarations to keep the code clean.
#
# Note that this removes even namespaces that were potentially
# in the original file. Though, that should be very rare –
# most of the time, we will stringify empty DocBook elements
# like <xref> or <co> or, at worst, <link> with xlink:href attribute.
#
# Also note that the regex expects the root element to be first
# thing in the string. But that should be fine, the tostring method
# does not produce XML declaration or doctype by default.
text = ROOT_ELEMENT_REGEX.sub(remove_xmlns, text)

replace_element_by_text(markup, text)

tree.write(sys.argv[2])
Loading

0 comments on commit 1e75936

Please sign in to comment.