Skip to content

Commit

Permalink
add rudimentary HTML converter named asciidoc-to-html
Browse files Browse the repository at this point in the history
  • Loading branch information
mojavelinux committed Mar 1, 2024
1 parent 88a369e commit 7fa2a64
Show file tree
Hide file tree
Showing 3 changed files with 314 additions and 2 deletions.
69 changes: 69 additions & 0 deletions bin/asciidoc-to-html
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env node

'use strict'

const { Console } = require('node:console')
const fs = require('node:fs')
const parse = require('asciidoc-parsing-lab')
const { parseArgs } = require('node:util')

const options = {
attribute: {
type: 'string',
short: 'a',
multiple: true,
desc: 'set one or more AsciiDoc attributes',
hint: 'name=value',
},
embedded: { type: 'boolean', short: 'e', desc: 'output document without root element for embedding' },
format: {
type: 'string',
short: 'f',
default: 'html',
desc: 'generate the specified format',
hint: 'ext',
choices: ['html', 'asg'],
},
output: { type: 'string', short: 'o', desc: 'specify a different output file or - for stdout', hint: 'path' },
timings: { type: 'boolean', short: 't', desc: 'print a timings report to stderr' },
help: { type: 'boolean', short: 'h', desc: 'output this help and exit' },
}

const { positionals: [sourceFile], values: opts } = parseArgs({ args: process.argv.slice(2), options, strict: false })
if (opts.help) printUsage(options)
if (!sourceFile?.endsWith('.adoc')) printUsage(options, true)
const outputFile = opts.output ?? sourceFile.replace(/\.adoc$/, '.' + opts.format)
if (opts.embedded) (opts.attribute ??= []).push('embedded')
let timer
opts.timings && (timer = new Console(process.stderr)).time('elapsed')
const source = fs.readFileSync(sourceFile, 'utf8').trimEnd()
// NOTE parseInlines significantly increases the parsing time
const asg = parse(source, { attributes: opts.attribute, parseInlines: true, showWarnings: true })
const output = opts.format === 'asg'
? require('#test-harness').stringifyASG(asg)
: require('asciidoc-parsing-lab/html-converter')(asg).trimEnd()
outputFile === '-' ? console.log(output) : fs.writeFileSync(outputFile, output + '\n', 'utf8')
timer && timer.timeEnd('elapsed')

function printUsage (options, error) {
let usage = [
'Usage: asciidoc-to-html [OPTION]... FILE',
'Convert the specified AsciiDoc FILE to the specified output file and format.',
'Example: asciidoc-to-html README.adoc',
]
if (error) {
usage = usage.slice(0, 1).concat("Run 'asciidoc-to-html --help' for more information.")
} else {
usage.push('')
Object.entries(options).forEach(([long, { short, choices, default: default_, hint, multiple, desc }]) => {
const option = short ? `-${short}, --${long}${hint ? ' ' + hint : ''}` : `--${long}`
if (multiple) desc += '; can be specified more than once'
if (choices) desc += ` [${choices.join(', ')}]`
if (default_) desc += ` (default: ${default_})`
usage.push(` ${option.padEnd(27, ' ')}${desc}`)
})
usage.push('', 'If --output is not specified, the output file path is derived from FILE (e.g., README.html).')
}
usage.reduce((stream, line) => typeof stream.write(line + '\n') && stream, error ? process.stderr : process.stdout)
process.exit(error ? 1 : 0)
}
241 changes: 241 additions & 0 deletions lib/html-converter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
'use strict'

const HTML_TAG_NAME_BY_SPAN_VARIANT = { code: 'code', emphasis: 'em', strong: 'strong' }

function convert (node, documentAttributes) {
let output = ''
let tagName
switch (node.name) {
case 'document': {
documentAttributes = Object.assign({}, node.attributes)
let convertedTitle
const header = node.header
if (header) {
if (header.attributes) {
for (const [name, { value }] of Object.entries(header.attributes)) {
if (!documentAttributes[name]?.locked) documentAttributes[name] = { value, origin: 'header' }
}
}
if (header.title) convertedTitle = convertInlines(header.title)
}
const standalone = documentAttributes.embedded == null
if (standalone) {
output += '<!DOCTYPE html>\n<html>\n<head>\n'
// FIXME downconvert contents of title tag to plain text
if (convertedTitle) output += `<title>${convertedTitle}</title>\n`
output += `<style>\n${css()}\n</style>\n`
output += '</head>\n<body>\n'
}
output += '<article>\n'
if (convertedTitle) output += `<header>\n<h1>${convertedTitle}</h1>\n</header>\n`
if (node.blocks.length) {
for (const child of node.blocks) output += convert(child, documentAttributes)
}
output += '</article>'
if (standalone) output += '\n</body>\n</html>'
break
}
case 'paragraph':
if (node.metadata?.options.includes('hardbreaks')) {
output += `<p${commonAttributes(node.metadata)}>${convertInlines(node.inlines).replace(/\n/g, '<br>')}</p>\n`
} else {
output += `<p${commonAttributes(node.metadata)}>${convertInlines(node.inlines)}</p>\n`
}
break
case 'section':
output += `<section${commonAttributes(node.metadata)}>\n`
output += `<${(tagName = `h${node.level + 1}`)}>${convertInlines(node.title)}</${tagName}>\n`
if (node.blocks.length) {
for (const child of node.blocks) output += convert(child, documentAttributes)
}
output += '</section>\n'
break
case 'preamble':
// Q: should preamble have an enclosure?
for (const child of node.blocks) output += convert(child, documentAttributes)
break
case 'heading':
output += `<${(tagName = `h${node.level + 1}`)}${commonAttributes(node.metadata, 'discrete')}>${convertInlines(node.title)}</${tagName}>\n`
break
case 'literal':
case 'listing':
if (node.metadata?.attributes.style === 'source') {
const language = node.metadata.attributes.language
output += `<pre${commonAttributes(node.metadata)}><code${language ? ` data-lang="${language}"` : ''}>${convertInlines(node.inlines)}</code></pre>\n`
} else {
output += `<pre${commonAttributes(node.metadata)}>${convertInlines(node.inlines)}</pre>\n`
}
break
case 'list': {
let listAttrs = ''
if (node.variant === 'ordered') {
tagName = 'ol'
const start = node.metadata?.attributes.start
if (start) listAttrs = ` start="${start}"`
} else {
tagName = 'ul'
}
output += `<${tagName}${commonAttributes(node.metadata)}${listAttrs}>\n`
for (const item of node.items) {
output += '<li>\n'
output += `<span class="principal">${convertInlines(item.principal)}</span>\n`
if (item.blocks.length) {
for (const child of item.blocks) output += convert(child, documentAttributes)
}
output += '</li>\n'
}
output += `</${tagName}>\n`
break
}
case 'dlist':
output += `<dl${commonAttributes(node.metadata)}>\n`
for (const item of node.items) {
for (const term of item.terms) output += `<dt>${convertInlines(term)}</dt>\n`
if (item.principal || item.blocks.length) {
output += '<dd>\n'
if (item.principal) output += `<span class="principal">${convertInlines(item.principal)}</span>\n`
if (item.blocks.length) {
for (const child of item.blocks) output += convert(child, documentAttributes)
}
output += '</dd>\n'
}
}
output += '</dl>\n'
break
case 'admonition':
output += `<div${commonAttributes(node.metadata, 'admonition')} data-severity="${node.variant}">\n`
for (const child of node.blocks) output += convert(child, documentAttributes)
output += '</div>\n'
break
case 'sidebar':
output += `<aside${commonAttributes(node.metadata)}>\n`
for (const child of node.blocks) output += convert(child, documentAttributes)
output += '</aside>\n'
break
case 'example':
output += `<div${commonAttributes(node.metadata, 'example')}>\n`
for (const child of node.blocks) output += convert(child, documentAttributes)
output += '</div>\n'
break
case 'image':
output += `<figure${commonAttributes(node.metadata)}>\n`
output += `<img src="${node.target}" alt="${node.metadata?.attributes.alt}">\n`
output += '</figure>\n'
break
case 'attributes':
for (const [name, { value }] of Object.entries(node.attributes)) {
if (!documentAttributes[name]?.locked) documentAttributes[name] = { value, origin: 'body' }
}
break
default:
console.warn(`${node.name} not converted`)
}
return output
}

function css () {
return `
body {
color: #222222;
font-family: sans-serif;
margin: 0;
}
article {
display: flow-root;
margin: 2em auto;
width: 80vw;
}
article > header h1 {
margin-top: 0;
font-size: 2em;
}
article > :first-child:not(header) {
margin-top: 0;
}
a {
color: #0000cc;
}
p,
li > .principal:first-child,
dd > .principal:first-child {
line-height: 1.6;
}
dt {
font-weight: bold;
}
dd {
margin-left: 1.5em;
}
code,
pre {
color: #aa0000;
font-size: 1.25em;
}
pre {
line-height: 1.25;
}
pre code {
font-size: inherit;
}
.admonition,
.example {
border: 1px solid currentColor;
margin-block: 1em 0;
padding: 0 1em;
}
.admonition::before {
content: attr(data-severity);
display: block;
font-weight: bold;
text-transform: uppercase;
margin-top: 1em;
}
figure {
margin-left: 0;
}
img {
display: inline-block;
max-width: 100%;
vertical-align: middle;
}
`.trim()
}

function convertInlines (nodes) {
return nodes.reduce((buffer, node) => {
let tagName
switch (node.name) {
case 'text':
//buffer.push(node.value)
// FIXME grammar should be giving us a hard break inline
buffer.push(node.value.replace(/ \+(?=\n)/g, '<br>'))
break
case 'ref':
buffer.push(`<a href="${node.target}">${convertInlines(node.inlines)}</a>`)
break
case 'span':
buffer.push(`<${(tagName = HTML_TAG_NAME_BY_SPAN_VARIANT[node.variant])}>${convertInlines(node.inlines)}</${tagName}>`)
break
default:
console.warn(`${node.name} not converted`)
}
return buffer
}, []).join('')
}

function commonAttributes (metadata, primaryRole) {
if (!metadata) return primaryRole ? ` class="${primaryRole}"` : ''
const { attributes, id, roles: secondaryRoles = [] } = metadata
const roles = primaryRole ? [primaryRole] : []
for (const role of secondaryRoles) roles.push(role)
const dataAttributes = Object.keys(attributes).filter((n) => n.startsWith('data-'))
const data = dataAttributes.length ? dataAttributes.map((n) => ` ${n}="${attributes[n]}"`).join('') : ''
if (id) {
return roles.length ? ` id="${id}" class="${roles.join(' ')}"` : ` id="${id}"${data}`
} else if (roles.length) {
return ` class="${roles.join(' ')}"${data}`
}
return data
}

module.exports = convert
6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
"gen:preprocessor-parser": "peggy -c grammar/asciidoc-preprocessor-config.js -t '' > /dev/null"
},
"bin": {
"asciidoc-tck-adapter": "bin/asciidoc-tck-adapter"
"asciidoc-tck-adapter": "bin/asciidoc-tck-adapter",
"asciidoc-to-html": "bin/asciidoc-to-html"
},
"main": "lib/index.js",
"exports": {
".": "./lib/index.js",
"./package.json": "./package.json"
"./package.json": "./package.json",
"./html-converter": "./lib/html-converter.js"
},
"imports": {
"#attrlist-parser": "./lib/asciidoc-attrlist-parser.js",
Expand Down

0 comments on commit 7fa2a64

Please sign in to comment.