diff --git a/.eslintrc.json b/.eslintrc.json index 7d05e3a6..9bfe4f01 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -39,6 +39,7 @@ ], "rules": { "prettier/prettier": "error", + "no-inner-declarations": "off", "consistent-return": "off", "no-eq-null": "error", "no-floating-decimal": "error", diff --git a/package-lock.json b/package-lock.json index d6921d5a..44d9d491 100644 --- a/package-lock.json +++ b/package-lock.json @@ -753,9 +753,9 @@ } }, "ecmarkdown": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/ecmarkdown/-/ecmarkdown-4.0.0.tgz", - "integrity": "sha512-lcF5Hn+cBdN8l8H5XLtHthKPpa5gBLTH8DkAcuHXFCp/6eVXLtuj8VFOzeEJvhejzC1FvO61xkR6f8VBcWApTQ==", + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ecmarkdown/-/ecmarkdown-5.0.1.tgz", + "integrity": "sha512-Fey6+oJ7jd3csflBdbQlQtL5qmpx1a9mJqNH/N+bpinYA2RxWbKeZ1W5rZtoPEvATFE7sooXX9GAskuCA+5aZg==", "requires": { "escape-html": "^1.0.1" } @@ -1239,9 +1239,9 @@ "dev": true }, "grammarkdown": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/grammarkdown/-/grammarkdown-2.1.2.tgz", - "integrity": "sha512-r0aMErRxHj1YxJU2OFo0z/8GD7TNBmt35aaCqiDwVcRiu8n01GV8dDP0bKY6CbECLwzPK/PUoh0wnXuJNEfBJg==", + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/grammarkdown/-/grammarkdown-2.2.0.tgz", + "integrity": "sha512-tWDnlX5pAJhOCoha7G71UPpuRDeUeD81W5GXCumSirePoFGMqaskDZJSpEGQwcn29CMXK8cdOakDDIKSmnL/NQ==", "requires": { "@esfx/async-canceltoken": "^1.0.0-pre.13", "@esfx/cancelable": "^1.0.0-pre.13", diff --git a/package.json b/package.json index d4f50c01..b605bd7b 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,7 @@ "build-release": "tsc -p src", "prepublish": "safe-publish-latest && npm run build-release", "pretest": "npm run build", - "test": "mocha && npm run lint", + "test": "mocha", "lint": "eslint --ext .js,.ts .", "format": "prettier --write .", "update-pages": "node bin/ecmarkup.js spec/index.html _index.html && git checkout gh-pages && rm index.html && mv _index.html index.html && git add index.html && git commit -m \"update pages\" && git checkout master", @@ -34,8 +34,8 @@ "dependencies": { "bluebird": "^3.7.2", "chalk": "^1.1.3", - "ecmarkdown": "4.0.0", - "grammarkdown": "^2.1.2", + "ecmarkdown": "5.0.1", + "grammarkdown": "^2.2.0", "highlight.js": "^9.17.1", "html-escape": "^1.0.2", "js-yaml": "^3.13.1", diff --git a/src/Algorithm.ts b/src/Algorithm.ts index 8f079484..1ed2e33c 100644 --- a/src/Algorithm.ts +++ b/src/Algorithm.ts @@ -6,14 +6,21 @@ import * as emd from 'ecmarkdown'; /*@internal*/ export default class Algorithm extends Builder { static enter(context: Context) { + context.inAlg = true; const { node } = context; + if ('ecmarkdownOut' in node) { + // i.e., we already parsed this during the lint phase + // @ts-ignore + node.innerHTML = node.ecmarkdownOut.replace(/((?:\s+|>)[!?])\s+(\w+\s*\()/g, '$1 $2'); + return; + } + // replace spaces after !/? with   to prevent bad line breaking const html = emd .algorithm(node.innerHTML) .replace(/((?:\s+|>)[!?])\s+(\w+\s*\()/g, '$1 $2'); node.innerHTML = html; - context.inAlg = true; } static exit(context: Context) { diff --git a/src/Grammar.ts b/src/Grammar.ts index 336a8b21..425f8712 100644 --- a/src/Grammar.ts +++ b/src/Grammar.ts @@ -9,6 +9,13 @@ const globalEndTagRe = /<\/?(emu-\w+|h?\d|p|ul|table|pre|code)\b[^>]*>/gi; /*@internal*/ export default class Grammar extends Builder { static enter({ spec, node }: Context) { + if ('grammarkdownOut' in node) { + // i.e., we already parsed this during the lint phase + // @ts-ignore + node.innerHTML = node.grammarkdownOut; + return; + } + let content: string; let possiblyMalformed = true; if (spec.sourceText) { @@ -21,8 +28,9 @@ export default class Grammar extends Builder { // the parser was able to find a matching end tag. const start = location.startTag.endOffset as number; const end = location.endTag.startOffset as number; - content = spec.sourceText.slice(start, end); + content = spec.sourceText!.slice(start, end); } else { + // TODO this is not reached // the parser was *not* able to find a matching end tag. Try to recover by finding a // possible end tag, otherwise read the rest of the source text. const start = (globalEndTagRe.lastIndex = location.endOffset as number); @@ -35,6 +43,7 @@ export default class Grammar extends Builder { globalEndTagRe.lastIndex = 0; } } else { + // TODO this is not reached // can't read location for whatever reason, so fallback to innerHTML content = node.innerHTML; } diff --git a/src/Spec.ts b/src/Spec.ts index e1de72a8..ae2c2fea 100644 --- a/src/Spec.ts +++ b/src/Spec.ts @@ -28,6 +28,7 @@ import Xref from './Xref'; import Eqn from './Eqn'; import Biblio from './Biblio'; import { autolink, replacerForNamespace, NO_CLAUSE_AUTOLINK } from './autolinker'; +import { lint } from './lint/lint'; import { CancellationToken } from 'prex'; const DRAFT_DATE_FORMAT = { year: 'numeric', month: 'long', day: 'numeric', timeZone: 'UTC' }; @@ -105,7 +106,7 @@ export default class Spec { fetch: (file: string, token: CancellationToken) => PromiseLike, dom: any, opts?: Options, - sourceText?: string, + sourceText?: string, // TODO we need not support this being undefined token = CancellationToken.none ) { opts = opts || {}; @@ -217,6 +218,16 @@ export default class Spec { }; const document = this.doc; + + if (this.opts.reportLintErrors) { + this._log('Linting...'); + const source = this.sourceText; + if (source === undefined) { + throw new Error('Cannot lint when source text is not available'); + } + lint(this.opts.reportLintErrors, source, this.dom, document); + } + const walker = document.createTreeWalker(document.body, 1 | 4 /* elements and text nodes */); walk(walker, context); diff --git a/src/args.ts b/src/args.ts index 1493c552..22dbd1f9 100644 --- a/src/args.ts +++ b/src/args.ts @@ -19,6 +19,12 @@ export const argParser = nomnom jsOut: { full: 'js-out', metavar: 'FILE', help: 'Write Emu JS dependencies to FILE' }, toc: { flag: true, help: "Don't include the table of contents" }, oldToc: { full: 'old-toc', flag: true, help: 'Use the old table of contents styling' }, + lintSpec: { + full: 'lint-spec', + flag: true, + default: false, + help: 'Enforce some style and correctness checks', + }, verbose: { flag: true, default: false, help: 'Display document build progress' }, version: { abbr: 'v', diff --git a/src/cli.ts b/src/cli.ts index 13a95416..a0d57159 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,8 +1,12 @@ +import type { Options } from './ecmarkup'; +import type { LintingError } from './lint/lint'; + import { argParser } from './args'; const args = argParser.parse(); -import * as ecmarkup from './ecmarkup'; import * as fs from 'fs'; +import * as chalk from 'chalk'; +import * as ecmarkup from './ecmarkup'; import * as utils from './utils'; const debounce: (_: () => Promise) => () => Promise = require('promise-debounce'); @@ -16,10 +20,33 @@ if (args.js) { args.jsOut = args.js; } +if (args.lintSpec && args.watch) { + console.error('Cannot use --lint-spec with --watch'); + process.exit(1); +} + const watching = new Map(); const build = debounce(async function build() { try { - const spec = await ecmarkup.build(args.infile, utils.readFile, args); + const opts: Options = { ...args }; + if (args.lintSpec) { + opts.reportLintErrors = (errors: LintingError[]) => { + console.error( + chalk.underline('Linting errors:') + + '\n' + + errors.map(e => `${args.infile}:${e.line}:${e.column}: ${e.message}`).join('\n') + + '\n' + ); + throw new Error( + `There ${ + errors.length === 1 + ? 'was a linting error' + : 'were ' + errors.length + ' linting errors' + }.` + ); + }; + } + const spec = await ecmarkup.build(args.infile, utils.readFile, opts); const pending: Promise[] = []; if (args.biblio) { diff --git a/src/ecmarkup.ts b/src/ecmarkup.ts index 14d4464e..6184dacd 100644 --- a/src/ecmarkup.ts +++ b/src/ecmarkup.ts @@ -1,4 +1,5 @@ import type { BiblioEntry } from './Biblio'; +import type { LintingError } from './lint/lint'; import Spec from './Spec'; import * as utils from './utils'; @@ -24,6 +25,7 @@ export interface Options { contributors?: string; toc?: boolean; oldToc?: boolean; + lintSpec?: boolean; verbose?: boolean; cssOut?: string; jsOut?: string; @@ -31,6 +33,7 @@ export interface Options { outfile?: string; boilerplate?: Boilerplate; ecma262Biblio?: boolean; + reportLintErrors?: (errors: LintingError[]) => void; } export async function build( diff --git a/src/lint/algorithm-error-reporter-type.ts b/src/lint/algorithm-error-reporter-type.ts new file mode 100644 index 00000000..74040c8e --- /dev/null +++ b/src/lint/algorithm-error-reporter-type.ts @@ -0,0 +1,9 @@ +export type Reporter = ({ + line, + column, + message, +}: { + line: number; + column: number; + message: string; +}) => void; diff --git a/src/lint/collect-algorithm-diagnostics.ts b/src/lint/collect-algorithm-diagnostics.ts new file mode 100644 index 00000000..b259a009 --- /dev/null +++ b/src/lint/collect-algorithm-diagnostics.ts @@ -0,0 +1,72 @@ +import type { Node as EcmarkdownNode, Observer } from 'ecmarkdown'; + +import type { Reporter } from './algorithm-error-reporter-type'; +import type { LintingError } from './lint'; + +import { parseAlgorithm, visit } from 'ecmarkdown'; + +import { getLocation } from './utils'; +import lintAlgorithmLineEndings from './rules/algorithm-line-endings'; + +let algorithmRules = [lintAlgorithmLineEndings]; + +function composeObservers(...observers: Observer[]): Observer { + return { + enter(node: EcmarkdownNode) { + for (let observer of observers) { + observer.enter?.(node); + } + }, + exit(node: EcmarkdownNode) { + for (let observer of observers) { + observer.exit?.(node); + } + }, + }; +} + +export function collectAlgorithmDiagnostics( + dom: any, + sourceText: string, + algorithms: { element: Element; tree?: EcmarkdownNode }[] +) { + let lintingErrors: LintingError[] = []; + + for (let algorithm of algorithms) { + let element = algorithm.element; + let location = getLocation(dom, element); + + let reporter: Reporter = ({ + line, + column, + message, + }: { + line: number; + column: number; + message: string; + }) => { + // jsdom's lines and columns are both 1-based + // ecmarkdown has 1-based line numbers and 0-based column numbers + // we want 1-based for both + let trueLine = location.startTag.line + line - 1; + let trueCol = column; + if (line === 1) { + trueCol += + location.startTag.col + (location.startTag.endOffset - location.startTag.startOffset); + } else { + trueCol += 1; + } + lintingErrors.push({ line: trueLine, column: trueCol, message }); + }; + + let observer = composeObservers(...algorithmRules.map(f => f(reporter, element))); + let tree = parseAlgorithm( + sourceText.slice(location.startTag.endOffset, location.endTag.startOffset), + { trackPositions: true } + ); + visit(tree, observer); + algorithm.tree = tree; + } + + return lintingErrors; +} diff --git a/src/lint/collect-grammar-diagnostics.ts b/src/lint/collect-grammar-diagnostics.ts new file mode 100644 index 00000000..c01704e2 --- /dev/null +++ b/src/lint/collect-grammar-diagnostics.ts @@ -0,0 +1,201 @@ +import type { LintingError } from './lint'; + +import { + Grammar as GrammarFile, + SyncHost, + EmitFormat, + Parser as GrammarParser, + SyntaxKind, + SymbolSpan, + Diagnostics, + Production, + Parameter, + skipTrivia, +} from 'grammarkdown'; + +import { + grammarkdownLocationToTrueLocation, + getLocation, + getProductions, + rhsMatches, +} from './utils'; + +export function collectGrammarDiagnostics( + dom: any, + sourceText: string, + mainGrammar: { element: Element; source: string }[], + sdos: { grammar: Element; alg: Element }[], + earlyErrors: { grammar: Element; lists: HTMLUListElement[] }[] +) { + // ******************* + // Parse the grammar with Grammarkdown and collect its diagnostics, if any + + let grammarParser = new GrammarParser(); + // TODO use CoreSyncHost once published + let fakeHost: SyncHost = { + readFileSync(file: string) { + let idx = parseInt(file); + if (idx.toString() !== file || idx < 0 || idx >= mainGrammar.length) { + throw new Error('tried to read non-existent ' + file); + } + return mainGrammar[idx].source; + }, + resolveFile(file: string) { + return file; + }, + normalizeFile(file: string) { + return file; + }, + getSourceFileSync(file: string) { + return grammarParser.parseSourceFile(file, this.readFileSync(file)); + }, + } as any; // good enough! + let compilerOptions = { + format: EmitFormat.ecmarkup, + noChecks: false, + noUnusedParameters: true, + }; + let grammar = new GrammarFile(Object.keys(mainGrammar), compilerOptions, fakeHost); + grammar.parseSync(); + grammar.checkSync(); + + let lintingErrors: LintingError[] = []; + let unusedParameterErrors: Map> = new Map(); + + if (grammar.diagnostics.size > 0) { + // `detailedMessage: false` prevents prepending line numbers, which is good because we're going to make our own + grammar.diagnostics + .getDiagnosticInfos({ formatMessage: true, detailedMessage: false }) + .forEach(m => { + let idx = +m.sourceFile!.filename; + let grammarLoc = getLocation(dom, mainGrammar[idx].element); + + let { line, column } = grammarkdownLocationToTrueLocation( + grammarLoc, + m.range!.start.line, + m.range!.start.character + ); + + let error = { line, column, message: m.formattedMessage! }; + lintingErrors.push(error); + + if (m.code === Diagnostics.Parameter_0_is_unused.code) { + let param = m.node as Parameter; + let navigator = grammar.resolver.createNavigator(param)!; + navigator.moveToAncestor(node => node.kind === SyntaxKind.Production); + let nodeName = (navigator.getNode() as Production).name.text!; + let paramName = param.name.text!; + if (!unusedParameterErrors.has(nodeName)) { + unusedParameterErrors.set(nodeName, new Map()); + } + let paramToError = unusedParameterErrors.get(nodeName)!; + paramToError.set(paramName, error); + } + }); + } + + // ******************* + // Check that SDOs and Early Errors are defined in terms of productions which actually exist + // Also filter out any "unused parameter" warnings for grammar productions for which the parameter is used in an early error or SDO + + let oneOffGrammars: { grammarEle: Element; grammar: GrammarFile }[] = []; + let actualGrammarProductions = getProductions(grammar); + let grammarsAndRules = [ + ...sdos.map(s => ({ grammar: s.grammar, rules: [s.alg], type: 'syntax-directed operation' })), + ...earlyErrors.map(e => ({ grammar: e.grammar, rules: e.lists, type: 'early error' })), + ]; + for (let { grammar: grammarEle, rules: rulesEles, type } of grammarsAndRules) { + let grammarLoc = getLocation(dom, grammarEle); + let grammarHost = SyncHost.forFile( + sourceText.slice(grammarLoc.startTag.endOffset, grammarLoc.endTag.startOffset) + ); + let grammar = new GrammarFile([grammarHost.file], {}, grammarHost); + grammar.parseSync(); + oneOffGrammars.push({ grammarEle, grammar }); + let productions = getProductions(grammar); + + function getLocationInGrammar(pos: number) { + let file = grammar.sourceFiles[0]; + let posWithoutWhitespace = skipTrivia(file.text, pos, file.text.length); + let { line: gmdLine, character: gmdCharacter } = file.lineMap.positionAt( + posWithoutWhitespace + ); + + return grammarkdownLocationToTrueLocation(grammarLoc, gmdLine, gmdCharacter); + } + + for (let [name, { production, rhses }] of productions) { + let originalRhses = actualGrammarProductions.get(name)?.rhses; + if (originalRhses === undefined) { + let { line, column } = getLocationInGrammar(production.pos); + lintingErrors.push({ + line, + column, + message: `Could not find a definition for LHS in ${type}`, + }); + continue; + } + for (let rhs of rhses) { + if (!originalRhses.some(o => rhsMatches(rhs, o))) { + let { line, column } = getLocationInGrammar(rhs.pos); + lintingErrors.push({ + line, + column, + message: `Could not find a production matching RHS in ${type}`, + }); + } + + if (rhs.kind === SyntaxKind.RightHandSide) { + (function noGrammarRestrictions(s: SymbolSpan | undefined) { + if (s === undefined) { + return; + } + if (s.symbol.kind === SyntaxKind.NoSymbolHereAssertion) { + let { line, column } = getLocationInGrammar(s.symbol.pos); + lintingErrors.push({ + line, + column, + message: `Productions referenced in ${type}s should not include "no LineTerminator here" restrictions`, + }); + } + // We could also enforce that lookahead restrictions are absent, but in some cases they actually do add clarity, so we just don't enforce it either way. + + noGrammarRestrictions(s.next); + })(rhs.head); + + if (rhs.constraints !== undefined) { + let { line, column } = getLocationInGrammar(rhs.constraints.pos); + lintingErrors.push({ + line, + column, + message: `Productions referenced in ${type}s should not be gated on grammar parameters`, + }); + } + } + } + + // Filter out unused parameter errors for which the parameter is actually used in an SDO or Early Error + if (unusedParameterErrors.has(name)) { + let paramToError = unusedParameterErrors.get(name)!; + for (let [paramName, error] of paramToError) { + // This isn't the most elegant check, but it works. + if (rulesEles.some(r => r.innerHTML.indexOf('[' + paramName + ']') !== -1)) { + paramToError.delete(paramName); + // Yes, there's definitely big-O faster ways of doing this, but in practice this is probably faster for the sizes we will encounter. + let index = lintingErrors.indexOf(error); + if (index === -1) { + throw new Error('unreachable: tried to clear non-existent error'); + } + lintingErrors.splice(index, 1); + } + } + } + } + } + + return { + grammar, + oneOffGrammars, + lintingErrors, + }; +} diff --git a/src/lint/collect-nodes.ts b/src/lint/collect-nodes.ts new file mode 100644 index 00000000..58cd3cb7 --- /dev/null +++ b/src/lint/collect-nodes.ts @@ -0,0 +1,105 @@ +import type { Node as EcmarkdownNode } from 'ecmarkdown'; + +import { getLocation } from './utils'; + +export function collectNodes(sourceText: string, dom: any, document: Document) { + let mainGrammar: { element: Element; source: string }[] = []; + let sdos: { grammar: Element; alg: Element }[] = []; + let earlyErrors: { grammar: Element; lists: HTMLUListElement[] }[] = []; + let algorithms: { element: Element; tree?: EcmarkdownNode }[] = []; + + let inAnnexB = false; + let lintWalker = document.createTreeWalker(document.body, 1 /* elements */); + function visitCurrentNode() { + let node: Element = lintWalker.currentNode as Element; + + let thisNodeIsAnnexB = + node.nodeName === 'EMU-ANNEX' && + node.id === 'sec-additional-ecmascript-features-for-web-browsers'; + if (thisNodeIsAnnexB) { + inAnnexB = true; + } + + // Don't bother collecting early errors and SDOs from Annex B. + // This is mostly so we don't have to deal with having two inconsistent copies of some of the grammar productions. + if (!inAnnexB) { + if (node.nodeName === 'EMU-CLAUSE') { + // Look for early errors + let first = node.firstElementChild; + if (first !== null && first.nodeName === 'H1') { + let title = first.textContent ?? ''; + if (title.trim() === 'Static Semantics: Early Errors') { + let grammar = null; + let lists: HTMLUListElement[] = []; + for (let child of (node.children as any) as Iterable) { + if (child.nodeName === 'EMU-GRAMMAR') { + if (grammar !== null) { + if (lists.length === 0) { + throw new Error( + 'unrecognized structure for early errors: grammar without errors' + ); + } + earlyErrors.push({ grammar, lists }); + } + grammar = child; + lists = []; + } else if (child.nodeName === 'UL') { + if (grammar === null) { + throw new Error( + 'unrecognized structure for early errors: errors without correspondinig grammar' + ); + } + lists.push(child as HTMLUListElement); + } + } + if (grammar === null) { + throw new Error('unrecognized structure for early errors: no grammars'); + } + if (lists.length === 0) { + throw new Error('unrecognized structure for early errors: grammar without errors'); + } + earlyErrors.push({ grammar, lists }); + } + } + } else if (node.nodeName === 'EMU-GRAMMAR') { + // Look for grammar definitions and SDOs + if (node.getAttribute('type') === 'definition') { + let loc = getLocation(dom, node); + let start = loc.startTag.endOffset; + let end = loc.endTag.startOffset; + let realSource = sourceText.slice(start, end); + mainGrammar.push({ element: node as Element, source: realSource }); + } else if (node.getAttribute('type') !== 'example') { + let next = lintWalker.nextSibling() as Element; + if (next) { + if (next.nodeName === 'EMU-ALG') { + sdos.push({ grammar: node, alg: next }); + } + lintWalker.previousSibling(); + } + } + } + } + + if (node.nodeName === 'EMU-ALG' && node.getAttribute('type') !== 'example') { + algorithms.push({ element: node }); + } + + let firstChild = lintWalker.firstChild(); + if (firstChild) { + while (true) { + visitCurrentNode(); + let next = lintWalker.nextSibling(); + if (!next) break; + } + lintWalker.parentNode(); + } + + if (thisNodeIsAnnexB) { + inAnnexB = false; + } + } + visitCurrentNode(); + + return { mainGrammar, sdos, earlyErrors, algorithms }; +} diff --git a/src/lint/lint.ts b/src/lint/lint.ts new file mode 100644 index 00000000..04321c98 --- /dev/null +++ b/src/lint/lint.ts @@ -0,0 +1,67 @@ +import { emit } from 'ecmarkdown'; + +import { collectNodes } from './collect-nodes'; +import { collectGrammarDiagnostics } from './collect-grammar-diagnostics'; +import { collectAlgorithmDiagnostics } from './collect-algorithm-diagnostics'; + +export type LintingError = { line: number; column: number; message: string }; + +/* +Currently this checks +- grammarkdown's built-in sanity checks +- the productions in the definition of each early error and SDO are defined in the main grammar +- those productions do not include `[no LineTerminator here]` restrictions or `[+flag]` gating +- the algorithm linting rules imported above + +There's more to do: +https://github.com/tc39/ecmarkup/issues/173 +*/ +export function lint( + report: (errors: LintingError[]) => void, + sourceText: string, + dom: any, + document: Document +) { + let { mainGrammar, sdos, earlyErrors, algorithms } = collectNodes(sourceText, dom, document); + + let { grammar, oneOffGrammars, lintingErrors } = collectGrammarDiagnostics( + dom, + sourceText, + mainGrammar, + sdos, + earlyErrors + ); + + lintingErrors.push(...collectAlgorithmDiagnostics(dom, sourceText, algorithms)); + + if (lintingErrors.length > 0) { + lintingErrors.sort((a, b) => a.line - b.line); + report(lintingErrors); + } + + // Stash intermediate results for later use + // This isn't actually necessary for linting, but we might as well avoid redoing work later when we can. + + grammar.emitSync(undefined, (file, source) => { + let name = +file.split('.')[0]; + let node = mainGrammar[name].element; + if ('grammarkdownOut' in node) { + throw new Error('unexpectedly regenerating grammarkdown output for node ' + name); + } + // @ts-ignore we are intentionally adding a property here + node.grammarkdownOut = source; + }); + for (let { grammarEle, grammar } of oneOffGrammars) { + grammar.emitSync(undefined, (file, source) => { + if ('grammarkdownOut' in grammarEle) { + throw new Error('unexpectedly regenerating grammarkdown output'); + } + // @ts-ignore we are intentionally adding a property here + grammarEle.grammarkdownOut = source; + }); + } + for (let { element, tree } of algorithms) { + // @ts-ignore we are intentionally adding a property here + element.ecmarkdownOut = emit(tree!); + } +} diff --git a/src/lint/rules/algorithm-line-endings.ts b/src/lint/rules/algorithm-line-endings.ts new file mode 100644 index 00000000..f89c2823 --- /dev/null +++ b/src/lint/rules/algorithm-line-endings.ts @@ -0,0 +1,240 @@ +import type { Node as EcmarkdownNode, Observer } from 'ecmarkdown'; +import type { Reporter } from '../algorithm-error-reporter-type'; + +/* +Checks that every algorithm step has one of these forms: + +- `If foo, bar.` +- `If foo, then,` + substeps +- `Else if foo, bar.` +- `Else if foo, then.` + substeps +- `Else, baz.` +- `Else,` + substeps +- `Repeat,` + substeps +- `Repeat, while foo,` + substeps +- `Repeat, until foo,` + substeps +- `For each foo, bar.` +- `For each foo, do` + substeps +- `Other.` +- `Other:` + substeps +*/ +export default function (report: Reporter, node: Element): Observer { + if (node.getAttribute('type') === 'example') { + return {}; + } + return { + enter(node: EcmarkdownNode) { + if (node.name !== 'ordered-list-item') { + return; + } + let firstIndex = 0; + while (firstIndex < node.contents.length && node.contents[firstIndex].name === 'tag') { + ++firstIndex; + } + if (firstIndex === node.contents.length) { + report({ + line: node.location!.start.line, + column: node.location!.start.column, + message: 'expected line to contain non-tag elements', + }); + return; + } + let first = node.contents[firstIndex]; + + let last = node.contents[node.contents.length - 1]; + + // Special case: if the step has a figure, it should end in `:` + if (last.name === 'tag' && last.contents === '') { + let count = 1; + let lastIndex = node.contents.length - 2; + if (lastIndex < 0) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: 'could not find matching
tag', + }); + return; + } + while (count > 0) { + last = node.contents[lastIndex]; + if (last.name === 'tag') { + if (last.contents === '
') { + --count; + } else if (last.contents === '
') { + ++count; + } + } + --lastIndex; + if (lastIndex < 0) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: 'could not find matching
tag', + }); + return; + } + } + last = node.contents[lastIndex]; + if (last.name !== 'text') { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: `expected line to end with text (found ${last.name})`, + }); + return; + } + if (!/:\n +$/.test(last.contents)) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: 'expected line with figure to end with ":"', + }); + } + return; + } + + if (last.name !== 'text') { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: `expected line to end with text (found ${last.name})`, + }); + return; + } + + let initialText = first.name === 'text' ? first.contents : ''; + let hasSubsteps = node.sublist !== null; + + if (/^(?:If |Else if)/.test(initialText)) { + if (hasSubsteps) { + if (node.sublist!.name === 'ol') { + if (!/, then$/.test(last.contents)) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: `expected "If" with substeps to end with ", then" (found ${JSON.stringify( + last.contents + )})`, + }); + } + } else { + if (!/:$/.test(last.contents)) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: `expected "If" with list to end with ":" (found ${JSON.stringify( + last.contents + )})`, + }); + } + } + } else { + if (!/(?:\.|\.\)|:)$/.test(last.contents)) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: `expected "If" without substeps to end with "." or ":" (found ${JSON.stringify( + last.contents + )})`, + }); + } + } + } else if (/^Else/.test(initialText)) { + if (hasSubsteps) { + if (node.contents.length === 1 && first.contents === 'Else,') { + return; + } + if (!/,$/.test(last.contents)) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: `expected "Else" with substeps to end with "," (found ${JSON.stringify( + last.contents + )})`, + }); + } + } else { + if (!/(?:\.|\.\)|:)$/.test(last.contents)) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: `expected "Else" without substeps to end with "." or ":" (found ${JSON.stringify( + last.contents + )})`, + }); + } + } + } else if (/^Repeat/.test(initialText)) { + if (!hasSubsteps) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: 'expected "Repeat" to have substeps', + }); + } + if (node.contents.length === 1 && first.contents === 'Repeat,') { + return; + } + if (!/^Repeat, (?:while|until) /.test(initialText)) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: `expected "Repeat" to start with "Repeat, while " or "Repeat, until " (found ${JSON.stringify( + initialText + )})`, + }); + } + if (!/,$/.test(last.contents)) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: 'expected "Repeat" to end with ","', + }); + } + } else if (/^For each/.test(initialText)) { + if (hasSubsteps) { + if (!/, do$/.test(last.contents)) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: `expected "For each" with substeps to end with ", do" (found ${JSON.stringify( + last.contents + )})`, + }); + } + } else { + if (!/(?:\.|\.\))$/.test(last.contents)) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: `expected "For each" without substeps to end with "." (found ${JSON.stringify( + last.contents + )})`, + }); + } + } + } else { + if (hasSubsteps) { + if (!/:$/.test(last.contents)) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: `expected freeform line with substeps to end with ":" (found ${JSON.stringify( + last.contents + )})`, + }); + } + } else if (!/(?:\.|\.\))$/.test(last.contents)) { + report({ + line: node.contents[0].location!.start.line, + column: node.contents[0].location!.start.column, + message: `expected freeform line to end with "." (found ${JSON.stringify( + last.contents + )})`, + }); + } + } + return; + }, + }; +} diff --git a/src/lint/utils.ts b/src/lint/utils.ts new file mode 100644 index 00000000..edbcb7f8 --- /dev/null +++ b/src/lint/utils.ts @@ -0,0 +1,196 @@ +import type { + Production, + ProductionBody, + RightHandSide, + OneOfList, + SymbolSpan, + LexicalSymbol, + Terminal, + Nonterminal, + ButNotSymbol, + OneOfSymbol, + ArgumentList, +} from 'grammarkdown'; + +import { Grammar as GrammarFile, SyntaxKind } from 'grammarkdown'; + +export function grammarkdownLocationToTrueLocation( + elementLoc: ReturnType, + gmdLine: number, + gmdCharacter: number +) { + // jsdom's lines and columns are both 1-based + // grammarkdown's lines and columns ("characters") are both 0-based + // we want 1-based for both + let line = elementLoc.startTag.line + gmdLine; + let column = + gmdLine === 0 + ? elementLoc.startTag.col + + (elementLoc.startTag.endOffset - elementLoc.startTag.startOffset) + + gmdCharacter + : gmdCharacter + 1; + return { line, column }; +} + +export function getLocation(dom: any, node: Element) { + let loc = dom.nodeLocation(node); + if (!loc || !loc.startTag || !loc.endTag) { + throw new Error('could not find location'); + } + return loc; +} + +export function getProductions(grammar: GrammarFile) { + let productions: Map< + string, + { production: Production; rhses: (RightHandSide | OneOfList)[] } + > = new Map(); + grammar.rootFiles.forEach(f => + f.elements.forEach(e => { + if (e.kind !== SyntaxKind.Production) { + // The alternatives supported by Grammarkdown are imports and defines, which ecma-262 does not use. + throw new Error('Grammar contains non-production node ' + JSON.stringify(e)); + } + if (typeof e.body === 'undefined') { + throw new Error('production lacks body ' + JSON.stringify(e)); + } + if (!productions.has(e.name.text!)) { + productions.set(e.name.text!, { production: e as Production, rhses: [] }); + } + productions.get(e.name.text!)!.rhses.push(...productionBodies(e.body)); + }) + ); + return productions; +} + +function productionBodies(body: ProductionBody) { + switch (body.kind) { + case SyntaxKind.RightHandSideList: + return body.elements!; + case SyntaxKind.OneOfList: + case SyntaxKind.RightHandSide: + return [body]; + default: + // @ts-ignore + throw new Error('unknown production body type ' + body.constructor.name); + } +} + +// these "matches" functions are not symmetric: +// the first parameter is permitted to omit flags and _opt nodes present on the second, but not conversely +export function rhsMatches(a: RightHandSide | OneOfList, b: RightHandSide | OneOfList) { + if (a.kind !== b.kind) { + return false; + } + switch (a.kind) { + case SyntaxKind.RightHandSide: { + let aHead = a.head; + let bHead = (b as RightHandSide).head; + if (aHead === undefined || bHead === undefined) { + throw new Error('RHS must have content'); + } + if (aHead.symbol.kind === SyntaxKind.EmptyAssertion) { + if (aHead.next !== undefined) { + throw new Error('empty assertions should not have other content'); + } + return bHead.symbol.kind === SyntaxKind.EmptyAssertion || canBeEmpty(bHead); + } + return symbolSpanMatches(aHead, bHead); + } + default: + throw new Error('unknown rhs type ' + a.constructor.name); + } +} + +function symbolSpanMatches(a: SymbolSpan | undefined, b: SymbolSpan | undefined): boolean { + if (a === undefined) { + return canBeEmpty(b); + } + + if (a !== undefined && b !== undefined && symbolMatches(a.symbol, b.symbol)) { + return symbolSpanMatches(a.next, b.next); + } + + // sometimes when there is an optional terminal or nonterminal we give distinct implementations for each case, rather than one implementation which represents both + // which means both `a b c` and `a c` must match `a b? c` + // TODO reconsider whether ECMA-262 should have these + if (b !== undefined && canSkipSymbol(b.symbol)) { + return symbolSpanMatches(a, b.next); + } + + return false; +} + +function canBeEmpty(b: SymbolSpan | undefined): boolean { + return b === undefined || (canSkipSymbol(b.symbol) && canBeEmpty(b.next)); +} + +function canSkipSymbol(a: LexicalSymbol) { + return ( + a.kind === SyntaxKind.NoSymbolHereAssertion || + a.kind === SyntaxKind.LookaheadAssertion || + a.kind === SyntaxKind.ProseAssertion || + (a as Terminal | Nonterminal).questionToken !== undefined + ); +} + +function symbolMatches(a: LexicalSymbol, b: LexicalSymbol): boolean { + if (a.kind !== b.kind) { + return false; + } + switch (a.kind) { + case SyntaxKind.Terminal: + return a.text === (b as Terminal).text; + case SyntaxKind.Nonterminal: + if (a.argumentList !== undefined) { + if ((b as Nonterminal).argumentList === undefined) { + return false; + } + if (!argumentListMatches(a.argumentList, (b as Nonterminal).argumentList!)) { + return false; + } + } + return a.name.text === (b as Nonterminal).name.text; + case SyntaxKind.ButNotSymbol: + if (a.right === undefined || (b as ButNotSymbol).right === undefined) { + throw new Error('"but not" production cannot be empty'); + } + return ( + symbolMatches(a.left, (b as ButNotSymbol).left) && + symbolMatches(a.right, (b as ButNotSymbol).right!) + ); + case SyntaxKind.EmptyAssertion: + case SyntaxKind.LookaheadAssertion: + case SyntaxKind.ProseAssertion: + return true; + case SyntaxKind.OneOfSymbol: + if (a.symbols === undefined || (b as OneOfSymbol).symbols === undefined) { + throw new Error('"one of" production cannot be empty'); + } + return ( + a.symbols.length === (b as OneOfSymbol).symbols!.length && + a.symbols.every((s, i) => symbolMatches(s, (b as OneOfSymbol).symbols![i])) + ); + default: + throw new Error('unknown symbol type ' + a.constructor.name); + } +} + +function argumentListMatches(a: ArgumentList, b: ArgumentList) { + if (a.elements === undefined || b.elements === undefined) { + throw new Error('argument lists must have elements'); + } + return ( + a.elements.length === b.elements.length && + a.elements.every((ae, i) => { + let be = b.elements![i]; + if (ae.operatorToken === undefined || be.operatorToken === undefined) { + throw new Error('arguments must have operators'); + } + if (ae.name === undefined || be.name === undefined) { + throw new Error('arguments must have names'); + } + return ae.operatorToken.kind === be.operatorToken.kind && ae.name.text === be.name.text; + }) + ); +} diff --git a/test/cli.js b/test/cli.js index fce917ac..878a6142 100644 --- a/test/cli.js +++ b/test/cli.js @@ -10,7 +10,7 @@ describe('ecmarkup#cli', () => { it('exits with an error on error', () => { assert.throws(() => { - execSync('./bin/ecmarkup.js test/malformed.bad.html', { encoding: 'utf8' }); + execSync('./bin/ecmarkup.js test/malformed.bad.html', { encoding: 'utf8', stdio: 'ignore' }); }); }); }); diff --git a/test/lint-algorithm-line-endings.js b/test/lint-algorithm-line-endings.js new file mode 100644 index 00000000..0b12f5b9 --- /dev/null +++ b/test/lint-algorithm-line-endings.js @@ -0,0 +1,80 @@ +'use strict'; + +let { assertLint, lintLocationMarker: M, positioned } = require('./lint-helpers'); + +describe('linting algorithms', function () { + describe('line endings', function () { + it('simple', async function () { + await assertLint( + positioned` + 1. ${M}testing + `, + 'expected freeform line to end with "." (found "testing")' + ); + }); + + it('inline', async function () { + await assertLint( + positioned`1. ${M}testing`, + 'expected freeform line to end with "." (found "testing")' + ); + }); + + it('repeat', async function () { + await assertLint( + positioned` + 1. ${M}Repeat, while _x_ < 10 + 1. Foo. + `, + 'expected "Repeat" to end with ","' + ); + }); + + it('inline if', async function () { + await assertLint( + positioned` + 1. ${M}If _x_, then + `, + 'expected "If" without substeps to end with "." or ":" (found ", then")' + ); + }); + + it('multiline if', async function () { + await assertLint( + positioned` + 1. ${M}If _x_, + 1. Foo. + `, + 'expected "If" with substeps to end with ", then" (found ",")' + ); + }); + + it('negative', async function () { + await assertLint({ html: ` + + 1. If foo, bar. + 1. Else if foo, bar. + 1. Else, bar. + 1. If foo, then + 1. Substep. + 1. Else if foo, then + 1. Substep. + 1. Else, + 1. Substep. + 1. Repeat, + 1. Substep. + 1. Repeat, while foo, + 1. Substep. + 1. Repeat, until foo, + 1. Substep. + 1. For each foo, do bar. + 1. For each foo, do + 1. Substep. + 1. Other. + 1. Other: + 1. Substep. + + ` }); + }); + }); +}); diff --git a/test/lint-helpers.js b/test/lint-helpers.js new file mode 100644 index 00000000..1480de23 --- /dev/null +++ b/test/lint-helpers.js @@ -0,0 +1,58 @@ +'use strict'; + +let assert = require('assert'); +let emu = require('../lib/ecmarkup'); + +let lintLocationMarker = {}; + +function positioned(literalParts, ...interpolatedParts) { + let markerIndex = interpolatedParts.indexOf(lintLocationMarker); + if (markerIndex < 0 || markerIndex !== interpolatedParts.lastIndexOf(lintLocationMarker)) { + throw new Error('alg template tag must interpolate the location marker exactly once'); + } + let offset, line, column; + let str = literalParts[0]; + for (let i = 0; i < literalParts.length - 1; ++i) { + if (i === markerIndex) { + offset = str.length; + let lines = str.split('\n'); + line = lines.length; + column = lines[lines.length - 1].length + 1; + } else { + str += String(interpolatedParts[i]); + } + str += literalParts[i + 1]; + } + return { offset, line, column, html: str }; +} + +async function assertLint({ offset, line, column, html }, message = null) { + let reportLintErrors; + let reported = false; + + if (message === null) { + reportLintErrors = errors => { + throw new Error('unexpected errors ' + JSON.stringify(errors)); + }; + } else { + reportLintErrors = errors => { + reported = true; + assert.equal(errors.length, 1, 'should have exactly one error'); + assert.deepStrictEqual(errors[0], { + line, + column, + message, + }); + }; + } + + await emu.build('test-example.emu', async () => html, { + ecma262Biblio: false, + copyright: false, + reportLintErrors, + }); + assert.equal(reported, message !== null); +} + + +module.exports = { assertLint, lintLocationMarker, positioned }; diff --git a/test/lint.js b/test/lint.js new file mode 100644 index 00000000..d4217a2f --- /dev/null +++ b/test/lint.js @@ -0,0 +1,89 @@ +'use strict'; + +let { assertLint, positioned, lintLocationMarker: M } = require('./lint-helpers'); + +describe('linting whole program', function () { + describe('grammar validity', function () { + it('unused parameters', async function () { + await assertLint( + positioned` + + Statement[${M}a]: \`;\` + + `, + "Parameter 'a' is unused." + ); + }); + + it('missing parameters', async function () { + await assertLint( + positioned` + + Foo[a]: + [+a] \`a\` + \`b\` + + Bar: ${M}Foo + + `, + "There is no argument given for parameter 'a'." + ); + }); + + it('error in inline grammar', async function () { + await assertLint( + positioned` + Statement[${M}a]: \`;\` + `, + "Parameter 'a' is unused." + ); + }); + }); + + describe('grammar+SDO validity', function () { + it('undefined nonterminals in SDOs', async function () { + await assertLint( + positioned` + + ${M}Statement: EmptyStatement + + + 1. Return Foo. + + `, + 'Could not find a definition for LHS in syntax-directed operation' + ); + }); + + it('error in inline grammar', async function () { + await assertLint( + positioned` + ${M}Statement: EmptyStatement + + 1. Return Foo. + + `, + 'Could not find a definition for LHS in syntax-directed operation' + ); + }); + + it('undefined productions in SDOs', async function () { + await assertLint( + positioned` + + Statement: \`;\` + + +

SS: Foo

+ + Statement: ${M}EmptyStatement + + + 1. Return Foo. + + `, + 'Could not find a production matching RHS in syntax-directed operation' + ); + }); + }); +});