Skip to content

Commit

Permalink
feat: Generate lists from RegExp patterns (#161)
Browse files Browse the repository at this point in the history
* feat: 'listOf' patterns
* test: New test cases
* test: New baseline
* Extend Config Schema (Backwards Compatible)
  • Loading branch information
about-code authored Jun 1, 2021
1 parent 3f39970 commit c99d5c6
Show file tree
Hide file tree
Showing 34 changed files with 771 additions and 32 deletions.
4 changes: 4 additions & 0 deletions conf/v5/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,10 @@
"description": "The page title for the index file. If missing the application uses a default value."
,"type": "string"
}
,"pattern": {
"description": "A regular expression which when matching against text will generate an entry in the given list. The expression may contain a capture group which extracts a list item title. A match will result in an URL-addressable HTML node being added to the output."
,"type": "string"
}
}
}
,"i18n": {
Expand Down
98 changes: 81 additions & 17 deletions lib/anchorizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const api = {};
const HTML_COMMENT = /<!-- (.*) -->/;
const TABLE_LABEL_REGEXP = /<!-- table: (.*) -->/;
const TABLE_LABEL_REGEXP2 = /(.*):(\s|\n|$)/;
const REGEXP_SANITIZE_HTML_ATTR = /["<>]/g;

/**
* Anchorizer searches the abstract syntax tree for figures and tables
Expand All @@ -25,23 +26,23 @@ const TABLE_LABEL_REGEXP2 = /(.*):(\s|\n|$)/;
*/
api.anchorizer = function(args) {
const {context} = args;
const {listOf, listOfFigures, listOfTables} = context.conf.generateFiles;
const generateFilesConf = context.conf.generateFiles;
const listOf = generateFilesConf.listOf;

// If there's a listOf-config (assumes that listOfFigures and listOfTables
// configs have been merged with listOf config during context initialization)
if (listOf.length > 0) {
// assumes that listOfFigures and listOfTables configs have been
// pushed to listOf configs during context initialization.
const figuresClass = listOfFigures ? listOfFigures.class : "figure";
const tablesClass = listOfTables ? listOfTables.class : "table";
return (tree, vFile) => {
const slugger = new GitHubSlugger();
uVisit(tree, getNodeVisitor(context, vFile, slugger, figuresClass, tablesClass));
uVisit(tree, getNodeVisitor(context, vFile, slugger, generateFilesConf));
return tree;
};
} else {
return (tree) => tree;
}
};

function getNodeVisitor(context, file, slugger, figuresClass, tablesClass) {
function getNodeVisitor(context, file, slugger, generateFilesConf) {
let htmlNodeDistance = 99;
return function(node, index, parent) {
const t = node.type;
Expand All @@ -51,13 +52,63 @@ function getNodeVisitor(context, file, slugger, figuresClass, tablesClass) {
htmlNodeDistance++;
}
if (htmlNodeDistance >= 3 && (t === "image" || t === "imageReference")) {
return anchorizeImage(node, index, parent, slugger, figuresClass);
return anchorizeImage(node, index, parent, slugger, generateFilesConf);
} else if (htmlNodeDistance >= 2 && t === "table") {
return anchorizeTable(node, index, parent, slugger, tablesClass);
return anchorizeTable(node, index, parent, slugger, generateFilesConf);
} else if (htmlNodeDistance >= 3 && t === "paragraph") {
return anchorizePattern(node, index, parent, slugger, generateFilesConf);
}
};
}

/**
* Prepends an HTML anchor tag to a markdown text node matching a given pattern
*
* @param {Node} node
* @param {number} index
* @param {Node} parent
* @param {GitHubSlugger} slugger Slug algorithm to generate URL-friendly anchor ids
* @param {string} anchorClass
*/
function anchorizePattern(node, index, parent, slugger, generateFilesConf) {
const {listOf} = generateFilesConf;
const text = getNodeText(node) || "";

for (let i = 0, len = listOf.length; i < len; i++) {
const anchorClass = listOf[i].class;
const pattern = listOf[i].pattern;
let title = "";
let id = "";
if (! pattern) {
continue;
}
const match = text.match(pattern);
if (! match) {
continue;
}
if (match[1]) {
// there's a capture group to extract the title
title = match[1];
id = `${slugger.slug(title.substr(0, 20))}`;
} else {
// apply default capture group to use the matching pattern itself as the title
title = text.match(`(${pattern})`)[1];
id = `${slugger.slug(title.substr(0, 20))}`;
}
title = title
.replace(REGEXP_SANITIZE_HTML_ATTR, "")
.trim();
parent.children.splice(index, 0,
html(`<span id="${id}" class="${anchorClass}" title="${title}"></span>`)
// Remember to increment 'index' by the number of nodes inserted via splicing
); // `------------.
index += 1; // <----'
}
// proceed tree visiting with "next-to-current" node
return index + 1;
}


/**
* Prepends an HTML anchor tag to a markdown image to be able to directly
* link and navigate to it. Anchorization allows to create a combined
Expand All @@ -74,7 +125,10 @@ function getNodeVisitor(context, file, slugger, figuresClass, tablesClass) {
* @param {GitHubSlugger} slugger Slug algorithm to generate URL-friendly anchor ids
* @param {string} anchorClass
*/
function anchorizeImage(node, index, parent, slugger, anchorClass) {
function anchorizeImage(node, index, parent, slugger, generateFilesConf) {

const listOfFigures = generateFilesConf.listOfFigures;
const anchorClass = listOfFigures ? listOfFigures.class : "figure";
// Note: The file-specific slugger internally counts figures with id "figure".
const t = node.type;
const id = slugger.slug(node.title || node.alt || "figure");
Expand All @@ -85,10 +139,14 @@ function anchorizeImage(node, index, parent, slugger, anchorClass) {
title = node.alt || node.label;
}
parent.children.splice(index, 0,
html(`<a id="${id}" class="${anchorClass}" title="${title}">`),
html(`<a id="${id}" class="${anchorClass}" title="${title.replace(REGEXP_SANITIZE_HTML_ATTR, "")}">`),
html("</a>")
);
return index + 3;
// Remember to increment 'index' by the number of nodes inserted via splicing
); // `------------.
index += 2; // <----'

// proceed tree visiting with "next-to-current" node
return index + 1;
}

/**
Expand All @@ -107,13 +165,19 @@ function anchorizeImage(node, index, parent, slugger, anchorClass) {
* @param {GitHubSlugger} slugger Slug algorithm for URL-friendly anchors
* @param {string} anchorClass
*/
function anchorizeTable(node, index, parent, slugger, anchorClass) {
function anchorizeTable(node, index, parent, slugger, generateFilesConf) {
const listOfTables = generateFilesConf.listOfTables;
const anchorClass = listOfTables ? listOfTables.class : "table";
const label = getTableLabel(node, index, parent);
const id = label ? slugger.slug(label) : "";
parent.children.splice(index, 0,
html(`<a id="${id}" class="${anchorClass}" title="${label}" />`)
);
return index + 2;
html(`<a id="${id}" class="${anchorClass}" title="${label.replace(REGEXP_SANITIZE_HTML_ATTR, "")}" />`)
// Remember to increment 'index' by the number of nodes inserted via splicing
); // `------------.
index += 1; // <----'

// proceed tree visiting with "next-to-current" node
return index + 1;
}

function getTableLabel(node, index, parent) {
Expand Down
24 changes: 11 additions & 13 deletions md/doc/vuepress.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,17 @@ More information see [README.md](../README.md).

## Markdown Extensions

Vuepress lists a few [Markdown Extensions](https://vuepress.vuejs.org/guide/markdown.html) like *Frontmatter*, *Emojis* etc.
Make sure to read [Markdown Syntax Extensions](../README.md#markdown-syntax-extensions), if your input files contain syntax sugar not covered by the [CommonMark] spec.
Below is a list of remark plug-ins you may consider:

| Vuepress Markdown Extension | remark plug-in required with glossarify-md |
| ------------------------------------- | ------------------------------------------------------------------------------- |
| [Frontmatter][vp-frontmatter] | [remark-frontmatter](http://unifiedjs.com/explore/package/remark-frontmatter/) |
| [Custom Containers][vp-cc] | [remark-directive](https://github.com/remarkjs/remark-directive) |
| [GitHub Style Tables][vp-gh-tables] | None (glossarify-md loads [remark-gfm](https://github.com/remarkjs/remark-gfm)) |
| [Table of Contents][vp-toc] `[[toc]]` | None |
| [Emoji][vp-emoji] | None |
| [Line Highlighting Codeblocks][vp-lh] | None |
| [Import Code Snippets][vp-code] | None |
Vuepress has a few [Markdown Extensions](https://vuepress.vuejs.org/guide/markdown.html). Most of them work out of the box. Though, *Frontmatter* requires a plug-in to work with glossarify-md. Read [Markdown Syntax Extensions](../README.md#markdown-syntax-extensions), for using glossarify-md with Markdown syntax not covered by the [CommonMark] Spec.

| Vuepress Markdown Extension | remark plug-in required with glossarify-md |
| ------------------------------------- | ------------------------------------------------------------------------------ |
| [Frontmatter][vp-frontmatter] | [remark-frontmatter](http://unifiedjs.com/explore/package/remark-frontmatter/) |
| [Custom Containers][vp-cc] | None |
| [GitHub Style Tables][vp-gh-tables] | None |
| [Table of Contents][vp-toc] `[[toc]]` | None |
| [Emoji][vp-emoji] | None |
| [Line Highlighting Codeblocks][vp-lh] | None |
| [Import Code Snippets][vp-code] | None |

[vp-frontmatter]: https://vuepress.vuejs.org/guide/markdown.html#frontmatter
[vp-gh-tables]: https://vuepress.vuejs.org/guide/markdown.html#github-style-tables
Expand Down
20 changes: 20 additions & 0 deletions test/input/config-listOf/pattern/tc1-text-paragraph/document.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Tests for pattern-based lists

## Test Case

GIVEN a paragraph
AND a listOf configuration
AND text containing "Test Case: 1" which matches a listOf-pattern
THEN the system MUST insert an HTML element
AND MUST insert the HTML in front of the whole paragraph
AND generate a list WITH a single list item
AND the list item label MUST be the part which matches the pattern, only.

GIVEN a paragraph
AND a listOf configuration
AND text containing "Test Case: 1" which matches a listOf-pattern
AND text containing "Test Case: 1" twice
THEN the system MUST insert an HTML element only once
AND MUST insert the HTML in front of the whole paragraph
AND generate a list WITH a single list item
AND the list item label MUST be the part which matches the pattern, only.
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"$schema": "../../../../../conf/v5/schema.json",
"baseDir": ".",
"linking": {
"paths": "relative",
"mentions": "all"
},
"includeFiles": [
"."
],
"excludeFiles": [],
"indexing": {
"groupByHeadingDepth": 0
},
"generateFiles": {
"listOf": [
{
"class": "test",
"file": "./list.md",
"title": "Test Case",
"pattern": "Test Case: [0-9]{1,3}"
}
]
},
"glossaries": [
{
"file": "./glossary.md"
}
],
"ignoreCase": false,
"outDir": "../../../../output-actual/config-listOf/pattern/tc1-text-paragraph",
"dev": {
"printInputAst": false,
"reproducablePaths": true,
"printOutputAst": false
}
}
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Tests for pattern-based lists

## Test Case

GIVEN a configuration
~~~md
{
"generateFiles": {
"listOf": [{
"class": "test",
"file": "list.md",
"title": "Test Case",
"pattern": "Test Case: [0-9]{1,3}"
}]
}
}
~~~
AND this mdAst *text* node: "Test Case: 2"
THEN the system MUST prepend an HTML element WITH attributes
- `id` whose value is a Slug limited to 20 characters in length
- AND `title` whose value is the value of the text node
- AND `class` whose value is `tc1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"$schema": "../../../../../conf/v5/schema.json",
"baseDir": ".",
"linking": {
"paths": "relative",
"mentions": "all"
},
"includeFiles": [
"."
],
"excludeFiles": [],
"indexing": {
"groupByHeadingDepth": 0
},
"generateFiles": {
"listOf": [
{
"class": "test",
"file": "./list.md",
"title": "Test Case",
"pattern": "Test Case: [0-9]{1,3}"
}
]
},
"glossaries": [
{
"file": "./glossary.md"
}
],
"ignoreCase": false,
"outDir": "../../../../output-actual/config-listOf/pattern/tc2-formatted-paragraph",
"dev": {
"printInputAst": false,
"reproducablePaths": true,
"printOutputAst": false
}
}
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Tests for pattern-based lists

## Test Case

GIVEN a configuration

```md
{
"generateFiles": {
"listOf": [ {
"class": "test",
"file": "./list.md",
"title": "Test Case",
"pattern": ":::[ ]?tip Tipp[:]? ([a-zA-Z0-9].*)"
}]
}
}
```

AND a container node

:::tip Tipp: Extract me
The title of this container should be extracted
:::

THEN the system MUST generate a list item with list item label `Extract me`
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"$schema": "../../../../../conf/v5/schema.json",
"baseDir": ".",
"linking": {
"paths": "relative",
"mentions": "all"
},
"includeFiles": [
"."
],
"excludeFiles": [],
"indexing": {
"groupByHeadingDepth": 0
},
"generateFiles": {
"listOf": [
{
"class": "test",
"file": "./list.md",
"title": "Test Case",
"pattern": ":::[ ]?tip Tipp[:]? ([a-zA-Z0-9].*)"
}
]
},
"glossaries": [
{
"file": "./glossary.md"
}
],
"ignoreCase": false,
"outDir": "../../../../output-actual/config-listOf/pattern/tc3-title-capture-group",
"dev": {
"printInputAst": false,
"reproducablePaths": true,
"printOutputAst": false
}
}
Empty file.
Loading

0 comments on commit c99d5c6

Please sign in to comment.