feat: Generate lists from RegExp patterns (#161)

* feat: 'listOf' patterns * test: New test cases * test: New baseline * Extend Config Schema (Backwards Compatible)
about-code · Jun 1, 2021 · c99d5c6 · c99d5c6
1 parent 3f39970
commit c99d5c6
Show file tree

Hide file tree

Showing 34 changed files with 771 additions and 32 deletions.
diff --git a/conf/v5/schema.json b/conf/v5/schema.json
@@ -225,6 +225,10 @@
                     "description": "The page title for the index file. If missing the application uses a default value."
                     ,"type": "string"
                 }
+                ,"pattern": {
+                    "description": "A regular expression which when matching against text will generate an entry in the given list. The expression may contain a capture group which extracts a list item title. A match will result in an URL-addressable HTML node being added to the output."
+                    ,"type": "string"
+                }
             }
         }
         ,"i18n": {

diff --git a/lib/anchorizer.js b/lib/anchorizer.js
@@ -8,6 +8,7 @@ const api = {};
 const HTML_COMMENT = /<!-- (.*) -->/;
 const TABLE_LABEL_REGEXP = /<!-- table: (.*) -->/;
 const TABLE_LABEL_REGEXP2 = /(.*):(\s|\n|$)/;
+const REGEXP_SANITIZE_HTML_ATTR = /["<>]/g;
 
 /**
  * Anchorizer searches the abstract syntax tree for figures and tables
@@ -25,23 +26,23 @@ const TABLE_LABEL_REGEXP2 = /(.*):(\s|\n|$)/;
  */
 api.anchorizer = function(args) {
     const {context} = args;
-    const {listOf, listOfFigures, listOfTables} = context.conf.generateFiles;
+    const generateFilesConf = context.conf.generateFiles;
+    const listOf = generateFilesConf.listOf;
+
+    // If there's a listOf-config (assumes that listOfFigures and listOfTables
+    // configs have been merged with listOf config during context initialization)
     if (listOf.length > 0) {
-        // assumes that listOfFigures and listOfTables configs have been
-        // pushed to listOf configs during context initialization.
-        const figuresClass = listOfFigures ? listOfFigures.class : "figure";
-        const tablesClass  = listOfTables  ? listOfTables.class  : "table";
         return (tree, vFile) => {
             const slugger = new GitHubSlugger();
-            uVisit(tree, getNodeVisitor(context, vFile, slugger, figuresClass, tablesClass));
+            uVisit(tree, getNodeVisitor(context, vFile, slugger, generateFilesConf));
             return tree;
         };
     } else {
         return (tree) => tree;
     }
 };
 
-function getNodeVisitor(context, file, slugger, figuresClass, tablesClass) {
+function getNodeVisitor(context, file, slugger, generateFilesConf) {
     let htmlNodeDistance = 99;
     return function(node, index, parent) {
         const t = node.type;
@@ -51,13 +52,63 @@ function getNodeVisitor(context, file, slugger, figuresClass, tablesClass) {
             htmlNodeDistance++;
         }
         if (htmlNodeDistance >= 3 && (t === "image" || t === "imageReference")) {
-            return anchorizeImage(node, index, parent, slugger, figuresClass);
+            return anchorizeImage(node, index, parent, slugger, generateFilesConf);
         } else if (htmlNodeDistance >= 2 && t === "table") {
-            return anchorizeTable(node, index, parent, slugger, tablesClass);
+            return anchorizeTable(node, index, parent, slugger, generateFilesConf);
+        } else if (htmlNodeDistance >= 3 && t === "paragraph") {
+            return anchorizePattern(node, index, parent, slugger, generateFilesConf);
         }
     };
 }
 
+/**
+ * Prepends an HTML anchor tag to a markdown text node matching a given pattern
+ *
+ * @param {Node} node
+ * @param {number} index
+ * @param {Node} parent
+ * @param {GitHubSlugger} slugger Slug algorithm to generate URL-friendly anchor ids
+ * @param {string} anchorClass
+ */
+function anchorizePattern(node, index, parent, slugger, generateFilesConf) {
+    const {listOf} = generateFilesConf;
+    const text = getNodeText(node) || "";
+
+    for (let i = 0, len = listOf.length; i < len; i++) {
+        const anchorClass = listOf[i].class;
+        const pattern = listOf[i].pattern;
+        let title = "";
+        let id = "";
+        if (! pattern) {
+            continue;
+        }
+        const match = text.match(pattern);
+        if (! match) {
+            continue;
+        }
+        if (match[1]) {
+            // there's a capture group to extract the title
+            title = match[1];
+            id = `${slugger.slug(title.substr(0, 20))}`;
+        } else {
+            // apply default capture group to use the matching pattern itself as the title
+            title = text.match(`(${pattern})`)[1];
+            id = `${slugger.slug(title.substr(0, 20))}`;
+        }
+        title = title
+            .replace(REGEXP_SANITIZE_HTML_ATTR, "")
+            .trim();
+        parent.children.splice(index, 0,
+            html(`<span id="${id}" class="${anchorClass}" title="${title}"></span>`)
+            // Remember to increment 'index' by the number of nodes inserted via splicing
+        );  // `------------.
+        index += 1; // <----'
+    }
+    // proceed tree visiting with "next-to-current" node
+    return index + 1;
+}
+
+
 /**
  * Prepends an HTML anchor tag to a markdown image to be able to directly
  * link and navigate to it. Anchorization allows to create a combined
@@ -74,7 +125,10 @@ function getNodeVisitor(context, file, slugger, figuresClass, tablesClass) {
  * @param {GitHubSlugger} slugger Slug algorithm to generate URL-friendly anchor ids
  * @param {string} anchorClass
  */
-function anchorizeImage(node, index, parent, slugger, anchorClass) {
+function anchorizeImage(node, index, parent, slugger, generateFilesConf) {
+
+    const listOfFigures = generateFilesConf.listOfFigures;
+    const anchorClass = listOfFigures ? listOfFigures.class : "figure";
     // Note: The file-specific slugger internally counts figures with id "figure".
     const t = node.type;
     const id = slugger.slug(node.title || node.alt || "figure");
@@ -85,10 +139,14 @@ function anchorizeImage(node, index, parent, slugger, anchorClass) {
         title = node.alt || node.label;
     }
     parent.children.splice(index, 0,
-        html(`<a id="${id}" class="${anchorClass}" title="${title}">`),
+        html(`<a id="${id}" class="${anchorClass}" title="${title.replace(REGEXP_SANITIZE_HTML_ATTR, "")}">`),
         html("</a>")
-    );
-    return index + 3;
+        // Remember to increment 'index' by the number of nodes inserted via splicing
+    );  // `------------.
+    index += 2; // <----'
+
+    // proceed tree visiting with "next-to-current" node
+    return index + 1;
 }
 
 /**
@@ -107,13 +165,19 @@ function anchorizeImage(node, index, parent, slugger, anchorClass) {
  * @param {GitHubSlugger} slugger Slug algorithm for URL-friendly anchors
  * @param {string} anchorClass
  */
-function anchorizeTable(node, index, parent, slugger, anchorClass) {
+function anchorizeTable(node, index, parent, slugger, generateFilesConf) {
+    const listOfTables = generateFilesConf.listOfTables;
+    const anchorClass = listOfTables ? listOfTables.class : "table";
     const label = getTableLabel(node, index, parent);
     const id = label ? slugger.slug(label) : "";
     parent.children.splice(index, 0,
-        html(`<a id="${id}" class="${anchorClass}" title="${label}" />`)
-    );
-    return index + 2;
+        html(`<a id="${id}" class="${anchorClass}" title="${label.replace(REGEXP_SANITIZE_HTML_ATTR, "")}" />`)
+        // Remember to increment 'index' by the number of nodes inserted via splicing
+    );  // `------------.
+    index += 1; // <----'
+
+    // proceed tree visiting with "next-to-current" node
+    return index + 1;
 }
 
 function getTableLabel(node, index, parent) {

diff --git a/md/doc/vuepress.md b/md/doc/vuepress.md
@@ -93,19 +93,17 @@ More information see [README.md](../README.md).
 
 ## Markdown Extensions
 
-Vuepress lists a few [Markdown Extensions](https://vuepress.vuejs.org/guide/markdown.html) like *Frontmatter*, *Emojis* etc.
-Make sure to read [Markdown Syntax Extensions](../README.md#markdown-syntax-extensions), if your input files contain syntax sugar not covered by the [CommonMark] spec.
-Below is a list of remark plug-ins you may consider:
-
-|      Vuepress Markdown Extension      |                   remark plug-in required with glossarify-md                    |
-| ------------------------------------- | ------------------------------------------------------------------------------- |
-| [Frontmatter][vp-frontmatter]         | [remark-frontmatter](http://unifiedjs.com/explore/package/remark-frontmatter/)  |
-| [Custom Containers][vp-cc]            | [remark-directive](https://github.com/remarkjs/remark-directive)                |
-| [GitHub Style Tables][vp-gh-tables]   | None (glossarify-md loads [remark-gfm](https://github.com/remarkjs/remark-gfm)) |
-| [Table of Contents][vp-toc] `[[toc]]` | None                                                                            |
-| [Emoji][vp-emoji]                     | None                                                                            |
-| [Line Highlighting Codeblocks][vp-lh] | None                                                                            |
-| [Import Code Snippets][vp-code]       | None                                                                            |
+Vuepress has a few [Markdown Extensions](https://vuepress.vuejs.org/guide/markdown.html). Most of them work out of the box. Though, *Frontmatter* requires a plug-in to work with glossarify-md. Read [Markdown Syntax Extensions](../README.md#markdown-syntax-extensions), for using glossarify-md with Markdown syntax not covered by the [CommonMark] Spec.
+
+|      Vuepress Markdown Extension      |                   remark plug-in required with glossarify-md                   |
+| ------------------------------------- | ------------------------------------------------------------------------------ |
+| [Frontmatter][vp-frontmatter]         | [remark-frontmatter](http://unifiedjs.com/explore/package/remark-frontmatter/) |
+| [Custom Containers][vp-cc]            | None                                                                           |
+| [GitHub Style Tables][vp-gh-tables]   | None                                                                           |
+| [Table of Contents][vp-toc] `[[toc]]` | None                                                                           |
+| [Emoji][vp-emoji]                     | None                                                                           |
+| [Line Highlighting Codeblocks][vp-lh] | None                                                                           |
+| [Import Code Snippets][vp-code]       | None                                                                           |
 
 [vp-frontmatter]: https://vuepress.vuejs.org/guide/markdown.html#frontmatter
 [vp-gh-tables]: https://vuepress.vuejs.org/guide/markdown.html#github-style-tables

diff --git a/test/input/config-listOf/pattern/tc1-text-paragraph/document.md b/test/input/config-listOf/pattern/tc1-text-paragraph/document.md
@@ -0,0 +1,20 @@
+# Tests for pattern-based lists
+
+## Test Case
+
+GIVEN a paragraph
+AND a listOf configuration
+AND text containing "Test Case: 1" which matches a listOf-pattern
+THEN the system MUST insert an HTML element
+AND MUST insert the HTML in front of the whole paragraph
+AND generate a list WITH a single list item
+AND the list item label MUST be the part which matches the pattern, only.
+
+GIVEN a paragraph
+AND a listOf configuration
+AND text containing "Test Case: 1" which matches a listOf-pattern
+AND text containing "Test Case: 1" twice
+THEN the system MUST insert an HTML element only once
+AND MUST insert the HTML in front of the whole paragraph
+AND generate a list WITH a single list item
+AND the list item label MUST be the part which matches the pattern, only.
diff --git a/test/input/config-listOf/pattern/tc1-text-paragraph/glossarify-md.conf.json b/test/input/config-listOf/pattern/tc1-text-paragraph/glossarify-md.conf.json
@@ -0,0 +1,37 @@
+{
+  "$schema": "../../../../../conf/v5/schema.json",
+  "baseDir": ".",
+  "linking": {
+    "paths": "relative",
+    "mentions": "all"
+  },
+  "includeFiles": [
+    "."
+  ],
+  "excludeFiles": [],
+  "indexing": {
+    "groupByHeadingDepth": 0
+  },
+  "generateFiles": {
+    "listOf": [
+      {
+        "class": "test",
+        "file": "./list.md",
+        "title": "Test Case",
+        "pattern": "Test Case: [0-9]{1,3}"
+      }
+    ]
+  },
+  "glossaries": [
+    {
+      "file": "./glossary.md"
+    }
+  ],
+  "ignoreCase": false,
+  "outDir": "../../../../output-actual/config-listOf/pattern/tc1-text-paragraph",
+  "dev": {
+    "printInputAst": false,
+    "reproducablePaths": true,
+    "printOutputAst": false
+  }
+}
diff --git a/test/input/config-listOf/pattern/tc1-text-paragraph/glossary.md b/test/input/config-listOf/pattern/tc1-text-paragraph/glossary.md
diff --git a/test/input/config-listOf/pattern/tc2-formatted-paragraph/document.md b/test/input/config-listOf/pattern/tc2-formatted-paragraph/document.md
@@ -0,0 +1,22 @@
+# Tests for pattern-based lists
+
+## Test Case
+
+GIVEN a configuration
+~~~md
+{
+   "generateFiles": {
+       "listOf": [{
+         "class": "test",
+         "file": "list.md",
+         "title": "Test Case",
+         "pattern": "Test Case: [0-9]{1,3}"
+       }]
+   }
+}
+~~~
+AND this mdAst *text* node: "Test Case: 2"
+THEN the system MUST prepend an HTML element WITH attributes
+  - `id` whose value is a Slug limited to 20 characters in length
+  - AND `title` whose value is the value of the text node
+  - AND `class` whose value is `tc1`
diff --git a/test/input/config-listOf/pattern/tc2-formatted-paragraph/glossarify-md.conf.json b/test/input/config-listOf/pattern/tc2-formatted-paragraph/glossarify-md.conf.json
@@ -0,0 +1,37 @@
+{
+  "$schema": "../../../../../conf/v5/schema.json",
+  "baseDir": ".",
+  "linking": {
+    "paths": "relative",
+    "mentions": "all"
+  },
+  "includeFiles": [
+    "."
+  ],
+  "excludeFiles": [],
+  "indexing": {
+    "groupByHeadingDepth": 0
+  },
+  "generateFiles": {
+    "listOf": [
+      {
+        "class": "test",
+        "file": "./list.md",
+        "title": "Test Case",
+        "pattern": "Test Case: [0-9]{1,3}"
+      }
+    ]
+  },
+  "glossaries": [
+    {
+      "file": "./glossary.md"
+    }
+  ],
+  "ignoreCase": false,
+  "outDir": "../../../../output-actual/config-listOf/pattern/tc2-formatted-paragraph",
+  "dev": {
+    "printInputAst": false,
+    "reproducablePaths": true,
+    "printOutputAst": false
+  }
+}
diff --git a/test/input/config-listOf/pattern/tc2-formatted-paragraph/glossary.md b/test/input/config-listOf/pattern/tc2-formatted-paragraph/glossary.md
diff --git a/test/input/config-listOf/pattern/tc3-title-capture-group/document.md b/test/input/config-listOf/pattern/tc3-title-capture-group/document.md
@@ -0,0 +1,26 @@
+# Tests for pattern-based lists
+
+## Test Case
+
+GIVEN a configuration
+
+```md
+{
+   "generateFiles": {
+       "listOf": [      {
+        "class": "test",
+        "file": "./list.md",
+        "title": "Test Case",
+        "pattern": ":::[ ]?tip Tipp[:]? ([a-zA-Z0-9].*)"
+      }]
+   }
+}
+```
+
+AND a container node
+
+:::tip Tipp: Extract me
+The title of this container should be extracted
+:::
+
+THEN the system MUST generate a list item with list item label `Extract me`
diff --git a/test/input/config-listOf/pattern/tc3-title-capture-group/glossarify-md.conf.json b/test/input/config-listOf/pattern/tc3-title-capture-group/glossarify-md.conf.json
@@ -0,0 +1,37 @@
+{
+  "$schema": "../../../../../conf/v5/schema.json",
+  "baseDir": ".",
+  "linking": {
+    "paths": "relative",
+    "mentions": "all"
+  },
+  "includeFiles": [
+    "."
+  ],
+  "excludeFiles": [],
+  "indexing": {
+    "groupByHeadingDepth": 0
+  },
+  "generateFiles": {
+    "listOf": [
+      {
+        "class": "test",
+        "file": "./list.md",
+        "title": "Test Case",
+        "pattern": ":::[ ]?tip Tipp[:]? ([a-zA-Z0-9].*)"
+      }
+    ]
+  },
+  "glossaries": [
+    {
+      "file": "./glossary.md"
+    }
+  ],
+  "ignoreCase": false,
+  "outDir": "../../../../output-actual/config-listOf/pattern/tc3-title-capture-group",
+  "dev": {
+    "printInputAst": false,
+    "reproducablePaths": true,
+    "printOutputAst": false
+  }
+}
diff --git a/test/input/config-listOf/pattern/tc3-title-capture-group/glossary.md b/test/input/config-listOf/pattern/tc3-title-capture-group/glossary.md