Skip to content

Commit

Permalink
fix: escape terms containing characters with special meaning in regExp
Browse files Browse the repository at this point in the history
chore(): Count term occurrences based on AST nodes. Don't count occurrences in term's own definition.
This change is necessary because with introducing linkification of term occurrences
in glossaries, too, we also counted occurrences in the term's own definition. This
way we were no longer able to detect orphan terms and term definitions used
no longer anywhere else.
  • Loading branch information
about-code committed Dec 23, 2019
1 parent cc199a3 commit 1815304
Show file tree
Hide file tree
Showing 29 changed files with 714 additions and 126 deletions.
28 changes: 28 additions & 0 deletions lib/counter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
const uVisit = require('unist-util-visit');
const {getLinkUrl} = require('./ast-tools');
/**
* Unified plug-in to count occurrences and mentions of a term.
* Won't count occurrences in the terms own definition.
*/
function counter(context) {
return () => (tree, vFile) => {
uVisit(tree, 'term-occurrence', (node, idx, parent) => {
node.termDefs.forEach(term => {
if (vFile.path === term.glossary.vFile.path) {
// current file is the glossary in which the term has been
// defined...
if (getLinkUrl(node.headingNode) !== term.anchor) {
// ...count term occurence only, if it is not in the
// terms own definition.
term.countOccurrence();
}
} else {
term.countOccurrence();
}
});
});
return tree;
};
}

module.exports = { counter };
3 changes: 2 additions & 1 deletion lib/indexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@ const index = {}
function indexer(context) {
const indexFilename = getIndexFilename(context);
if (! indexFilename) {
return () => (tree, vFile) => {};
return () => (tree, vFile) => tree;
} else {
return () => (tree, vFile) => {
const currentDocFilename = `${vFile.dirname}/${vFile.basename}`;
uVisit(tree, 'term-occurrence', getNodeVisitor(context, indexFilename, currentDocFilename));
return tree;
};
}
}
Expand Down
4 changes: 2 additions & 2 deletions lib/linker.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const Term = require("./term.js");
const linkifyRegex = require("./linkify");
const {indexer} = require("./indexer");
const {counter} = require("./counter");
const {printAst, noopCompiler} = require("./ast-tools.js");
const {toForwardSlash} = require("./pathplus");
const path = require("path");
Expand Down Expand Up @@ -44,6 +45,7 @@ api.linkTermOccurrences = function(context) {
.use(remark_ref_links)
.use(remark_link_headings, {behavior: 'wrap'})
.use(indexer(context))
.use(counter(context))
.use(printAst(context.opts.dev.printOutputAst))
.use(noopCompiler)
.use(remark_stringify)
Expand Down Expand Up @@ -142,7 +144,6 @@ function linkify(txtNode, headingNode, termDefs, context, vFile) {
headingNode: headingNode
});
if (! hasMultipleDefs) {
term.countOccurrence();
if (term.hint) {
if (/\$\{term\}/.test(term.hint)) {
linkNode.children[0].value = term.hint.replace("${term}", linkNode.children[0].value);
Expand All @@ -169,7 +170,6 @@ function linkify(txtNode, headingNode, termDefs, context, vFile) {
paragraph.children = childr
.slice(0, linkIndex + 1)
.concat(termDefs.map((t, i) => {
t.countOccurrence();
return {
type: "link",
title: t.getShortDescription(),
Expand Down
3 changes: 2 additions & 1 deletion lib/term.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,9 @@ function updateRegExp(term) {
// sort by length descending to create a regExp which tests for the longest
// term first.
termAndAliases
.map((term) => escapeRegExp(term))
.sort((term1, term2) => term2.length - term1.length)
.forEach((alias, idx) => regExp += (idx > 0 ? "|" : "") + alias);
.forEach((term, idx) => regExp += (idx > 0 ? "|" : "") + term)
regExp += `)`;

if (term.ignoreCase) {
Expand Down
7 changes: 3 additions & 4 deletions test/input/config-shared/glossarify-md.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
"experimentalFootnotes": true,
"keepRawFiles": [],
"glossaries": [
{ "file": "./markdown/glossary.md", "termHint": "" },
{ "file": "./substring_behavior/glossary.md", "termHint": ""},
{ "file": "./term_descriptions/glossary.md", "termHint": ""},
{ "file": "./aliases_and_synonyms/glossary.md", "termHint": ""}
{ "file": "./markdown/glossary.md", "termHint": "" }
,{ "file": "./term_descriptions/glossary.md", "termHint": "" }
,{ "file": "./aliases_and_synonyms/glossary.md", "termHint": "" }
],
"linking": "relative",
"ignoreCase": false,
Expand Down
15 changes: 15 additions & 0 deletions test/input/config-tailored/count-term-occurrences/document.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Count Term Occurrences

## Once

GIVEN a document
AND it mentions the term "Mentioned-in-document-once" once
AND noohere else
THEN the term's term occurrence count MUST be 1.

## Twice

GIVEN a document
AND it mentions the term "Mentioned-in-document-twice" once
AND it mentions the term "Mentioned-in-document-twice" twice
THEN the term's term occurrence count MUST be 2.
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"$schema": "../../../../conf.schema.json",
"baseDir": ".",
"outDir": "../../../output-actual/config-tailored/count-term-occurrences",
"includeFiles": ["."],
"excludeFiles": [],
"keepRawFiles": [],
"glossaries": [
{ "file": "./glossary.md", "termHint": "" }
],
"linking": "relative",
"ignoreCase": false,
"dev": {
"termsFile": "../../../output-actual/config-tailored/count-term-occurrences/terms.json",
"printInputAst": false,
"reproducablePaths": true,
"printOutputAst": false
}
}
52 changes: 52 additions & 0 deletions test/input/config-tailored/count-term-occurrences/glossary.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Glossary

## Zero

GIVEN a term "Zero"
AND the term "Zero" is mentioned twice in its own definition
AND nowhere else
THEN this term's occurrence count MUST be 0.

## One

GIVEN a term "One"
AND the term is mentioned only once in a subsequent term definition
AND nowhere else
THEN this term's occurrence count MUST be 1.

#### mention-one-once

GIVEN this definition
AND it mentions another term "One" once
AND itself is being mentioned nowhere else
THEN the other term's occurrence count MUST be 1
AND this term's occurrence count MUST be 0.

## Two

GIVEN a term "Two"
AND the term is mentioned term two times in a subsequent term definition
AND nowhere else
THEN this term's occurrence count MUST be 2.

#### mention-two-twice

GIVEN this definition
AND it mentions another term "Two" Two times
AND itself is being mentioned nowhere else
THEN the other term's occurrence count MUST be 2
AND this term's occurrence count MUST be 0.

## Mentioned-in-document-once

GIVEN a term
AND it is mentioned once in a document
AND not in the glossary
THEN this term's occurrence count MUST be 1.

## Mentioned-in-document-twice

GIVEN a term
AND it is mentioned twice in a document
AND not in the glossary
THEN this term's occurrence count MUST be 2.
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"$schema": "../../../../conf.schema.json",
"baseDir": ".",
"outDir": "../../../output-actual/config-tailored/glossary-crosslinks",
"includeFiles": ["."],
"excludeFiles": [],
"experimentalFootnotes": true,
"keepRawFiles": [],
"glossaries": [
{ "file": "./glossary-g1.md", "termHint": "" }
,{ "file": "./glossary-g2.md", "termHint": "" }
],
"linking": "relative",
"ignoreCase": false,
"dev": {
"termsFile": "../../../output-actual/config-tailored/glossary-crosslinks/terms.json",
"printInputAst": false,
"reproducablePaths": true,
"printOutputAst": false
}
}
21 changes: 21 additions & 0 deletions test/input/config-tailored/glossary-crosslinks/glossary-g1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Glossary G1

## Term A

"Term A" only exists for the sake of being linked to from within this glossary.

## Term B

GIVEN a term "Term A" AND a definition of "Term B" in the same glossary G1
THEN any occurrence of "Term A" in the definition of "Term B" MUST be linked
to the definition of "Term A".

## Term C

GIVEN a term "Term C" in glossary G1 AND a term "Term D" in another glossary G2
THEN any occurrence of "Term D" in glossary G1 MUST be linked to the definition
of "Term D".

## Term E

"Term E" only exists for the sake of being linked to from glossary G2.
12 changes: 12 additions & 0 deletions test/input/config-tailored/glossary-crosslinks/glossary-g2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Glossary G2

## Term D

"Term D" only exists for the sake of being linked to by the definition of
"Term C" in Glossary G1.

## Term F

GIVEN a term "Term F" in glossary G2 AND a term "Term E" in another glossary G1
THEN any occurrence of "Term E" in glossary G2 MUST be linked to the definition
of "Term E".
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"$schema": "../../../../conf.schema.json",
"baseDir": ".",
"outDir": "../../../output-actual/config-tailored/substring_behavior",
"includeFiles": ["."],
"excludeFiles": [],
"experimentalFootnotes": true,
"keepRawFiles": [],
"glossaries": [
{ "file": "./glossary.md", "termHint": "" }
],
"linking": "relative",
"ignoreCase": false,
"dev": {
"termsFile": "../../../output-actual/config-tailored/substring_behavior/terms.json",
"printInputAst": false,
"reproducablePaths": true,
"printOutputAst": false
}
}
7 changes: 3 additions & 4 deletions test/output-expected/config-shared/glossarify-md.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
"experimentalFootnotes": true,
"keepRawFiles": [],
"glossaries": [
{ "file": "./markdown/glossary.md", "termHint": "" },
{ "file": "./substring_behavior/glossary.md", "termHint": ""},
{ "file": "./term_descriptions/glossary.md", "termHint": ""},
{ "file": "./aliases_and_synonyms/glossary.md", "termHint": ""}
{ "file": "./markdown/glossary.md", "termHint": "" }
,{ "file": "./term_descriptions/glossary.md", "termHint": "" }
,{ "file": "./aliases_and_synonyms/glossary.md", "termHint": "" }
],
"linking": "relative",
"ignoreCase": false,
Expand Down
102 changes: 0 additions & 102 deletions test/output-expected/config-shared/terms.json
Original file line number Diff line number Diff line change
@@ -1,106 +1,4 @@
[
{
"shortDesc": "GIVEN a term 'ÄÖ' WITH term 'Ä' being a substring of it and itself being substring of 'ÄÖÜ'",
"term": "'ÄÖ'",
"hint": "",
"longDesc": "GIVEN a term 'ÄÖ' WITH term 'Ä' being a substring of it and itself being substring of 'ÄÖÜ'",
"anchor": "#äö",
"glossary": {
"file": "./substring_behavior/glossary.md",
"termHint": "",
"basePath": "/{redacted}/input/config-shared/substring_behavior/glossary.md",
"outPath": "/{redacted}/output-actual/config-shared/substring_behavior/glossary.md"
},
"regex": {},
"aliases": [],
"ignoreCase": false,
"countOccurrenceTotal": 7
},
{
"shortDesc": "GIVEN a term 'ÄÖÜ' WITH terms 'ÄÖ' and 'Ä' being substrings of it",
"term": "'ÄÖÜ'",
"hint": "",
"longDesc": "GIVEN a term 'ÄÖÜ' WITH terms 'ÄÖ' and 'Ä' being substrings of it",
"anchor": "#äöü",
"glossary": {
"file": "./substring_behavior/glossary.md",
"termHint": "",
"basePath": "/{redacted}/input/config-shared/substring_behavior/glossary.md",
"outPath": "/{redacted}/output-actual/config-shared/substring_behavior/glossary.md"
},
"regex": {},
"aliases": [],
"ignoreCase": false,
"countOccurrenceTotal": 7
},
{
"shortDesc": "GIVEN an atomic term 'A' WITH term 'A' being a substring of 'AB' and 'ABC'",
"term": "A",
"hint": "",
"longDesc": "GIVEN an atomic term 'A' WITH term 'A' being a substring of 'AB' and 'ABC'",
"anchor": "#a",
"glossary": {
"file": "./substring_behavior/glossary.md",
"termHint": "",
"basePath": "/{redacted}/input/config-shared/substring_behavior/glossary.md",
"outPath": "/{redacted}/output-actual/config-shared/substring_behavior/glossary.md"
},
"regex": {},
"aliases": [],
"ignoreCase": false,
"countOccurrenceTotal": 9
},
{
"shortDesc": "GIVEN an atomic non-ASCII term 'Ä' WITH term 'Ä' being a substring of 'ÄÖ' and 'ÄÖÜ'",
"term": "Ä",
"hint": "",
"longDesc": "GIVEN an atomic non-ASCII term 'Ä' WITH term 'Ä' being a substring of 'ÄÖ' and 'ÄÖÜ'",
"anchor": "",
"glossary": {
"file": "./substring_behavior/glossary.md",
"termHint": "",
"basePath": "/{redacted}/input/config-shared/substring_behavior/glossary.md",
"outPath": "/{redacted}/output-actual/config-shared/substring_behavior/glossary.md"
},
"regex": {},
"aliases": [],
"ignoreCase": false,
"countOccurrenceTotal": 9
},
{
"shortDesc": "GIVEN a term 'AB' WITH term A being a substring of it and itself being substring of 'ABC'",
"term": "AB",
"hint": "",
"longDesc": "GIVEN a term 'AB' WITH term A being a substring of it and itself being substring of 'ABC'",
"anchor": "#ab",
"glossary": {
"file": "./substring_behavior/glossary.md",
"termHint": "",
"basePath": "/{redacted}/input/config-shared/substring_behavior/glossary.md",
"outPath": "/{redacted}/output-actual/config-shared/substring_behavior/glossary.md"
},
"regex": {},
"aliases": [],
"ignoreCase": false,
"countOccurrenceTotal": 7
},
{
"shortDesc": "GIVEN a term 'ABC' WITH terms 'AB' and A being substrings of it",
"term": "ABC",
"hint": "",
"longDesc": "GIVEN a term 'ABC' WITH terms 'AB' and A being substrings of it",
"anchor": "#abc",
"glossary": {
"file": "./substring_behavior/glossary.md",
"termHint": "",
"basePath": "/{redacted}/input/config-shared/substring_behavior/glossary.md",
"outPath": "/{redacted}/output-actual/config-shared/substring_behavior/glossary.md"
},
"regex": {},
"aliases": [],
"ignoreCase": false,
"countOccurrenceTotal": 7
},
{
"shortDesc": "",
"term": "Alias substring behavior",
Expand Down
Loading

0 comments on commit 1815304

Please sign in to comment.