Skip to content

Commit

Permalink
feat: Improved spacing in and around bold, italic, and strikethrough …
Browse files Browse the repository at this point in the history
…tags
  • Loading branch information
nonara committed Jul 11, 2021
1 parent 1e59887 commit 8198524
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 9 deletions.
9 changes: 5 additions & 4 deletions src/config.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { isWhiteSpaceOnly, surround, trimNewLines } from './utilities';
import { isWhiteSpaceOnly, surround, tagSurround, trimNewLines } from './utilities';
import { PostProcessResult, TranslatorConfigObject } from './translator';
import { NodeHtmlMarkdownOptions } from './options';
import { Options as NodeHtmlParserOptions } from 'node-html-parser'
Expand Down Expand Up @@ -29,6 +29,7 @@ export const contentlessElements = [ 'BR', 'HR', 'IMG' ];
// region: Options
/* ****************************************************************************************************************** */

// noinspection RegExpUnnecessaryNonCapturingGroup
export const defaultOptions: Readonly<NodeHtmlMarkdownOptions> = Object.freeze({
preferNativeParser: false,
codeFence: '```',
Expand Down Expand Up @@ -97,7 +98,7 @@ export const defaultTranslators: TranslatorConfigObject = {
postprocess: ({ content, options: { strongDelimiter } }) =>
isWhiteSpaceOnly(content)
? PostProcessResult.RemoveNode
: content.replace(/^[^\S\r\n]*?(\S+.*?)[^\S\r\n]*?$/gm, surround('$1', strongDelimiter))
: tagSurround(content, strongDelimiter)
},

/* Strikethrough */
Expand All @@ -106,7 +107,7 @@ export const defaultTranslators: TranslatorConfigObject = {
postprocess: ({ content }) =>
isWhiteSpaceOnly(content)
? PostProcessResult.RemoveNode
: content.replace(/^[^\S\r\n]*?(\S+.*?)[^\S\r\n]*?$/gm, '~~$1~~')
: tagSurround(content, '~~')
},

/* Italic / Emphasis */
Expand All @@ -115,7 +116,7 @@ export const defaultTranslators: TranslatorConfigObject = {
postprocess: ({ content, options: { emDelimiter } }) =>
isWhiteSpaceOnly(content)
? PostProcessResult.RemoveNode
: content.replace(/^[^\S\r\n]*?(\S+.*?)[^\S\r\n]*?$/gm, surround('$1', emDelimiter))
: tagSurround(content, emDelimiter)
},

/* Lists (ordered & unordered) */
Expand Down
82 changes: 80 additions & 2 deletions src/utilities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,89 @@ export const trimNewLines = (s: string) => s.replace(/^\n+|\n+$/g, '');
export const surround = (source: string, surroundStr: string) => `${surroundStr}${source}${surroundStr}`;
export const isWhiteSpaceOnly = (s: string) => !/\S/.test(s);

/**
* Split string, preserving specific newline used for each line
*/
export function splitSpecial(s: string) {
const lines: { text: string, newLineChar: '\r' | '\n' | '\r\n' | '' }[] = [];
const strLen = s.length;

for (let i = 0, startPos = 0; i < strLen; ++i) {
let char = s.charAt(i);
let newLineChar: typeof lines[number]['newLineChar'] = '';

if (char === '\r') newLineChar = (s.charAt(i + 1) === '\n') ? '\r\n' : char;
else if (char === '\n') newLineChar = char;

const endPos = newLineChar ? i :
i === (strLen - 1) ? i + 1 :
undefined;

if (endPos === undefined) continue;

lines.push({
text: s.slice(startPos, endPos),
newLineChar
});

startPos = endPos + newLineChar.length;
if (newLineChar.length > 1) ++i;
}

return lines;
}

/**
* Surround tag content with delimiter (moving any leading/trailing space to outside the tag
*/
export function tagSurround(content: string, surroundStr: string) {
const lines = splitSpecial(content);
let res = '';

for (const { text, newLineChar } of lines) {
let i: number = 0;
let startPos: number | undefined = undefined;
let endPos: number | undefined = undefined;

while (i >= 0 && i < text.length) {
if (/[\S]/.test(text[i])) {
if (startPos === undefined) {
startPos = i;
i = text.length;
} else {
endPos = i;
i = NaN;
}
}

if (startPos === undefined) ++i;
else --i;
}

// If whole string is non-breaking whitespace, don't surround it
if (startPos === undefined) {
res += text + newLineChar;
continue;
}

if (endPos === undefined) endPos = text.length - 1;

const leadingSpace = startPos > 0 ? text[startPos - 1] : '';
const trailingSpace = endPos < (text.length - 1) ? text[endPos + 1] : '';

const slicedText = text.slice(startPos, endPos + 1)

res += leadingSpace + surroundStr + slicedText + surroundStr + trailingSpace + newLineChar;
}

return res;
}

export const getTrailingWhitespaceInfo = (s: string): { whitespace: number, newLines: number } => {
const res = { whitespace: 0, newLines: 0 };
const minI = Math.max(s.length - 10, 0);
for (let i = s.length-1; i >= minI; i--) {
const token = s.slice(i, i+1);
for (let i = s.length - 1; i >= minI; i--) {
const token = s.slice(i, i + 1);
if (!/\s/.test(token)) break;
res.whitespace++;
if ([ '\r', '\n' ].includes(token)) ++res.newLines;
Expand Down
8 changes: 5 additions & 3 deletions test/default-tags.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,20 @@ describe(`Default Tags`, () => {

test(`Bold (b, strong)`, () => {
const res = translate(`<b>a<del>b</del><br><br>c<br>d</b><strong>a<del>b</del><br><br>c<br>d</strong>`);
const exp = `**a~~b~~**\n \n**c**\n**d**`;
const exp = `**a~~b~~** \n \n**c** \n**d**`;
expect(res).toBe(exp + ' ' + exp);
});

test(`Strikethrough (del, s, strike)`, () => {
const res = translate(`<del>a<em>b</em><br><br>c<br>d</del><s>a<em>b</em><br><br>c<br>d</s><strike>a<em>b</em><br><br>c<br>d</strike>`);
expect(res).toBe(`~~a_b_~~\n \n~~c~~\n~~d~~ `.repeat(3).trim());
const exp = `~~a_b_~~ \n \n~~c~~ \n~~d~~`;
expect(res).toBe(exp + ' ' + exp + ' ' + exp);
});

test(`Italic / Emphasis (em, i)`, () => {
const res = translate(`<em>a <s>b</s><br><br>c<br>d</em><i>a <s>b</s><br><br>c<br>d</i>`);
expect(res).toBe(`_a ~~b~~_\n \n_c_\n_d_ `.repeat(2).trim());
const exp = `_a ~~b~~_ \n \n_c_ \n_d_`;
expect(res).toBe(exp + ' ' + exp);
});

test(`Link (a)`, () => {
Expand Down
35 changes: 35 additions & 0 deletions test/special-cases.test.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,54 @@
import { NodeHtmlMarkdown } from '../src';


/* ****************************************************************************************************************** *
* Config
* ****************************************************************************************************************** */

const textFormatTags = [ 'strong', 'b', 'del', 's', 'strike', 'em', 'i' ] as const;
const getDelims = (instance: NodeHtmlMarkdown) => Object.fromEntries(textFormatTags.map(t => [
t,
(() => {
switch (t) {
case 'strong':
case 'b':
return instance.options.strongDelimiter;
case 'del':
case 's':
case 'strike':
return '~~';
case 'em':
case 'i':
return instance.options.emDelimiter;
}
})()
]));


/* ****************************************************************************************************************** *
* Tests
* ****************************************************************************************************************** */

describe(`Special Cases`, () => {
let instance: NodeHtmlMarkdown;
let delims: ReturnType<typeof getDelims>;
const translate = (html: string) => instance.translate(html);
beforeAll(() => {
instance = new NodeHtmlMarkdown();
delims = getDelims(instance);
});

test(`Removes uncaught Doctype`, () => {
const res = translate(`<!DOCTYPE html>abc`);
expect(res).toBe(`abc`);
});

describe(`Whitespace handled for leading / trailing whitespace in tags`, () => {
test.each(textFormatTags)(`%s`, tag => {
const delim = delims[tag];

expect(translate(`<p><${tag}> &nbsp;Label:&nbsp; </${tag}>Value</p>`)).toBe(${delim}Label:${delim} Value`);
expect(translate(`<p><${tag}>&nbsp; Label: &nbsp;</${tag}>Value</p>`)).toBe(` ${delim}Label:${delim} Value`);
});
});
});

0 comments on commit 8198524

Please sign in to comment.