Skip to content

Commit

Permalink
fix: Correct nesting and other possible issues in child nodes of A tag (
Browse files Browse the repository at this point in the history
closes #25)
  • Loading branch information
nonara committed Aug 23, 2021
1 parent 422bcf9 commit dcd6b20
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 7 deletions.
13 changes: 12 additions & 1 deletion src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,8 @@ export const defaultTranslators: TranslatorConfigObject = {
if (node.textContent === href) return { content: `<${href}>` };

return {
postprocess: ({ content }) => content.replace(/(?:\r?\n)+/g, ''),
postprocess: ({ content }) => content.replace(/(?:\r?\n)+/g, ' '),
childTranslators: visitor.instance.aTagTranslators,
prefix: '[',
postfix: ']' + (!options.useLinkReferenceDefinitions
? `(${href}${title ? ` "${title}"` : ''})`
Expand Down Expand Up @@ -226,6 +227,16 @@ export const defaultCodeBlockTranslators: TranslatorConfigObject = {
'img': { recurse: false }
}

export const aTagTranslatorConfig: TranslatorConfigObject = {
'br': { content: '\n', recurse: false },
'hr': { content: '\n', recurse: false },
'pre': defaultTranslators['pre'],
'strong,b': defaultTranslators['strong,b'],
'del,s,strike': defaultTranslators['del,s,strike'],
'em,i': defaultTranslators['em,i'],
'img': defaultTranslators['img']
}

// endregion


Expand Down
7 changes: 6 additions & 1 deletion src/main.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { NodeHtmlMarkdownOptions } from './options';
import { TranslatorCollection, TranslatorConfigObject } from './translator';
import {
defaultBlockElements, defaultCodeBlockTranslators, defaultIgnoreElements, defaultOptions, defaultTranslators
aTagTranslatorConfig, defaultBlockElements, defaultCodeBlockTranslators, defaultIgnoreElements, defaultOptions,
defaultTranslators
} from './config';
import { parseHTML } from './utilities';
import { getMarkdownForHtmlNodes } from './visitor';
Expand All @@ -23,6 +24,7 @@ type Options = Partial<NodeHtmlMarkdownOptions>

export class NodeHtmlMarkdown {
public translators = new TranslatorCollection();
public aTagTranslators = new TranslatorCollection();
public codeBlockTranslators = new TranslatorCollection();
public readonly options: NodeHtmlMarkdownOptions

Expand Down Expand Up @@ -50,6 +52,9 @@ export class NodeHtmlMarkdown {
for (const [ elems, cfg ] of Object.entries({ ...defaultCodeBlockTranslators, ...customCodeBlockTranslators }))
this.codeBlockTranslators.set(elems, cfg, true);

for (const [ elems, cfg ] of Object.entries(aTagTranslatorConfig))
this.aTagTranslators.set(elems, cfg, true);

// TODO - Workaround for upstream issue (may not be fixed) - https://github.com/taoqf/node-html-parser/issues/78
if (!this.options.textReplace) this.options.textReplace = [];
this.options.textReplace.push([ /^<!DOCTYPE.*>/gmi, '' ]);
Expand Down
5 changes: 3 additions & 2 deletions src/visitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ export class Visitor {
if (textOnly || !isElementNode(node)) return;

/* Handle element node */
const translators = metadata?.translators ?? this.instance.translators;
const translatorCfgOrFactory = translators[node.tagName] as TranslatorConfig | TranslatorConfigFactory;
const translatorCfgOrFactory: TranslatorConfig | TranslatorConfigFactory | undefined =
metadata?.translators ? metadata.translators[node.tagName] : this.instance.translators[node.tagName];

/* Update metadata with list detail */
switch (node.tagName) {
Expand All @@ -183,6 +183,7 @@ export class Visitor {
...metadata,
preserveWhitespace: true
}
break;
}
if (metadata) this.nodeMetadata.set(node, metadata);

Expand Down
4 changes: 3 additions & 1 deletion test/default-tags.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ describe(`Default Tags`, () => {
<a href="${url}">a<br><br>b<strong>c</strong></a>
<a>a<strong>b</strong></a> <!-- This node is treated as text due to no href -->
<a href="${url}">${url}</a>
<!-- see: https://github.com/crosstype/node-html-markdown/issues/25 -->
<a href="${url}">a<a href="2">nested</a><img src="${url}">b</a>
`);
expect(res).toBe(`[a b**c**](${url}) a**b** <${url}> `);
expect(res).toBe(`[a b**c**](${url}) a**b** <${url}> [anested![](${url})b](${url}) `);
});

test(`Image (img)`, () => {
Expand Down
4 changes: 2 additions & 2 deletions test/options.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -265,13 +265,13 @@ text`);
instance.options.useLinkReferenceDefinitions = false;
let res = translate(html);
expect(res).toBe(
`Hello: [a b**c**](${url}) a**b** [link2](${url}/other) [repeat link](${url}) <${url}> Goodbye!`
`Hello: [a b**c**](${url}) a**b** [link2](${url}/other) [repeat link](${url}) <${url}> Goodbye!`
);

instance.options.useLinkReferenceDefinitions = true;
res = translate(html);
expect(res).toBe(
`Hello: [a b**c**][1] a**b** [link2][2] [repeat link][1] <${url}> Goodbye!\n\n[1]: ${url}\n[2]: ${url}/other`
`Hello: [a b**c**][1] a**b** [link2][2] [repeat link][1] <${url}> Goodbye!\n\n[1]: ${url}\n[2]: ${url}/other`
);

instance.options.useLinkReferenceDefinitions = originalUseLinkReferenceDefinitions;
Expand Down

0 comments on commit dcd6b20

Please sign in to comment.