Skip to content

Commit

Permalink
feat: Added table support
Browse files Browse the repository at this point in the history
  • Loading branch information
otolab authored and Ron S committed Apr 20, 2022
1 parent 751af8c commit faaebe8
Show file tree
Hide file tree
Showing 131 changed files with 246 additions and 4 deletions.
Empty file modified .editorconfig
100755 → 100644
Empty file.
Empty file modified .github/FUNDING.yml
100755 → 100644
Empty file.
Empty file modified .github/workflows/build.yml
100755 → 100644
Empty file.
Empty file modified .github/workflows/publish.yml
100755 → 100644
Empty file.
Empty file modified .gitignore
100755 → 100644
Empty file.
Empty file modified .idea/.name
100755 → 100644
Empty file.
Empty file modified .idea/codeStyles/Project.xml
100755 → 100644
Empty file.
Empty file modified .idea/codeStyles/codeStyleConfig.xml
100755 → 100644
Empty file.
Empty file modified .idea/inspectionProfiles/Project_Default.xml
100755 → 100644
Empty file.
Empty file modified .idea/modules.xml
100755 → 100644
Empty file.
Empty file modified .idea/node-html-markdown.iml
100755 → 100644
Empty file.
Empty file modified .idea/vcs.xml
100755 → 100644
Empty file.
Empty file modified CHANGELOG.md
100755 → 100644
Empty file.
Empty file modified README.md
100755 → 100644
Empty file.
Empty file modified benchmark/LICENSE.md
100755 → 100644
Empty file.
Empty file modified benchmark/README.md
100755 → 100644
Empty file.
Empty file modified benchmark/_run.js
100755 → 100644
Empty file.
Empty file modified benchmark/execute.js
100755 → 100644
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file modified benchmark/index.js
100755 → 100644
Empty file.
Empty file modified benchmark/package.json
100755 → 100644
Empty file.
Empty file modified benchmark/wrapper/node-html-makrdown_reuse.js
100755 → 100644
Empty file.
Empty file modified benchmark/wrapper/node-html-markdown.js
100755 → 100644
Empty file.
Empty file modified benchmark/wrapper/turndown.js
100755 → 100644
Empty file.
Empty file modified benchmark/wrapper/turndown_reuse.js
100755 → 100644
Empty file.
Empty file modified benchmark/yarn.lock
100755 → 100644
Empty file.
Empty file modified jest.config.js
100755 → 100644
Empty file.
Empty file modified package.json
100755 → 100644
Empty file.
110 changes: 109 additions & 1 deletion src/config.ts
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { isWhiteSpaceOnly, surround, tagSurround, trimNewLines } from './utilities';
import { isWhiteSpaceOnly, splitSpecial, surround, tagSurround, trimNewLines } from './utilities';
import { PostProcessResult, TranslatorConfigObject } from './translator';
import { NodeHtmlMarkdownOptions } from './options';
import { Options as NodeHtmlParserOptions } from 'node-html-parser'
Expand Down Expand Up @@ -182,6 +182,62 @@ export const defaultTranslators: TranslatorConfigObject = {
}
},

/* Table */
'table': ({ visitor }) => ({
surroundingNewlines: 2,
childTranslators: visitor.instance.tableTranslators,
overrideMetadata: true,
postprocess: ({ content, nodeMetadata, node }) => {
// Split and trim leading + trailing pipes
const rawRows = splitSpecial(content).map(({ text }) => text.replace(/^(?:\|\s+)?(.+)\s*\|\s*$/, '$1'));

/* Get Row Data */
const rows: string[][] = [];
let colWidth: number[] = [];
for (const row of rawRows) {
if (!row) continue;

/* Track columns */
const cols = row.split(' |').map((c, i) => {
c = c.trim();
if (colWidth.length < i + 1 || colWidth[i] < c.length) colWidth[i] = c.length;

return c;
});

rows.push(cols);
}

if (rows.length < 1) return PostProcessResult.RemoveNode;

/* Compose Table */
const maxCols = colWidth.length;

let res = '';
const caption = nodeMetadata.get(node)!.tableMeta!.caption;
if (caption) res += caption + '\n';

rows.forEach((cols, rowNumber) => {
res += '| ';

/* Add Columns */
for (let i = 0; i < maxCols; i++) {
let c = (cols[i] ?? '');
c += ' '.repeat(Math.max(0, (colWidth[i] - c.length))); // Pad to max length

res += c + ' |' + (i < maxCols - 1 ? ' ' : '');
}

res += '\n';

// Add separator row
if (rowNumber === 0) res += '|' + colWidth.map(w => ' ' + '-'.repeat(w) + ' |').join('') + '\n'
});

return res;
}
}),

/* Link */
'a': ({ node, options, visitor }) => {
const href = node.getAttribute('href');
Expand Down Expand Up @@ -239,12 +295,64 @@ export const defaultTranslators: TranslatorConfigObject = {
},
}

export const tableTranslatorConfig: TranslatorConfigObject = {
/* Table Caption */
'caption': ({ visitor }) => ({
surroundingNewlines: false,
childTranslators: visitor.instance.tableCellTranslators,
overrideMetadata: true,
postprocess: ({ content, nodeMetadata, node }) => {
const caption = content.replace(/(?:\r?\n)+/g, ' ').trim();
if (caption) nodeMetadata.get(node)!.tableMeta!.caption = '__' + caption + '__'

return PostProcessResult.RemoveNode;
},
}),

/* Table row */
'tr': ({ visitor }) => ({
surroundingNewlines: false,
childTranslators: visitor.instance.tableRowTranslators,
overrideMetadata: true,
postfix: '\n',
prefix: '| ',
postprocess: ({ content }) => !/ \|\s*$/.test(content) ? PostProcessResult.RemoveNode : content
}),

/* Table cell, (header cell) */
'th,td': ({ visitor }) => ({
surroundingNewlines: false,
childTranslators: visitor.instance.tableCellTranslators, // FIXME: Circular references should be avoided.
overrideMetadata: true,
prefix: ' ',
postfix: ' |',
postprocess: ({ content }) =>
trimNewLines(content)
.replace('|', '\\|')
.replace(/(?:\r?\n)+/g, ' ')
.trim()
}),
}

export const tableRowTranslatorConfig: TranslatorConfigObject = {
'th,td': tableTranslatorConfig['th,td']
}

export const tableCellTranslatorConfig: TranslatorConfigObject = {
'a': defaultTranslators['a'],
'strong,b': defaultTranslators['strong,b'],
'del,s,strike': defaultTranslators['del,s,strike'],
'em,i': defaultTranslators['em,i'],
'img': defaultTranslators['img']
}

export const defaultCodeBlockTranslators: TranslatorConfigObject = {
'br': { content: `\n`, recurse: false },
'hr': { content: '---', recurse: false },
'h1,h2,h3,h4,h5,h6': { prefix: '[', postfix: ']' },
'ol,ul': defaultTranslators['ol,ul'],
'li': defaultTranslators['li'],
'tr': { surroundingNewlines: true },
'img': { recurse: false }
}

Expand Down
Empty file modified src/index.ts
100755 → 100644
Empty file.
15 changes: 13 additions & 2 deletions src/main.ts
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { NodeHtmlMarkdownOptions } from './options';
import { TranslatorCollection, TranslatorConfigObject } from './translator';
import {
aTagTranslatorConfig, defaultBlockElements, defaultCodeBlockTranslators, defaultIgnoreElements, defaultOptions,
defaultTranslators
defaultTranslators, tableCellTranslatorConfig, tableRowTranslatorConfig, tableTranslatorConfig
} from './config';
import { parseHTML } from './utilities';
import { getMarkdownForHtmlNodes } from './visitor';
Expand All @@ -26,6 +26,9 @@ export class NodeHtmlMarkdown {
public translators = new TranslatorCollection();
public aTagTranslators = new TranslatorCollection();
public codeBlockTranslators = new TranslatorCollection();
public tableTranslators = new TranslatorCollection();
public tableRowTranslators = new TranslatorCollection();
public tableCellTranslators = new TranslatorCollection();
public readonly options: NodeHtmlMarkdownOptions

constructor(options?: Options, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject) {
Expand Down Expand Up @@ -55,12 +58,20 @@ export class NodeHtmlMarkdown {
for (const [ elems, cfg ] of Object.entries(aTagTranslatorConfig))
this.aTagTranslators.set(elems, cfg, true);

for (const [ elems, cfg ] of Object.entries(tableTranslatorConfig))
this.tableTranslators.set(elems, cfg, true);

for (const [ elems, cfg ] of Object.entries(tableRowTranslatorConfig))
this.tableRowTranslators.set(elems, cfg, true);

for (const [ elems, cfg ] of Object.entries(tableCellTranslatorConfig))
this.tableCellTranslators.set(elems, cfg, true);

// TODO - Workaround for upstream issue (may not be fixed) - https://github.com/taoqf/node-html-parser/issues/78
if (!this.options.textReplace) this.options.textReplace = [];
this.options.textReplace.push([ /^<!DOCTYPE.*>/gmi, '' ]);
}


/* ********************************************************* */
// region: Static Methods
/* ********************************************************* */
Expand Down
Empty file modified src/nodes.ts
100755 → 100644
Empty file.
Empty file modified src/options.ts
100755 → 100644
Empty file.
5 changes: 5 additions & 0 deletions src/translator.ts
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ export interface TranslatorConfig {
* Custom translator collection to use for child HTML nodes
*/
childTranslators?: TranslatorCollection

/**
* Force overrite node metadata (for custom translators)
*/
overrideMetadata?: boolean
}

export enum PostProcessResult {
Expand Down
Empty file modified src/utilities.ts
100755 → 100644
Empty file.
13 changes: 12 additions & 1 deletion src/visitor.ts
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ export interface NodeMetadata {
noEscape?: boolean
preserveWhitespace?: boolean
translators?: TranslatorConfigObject
tableMeta?: {
node: ElementNode,
caption?: string
}
}

export type NodeMetadataMap = Map<ElementNode, NodeMetadata>
Expand Down Expand Up @@ -184,6 +188,13 @@ export class Visitor {
preserveWhitespace: true
}
break;
case 'TABLE':
metadata = {
...metadata,
tableMeta: {
node: node
}
}
}
if (metadata) this.nodeMetadata.set(node, metadata);

Expand All @@ -205,7 +216,7 @@ export class Visitor {
if (cfg.ignore) return;

/* Update metadata if needed */
if ((cfg.noEscape && !metadata?.noEscape) || (cfg.childTranslators && !metadata?.translators)) {
if ((cfg.noEscape && !metadata?.noEscape) || (cfg.childTranslators && (!metadata?.translators || cfg.overrideMetadata))) {
metadata = { ...metadata, noEscape: cfg.noEscape, translators: cfg.childTranslators };
this.nodeMetadata.set(node, metadata);
}
Expand Down
5 changes: 5 additions & 0 deletions test/default-tags-codeblock.test.ts
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,9 @@ describe(`Default Tags`, () => {
expect(res).toBe(getExpected(` \n \n* a \nb\n \n \n* b \n \n * c \n d \n \n 1. e \n f\n \n `));
});
});

test(`Table`, () => {
const res = translateAsBlock('a<tr>b</tr>c<table><td>X</td></table>');
expect(res).toBe(getExpected(`a\nb\nc\n\nX\n\n`));
})
});
Empty file modified test/default-tags.test.ts
100755 → 100644
Empty file.
Empty file modified test/special-cases.test.ts
100755 → 100644
Empty file.
102 changes: 102 additions & 0 deletions test/table.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import { NodeHtmlMarkdown } from '../src';


/* ****************************************************************************************************************** *
* Tests
* ****************************************************************************************************************** */

describe(`Table`, () => {
let instance: NodeHtmlMarkdown;
const translate = (html: string) => instance.translate(html);
beforeAll(() => {
instance = new NodeHtmlMarkdown();
});

test(`Single row, Single column table`, () => {
const expected = `| col1 |\n| ---- |`;

expect(translate(`<table><tr><th> col1 </th></tr></table>`)).toBe(expected);
expect(translate(`<table><tr><td> col1 </td></tr></table>`)).toBe(expected);
expect(translate(`<table><td> col1 </td></table>`)).toBe(expected);
});

test(`Single row table`, () => {
const expected = `| col1 | col2 |\n| ---- | ---- |`;

expect(translate(`<table><tr><th> col1 </th><td>col2 </td></tr></table>`)).toBe(expected);
expect(translate(`<table><tr><td> col1 </td><td>col2 </td></table>`)).toBe(expected);
expect(translate(`<table><td> col1 </td><td>col2 </td></table>`)).toBe(expected);
});

test(`Table with caption`, () => {
const expected =
`__Hello__\n` +
`| col1 | col2 |\n` +
`| ---- | ---- |`;

expect(translate(`<table><caption>Hello</caption><tr><th> col1 </th><td>col2 </td></tr></table>`)).toBe(expected);
expect(translate(`<table><th> col1 </th><td>col2 </td><caption>Hello</caption></table>`)).toBe(expected);
});

describe(`Special Cases`, () => {
test(`"|" is escaped`, () => {
expect(translate(`<table><tr><td>A|B</td></tr></table>`)).toBe(`| A\\|B |\n| ---- |`);
});

test(`Pads cells`, () => {
const html = `<table>
<tr><td>abc</td><td>def</td><td>ghi</td></tr>
<tr><td>abc1</td><td>def123</td><td>ghi1234567</td></tr>
<tr><td>a</td><td>def1234</td><td>c</td></tr>
</table>`;
const expected =
`| abc | def | ghi |\n` +
`| ---- | ------- | ---------- |\n` +
`| abc1 | def123 | ghi1234567 |\n` +
`| a | def1234 | c |`;

expect(translate(html)).toBe(expected);
});

test(`Nested tables are not supported`, () => {
const html = `<table><tr><td><table><tr><td>nested</td></tr></table></td><td>abc</td></tr></table>`;
expect(translate(html)).toBe(`| nested | abc |\n| ------ | --- |`);
});

test(`Supports inline tags + mismatched rows`, () => {
const html = `
<table>
<thead>
<tr>
<th>COL1</th>
<th>C
O
L2</th>
</tr>
</thead>
<tbody>
<tr>
<th><b>b</b></th>
<td><i>i</i></td>
<td><a href="link">a</a></td>
<td><img src="file"></td>
</tr>
<tr>
<th><ul><li>list</li><li></li></ul></th>
<td><hr></td>
<td><h1>h1</h1></td>
</tr>
</tbody>
</table>
`;

const expected =
`| COL1 | C O L2 | | |\n` +
`| ----- | ------ | --------- | --------- |\n` +
`| **b** | _i_ | [a](link) | ![](file) |\n` +
`| list | | h1 | |`;

expect(translate(html)).toBe(expected);
});
});
});
Empty file modified test/tsconfig.json
100755 → 100644
Empty file.
Empty file modified transformer.js
100755 → 100644
Empty file.
Empty file modified tsconfig.base.json
100755 → 100644
Empty file.
Empty file modified tsconfig.json
100755 → 100644
Empty file.
Empty file modified yarn.lock
100755 → 100644
Empty file.

2 comments on commit faaebe8

@nhj7
Copy link

@nhj7 nhj7 commented on faaebe8 Apr 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit solved my problem. 😊👍

@nonara
Copy link
Collaborator

@nonara nonara commented on faaebe8 Apr 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nhj7 Glad to hear it! Thanks goes to @otolab for the help on this.

Please sign in to comment.