Skip to content

Commit

Permalink
feat: Added performance enhancements + improved benchmark display
Browse files Browse the repository at this point in the history
  • Loading branch information
nonara committed Nov 28, 2020
1 parent efd731b commit 4777441
Show file tree
Hide file tree
Showing 12 changed files with 287 additions and 106 deletions.
44 changes: 26 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,31 +35,39 @@ npm i -S node-html-parser

## Benchmarks
```
node-html-makrdown (reused instance): 47.2261 ms/file ± 29.1069 (avg bytes/sec: 2.086)
node-html-markdown : 55.2654 ms/file ± 38.2978 (avg bytes/sec: 1.782)
turndown (reused instance) : 66.8692 ms/file ± 30.9974 (avg bytes/sec: 1.473)
turndown : 68.9757 ms/file ± 35.0430 (avg bytes/sec: 1.428)
--------------------------------------------------------------------------------------
Comparison to fastest (node-html-makrdown (reused instance)):
node-html-markdown: -14.55%
turndown (reused instance): -29.38%
turndown: -31.53%
node-html-makrdown (reused instance): 46.0008 ms/file ± 27.3827 (avg bytes/sec: 2.141)
node-html-markdown : 47.0208 ms/file ± 28.4478 (avg bytes/sec: 2.095)
turndown : 80.7753 ms/file ± 33.3519 (avg bytes/sec: 1.219)
turndown (reused instance) : 65.9743 ms/file ± 32.3411 (avg bytes/sec: 1.493)
--------------------------------------------------------------------------------------
Estimated processing times (fastest to slowest):
[node-html-makrdown (reused instance)]
100 kB: 0.05sec
1 MB: 0.50sec
50 MB: 25.14sec
1 GB: 8min, 34.80sec
50 GB: 7hr, 8min, 59.91sec
100 kB: 48ms
1 MB: 490ms
50 MB: 24.48sec
1 GB: 8min, 21sec
50 GB: 6hr, 57min, 52sec
[turndown (reused instance)]
100 kB: 0.07sec
1 MB: 0.71sec
50 MB: 35.59sec
1 GB: 12min, 8.92sec
50 GB: 10hr, 7min, 26.09sec
100 kB: 69ms
1 MB: 702ms
50 MB: 35.12sec
1 GB: 11min, 59sec
50 GB: 9hr, 59min, 18sec
--------------------------------------------------------------------------------------
Comparison to fastest (node-html-makrdown (reused instance)):
node-html-markdown: -2.17%
turndown (reused instance): -30.27%
turndown: -43.05%
--------------------------------------------------------------------------------------
```

## Usage
Expand Down
3 changes: 2 additions & 1 deletion benchmark/_run.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ process.on('message', function (item) {
total: Benchmark.TOTAL,
complete: '=',
incomplete: ' ',
width: 50
width: 50,
clear: true
});

const parser = require(item.parser);
Expand Down
23 changes: 17 additions & 6 deletions benchmark/execute.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ const wrappers = fs.readdirSync(path.join(__dirname, 'wrapper'))

const MAX_WIDTH = Math.max(...wrappers.map(wrapper => wrapper.name.length));

const SEPARATOR = '\n' + '-'.repeat(MAX_WIDTH + 50) + '\n';

// endregion


Expand Down Expand Up @@ -50,7 +52,9 @@ function humanTime(seconds) {

for (const n of [ hours, minutes, s ]) if (!isFinite(n) || isNaN(n)) return 'N/A';

return `${hours ? hours + 'hr, ' : ''}${minutes ? minutes + 'min, ' : ''}${s.toFixed(2)}sec`;
return (!hours && !minutes && seconds < 1) ? `${Math.round((s % 1) * 1000)}ms` :
(!hours && !minutes) ? `${s.toFixed(2)}sec` :
`${hours ? hours + 'hr, ' : ''}${minutes ? minutes + 'min, ' : ''}${Math.round(s)}sec`;
}

// endregion
Expand All @@ -64,10 +68,12 @@ function humanTime(seconds) {
if (!quickMode) console.log('NOTE: Large mode is generally less reliable in most environments!');
const stats = [];

console.log(SEPARATOR);

async.eachSeries(
wrappers,
function (item, done) {
const runner = fork(path.join(__dirname, '_run.js'), void 0, { env: { QUICK_MODE: quickMode }});
const runner = fork(path.join(__dirname, '_run.js'), void 0, { env: { QUICK_MODE: quickMode, LOG_PERF: true }});
runner.send(item);
runner.on('message', function (stat) {
const name = formatName(item.name);
Expand All @@ -76,7 +82,7 @@ function humanTime(seconds) {
const avgBytesPerSec = (stat.avgBytesPerMs / 1000).toPrecision(4);

stats.push({ name, ...stat });
console.log(`\n${name}: ${mean} ms/file ± ${sd} (avg bytes/sec: ${avgBytesPerSec})`);
console.log(`${name}: ${mean} ms/file ± ${sd} (avg bytes/sec: ${avgBytesPerSec})`);
});

runner.on('close', function (n) {
Expand All @@ -85,10 +91,14 @@ function humanTime(seconds) {
});
},
function () {
console.log(SEPARATOR);
console.log(
`\nTotal Files: ${stats[0].totalFiles}\nAvg. file size: ${humanFileSize(stats[0].avgFileSize)}\n`);
`Total Files: ${stats[0].totalFiles}\n`+
`Avg. file size: ${humanFileSize(stats[0].avgFileSize)}`
);

/* Get speed estimates */
console.log(SEPARATOR);
console.log(`Estimated processing times (fastest to slowest):`);
const sortedStats = [ ...stats ].sort((a,b) => b.avgBytesPerMs - a.avgBytesPerMs)
sortedStats.forEach(({ name, avgBytesPerMs }) => {
Expand All @@ -103,13 +113,14 @@ function humanTime(seconds) {
});

/* Get comparisons */
console.log(`\nComparison to fastest (${sortedStats[0].name.trim()}): `);
console.log(SEPARATOR);
console.log(`Comparison to fastest (${sortedStats[0].name.trim()}): \n`);
const fastestMean = sortedStats[0].mean;
sortedStats.slice(1).forEach(({ name, mean }) =>
console.log(` ${name.trim()}: -${((1 - (fastestMean / mean)) * 100).toFixed(2)}%`)
);

console.log('');
console.log(SEPARATOR);
}
);
})();
Expand Down
4 changes: 2 additions & 2 deletions benchmark/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
},
"dependencies": {
"async": "^3.1.0",
"node-html-markdown": "link:../",
"summary": "^1.0.0",
"turndown": "^7.0.0",
"node-html-markdown": "link:../"
"turndown": "^7.0.0"
},
"devDependencies": {
"progress": "^2.0.3"
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"test": "jest",
"test:coverage": "jest --collect-coverage",
"------------- ": "-------------",
"prepare": "cd benchmark && yarn install"
"prepare": "ts-patch patch tsc --silent && cd benchmark && yarn install"
},
"files": [
"README.md",
Expand Down Expand Up @@ -52,6 +52,7 @@
"jest": "^26.4.2",
"ts-jest": "^26.4.1",
"ts-node": "^9.0.0",
"ts-patch": "^1.3.1",
"typescript": "^4.0.3"
}
}
2 changes: 2 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ export const defaultIgnoreElements = [
'SOURCE', 'STYLE', 'TRACK', 'WBR'
];

export const contentlessElements = [ 'BR', 'HR', 'IMG' ];

// endregion


Expand Down
11 changes: 7 additions & 4 deletions src/nodes.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import * as NHParser from 'node-html-parser';
import { CommentNode, NodeType, TextNode } from 'node-html-parser';
import { CommentNode, NodeType } from 'node-html-parser';


/* ****************************************************************************************************************** */
// region: Types
/* ****************************************************************************************************************** */

export { TextNode, NodeType, CommentNode }
export { NodeType, CommentNode }

/* ********************************************************* *
* Merged Nodes - Unions of node-html-parser and common DOM
* ********************************************************* */

export type HtmlNode = NHParser.Node | Node
export type ElementNode = NHParser.HTMLElement | HTMLElement
type NodeBase = { preserve?: boolean }

export type HtmlNode = (NHParser.Node | Node) & NodeBase
export type ElementNode = (NHParser.HTMLElement | HTMLElement) & NodeBase
export type TextNode = (NHParser.TextNode) & NodeBase

// endregion

Expand Down
39 changes: 36 additions & 3 deletions src/utilities.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { NodeHtmlMarkdownOptions } from './options';
import { ElementNode } from './nodes';
import { ElementNode, HtmlNode } from './nodes';
import { nodeHtmlParserConfig } from './config';


Expand Down Expand Up @@ -78,6 +78,7 @@ export function parseHTML(html: string, options: NodeHtmlMarkdownOptions): Eleme
let nodeHtmlParse: ReturnType<typeof getNodeHtmlParser>;

/* If specified, try to parse with native engine, fallback to node-html-parser */
perfStart('parse');
let el: ElementNode | undefined;
if (options.preferNativeParser) {
try {
Expand All @@ -86,12 +87,44 @@ export function parseHTML(html: string, options: NodeHtmlMarkdownOptions): Eleme
catch (e) {
nodeHtmlParse = getNodeHtmlParser();
if (nodeHtmlParse) console.warn('Native DOM parser encountered an error during parse', e);
throw e;
else throw e;
}
}
else nodeHtmlParse = getNodeHtmlParser();

return el || nodeHtmlParse!(html, nodeHtmlParserConfig).removeWhitespace();
if (!el) el = nodeHtmlParse!(html, nodeHtmlParserConfig).removeWhitespace();
perfStop('parse');

return el;
}

// endregion


/* ****************************************************************************************************************** */
// region: General
/* ****************************************************************************************************************** */

export function getChildNodes<T extends HtmlNode | Node>(node: T): T[]
export function getChildNodes(node: HtmlNode | Node): (Node | HtmlNode)[] {
if (!isNodeList(node.childNodes)) return node.childNodes;

const res: (ChildNode)[] = [];
node.childNodes.forEach(n => res.push(n));

return res;

function isNodeList(v: any): v is NodeListOf<ChildNode> {
return (v != null) || (typeof v[Symbol.iterator] === 'function');
}
}

export function perfStart(label: string) {
if (process.env.LOG_PERF) console.time(label);
}

export function perfStop(label: string) {
if (process.env.LOG_PERF) console.timeEnd(label);
}

// endregion
Loading

0 comments on commit 4777441

Please sign in to comment.