Skip to content

Commit

Permalink
refactor(options): Revamp option handling (#2916)
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 authored Dec 24, 2022
1 parent fa00a9d commit 16c3149
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 74 deletions.
2 changes: 1 addition & 1 deletion src/api/manipulation.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1974,7 +1974,7 @@ describe('$(...)', () => {
});

it('() : should preserve parsing options', () => {
const $ = load('<div>π</div>', { decodeEntities: false });
const $ = load('<div>π</div>', { xml: { decodeEntities: false } });
const $div = $('div');

expect($div.text()).toBe($div.clone().text());
Expand Down
2 changes: 1 addition & 1 deletion src/batteries.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ describe('stringStream', () => {
});

it('should use htmlparser2 for XML', (cb) => {
const stream = cheerio.stringStream({ xmlMode: true }, (err, $) => {
const stream = cheerio.stringStream({ xml: true }, (err, $) => {
expect(err).toBeNull();

expect($.html()).toBe(TEST_HTML);
Expand Down
18 changes: 12 additions & 6 deletions src/batteries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export * from './index.js';

import type { CheerioAPI, CheerioOptions } from './index.js';
import { load } from './index.js';
import { flatten as flattenOptions, type InternalOptions } from './options.js';
import { flattenOptions, type InternalOptions } from './options.js';
import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter';

// eslint-disable-next-line n/file-extension-in-import
Expand Down Expand Up @@ -58,7 +58,7 @@ function _stringStream(
options: InternalOptions | undefined,
cb: (err: Error | null | undefined, $: CheerioAPI) => void
): Writable {
if (options && (options.xmlMode || options._useHtmlParser2)) {
if (options?._useHtmlParser2) {
const handler: DomHandler = new DomHandler(
(err) => cb(err, load(handler.root)),
options
Expand All @@ -67,10 +67,14 @@ function _stringStream(
return new Htmlparser2Stream(handler, options);
}

const stream = new Parse5Stream({
...options,
treeAdapter: htmlparser2Adapter,
});
options ??= {};
options.treeAdapter ??= htmlparser2Adapter;

if (options.scriptingEnabled !== false) {
options.scriptingEnabled = true;
}

const stream = new Parse5Stream(options);

finished(stream, (err) => cb(err, load(stream.document)));

Expand Down Expand Up @@ -102,6 +106,7 @@ function _stringStream(
* writeStream
* );
* ```
*
* @param options - The options to pass to Cheerio.
* @param cb - The callback to call when the stream is finished.
* @returns The writable stream.
Expand Down Expand Up @@ -176,6 +181,7 @@ const defaultRequestOptions: UndiciStreamOptions = {
*
* const $ = await cheerio.fromURL('https://example.com');
* ```
*
* @param url - The URL to load the document from.
* @param options - The options to pass to Cheerio.
* @returns The loaded document.
Expand Down
29 changes: 12 additions & 17 deletions src/cheerio.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -452,9 +452,8 @@ describe('cheerio', () => {
describe('parse5 options', () => {
// Should parse noscript tags only with false option value
test('{scriptingEnabled: ???}', () => {
const opt = 'scriptingEnabled';
const options: CheerioOptions = {};
let result;
let result: Cheerio<Element>;

// [default] scriptingEnabled: true - tag contains one text element
result = cheerio.load(noscript)('noscript');
Expand All @@ -463,7 +462,7 @@ describe('cheerio', () => {
expect(result[0].children[0].type).toBe('text');

// ScriptingEnabled: false - content of noscript will parsed
options[opt] = false;
options.scriptingEnabled = false;
result = cheerio.load(noscript, options)('noscript');
expect(result).toHaveLength(1);
expect(result[0].children).toHaveLength(2);
Expand All @@ -474,7 +473,7 @@ describe('cheerio', () => {
// ScriptingEnabled: ??? - should acts as true
const values = [undefined, null, 0, ''];
for (const val of values) {
options[opt] = val as any;
options.scriptingEnabled = val as any;
result = cheerio.load(noscript, options)('noscript');
expect(result).toHaveLength(1);
expect(result[0].children).toHaveLength(1);
Expand All @@ -484,29 +483,25 @@ describe('cheerio', () => {

// Should contain location data only with truthful option value
test('{sourceCodeLocationInfo: ???}', () => {
const prop = 'sourceCodeLocation';
const opt = 'sourceCodeLocationInfo';
const options: CheerioOptions = {};
let result;
let i;

// Location data should not be present
let values = [undefined, null, 0, false, ''];
for (i = 0; i < values.length; i++) {
options[opt] = values[i] as any;
result = cheerio.load(noscript, options)('noscript');
for (let i = 0; i < values.length; i++) {
options.sourceCodeLocationInfo = values[i] as any;
const result = cheerio.load(noscript, options)('noscript');
expect(result).toHaveLength(1);
expect(result[0]).not.toHaveProperty(prop);
expect(result[0]).not.toHaveProperty('sourceCodeLocation');
}

// Location data should be present
values = [true, 1, 'test'];
for (i = 0; i < values.length; i++) {
options[opt] = values[i] as any;
result = cheerio.load(noscript, options)('noscript');
for (let i = 0; i < values.length; i++) {
options.sourceCodeLocationInfo = values[i] as any;
const result = cheerio.load(noscript, options)('noscript');
expect(result).toHaveLength(1);
expect(result[0]).toHaveProperty(prop);
expect(typeof (result[0] as any)[prop]).toBe('object');
expect(result[0]).toHaveProperty('sourceCodeLocation');
expect(typeof (result[0] as any)['sourceCodeLocation']).toBe('object');
}
});
});
Expand Down
4 changes: 2 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import renderWithHtmlparser2 from 'dom-serializer';
import { parseDocument as parseWithHtmlparser2 } from 'htmlparser2';

const parse = getParse((content, options, isDocument, context) =>
options.xmlMode || options._useHtmlParser2
options._useHtmlParser2
? parseWithHtmlparser2(content, options)
: parseWithParse5(content, options, isDocument, context)
);
Expand All @@ -52,7 +52,7 @@ const parse = getParse((content, options, isDocument, context) =>
* @see {@link https://cheerio.js.org#loading} for additional usage information.
*/
export const load = getLoad(parse, (dom, options) =>
options.xmlMode || options._useHtmlParser2
options._useHtmlParser2
? renderWithHtmlparser2(dom, options)
: renderWithParse5(dom)
);
Expand Down
10 changes: 3 additions & 7 deletions src/load.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import {
type CheerioOptions,
type InternalOptions,
default as defaultOptions,
flatten as flattenOptions,
flattenOptions,
} from './options.js';
import * as staticMethods from './static.js';
import { Cheerio } from './cheerio.js';
Expand Down Expand Up @@ -114,7 +113,7 @@ export function getLoad(
throw new Error('cheerio.load() expects a string');
}

const internalOpts = { ...defaultOptions, ...flattenOptions(options) };
const internalOpts = flattenOptions(options);
const initialRoot = parse(content, internalOpts, isDocument, null);

/**
Expand Down Expand Up @@ -157,10 +156,7 @@ export function getLoad(
// $($)
if (selector && isCheerio<Result>(selector)) return selector;

const options = {
...internalOpts,
...flattenOptions(opts),
};
const options = flattenOptions(opts, internalOpts);
const r =
typeof root === 'string'
? [parse(root, options, false, null)]
Expand Down
72 changes: 49 additions & 23 deletions src/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,25 @@ export type Parse5Options = Parse5ParserOptions<Htmlparser2TreeAdapterMap>;
* Please note that parser-specific options are _only recognized_ if the
* relevant parser is used.
*/
export interface CheerioOptions extends HTMLParser2Options, Parse5Options {
/** Recommended way of configuring htmlparser2 when wanting to parse XML. */
export interface CheerioOptions extends Parse5Options {
/**
* Recommended way of configuring htmlparser2 when wanting to parse XML.
*
* This will switch Cheerio to use htmlparser2.
*
* @default false
*/
xml?: HTMLParser2Options | boolean;

/** The base URI for the document. Used for the `href` and `src` props. */
/**
* Enable xml mode, which will switch Cheerio to use htmlparser2.
*
* @deprecated Please use the `xml` option instead.
* @default false
*/
xmlMode?: boolean;

/** The base URI for the document. Used to resolve the `href` and `src` props. */
baseURI?: string | URL; // eslint-disable-line n/no-unsupported-features/node-builtins

/**
Expand Down Expand Up @@ -70,7 +84,9 @@ export interface CheerioOptions extends HTMLParser2Options, Parse5Options {
}

/** Internal options for Cheerio. */
export interface InternalOptions extends Omit<CheerioOptions, 'xml'> {
export interface InternalOptions
extends HTMLParser2Options,
Omit<CheerioOptions, 'xml'> {
/**
* Whether to use htmlparser2.
*
Expand All @@ -79,17 +95,8 @@ export interface InternalOptions extends Omit<CheerioOptions, 'xml'> {
_useHtmlParser2?: boolean;
}

const defaultOpts: CheerioOptions = {
xml: false,
decodeEntities: true,
};

/** Cheerio default options. */
export default defaultOpts;

const xmlModeDefault: InternalOptions = {
_useHtmlParser2: true,
xmlMode: true,
const defaultOpts: InternalOptions = {
_useHtmlParser2: false,
};

/**
Expand All @@ -98,14 +105,33 @@ const xmlModeDefault: InternalOptions = {
* This will set `_useHtmlParser2` to true if `xml` is set to true.
*
* @param options - The options to flatten.
* @param baseOptions - The base options to use.
* @returns The flattened options.
*/
export function flatten(
options?: CheerioOptions | null
): InternalOptions | undefined {
return options?.xml
? typeof options.xml === 'boolean'
? xmlModeDefault
: { ...xmlModeDefault, ...options.xml }
: options ?? undefined;
export function flattenOptions(
options?: CheerioOptions | null,
baseOptions?: InternalOptions
): InternalOptions {
if (!options) {
return baseOptions ?? defaultOpts;
}

const opts: InternalOptions = {
_useHtmlParser2: !!options.xmlMode,
...baseOptions,
...options,
};

if (options.xml) {
opts._useHtmlParser2 = true;
opts.xmlMode = true;

if (options.xml !== true) {
Object.assign(opts, options.xml);
}
} else if (options.xmlMode) {
opts._useHtmlParser2 = true;
}

return opts;
}
5 changes: 3 additions & 2 deletions src/parse.spec.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import type { Document, Element } from 'domhandler';
import { getParse } from './parse.js';
import defaultOpts from './options.js';

import { parseDocument as parseWithHtmlparser2 } from 'htmlparser2';
import { parseWithParse5 } from './parsers/parse5-adapter.js';

const defaultOpts = { _useHtmlParser2: false };

const parse = getParse((content, options, isDocument, context) =>
options.xmlMode || options._useHtmlParser2
options._useHtmlParser2
? parseWithHtmlparser2(content, options)
: parseWithParse5(content, options, isDocument, context)
);
Expand Down
17 changes: 7 additions & 10 deletions src/parsers/parse5-adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,15 @@ export function parseWithParse5(
isDocument: boolean,
context: ParentNode | null
): Document {
const opts = {
scriptingEnabled:
typeof options.scriptingEnabled === 'boolean'
? options.scriptingEnabled
: true,
treeAdapter: htmlparser2Adapter,
sourceCodeLocationInfo: options.sourceCodeLocationInfo,
};
options.treeAdapter ??= htmlparser2Adapter;

if (options.scriptingEnabled !== false) {
options.scriptingEnabled = true;
}

return isDocument
? parseDocument(content, opts)
: parseFragment(context, content, opts);
? parseDocument(content, options)
: parseFragment(context, content, options);
}

const renderOpts = { treeAdapter: htmlparser2Adapter };
Expand Down
8 changes: 3 additions & 5 deletions src/static.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ import { textContent } from 'domutils';
import {
type InternalOptions,
type CheerioOptions,
default as defaultOptions,
flatten as flattenOptions,
flattenOptions as flattenOptions,
} from './options.js';
import type { ExtractedMap, ExtractMap } from './api/extract.js';

Expand Down Expand Up @@ -85,9 +84,8 @@ export function html(
* so fallback non-existing options to the default ones.
*/
const opts = {
...defaultOptions,
...this?._options,
...flattenOptions(options ?? {}),
...flattenOptions(options),
};

return render(this, toRender, opts);
Expand Down Expand Up @@ -166,7 +164,7 @@ export function parseHTML(
keepScripts = context;
}

const parsed = this.load(data, defaultOptions, false);
const parsed = this.load(data, this._options, false);
if (!keepScripts) {
parsed('script').remove();
}
Expand Down

0 comments on commit 16c3149

Please sign in to comment.