From 9ea5bf2eef722f1dd63ae9283c4c209f97813df7 Mon Sep 17 00:00:00 2001 From: Nolan Lawson Date: Sun, 29 Sep 2024 17:20:10 -0700 Subject: [PATCH 1/2] fix: fix `` parsing Fixes #972 --- packages/parse5/lib/parser/index.test.ts | 38 ++++++++++++++++++++++++ packages/parse5/lib/parser/index.ts | 13 ++++---- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/packages/parse5/lib/parser/index.test.ts b/packages/parse5/lib/parser/index.test.ts index 890594e0..3a017ba9 100644 --- a/packages/parse5/lib/parser/index.test.ts +++ b/packages/parse5/lib/parser/index.test.ts @@ -3,6 +3,7 @@ import { parseFragment, parse } from 'parse5'; import { jest } from '@jest/globals'; import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-tests.js'; import { treeAdapters } from 'parse5-test-utils/utils/common.js'; +import type { Element, TextNode } from '../tree-adapters/default.js'; generateParsingTests( 'parser', @@ -110,4 +111,41 @@ describe('parser', () => { expect(onItemPop).toHaveBeenLastCalledWith(bodyElement.childNodes[0], bodyElement); }); }); + + describe('rawtext parsing', () => { + it.each([ + ['iframe'], + ['noembed'], + ['noframes'], + ['noscript'], + ['script'], + ['style'], + ['textarea'], + ['title'], + ['xmp'], + ])('<%s>', (tagName) => { + const html = `<r><${tagName}><math id="</${tagName}><b>should be outside</b>">`; + const fragment = parseFragment(html); + + expect(fragment.childNodes.length).toBe(1); + const r = fragment.childNodes[0] as Element; + expect(r.nodeName).toBe('r'); + expect(r.childNodes).toHaveLength(3); + expect(r.childNodes.map(_ => _.nodeName)).toEqual([ + tagName, + 'b', + '#text' + ]); + + const target = r.childNodes[0] as Element; + expect(target.childNodes).toHaveLength(1); + expect(target.childNodes[0].nodeName).toBe('#text'); + expect((target.childNodes[0] as TextNode).value).toBe('<math id="'); + + const b = r.childNodes[1] as Element; + expect(b.childNodes).toHaveLength(1); + expect(b.childNodes[0].nodeName).toBe('#text'); + expect((b.childNodes[0] as TextNode).value).toBe('should be outside'); + }); + }); }); diff --git a/packages/parse5/lib/parser/index.ts b/packages/parse5/lib/parser/index.ts index 5dd908ca..8a0fcb34 100644 --- a/packages/parse5/lib/parser/index.ts +++ b/packages/parse5/lib/parser/index.ts @@ -2195,9 +2195,9 @@ function iframeStartTagInBody<T extends TreeAdapterTypeMap>(p: Parser<T>, token: p._switchToTextParsing(token, TokenizerMode.RAWTEXT); } -//NOTE: here we assume that we always act as an user agent with enabled plugins, so we parse -//<noembed> as rawtext. -function noembedStartTagInBody<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagToken): void { +//NOTE: here we assume that we always act as a user agent with enabled plugins/frames, so we parse +//<noembed>/<noframes> as rawtext. +function rawTextStartTagInBody<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagToken): void { p._switchToTextParsing(token, TokenizerMode.RAWTEXT); } @@ -2449,8 +2449,9 @@ function startTagInBody<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagTo optgroupStartTagInBody(p, token); break; } - case $.NOEMBED: { - noembedStartTagInBody(p, token); + case $.NOEMBED: + case $.NOFRAMES: { + rawTextStartTagInBody(p, token); break; } case $.FRAMESET: { @@ -2463,7 +2464,7 @@ function startTagInBody<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagTo } case $.NOSCRIPT: { if (p.options.scriptingEnabled) { - noembedStartTagInBody(p, token); + rawTextStartTagInBody(p, token); } else { genericStartTagInBody(p, token); } From b8a27629783771fa0a6a2341e97e49c88c03cf5e Mon Sep 17 00:00:00 2001 From: Nolan Lawson <nolan@nolanlawson.com> Date: Sun, 29 Sep 2024 18:14:46 -0700 Subject: [PATCH 2/2] chore: prettier --- packages/parse5/lib/parser/index.test.ts | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/packages/parse5/lib/parser/index.test.ts b/packages/parse5/lib/parser/index.test.ts index 3a017ba9..249d00c7 100644 --- a/packages/parse5/lib/parser/index.test.ts +++ b/packages/parse5/lib/parser/index.test.ts @@ -131,11 +131,7 @@ describe('parser', () => { const r = fragment.childNodes[0] as Element; expect(r.nodeName).toBe('r'); expect(r.childNodes).toHaveLength(3); - expect(r.childNodes.map(_ => _.nodeName)).toEqual([ - tagName, - 'b', - '#text' - ]); + expect(r.childNodes.map((_) => _.nodeName)).toEqual([tagName, 'b', '#text']); const target = r.childNodes[0] as Element; expect(target.childNodes).toHaveLength(1);