diff --git a/packages/happy-dom/src/config/ForeignAttributeConfig.ts b/packages/happy-dom/src/config/ForeignAttributeConfig.ts new file mode 100644 index 00000000..0654a856 --- /dev/null +++ b/packages/happy-dom/src/config/ForeignAttributeConfig.ts @@ -0,0 +1,20 @@ +import NamespaceURI from './NamespaceURI.js'; + +/** + * Forgeign attribute config. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#adjust-foreign-attributes + */ +export default <{ [key: string]: string }>{ + 'xlink:actuate': NamespaceURI.xlink, + 'xlink:arcrole': NamespaceURI.xlink, + 'xlink:href': NamespaceURI.xlink, + 'xlink:role': NamespaceURI.xlink, + 'xlink:show': NamespaceURI.xlink, + 'xlink:title': NamespaceURI.xlink, + 'xlink:type': NamespaceURI.xlink, + 'xml:lang': NamespaceURI.xml, + 'xml:space': NamespaceURI.xml, + xmlns: NamespaceURI.xmlns, + 'xmlns:xlink': NamespaceURI.xmlns +}; diff --git a/packages/happy-dom/src/config/HTMLElementConfig.ts b/packages/happy-dom/src/config/HTMLElementConfig.ts index 51536ee3..29c47d5c 100644 --- a/packages/happy-dom/src/config/HTMLElementConfig.ts +++ b/packages/happy-dom/src/config/HTMLElementConfig.ts @@ -9,10 +9,6 @@ export default < className: string; contentModel: HTMLElementConfigContentModelEnum; forbiddenDescendants?: string[]; - permittedDescendants?: string[]; - permittedParents?: string[]; - addPermittedParent?: string; - moveForbiddenDescendant?: { exclude: string[] }; escapesSVGNamespace?: boolean; }; } @@ -133,7 +129,7 @@ export default < }, caption: { className: 'HTMLTableCaptionElement', - contentModel: HTMLElementConfigContentModelEnum.textOrComments + contentModel: HTMLElementConfigContentModelEnum.anyDescendants }, cite: { className: 'HTMLElement', @@ -146,13 +142,11 @@ export default < }, col: { className: 'HTMLTableColElement', - contentModel: HTMLElementConfigContentModelEnum.noDescendants, - permittedParents: ['colgroup'] + contentModel: HTMLElementConfigContentModelEnum.noDescendants }, colgroup: { className: 'HTMLTableColElement', - contentModel: HTMLElementConfigContentModelEnum.permittedDescendants, - permittedDescendants: ['col'] + contentModel: HTMLElementConfigContentModelEnum.anyDescendants }, data: { className: 'HTMLDataElement', @@ -357,7 +351,41 @@ export default < }, p: { className: 'HTMLParagraphElement', - contentModel: HTMLElementConfigContentModelEnum.anyDescendants, + contentModel: HTMLElementConfigContentModelEnum.noForbiddenFirstLevelDescendants, + forbiddenDescendants: [ + 'address', + 'article', + 'aside', + 'blockquote', + 'details', + 'div', + 'dl', + 'fieldset', + 'figcaption', + 'figure', + 'footer', + 'form', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'header', + 'hgroup', + 'hr', + 'main', + 'menu', + 'nav', + 'ol', + 'pre', + 'search', + 'section', + 'table', + 'ul', + 'p' + ], + appendElementForNonMatchingEndTag: true, escapesSVGNamespace: true }, param: { @@ -456,43 +484,28 @@ export default < }, table: { className: 'HTMLTableElement', - contentModel: HTMLElementConfigContentModelEnum.permittedDescendants, - permittedDescendants: ['caption', 'colgroup', 'thead', 'tfoot', 'tbody'], - moveForbiddenDescendant: { exclude: [] }, + contentModel: HTMLElementConfigContentModelEnum.anyDescendants, escapesSVGNamespace: true }, tbody: { className: 'HTMLTableSectionElement', - contentModel: HTMLElementConfigContentModelEnum.permittedDescendants, - permittedDescendants: ['tr'], - permittedParents: ['table'], - moveForbiddenDescendant: { exclude: ['caption', 'colgroup', 'thead', 'tfoot', 'tbody'] } + contentModel: HTMLElementConfigContentModelEnum.anyDescendants }, td: { className: 'HTMLTableCellElement', - contentModel: HTMLElementConfigContentModelEnum.noForbiddenFirstLevelDescendants, - forbiddenDescendants: ['td', 'th', 'tr', 'tbody', 'tfoot', 'thead'], - permittedParents: ['tr'] + contentModel: HTMLElementConfigContentModelEnum.anyDescendants }, tfoot: { className: 'HTMLTableSectionElement', - contentModel: HTMLElementConfigContentModelEnum.permittedDescendants, - permittedDescendants: ['tr'], - permittedParents: ['table'], - moveForbiddenDescendant: { exclude: ['caption', 'colgroup', 'thead', 'tfoot', 'tbody'] } + contentModel: HTMLElementConfigContentModelEnum.anyDescendants }, th: { className: 'HTMLTableCellElement', - contentModel: HTMLElementConfigContentModelEnum.noForbiddenFirstLevelDescendants, - forbiddenDescendants: ['td', 'th', 'tr', 'tbody', 'tfoot', 'thead'], - permittedParents: ['tr'] + contentModel: HTMLElementConfigContentModelEnum.anyDescendants }, thead: { className: 'HTMLTableSectionElement', - contentModel: HTMLElementConfigContentModelEnum.permittedDescendants, - permittedDescendants: ['tr'], - permittedParents: ['table'], - moveForbiddenDescendant: { exclude: ['caption', 'colgroup', 'thead', 'tfoot', 'tbody'] } + contentModel: HTMLElementConfigContentModelEnum.anyDescendants }, time: { className: 'HTMLTimeElement', @@ -504,11 +517,7 @@ export default < }, tr: { className: 'HTMLTableRowElement', - contentModel: HTMLElementConfigContentModelEnum.permittedDescendants, - permittedDescendants: ['td', 'th'], - permittedParents: ['tbody', 'tfoot', 'thead'], - addPermittedParent: 'tbody', - moveForbiddenDescendant: { exclude: ['caption', 'colgroup', 'thead', 'tfoot', 'tbody', 'tr'] } + contentModel: HTMLElementConfigContentModelEnum.anyDescendants }, track: { className: 'HTMLTrackElement', diff --git a/packages/happy-dom/src/config/HTMLElementConfigContentModelEnum.ts b/packages/happy-dom/src/config/HTMLElementConfigContentModelEnum.ts index 1c609f61..5197c5f9 100644 --- a/packages/happy-dom/src/config/HTMLElementConfigContentModelEnum.ts +++ b/packages/happy-dom/src/config/HTMLElementConfigContentModelEnum.ts @@ -4,8 +4,6 @@ enum HTMLElementConfigContentModelEnum { noFirstLevelSelfDescendants = 'noFirstLevelSelfDescendants', noForbiddenFirstLevelDescendants = 'noForbiddenFirstLevelDescendants', noDescendants = 'noDescendants', - permittedDescendants = 'permittedDescendants', - textOrComments = 'textOrComments', anyDescendants = 'anyDescendants' } diff --git a/packages/happy-dom/src/config/MathMLAttributeConfig.ts b/packages/happy-dom/src/config/MathMLAttributeConfig.ts new file mode 100644 index 00000000..ee81aef6 --- /dev/null +++ b/packages/happy-dom/src/config/MathMLAttributeConfig.ts @@ -0,0 +1,8 @@ +/** + * Math ML attribute config. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#adjust-mathml-attributes + */ +export default <{ [key: string]: string }>{ + definitionurl: 'definitionURL' +}; diff --git a/packages/happy-dom/src/config/SVGAttributeConfig.ts b/packages/happy-dom/src/config/SVGAttributeConfig.ts new file mode 100644 index 00000000..e2a5134f --- /dev/null +++ b/packages/happy-dom/src/config/SVGAttributeConfig.ts @@ -0,0 +1,65 @@ +/** + * SVG attribute config. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#adjust-svg-attributes + */ +export default <{ [key: string]: string }>{ + attributename: 'attributeName', + attributetype: 'attributeType', + basefrequency: 'baseFrequency', + baseprofile: 'baseProfile', + calcmode: 'calcMode', + clippathunits: 'clipPathUnits', + diffuseconstant: 'diffuseConstant', + edgemode: 'edgeMode', + filterunits: 'filterUnits', + glyphref: 'glyphRef', + gradienttransform: 'gradientTransform', + gradientunits: 'gradientUnits', + kernelmatrix: 'kernelMatrix', + kernelunitlength: 'kernelUnitLength', + keypoints: 'keyPoints', + keysplines: 'keySplines', + keytimes: 'keyTimes', + lengthadjust: 'lengthAdjust', + limitingconeangle: 'limitingConeAngle', + markerheight: 'markerHeight', + markerunits: 'markerUnits', + markerwidth: 'markerWidth', + maskcontentunits: 'maskContentUnits', + maskunits: 'maskUnits', + numoctaves: 'numOctaves', + pathlength: 'pathLength', + patterncontentunits: 'patternContentUnits', + patterntransform: 'patternTransform', + patternunits: 'patternUnits', + pointsatx: 'pointsAtX', + pointsaty: 'pointsAtY', + pointsatz: 'pointsAtZ', + preservealpha: 'preserveAlpha', + preserveaspectratio: 'preserveAspectRatio', + primitiveunits: 'primitiveUnits', + refx: 'refX', + refy: 'refY', + repeatcount: 'repeatCount', + repeatdur: 'repeatDur', + requiredextensions: 'requiredExtensions', + requiredfeatures: 'requiredFeatures', + specularconstant: 'specularConstant', + specularexponent: 'specularExponent', + spreadmethod: 'spreadMethod', + startoffset: 'startOffset', + stddeviation: 'stdDeviation', + stitchtiles: 'stitchTiles', + surfacescale: 'surfaceScale', + systemlanguage: 'systemLanguage', + tablevalues: 'tableValues', + targetx: 'targetX', + targety: 'targetY', + textlength: 'textLength', + viewbox: 'viewBox', + viewtarget: 'viewTarget', + xchannelselector: 'xChannelSelector', + ychannelselector: 'yChannelSelector', + zoomandpan: 'zoomAndPan' +}; diff --git a/packages/happy-dom/src/html-parser/HTMLParser.ts b/packages/happy-dom/src/html-parser/HTMLParser.ts index 31b41455..b2c4627b 100755 --- a/packages/happy-dom/src/html-parser/HTMLParser.ts +++ b/packages/happy-dom/src/html-parser/HTMLParser.ts @@ -18,6 +18,12 @@ import HTMLHtmlElement from '../nodes/html-html-element/HTMLHtmlElement.js'; import XMLEncodeUtility from '../utilities/XMLEncodeUtility.js'; import NodeTypeEnum from '../nodes/node/NodeTypeEnum.js'; import MathMLElementConfig from '../config/MathMLElementConfig.js'; +import Attr from '../nodes/attr/Attr.js'; +import ForeignAttributeConfig from '../config/ForeignAttributeConfig.js'; +import SVGAttributeConfig from '../config/SVGAttributeConfig.js'; +import MathMLAttributeConfig from '../config/MathMLAttributeConfig.js'; +import HTMLParserErrorCodeEnum from './HTMLParserErrorCodeEnum.js'; +import HTMLFormElement from '../nodes/html-form-element/HTMLFormElement.js'; /** * Markup RegExp. @@ -47,7 +53,7 @@ const MARKUP_REGEXP = /<([^\s/!>?]+)|<\/([^\s/!>?]+)\s*>|(|--!>)|("). */ const ATTRIBUTE_REGEXP = - /\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)\s*=\s*([a-zA-Z0-9-_:.$@?{}/<]+)|\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)\s*=\s*"([^"]*)("{0,1})|\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)\s*=\s*'([^']*)('{0,1})|\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)/gm; + /\s*([^="'\/\s]+)\s*=\s*([^"'\s]+)|([^="'\/\s]+)\s*=\s*"([^"]+)("){0,1}|\s*([^="'\/\s]+)\s*=\s*'([^']+)('){0,1}|\s*([^="'\/\s]+)/gm; /** * Document type attribute RegExp. @@ -68,14 +74,45 @@ const SPACE_IN_BEGINNING_REGEXP = /^\s+/; /** * Markup read state (which state the parser is in). + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#tokenization */ enum MarkupReadStateEnum { - any = 'any', - startTag = 'startTag', + data = 'data', + tagName = 'tagName', comment = 'comment', documentType = 'documentType', processingInstruction = 'processingInstruction', - rawTextElement = 'rawTextElement' + rawText = 'rawText' +} + +/** + * Insertion mode. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#insertion-mode + */ +enum InsertionModeEnum { + initial = 'initial', + beforeHTML = 'beforeHTML', + beforeHead = 'beforeHead', + inHead = 'inHead', + inHeadNoscript = 'inHeadNoscript', + afterHead = 'afterHead', + inBody = 'inBody', + inTable = 'inTable', + inCaption = 'inCaption', + inColumnGroup = 'inColumnGroup', + inTableBody = 'inTableBody', + inRow = 'inRow', + inCell = 'inCell', + inSelect = 'inSelect', + inSelectInTable = 'inSelectInTable', + inTemplate = 'inTemplate', + afterBody = 'afterBody', + inFrameset = 'inFrameset', + afterFrameset = 'afterFrameset', + afterAfterBody = 'afterAfterBody', + afterAfterFrameset = 'afterAfterFrameset' } /** @@ -87,63 +124,52 @@ interface IDocumentType { systemId: string; } -/** - * How much of the HTML document that has been parsed (where the parser level is). - */ -enum HTMLDocumentStructureLevelEnum { - root = 0, - doctype = 1, - documentElement = 2, - head = 3, - additionalHeadWithoutBody = 4, - body = 5, - afterBody = 6 -} - -interface IHTMLDocumentStructure { - nodes: { - doctype: DocumentType; - documentElement: HTMLHtmlElement; - head: HTMLHeadElement; - body: HTMLBodyElement; - }; - level: HTMLDocumentStructureLevelEnum; -} - /** * HTML parser. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html */ export default class HTMLParser { private window: BrowserWindow; - private evaluateScripts: boolean = false; + private enableScripts: boolean = false; + private framesetOk: boolean = true; + private elementPointer: { + html: HTMLHtmlElement | null; + head: HTMLHeadElement | null; + body: HTMLBodyElement | null; + form: HTMLFormElement | null; + } = { + html: null, + head: null, + body: null, + form: null + }; private rootNode: Element | DocumentFragment | Document | null = null; private rootDocument: Document | null = null; private nodeStack: Node[] = []; private tagNameStack: string[] = []; - private documentStructure: IHTMLDocumentStructure | null = null; private startTagIndex = 0; private markupRegExp: RegExp | null = null; - private nextElement: Element | null = null; - private currentNode: Node | null = null; - private readState: MarkupReadStateEnum = MarkupReadStateEnum.any; + private readState: MarkupReadStateEnum = MarkupReadStateEnum.data; + private insertionMode: InsertionModeEnum = InsertionModeEnum.initial; /** * Constructor. * * @param window Window. * @param [options] Options. - * @param [options.evaluateScripts] Set to "true" to enable script execution + * @param [options.enableScripts] Set to "true" to enable script execution */ constructor( window: BrowserWindow, options?: { - evaluateScripts?: boolean; + enableScripts?: boolean; } ) { this.window = window; - if (options?.evaluateScripts) { - this.evaluateScripts = true; + if (options?.enableScripts) { + this.enableScripts = true; } } /** @@ -160,66 +186,30 @@ export default class HTMLParser { this.rootNode = rootNode || this.window.document.createDocumentFragment(); this.rootDocument = this.rootNode instanceof Document ? this.rootNode : this.window.document; this.nodeStack = [this.rootNode]; - this.tagNameStack = [null]; - this.currentNode = this.rootNode; - this.readState = MarkupReadStateEnum.any; - this.documentStructure = null; + this.readState = MarkupReadStateEnum.data; this.startTagIndex = 0; this.markupRegExp = new RegExp(MARKUP_REGEXP, 'gm'); + this.insertionMode = this.getInitialInsertionMode(this.rootNode); - if (this.rootNode instanceof Document) { - const { doctype, documentElement, head, body } = this.rootNode; - - if (!documentElement || !head || !body) { - throw new Error( - 'Failed to parse HTML: The root node must have "documentElement", "head" and "body".\n\nWe should not end up here and it is therefore a bug in Happy DOM. Please report this issue.' - ); - } - - this.documentStructure = { - nodes: { - doctype: doctype || null, - documentElement, - head, - body - }, - level: HTMLDocumentStructureLevelEnum.root - }; - } - - if (this.rootNode instanceof this.window.HTMLHtmlElement) { - const head = this.rootDocument.createElement('head'); - const body = this.rootDocument.createElement('body'); - while (this.rootNode[PropertySymbol.nodeArray].length > 0) { - this.rootNode[PropertySymbol.removeChild]( - this.rootNode[PropertySymbol.nodeArray][ - this.rootNode[PropertySymbol.nodeArray].length - 1 - ] - ); - } - - this.rootNode[PropertySymbol.appendChild](head); - this.rootNode[PropertySymbol.appendChild](body); - - this.documentStructure = { - nodes: { - doctype: null, - documentElement: this.rootNode, - head, - body - }, - level: HTMLDocumentStructureLevelEnum.documentElement - }; + if (rootNode instanceof Element || rootNode instanceof DocumentFragment) { + this.elementPointer.html = this.rootNode[PropertySymbol.ownerDocument].documentElement; + this.elementPointer.head = this.rootNode[PropertySymbol.ownerDocument].head; + this.elementPointer.body = this.rootNode[PropertySymbol.ownerDocument].body; + } else { + this.elementPointer.html = null; + this.elementPointer.head = null; + this.elementPointer.body = null; } let match: RegExpExecArray; let lastIndex = 0; + let tagName: string | null = null; html = String(html); while ((match = this.markupRegExp.exec(html))) { switch (this.readState) { - case MarkupReadStateEnum.any: + case MarkupReadStateEnum.data: // Plain text between tags. if ( match.index !== lastIndex && @@ -230,10 +220,9 @@ export default class HTMLParser { if (match[1]) { // Start tag. - this.nextElement = this.getStartTagElement(match[1]); - + tagName = StringUtility.asciiUpperCase(match[1]); this.startTagIndex = this.markupRegExp.lastIndex; - this.readState = MarkupReadStateEnum.startTag; + this.readState = MarkupReadStateEnum.tagName; } else if (match[2]) { // End tag. this.parseEndTag(match[2]); @@ -255,24 +244,24 @@ export default class HTMLParser { } break; - case MarkupReadStateEnum.startTag: + case MarkupReadStateEnum.tagName: // End of start tag // match[2] is matching an end tag in case the start tag wasn't closed (e.g. "" instead of "
\n"). // match[7] is matching "/>" (e.g. ""). // match[8] is matching ">" (e.g. "
"). if (match[7] || match[8] || match[2]) { - if (this.nextElement) { - const attributeString = html.substring( + const attributes = this.parseAttributes( + html.substring( this.startTagIndex, match[2] ? this.markupRegExp.lastIndex - 1 : match.index - ); - const isSelfClosed = !!match[7]; + ) + ); - this.parseEndOfStartTag(attributeString, isSelfClosed); - } else { - // If "nextElement" is set to null, the tag is not allowed (, and are not allowed in an HTML fragment or to be nested). - this.readState = MarkupReadStateEnum.any; + // If attributes are null, the attribute string wasn't complete. + // We should continue parsing until the next end of start tag. + if (attributes !== null) { + this.parseStartTag(tagName, attributes, !!match[7]); } } break; @@ -298,16 +287,11 @@ export default class HTMLParser { this.parseComment('?' + html.substring(this.startTagIndex, match.index)); } break; - case MarkupReadStateEnum.rawTextElement: - // End tag of raw text content. - - //