From 04f889995b669abc3a4945a6177cc81a9dbff0fa Mon Sep 17 00:00:00 2001 From: Kuba Niegowski Date: Sun, 16 Jan 2022 18:48:22 +0100 Subject: [PATCH 1/2] HTMLDataProcessor should preserve leading HTML comments, script and style tags if only content of the document body was provided. --- .../src/dataprocessor/htmldataprocessor.js | 38 +- .../tests/dataprocessor/htmldataprocessor.js | 324 ++++++++++++------ 2 files changed, 221 insertions(+), 141 deletions(-) diff --git a/packages/ckeditor5-engine/src/dataprocessor/htmldataprocessor.js b/packages/ckeditor5-engine/src/dataprocessor/htmldataprocessor.js index e88c4c768a0..94566de2c68 100644 --- a/packages/ckeditor5-engine/src/dataprocessor/htmldataprocessor.js +++ b/packages/ckeditor5-engine/src/dataprocessor/htmldataprocessor.js @@ -12,8 +12,6 @@ import BasicHtmlWriter from './basichtmlwriter'; import DomConverter from '../view/domconverter'; -import isComment from '@ckeditor/ckeditor5-utils/src/dom/iscomment'; - /** * The HTML data processor class. * This data processor implementation uses HTML as input and output data. @@ -116,37 +114,15 @@ export default class HtmlDataProcessor { * @returns {DocumentFragment} */ _toDom( data ) { - const document = this.domParser.parseFromString( data, 'text/html' ); - const fragment = document.createDocumentFragment(); - - // The rules for parsing an HTML string can be read on https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhtml. - // - // In short, parsing tokens in an HTML string starts with the so-called "initial" insertion mode. When a DOM parser is in this - // state and encounters a comment node, it inserts this comment node as the last child of the newly-created `HTMLDocument` object. - // The parser then proceeds to successive insertion modes during parsing subsequent tokens and appends in the `HTMLDocument` object - // other nodes (like , , ). This causes that the first leading comments from HTML string become the first nodes - // in the `HTMLDocument` object, but not in the collection, because they are ultimately located before the element. - // - // Therefore, so that such leading comments do not disappear, they all are moved from the `HTMLDocument` object to the document - // fragment, until the element is encountered. - // - // See: https://github.com/ckeditor/ckeditor5/issues/9861. - let documentChildNode = document.firstChild; - - while ( !documentChildNode.isSameNode( document.documentElement ) ) { - const node = documentChildNode; - - documentChildNode = documentChildNode.nextSibling; - - // It seems that `DOMParser#parseFromString()` adds only comment nodes directly to the `HTMLDocument` object, before the - // node. The condition below is just to be sure we are moving only comment nodes. - - /* istanbul ignore else */ - if ( isComment( node ) ) { - fragment.appendChild( node ); - } + // Wrap data with a so leading non-layout nodes (like ' + '

' + '' + 'Paragraph' + - '' + - '

' + - '' + - '' - ); + '

' + ); - expect( paragraph.nodeType ).to.equal( Node.ELEMENT_NODE ); - expect( paragraph.outerHTML ).to.equal( - '

' + - '' + - 'Paragraph' + - '' + - '

' - ); - } ); + expect( bodyDocumentFragment.childNodes.length ).to.equal( 4 ); - it( 'should insert leading comment nodes from HTML string into collection #1', () => { - const bodyDocumentFragment = dataProcessor._toDom( - '' + - '' + - '

Heading

' + - '

Paragraph

' + - '' + - '' - ); + const [ + comment1, + style, + script, + paragraph + ] = bodyDocumentFragment.childNodes; + + expect( comment1.nodeType ).to.equal( Node.COMMENT_NODE ); + expect( comment1.data ).to.equal( ' Comment 1 ' ); + + expect( style.nodeType ).to.equal( Node.ELEMENT_NODE ); + expect( style.outerHTML ).to.equal( '' ); - const [ - comment1, - comment2, - heading, - paragraph, - comment3, - comment4 - ] = bodyDocumentFragment.childNodes; + expect( script.nodeType ).to.equal( Node.ELEMENT_NODE ); + expect( script.outerHTML ).to.equal( '' ); - expect( bodyDocumentFragment.childNodes.length ).to.equal( 6 ); + expect( paragraph.nodeType ).to.equal( Node.ELEMENT_NODE ); + expect( paragraph.outerHTML ).to.equal( '

Paragraph

' ); + } ); + } ); - expect( comment1.nodeType ).to.equal( Node.COMMENT_NODE ); - expect( comment1.data ).to.equal( ' Comment 1 ' ); + describe( 'full HTML document', () => { + it( 'should ignore leading non-layout elements if tag is provided', () => { + const bodyDocumentFragment = dataProcessor._toDom( + '' + + '' + + '' + + '' + + '

' + + '' + + 'Paragraph' + + '

' + + '' + ); - expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE ); - expect( comment2.data ).to.equal( ' Comment 2 ' ); + expect( bodyDocumentFragment.childNodes.length ).to.equal( 1 ); - expect( comment3.nodeType ).to.equal( Node.COMMENT_NODE ); - expect( comment3.data ).to.equal( ' Comment 3 ' ); + const [ paragraph ] = bodyDocumentFragment.childNodes; + const [ comment2, text ] = paragraph.childNodes; - expect( comment4.nodeType ).to.equal( Node.COMMENT_NODE ); - expect( comment4.data ).to.equal( ' Comment 4 ' ); + expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE ); + expect( comment2.data ).to.equal( ' Comment 2 ' ); - expect( heading.nodeType ).to.equal( Node.ELEMENT_NODE ); - expect( heading.outerHTML ).to.equal( '

Heading

' ); + expect( text.nodeType ).to.equal( Node.TEXT_NODE ); + expect( text.data ).to.equal( 'Paragraph' ); + } ); - expect( paragraph.nodeType ).to.equal( Node.ELEMENT_NODE ); - expect( paragraph.outerHTML ).to.equal( '

Paragraph

' ); - } ); + it( 'should ignore leading non-layout elements if tag is provided', () => { + const bodyDocumentFragment = dataProcessor._toDom( + '' + + '' + + '' + + '' + + '

' + + '' + + 'Paragraph' + + '

' + + '' + ); - it( 'should insert leading comment nodes from HTML string into collection #2', () => { - // The existence of the tag causes that DOMParser inserts this element into the . Moreover, all subsequent comment - // nodes (up until the node, that is not valid inside the , which is the

in our case) are also inserted into the - // . So both and nodes, that are located between the and

in the HTML - // string, are insterted into the . - const bodyDocumentFragment = dataProcessor._toDom( - '' + - '' + - '' + // inserted into the by DOMParser#parseFromString() - '' + // inserted into the by DOMParser#parseFromString() - '' + // inserted into the by DOMParser#parseFromString() - '

Heading

' + - '

Paragraph

' + - '' + - '' - ); + expect( bodyDocumentFragment.childNodes.length ).to.equal( 1 ); - const [ - comment1, - comment2, - heading, - paragraph, - comment5, - comment6 - ] = bodyDocumentFragment.childNodes; + const [ paragraph ] = bodyDocumentFragment.childNodes; + const [ comment2, text ] = paragraph.childNodes; - expect( bodyDocumentFragment.childNodes.length ).to.equal( 6 ); + expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE ); + expect( comment2.data ).to.equal( ' Comment 2 ' ); - expect( comment1.nodeType ).to.equal( Node.COMMENT_NODE ); - expect( comment1.data ).to.equal( ' Comment 1 ' ); + expect( text.nodeType ).to.equal( Node.TEXT_NODE ); + expect( text.data ).to.equal( 'Paragraph' ); + } ); - expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE ); - expect( comment2.data ).to.equal( ' Comment 2 ' ); + it( 'should ignore leading non-layout elements if tag is provided', () => { + const bodyDocumentFragment = dataProcessor._toDom( + '' + + '' + + '' + + '' + + '

' + + '' + + 'Paragraph' + + '

' + ); - expect( comment5.nodeType ).to.equal( Node.COMMENT_NODE ); - expect( comment5.data ).to.equal( ' Comment 5 ' ); + expect( bodyDocumentFragment.childNodes.length ).to.equal( 1 ); - expect( comment6.nodeType ).to.equal( Node.COMMENT_NODE ); - expect( comment6.data ).to.equal( ' Comment 6 ' ); + const [ paragraph ] = bodyDocumentFragment.childNodes; + const [ comment2, text ] = paragraph.childNodes; - expect( heading.nodeType ).to.equal( Node.ELEMENT_NODE ); - expect( heading.outerHTML ).to.equal( '

Heading

' ); + expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE ); + expect( comment2.data ).to.equal( ' Comment 2 ' ); - expect( paragraph.nodeType ).to.equal( Node.ELEMENT_NODE ); - expect( paragraph.outerHTML ).to.equal( '

Paragraph

' ); + expect( text.nodeType ).to.equal( Node.TEXT_NODE ); + expect( text.data ).to.equal( 'Paragraph' ); + } ); } ); } ); From f17a94152009bcfc5fd8e1aedf691267c32b79f7 Mon Sep 17 00:00:00 2001 From: Aleksander Nowodzinski Date: Mon, 17 Jan 2022 11:16:24 +0100 Subject: [PATCH 2/2] Applied review suggestions. --- .../ckeditor5-engine/tests/dataprocessor/htmldataprocessor.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/ckeditor5-engine/tests/dataprocessor/htmldataprocessor.js b/packages/ckeditor5-engine/tests/dataprocessor/htmldataprocessor.js index e11b1e6ab69..28565ef3626 100644 --- a/packages/ckeditor5-engine/tests/dataprocessor/htmldataprocessor.js +++ b/packages/ckeditor5-engine/tests/dataprocessor/htmldataprocessor.js @@ -334,7 +334,7 @@ describe( 'HtmlDataProcessor', () => { expect( text.data ).to.equal( 'Paragraph' ); } ); - it( 'should ignore leading non-layout elements if tag is provided', () => { + it( 'should ignore leading non-layout elements if tag is provided', () => { const bodyDocumentFragment = dataProcessor._toDom( '' + '' +