From 04f889995b669abc3a4945a6177cc81a9dbff0fa Mon Sep 17 00:00:00 2001
From: Kuba Niegowski
Date: Sun, 16 Jan 2022 18:48:22 +0100
Subject: [PATCH 1/2] HTMLDataProcessor should preserve leading HTML comments,
script and style tags if only content of the document body was provided.
---
.../src/dataprocessor/htmldataprocessor.js | 38 +-
.../tests/dataprocessor/htmldataprocessor.js | 324 ++++++++++++------
2 files changed, 221 insertions(+), 141 deletions(-)
diff --git a/packages/ckeditor5-engine/src/dataprocessor/htmldataprocessor.js b/packages/ckeditor5-engine/src/dataprocessor/htmldataprocessor.js
index e88c4c768a0..94566de2c68 100644
--- a/packages/ckeditor5-engine/src/dataprocessor/htmldataprocessor.js
+++ b/packages/ckeditor5-engine/src/dataprocessor/htmldataprocessor.js
@@ -12,8 +12,6 @@
import BasicHtmlWriter from './basichtmlwriter';
import DomConverter from '../view/domconverter';
-import isComment from '@ckeditor/ckeditor5-utils/src/dom/iscomment';
-
/**
* The HTML data processor class.
* This data processor implementation uses HTML as input and output data.
@@ -116,37 +114,15 @@ export default class HtmlDataProcessor {
* @returns {DocumentFragment}
*/
_toDom( data ) {
- const document = this.domParser.parseFromString( data, 'text/html' );
- const fragment = document.createDocumentFragment();
-
- // The rules for parsing an HTML string can be read on https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhtml.
- //
- // In short, parsing tokens in an HTML string starts with the so-called "initial" insertion mode. When a DOM parser is in this
- // state and encounters a comment node, it inserts this comment node as the last child of the newly-created `HTMLDocument` object.
- // The parser then proceeds to successive insertion modes during parsing subsequent tokens and appends in the `HTMLDocument` object
- // other nodes (like , , ). This causes that the first leading comments from HTML string become the first nodes
- // in the `HTMLDocument` object, but not in the collection, because they are ultimately located before the element.
- //
- // Therefore, so that such leading comments do not disappear, they all are moved from the `HTMLDocument` object to the document
- // fragment, until the element is encountered.
- //
- // See: https://github.com/ckeditor/ckeditor5/issues/9861.
- let documentChildNode = document.firstChild;
-
- while ( !documentChildNode.isSameNode( document.documentElement ) ) {
- const node = documentChildNode;
-
- documentChildNode = documentChildNode.nextSibling;
-
- // It seems that `DOMParser#parseFromString()` adds only comment nodes directly to the `HTMLDocument` object, before the
- // node. The condition below is just to be sure we are moving only comment nodes.
-
- /* istanbul ignore else */
- if ( isComment( node ) ) {
- fragment.appendChild( node );
- }
+ // Wrap data with a so leading non-layout nodes (like ' +
'' +
'' +
'Paragraph' +
- '' +
- '
' +
- '' +
- ''
- );
+ '
'
+ );
- expect( paragraph.nodeType ).to.equal( Node.ELEMENT_NODE );
- expect( paragraph.outerHTML ).to.equal(
- '' +
- '' +
- 'Paragraph' +
- '' +
- '
'
- );
- } );
+ expect( bodyDocumentFragment.childNodes.length ).to.equal( 4 );
- it( 'should insert leading comment nodes from HTML string into collection #1', () => {
- const bodyDocumentFragment = dataProcessor._toDom(
- '' +
- '' +
- 'Heading
' +
- 'Paragraph
' +
- '' +
- ''
- );
+ const [
+ comment1,
+ style,
+ script,
+ paragraph
+ ] = bodyDocumentFragment.childNodes;
+
+ expect( comment1.nodeType ).to.equal( Node.COMMENT_NODE );
+ expect( comment1.data ).to.equal( ' Comment 1 ' );
+
+ expect( style.nodeType ).to.equal( Node.ELEMENT_NODE );
+ expect( style.outerHTML ).to.equal( '' );
- const [
- comment1,
- comment2,
- heading,
- paragraph,
- comment3,
- comment4
- ] = bodyDocumentFragment.childNodes;
+ expect( script.nodeType ).to.equal( Node.ELEMENT_NODE );
+ expect( script.outerHTML ).to.equal( '' );
- expect( bodyDocumentFragment.childNodes.length ).to.equal( 6 );
+ expect( paragraph.nodeType ).to.equal( Node.ELEMENT_NODE );
+ expect( paragraph.outerHTML ).to.equal( 'Paragraph
' );
+ } );
+ } );
- expect( comment1.nodeType ).to.equal( Node.COMMENT_NODE );
- expect( comment1.data ).to.equal( ' Comment 1 ' );
+ describe( 'full HTML document', () => {
+ it( 'should ignore leading non-layout elements if tag is provided', () => {
+ const bodyDocumentFragment = dataProcessor._toDom(
+ '' +
+ '' +
+ '' +
+ '' +
+ '' +
+ '' +
+ 'Paragraph' +
+ '
' +
+ ''
+ );
- expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE );
- expect( comment2.data ).to.equal( ' Comment 2 ' );
+ expect( bodyDocumentFragment.childNodes.length ).to.equal( 1 );
- expect( comment3.nodeType ).to.equal( Node.COMMENT_NODE );
- expect( comment3.data ).to.equal( ' Comment 3 ' );
+ const [ paragraph ] = bodyDocumentFragment.childNodes;
+ const [ comment2, text ] = paragraph.childNodes;
- expect( comment4.nodeType ).to.equal( Node.COMMENT_NODE );
- expect( comment4.data ).to.equal( ' Comment 4 ' );
+ expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE );
+ expect( comment2.data ).to.equal( ' Comment 2 ' );
- expect( heading.nodeType ).to.equal( Node.ELEMENT_NODE );
- expect( heading.outerHTML ).to.equal( 'Heading
' );
+ expect( text.nodeType ).to.equal( Node.TEXT_NODE );
+ expect( text.data ).to.equal( 'Paragraph' );
+ } );
- expect( paragraph.nodeType ).to.equal( Node.ELEMENT_NODE );
- expect( paragraph.outerHTML ).to.equal( 'Paragraph
' );
- } );
+ it( 'should ignore leading non-layout elements if tag is provided', () => {
+ const bodyDocumentFragment = dataProcessor._toDom(
+ '' +
+ '' +
+ '' +
+ '' +
+ '' +
+ '' +
+ 'Paragraph' +
+ '
' +
+ ''
+ );
- it( 'should insert leading comment nodes from HTML string into collection #2', () => {
- // The existence of the tag causes that DOMParser inserts this element into the . Moreover, all subsequent comment
- // nodes (up until the node, that is not valid inside the , which is the in our case) are also inserted into the
- // . So both and nodes, that are located between the and in the HTML
- // string, are insterted into the .
- const bodyDocumentFragment = dataProcessor._toDom(
- '' +
- '' +
- '' + // inserted into the by DOMParser#parseFromString()
- '' + // inserted into the by DOMParser#parseFromString()
- '' + // inserted into the by DOMParser#parseFromString()
- 'Heading
' +
- '
Paragraph
' +
- '' +
- ''
- );
+ expect( bodyDocumentFragment.childNodes.length ).to.equal( 1 );
- const [
- comment1,
- comment2,
- heading,
- paragraph,
- comment5,
- comment6
- ] = bodyDocumentFragment.childNodes;
+ const [ paragraph ] = bodyDocumentFragment.childNodes;
+ const [ comment2, text ] = paragraph.childNodes;
- expect( bodyDocumentFragment.childNodes.length ).to.equal( 6 );
+ expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE );
+ expect( comment2.data ).to.equal( ' Comment 2 ' );
- expect( comment1.nodeType ).to.equal( Node.COMMENT_NODE );
- expect( comment1.data ).to.equal( ' Comment 1 ' );
+ expect( text.nodeType ).to.equal( Node.TEXT_NODE );
+ expect( text.data ).to.equal( 'Paragraph' );
+ } );
- expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE );
- expect( comment2.data ).to.equal( ' Comment 2 ' );
+ it( 'should ignore leading non-layout elements if tag is provided', () => {
+ const bodyDocumentFragment = dataProcessor._toDom(
+ '' +
+ '' +
+ '' +
+ '' +
+ '' +
+ '' +
+ 'Paragraph' +
+ '
'
+ );
- expect( comment5.nodeType ).to.equal( Node.COMMENT_NODE );
- expect( comment5.data ).to.equal( ' Comment 5 ' );
+ expect( bodyDocumentFragment.childNodes.length ).to.equal( 1 );
- expect( comment6.nodeType ).to.equal( Node.COMMENT_NODE );
- expect( comment6.data ).to.equal( ' Comment 6 ' );
+ const [ paragraph ] = bodyDocumentFragment.childNodes;
+ const [ comment2, text ] = paragraph.childNodes;
- expect( heading.nodeType ).to.equal( Node.ELEMENT_NODE );
- expect( heading.outerHTML ).to.equal( 'Heading
' );
+ expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE );
+ expect( comment2.data ).to.equal( ' Comment 2 ' );
- expect( paragraph.nodeType ).to.equal( Node.ELEMENT_NODE );
- expect( paragraph.outerHTML ).to.equal( 'Paragraph
' );
+ expect( text.nodeType ).to.equal( Node.TEXT_NODE );
+ expect( text.data ).to.equal( 'Paragraph' );
+ } );
} );
} );
From f17a94152009bcfc5fd8e1aedf691267c32b79f7 Mon Sep 17 00:00:00 2001
From: Aleksander Nowodzinski
Date: Mon, 17 Jan 2022 11:16:24 +0100
Subject: [PATCH 2/2] Applied review suggestions.
---
.../ckeditor5-engine/tests/dataprocessor/htmldataprocessor.js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/packages/ckeditor5-engine/tests/dataprocessor/htmldataprocessor.js b/packages/ckeditor5-engine/tests/dataprocessor/htmldataprocessor.js
index e11b1e6ab69..28565ef3626 100644
--- a/packages/ckeditor5-engine/tests/dataprocessor/htmldataprocessor.js
+++ b/packages/ckeditor5-engine/tests/dataprocessor/htmldataprocessor.js
@@ -334,7 +334,7 @@ describe( 'HtmlDataProcessor', () => {
expect( text.data ).to.equal( 'Paragraph' );
} );
- it( 'should ignore leading non-layout elements if tag is provided', () => {
+ it( 'should ignore leading non-layout elements if tag is provided', () => {
const bodyDocumentFragment = dataProcessor._toDom(
'' +
'' +