Skip to content

Commit

Permalink
LibWeb: Lazily merge text nodes when invoking HTML parser incrementally
Browse files Browse the repository at this point in the history
Instead of always inserting a new text node, we now continue appending
to an extisting text node if the parser's character insertion point is
a suitable text node.

This fixes an issue where multiple invocations of document.write() would
create unnecessary sequences of text nodes. Such sequences are now
merged automatically.

19 new passes in WPT/html/syntax/parsing/ :^)
  • Loading branch information
awesomekling committed Feb 19, 2025
1 parent 08d1a25 commit f93bb15
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 29 deletions.
13 changes: 8 additions & 5 deletions Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1209,24 +1209,24 @@ void HTMLParser::parse_generic_raw_text_element(HTMLToken& token)
m_insertion_mode = InsertionMode::Text;
}

static bool is_empty_text_node(DOM::Node const* node)
static bool is_text_node(DOM::Node const* node)
{
return node && node->is_text() && static_cast<DOM::Text const*>(node)->data().is_empty();
return node && node->is_text();
}

DOM::Text* HTMLParser::find_character_insertion_node()
{
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
if (adjusted_insertion_location.insert_before_sibling) {
if (is_empty_text_node(adjusted_insertion_location.insert_before_sibling->previous_sibling()))
if (is_text_node(adjusted_insertion_location.insert_before_sibling->previous_sibling()))
return static_cast<DOM::Text*>(adjusted_insertion_location.insert_before_sibling->previous_sibling());
auto new_text_node = realm().create<DOM::Text>(document(), String {});
adjusted_insertion_location.parent->insert_before(*new_text_node, *adjusted_insertion_location.insert_before_sibling);
return new_text_node;
}
if (adjusted_insertion_location.parent->is_document())
return nullptr;
if (is_empty_text_node(adjusted_insertion_location.parent->last_child()))
if (is_text_node(adjusted_insertion_location.parent->last_child()))
return static_cast<DOM::Text*>(adjusted_insertion_location.parent->last_child());
auto new_text_node = realm().create<DOM::Text>(document(), String {});
MUST(adjusted_insertion_location.parent->append_child(*new_text_node));
Expand All @@ -1237,7 +1237,10 @@ void HTMLParser::flush_character_insertions()
{
if (m_character_insertion_builder.is_empty())
return;
m_character_insertion_node->set_data(MUST(m_character_insertion_builder.to_string()));
if (m_character_insertion_node->data().is_empty())
m_character_insertion_node->set_data(MUST(m_character_insertion_builder.to_string()));
else
(void)m_character_insertion_node->append_data(MUST(m_character_insertion_builder.to_string()));
m_character_insertion_builder.clear();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ Harness status: OK

Found 111 tests

94 Pass
17 Fail
Fail html5lib_template.html 010950d55f4eccf16e9c4af1d263bb747294c646
Fail html5lib_template.html a838bd54410cef059a42eea9606356488e16535b
111 Pass
Pass html5lib_template.html 010950d55f4eccf16e9c4af1d263bb747294c646
Pass html5lib_template.html a838bd54410cef059a42eea9606356488e16535b
Pass html5lib_template.html 27fb9111f6675a7e033b867480c0afddcda161a6
Fail html5lib_template.html aee883a65775489399a003b2371d58248a6aff6f
Pass html5lib_template.html aee883a65775489399a003b2371d58248a6aff6f
Pass html5lib_template.html 89b17b54ab343191bf74ef5434f4d2cfac40ea97
Pass html5lib_template.html c4433556c7414cfd71f27b420f1ffc4348774f5e
Fail html5lib_template.html 3dcce7d97108b3e9ea7fa96f240ac62bf280e74b
Pass html5lib_template.html 3dcce7d97108b3e9ea7fa96f240ac62bf280e74b
Pass html5lib_template.html a1f587f7ea85ccfe294bd45bfb501e850cb979e0
Pass html5lib_template.html cd26a7832f13bdc135697321ca6c2fecdca6ef5d
Pass html5lib_template.html e30571d90b0e56864499961eb7be955994cf72e2
Expand Down Expand Up @@ -47,7 +46,7 @@ Pass html5lib_template.html f915e7b3407c24b28c3aad318e5693cc774020f4
Pass html5lib_template.html 3c5eb261787b3d15aff86fa61de773fd7e439b0e
Pass html5lib_template.html 2b57775750c198d4b98b23aed74ff80a866a01f5
Pass html5lib_template.html dc3d016610f3ab532525a6c2871f03d6b62b0168
Fail html5lib_template.html 6a184d71d00580a26a8b6bd97aafe5503339f3f6
Pass html5lib_template.html 6a184d71d00580a26a8b6bd97aafe5503339f3f6
Pass html5lib_template.html ce570a6c4bcee8b72a03e25508c6dd72e3cc6c35
Pass html5lib_template.html e0c3d922f7b1f1654f02f716c3d9b31198ce3385
Pass html5lib_template.html 87e67242bf6debcf3b7dca852d10aa0f7b625b28
Expand All @@ -61,7 +60,7 @@ Pass html5lib_template.html 275060925a844cb51b29bae660301de9780d68c8
Pass html5lib_template.html 9f82f6ec4c0a48c1d4dfbe6803b94abd553aea88
Pass html5lib_template.html f094bf7e94a88b86c80a0643e70c8e5ff3354698
Pass html5lib_template.html 35a07ec3b4bf26ea407dc1ddf52f14195a714059
Fail html5lib_template.html 24faa53b271f994a4ff31d5796c8ff47d6f2c3e6
Pass html5lib_template.html 24faa53b271f994a4ff31d5796c8ff47d6f2c3e6
Pass html5lib_template.html 0f1c491b58c2dd3c402a62e37f833bc1f1db8d21
Pass html5lib_template.html 868d918a7b5d8b5c065c15229492bc2022bfbcba
Pass html5lib_template.html 0538efa44e857596c556033a3821d424378aea3f
Expand All @@ -73,16 +72,16 @@ Pass html5lib_template.html 9bd9687a65f258adc24450fc5cbd781fff6c038a
Pass html5lib_template.html db1baeb846d718c773324746524fbd68f2e9436e
Pass html5lib_template.html 4b0ce46c611dbcc016db272ef007f302bee0c897
Pass html5lib_template.html 1a735e1c7f28f8701f3c7fd5e9404b8911916086
Fail html5lib_template.html 0686eedec06b2db1dc283fac92c1ef1a33114c71
Pass html5lib_template.html 0686eedec06b2db1dc283fac92c1ef1a33114c71
Pass html5lib_template.html d4dfb87ce626f12923056a6cd77448eaf4660ac2
Pass html5lib_template.html 1f295920f2937b2c8023b3761c43a0d4d9e5353c
Pass html5lib_template.html 3b91fa08fad923d387d924cff37fbf6b4c3a5712
Pass html5lib_template.html 45a1c1ad5d99ad67c573096a79253996a664e01b
Pass html5lib_template.html 0fe3a66773c6048c8f6f2c92f2611f65be972ec1
Pass html5lib_template.html be40897ca411e1507197c31ab2a9f9752a05f769
Fail html5lib_template.html dcfb1048ed5c40e406b4fbf0cde24c826713907f
Fail html5lib_template.html 78263aeea68ac97903598682013bae9c0c21d547
Fail html5lib_template.html 5aa177ef1a35bf4502dcb867d8e666288982ba99
Pass html5lib_template.html dcfb1048ed5c40e406b4fbf0cde24c826713907f
Pass html5lib_template.html 78263aeea68ac97903598682013bae9c0c21d547
Pass html5lib_template.html 5aa177ef1a35bf4502dcb867d8e666288982ba99
Pass html5lib_template.html 5d303375907dc4d4380b477e0317c17b660613e9
Pass html5lib_template.html d822f726927c34b92fe102b13e63920850878f6a
Pass html5lib_template.html 07acdcaeb4fa639296d46673cf28823ddf2a6ca7
Expand All @@ -97,21 +96,21 @@ Pass html5lib_template.html 1125967cbbcd404f4cb14d48270b8ec778970d77
Pass html5lib_template.html 32c963e164b9ec82c60e490bb141c1ccc70b992f
Pass html5lib_template.html 574a95fc9c9f2de3aeaa0c9ee1e6967fc3d4770d
Pass html5lib_template.html 332863a7f9e61bff32bd3427ede7a088b790d453
Fail html5lib_template.html 2121db07146781773df9e53b94fa921a805175ce
Fail html5lib_template.html 8675de267cd7e34f2febdee3feb665614d1562fe
Pass html5lib_template.html 2121db07146781773df9e53b94fa921a805175ce
Pass html5lib_template.html 8675de267cd7e34f2febdee3feb665614d1562fe
Pass html5lib_template.html c5d26ad923a2b1e988ddd378ca4fb26eb48353e1
Pass html5lib_template.html eec1542e2fa0e9eafb7f8d4a51eae56b5a31b3c8
Pass html5lib_template.html b79387a54c3b136db0f28ed96555ff683b3947fe
Pass html5lib_template.html c477a29a4deb32d072a415fa809a84a4f2beee0c
Pass html5lib_template.html 26e4480c08e1f5f7b6ac8b8c1832ab0312e3b7c5
Pass html5lib_template.html 24b3b50fdd0bf8d5cf2ebaa6bf502d7bcfde1da4
Fail html5lib_template.html d3704c68528357189eb5826ab66eea071d6137a5
Pass html5lib_template.html d3704c68528357189eb5826ab66eea071d6137a5
Pass html5lib_template.html d958f7d44faf772d1fb60f1a8f186f837ca735d9
Pass html5lib_template.html 3fc4d97fa68fc2658356bdbd4e051c867de8de53
Fail html5lib_template.html 94820107bbf3fab3f82de1f717e8413aead7d3a6
Pass html5lib_template.html 94820107bbf3fab3f82de1f717e8413aead7d3a6
Pass html5lib_template.html ed920bca1fe1f5ad471bbd81adf8a41f3e2d9b06
Pass html5lib_template.html 657c00ebdda37ae060cc69633ed98482ccc29e18
Pass html5lib_template.html 649fc955a4b60ab2a5b881d94c9493eb4a545002
Fail html5lib_template.html 977041956eb9c7b9db73935168aba92f77c079f6
Fail html5lib_template.html fafee395fea124791df59bafeb1136342b64d3c6
Fail html5lib_template.html d5a8beecf5d3c53e947772ad887808d132334aa1
Pass html5lib_template.html 977041956eb9c7b9db73935168aba92f77c079f6
Pass html5lib_template.html fafee395fea124791df59bafeb1136342b64d3c6
Pass html5lib_template.html d5a8beecf5d3c53e947772ad887808d132334aa1
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ Harness status: OK

Found 35 tests

34 Pass
1 Fail
35 Pass
Pass html5lib_tests18.html 7471f6a45872ac6d70f69fc3f4e10b13c7c1ac45
Pass html5lib_tests18.html 0d0085749435e0d0ddb56c9db809bfcbbc995767
Pass html5lib_tests18.html 9052b915187ac505be8958ab5e9f8d4ca0bfde81
Expand All @@ -18,7 +17,7 @@ Pass html5lib_tests18.html 9ac591f40aae947707f7d5e83947712bbeca9574
Pass html5lib_tests18.html d44cf9a5fcf0759fce78497c7f10e3019c361274
Pass html5lib_tests18.html e4eb33f77ae641718853d2cfddbdb2eece6b266b
Pass html5lib_tests18.html 53ce5b102579af9830bf561b634af681bbdb5dfd
Fail html5lib_tests18.html cd24d93d1235e4aabbdcfab1d3acdbe488325666
Pass html5lib_tests18.html cd24d93d1235e4aabbdcfab1d3acdbe488325666
Pass html5lib_tests18.html abae66ad61e145e32fb4fc4946b839f56b16bb3d
Pass html5lib_tests18.html 9d38e0731d08aec061003c7783c70e682221378b
Pass html5lib_tests18.html 9df08923a41bf58c6291f9ce6d9e36a29d336bd6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ Harness status: OK

Found 1 tests

1 Fail
Fail document.write \r\n
1 Pass
Pass document.write \r\n

0 comments on commit f93bb15

Please sign in to comment.