Skip to content

Commit

Permalink
LibWeb: Remember when HTML parser should ignore next line feed character
Browse files Browse the repository at this point in the history
There's a quirk in HTML where the parser should ignore any line feed
character immediately following a `pre` or `textarea` start tag.

This was working fine when we could peek ahead in the input stream and
see the next token, but didn't work in character-at-a-time parsing with
document.write().

This commit adds the "can ignore next line feed character" as a parser
flag that is maintained across invocations, making it work in this
parsing mode as well.

20 new passes in WPT/html/syntax/parsing/ :^)
  • Loading branch information
awesomekling committed Feb 19, 2025
1 parent f93bb15 commit c632ee6
Show file tree
Hide file tree
Showing 11 changed files with 51 additions and 61 deletions.
39 changes: 15 additions & 24 deletions Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,13 @@ void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point

dbgln_if(HTML_PARSER_DEBUG, "[{}] {}", insertion_mode_name(), token.to_string());

if (m_next_line_feed_can_be_ignored) {
m_next_line_feed_can_be_ignored = false;
if (token.is_character() && token.code_point() == '\n') {
continue;
}
}

// https://html.spec.whatwg.org/multipage/parsing.html#tree-construction-dispatcher
// As each token is emitted from the tokenizer, the user agent must follow the appropriate steps from the following list, known as the tree construction dispatcher:
if (m_stack_of_open_elements.is_empty()
Expand Down Expand Up @@ -2017,20 +2024,13 @@ void HTMLParser::handle_in_body(HTMLToken& token)
// Insert an HTML element for the token.
(void)insert_html_element(token);

// AD-HOC: We move this step before handling LINE FEED below, to ensure the flag is updated before
// we process the next token. This is necessary due to how we implement token reprocessing.
// Set the frameset-ok flag to "not ok".
m_frameset_ok = false;

// If the next token is a U+000A LINE FEED (LF) character token,
// then ignore that token and move on to the next one.
// (Newlines at the start of pre blocks are ignored as an authoring convenience.)
auto next_token = m_tokenizer.next_token();
if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_point() == '\n') {
// Ignore it.
} else if (next_token.has_value()) {
process_using_the_rules_for(m_insertion_mode, next_token.value());
}
m_next_line_feed_can_be_ignored = true;

// Set the frameset-ok flag to "not ok".
m_frameset_ok = false;

return;
}
Expand Down Expand Up @@ -2567,16 +2567,14 @@ void HTMLParser::handle_in_body(HTMLToken& token)
// 1. Insert an HTML element for the token.
(void)insert_html_element(token);

// FIXME: 2. If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.)
// 2. If the next token is a U+000A LINE FEED (LF) character token,
// then ignore that token and move on to the next one.
// (Newlines at the start of textarea elements are ignored as an authoring convenience.)
m_next_line_feed_can_be_ignored = true;

// 3. Switch the tokenizer to the RCDATA state.
m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);

// If the next token is a U+000A LINE FEED (LF) character token,
// then ignore that token and move on to the next one.
// (Newlines at the start of pre blocks are ignored as an authoring convenience.)
auto next_token = m_tokenizer.next_token();

// 4. Let the original insertion mode be the current insertion mode.
m_original_insertion_mode = m_insertion_mode;

Expand All @@ -2585,13 +2583,6 @@ void HTMLParser::handle_in_body(HTMLToken& token)

// 6. Switch the insertion mode to "text".
m_insertion_mode = InsertionMode::Text;

// FIXME: This step is not in the spec.
if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_point() == '\n') {
// Ignore it.
} else if (next_token.has_value()) {
process_using_the_rules_for(m_insertion_mode, next_token.value());
}
return;
}

Expand Down
2 changes: 2 additions & 0 deletions Libraries/LibWeb/HTML/Parser/HTMLParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ class HTMLParser final : public JS::Cell {

HTMLTokenizer m_tokenizer;

bool m_next_line_feed_can_be_ignored { false };

bool m_foster_parenting { false };
bool m_frameset_ok { true };
bool m_parsing_fragment { false };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Harness status: OK

Found 24 tests

17 Pass
7 Fail
19 Pass
5 Fail
Pass html5lib_html5test-com.html 71bd5e6b9e907e65295b6d670627e0da4a8a65ed
Pass html5lib_html5test-com.html 32cd504d36a6db3584b716b3681ab4b0741423b3
Pass html5lib_html5test-com.html f0bf0506a2d3e5ca4aa5f14a1f260e405882827e
Expand All @@ -18,8 +18,8 @@ Fail html5lib_html5test-com.html bcbeb84f40e56a642b794d514e97e3ec303d4a79
Pass html5lib_html5test-com.html 1cbb987dd0a35af3a5b2e4fc11eba36a60eba03d
Pass html5lib_html5test-com.html 5b5e75eca2f5c80e1c4d5676254b9891090e288e
Pass html5lib_html5test-com.html 93e966e2edad3297ecb159f3983bdd2dc84f829e
Fail html5lib_html5test-com.html 7a02a2d7ab875dbeedc9a34c6c27b6119bd6d1f0
Fail html5lib_html5test-com.html 46615acdb9dd6231e2a65fed5bcce7e19f086d03
Pass html5lib_html5test-com.html 7a02a2d7ab875dbeedc9a34c6c27b6119bd6d1f0
Pass html5lib_html5test-com.html 46615acdb9dd6231e2a65fed5bcce7e19f086d03
Pass html5lib_html5test-com.html 381de12234a699cbfb775b3ca7c679f357e7403e
Pass html5lib_html5test-com.html 72736fc894b2077928559cc4284a102635cac898
Pass html5lib_html5test-com.html 290d5e7fa9684038411e78c4b0e0ade83150eeea
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Harness status: OK

Found 33 tests

30 Pass
3 Fail
32 Pass
1 Fail
Pass html5lib_plain-text-unsafe.html 7e4ca4cb5e73852744a876bf8652dd2c8998d94a
Pass html5lib_plain-text-unsafe.html e552342bdd3ac62316bd91126556512683f3d4a9
Pass html5lib_plain-text-unsafe.html 9112cef60139e6988b66334e522777329051442a
Expand All @@ -28,8 +28,8 @@ Pass html5lib_plain-text-unsafe.html a1e08cb99d89381a1c997fcd60bad23c029c4500
Pass html5lib_plain-text-unsafe.html 26d850208425cc885d4d0143909cf341f61fa1f1
Pass html5lib_plain-text-unsafe.html 68f0365c01dc386c706edd2b18672f9d85caaa2e
Fail html5lib_plain-text-unsafe.html e415a2e7cf090e2c308af905d52c5f8163ae52ce
Fail html5lib_plain-text-unsafe.html 822702de65b80ec8e79da19335ab9d6a49f6ec6a
Fail html5lib_plain-text-unsafe.html ee8b017ab043ff51b593787961626acb4c6488cd
Pass html5lib_plain-text-unsafe.html 822702de65b80ec8e79da19335ab9d6a49f6ec6a
Pass html5lib_plain-text-unsafe.html ee8b017ab043ff51b593787961626acb4c6488cd
Pass html5lib_plain-text-unsafe.html 304960c795639128844445166238350682ba0516
Pass html5lib_plain-text-unsafe.html 275bb0b518ec00b1e64a28cb9088989371fca9d9
Pass html5lib_plain-text-unsafe.html 068ac565b7c7bdad572f26dafb4580483cdbc6f7
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ Harness status: OK

Found 112 tests

111 Pass
1 Fail
112 Pass
Pass html5lib_tests1.html 4235382bf15f93f7dd1096832ae74cc71edef4d7
Pass html5lib_tests1.html ad8515e9db0abd26469d0d2e46b42cebf606d4f3
Pass html5lib_tests1.html 2433aa5c088d78da9e7824e499f639177f56625d
Expand Down Expand Up @@ -92,7 +91,7 @@ Pass html5lib_tests1.html 06ed0f32cfd261010c9d810ff8317ef96b47c04c
Pass html5lib_tests1.html 44ea84c7e4e401c9d3f96d7cc39709e4be81edc8
Pass html5lib_tests1.html 67af290f1b04c4b1a67131edba1ee832c690432c
Pass html5lib_tests1.html 2f1899f72fafcb062418e8ce892188040de4708c
Fail html5lib_tests1.html ed2a4958c832ef6cec993cb52afc808132714d0a
Pass html5lib_tests1.html ed2a4958c832ef6cec993cb52afc808132714d0a
Pass html5lib_tests1.html c7943ccd9d880664b0894a2035e1f2a837f37c7a
Pass html5lib_tests1.html bbc836b1f494223d4eb8982930d693489d135740
Pass html5lib_tests1.html 617fdf08035740698b2f0f4c3874dbb469fd1848
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Harness status: OK

Found 191 tests

183 Pass
8 Fail
185 Pass
6 Fail
Pass html5lib_tests16.html 6d8b9d29f1890d59ef2453cff3f6d57b7e398c5c
Pass html5lib_tests16.html 5d4ac4961f9d52a42f309886d16fbe9c55c198bb
Pass html5lib_tests16.html 132c6e3cd2659e15b69904c67981a04e81fabe78
Expand Down Expand Up @@ -92,7 +92,7 @@ Pass html5lib_tests16.html ae3967a139a3ecf61ecbc59c8c769a2731626fac
Pass html5lib_tests16.html 3586a5a4a1d1d69b139d139b0823af4753bc3e8d
Pass html5lib_tests16.html 0e99e2603bc91553c252713108e30495d71c3f37
Pass html5lib_tests16.html f9858d096fa1e68cce0742d125c551878d2d7020
Fail html5lib_tests16.html 225e87bce5a4518c3e5cd248ef93ebc39dba14e0
Pass html5lib_tests16.html 225e87bce5a4518c3e5cd248ef93ebc39dba14e0
Fail html5lib_tests16.html bb08b00b361470ce18b435c97aff4449dc98cc51
Fail html5lib_tests16.html cd74b727c1c8233f98e325293a2307e882e10f41
Fail html5lib_tests16.html 367bf426c092467300f78e5d7526b5a95b490871
Expand Down Expand Up @@ -186,7 +186,7 @@ Pass html5lib_tests16.html 5423bb28649f37e70a0559cba78c3b253a60c277
Pass html5lib_tests16.html 2c091a50dfd31e766a5a629c0b7c21973e33319d
Pass html5lib_tests16.html 920feb4f9d1032dcec2abc5c526e4996f642968b
Pass html5lib_tests16.html 31b9b446263cd5b7a844d43f2a235ed9b0c53efb
Fail html5lib_tests16.html dbb5127246fee18718bcffc6cf0730674d12b98a
Pass html5lib_tests16.html dbb5127246fee18718bcffc6cf0730674d12b98a
Fail html5lib_tests16.html 3872a4cfeba7651f0671de7e5f3922fd5053837b
Pass html5lib_tests16.html 2fbcb2db61b6416cdf46e0526e1929d146ab3da7
Pass html5lib_tests16.html 999da234e770bbf681a819423d04ea57415d9bbc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ Harness status: OK

Found 63 tests

61 Pass
2 Fail
62 Pass
1 Fail
Pass html5lib_tests2.html e070301fb578bd639ecbc7ec720fa60222d05826
Fail html5lib_tests2.html aaf24dabcb42470e447d241a40def0d136c12b93
Pass html5lib_tests2.html aaf24dabcb42470e447d241a40def0d136c12b93
Pass html5lib_tests2.html b6c1142484570bb90c36e454ee193cca17bb618a
Pass html5lib_tests2.html 1977644a94de1a04245dfef3f3db69c7ac41aa6f
Pass html5lib_tests2.html fb0d7207ed9fbc3fe5d1e0f85ad18c247dfb05a3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,26 @@ Harness status: OK

Found 24 tests

15 Pass
9 Fail
24 Pass
Pass html5lib_tests3.html 9af28bba864ad2e398d95249fdcd40491e91b23f
Pass html5lib_tests3.html be8bf339f25c34d94456b39ceeed74a25167df40
Pass html5lib_tests3.html b77d2b4c52c8d57dae80409a39f5e21cb8e5b3bc
Pass html5lib_tests3.html 7902929c3aa85bf8ffc8d7fa228921acec21808e
Fail html5lib_tests3.html 16dda22403dee14d6d8627d9139b8c5296f24b61
Fail html5lib_tests3.html 7022e121d090113a9b6a1f29e8c620b5b6c9b67c
Fail html5lib_tests3.html 8b5088252a41409e5f5989408f09af986573b007
Fail html5lib_tests3.html 692cef68475bc2c58dc3a1d6fc804ab69df37117
Pass html5lib_tests3.html 16dda22403dee14d6d8627d9139b8c5296f24b61
Pass html5lib_tests3.html 7022e121d090113a9b6a1f29e8c620b5b6c9b67c
Pass html5lib_tests3.html 8b5088252a41409e5f5989408f09af986573b007
Pass html5lib_tests3.html 692cef68475bc2c58dc3a1d6fc804ab69df37117
Pass html5lib_tests3.html 88bbb8a76e9880c09e8ffcd626660106cf27abce
Pass html5lib_tests3.html b43510ea21c96a96255b45aef578af5cbc78475e
Pass html5lib_tests3.html 5227d81a48fc519767baaca384b9687dad7ba1bf
Fail html5lib_tests3.html 5bb12f29d0f7c9c30bc8ceb14578c60df73dca2c
Pass html5lib_tests3.html 5bb12f29d0f7c9c30bc8ceb14578c60df73dca2c
Pass html5lib_tests3.html 9ba44cced626432a79929642154346ab9d01403a
Pass html5lib_tests3.html f9031fcb39c793e24b116a1e041dd93ed638a0f4
Fail html5lib_tests3.html 45ec5c450b3039007112fcb053c2a82ce2e93f17
Pass html5lib_tests3.html 45ec5c450b3039007112fcb053c2a82ce2e93f17
Pass html5lib_tests3.html 6a66abfc230b8cfc93c57210ae370b1d5e744b5a
Fail html5lib_tests3.html ed9cc49cd8a577e1e6343808c328e242b53ee42d
Fail html5lib_tests3.html 32c5a1be682ae34b4195cd0481ee6c53c806abeb
Fail html5lib_tests3.html daf731117bb7cf43f750f187cbb3528f07c9a012
Pass html5lib_tests3.html ed9cc49cd8a577e1e6343808c328e242b53ee42d
Pass html5lib_tests3.html 32c5a1be682ae34b4195cd0481ee6c53c806abeb
Pass html5lib_tests3.html daf731117bb7cf43f750f187cbb3528f07c9a012
Pass html5lib_tests3.html 948e2378d0e6bd68dbc278a993c2774c22b30370
Pass html5lib_tests3.html f7fd80272bb4ab6e3bb871de5f7688912740c39f
Pass html5lib_tests3.html 9eddcf7971dc65d05f25aa4c412acf09a789e5b3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Harness status: OK

Found 16 tests

13 Pass
3 Fail
14 Pass
2 Fail
Pass html5lib_tests5.html c482a88c4feb445945f19c77eda5e460cd6db344
Pass html5lib_tests5.html b28eaef63aeeb165eceb56152d50767327f975fa
Pass html5lib_tests5.html 20c1b55aabcd426fa5975648f21cff40fa3fc2e3
Expand All @@ -12,7 +12,7 @@ Pass html5lib_tests5.html 283a0f4eb33a3ee80f718020268bf1794a758ec9
Pass html5lib_tests5.html bde8b7a035edd6f123f45708ac10b4f60e81edf4
Pass html5lib_tests5.html 6ace30add7690cda74de9830481c95bef1f5976d
Pass html5lib_tests5.html 3c5f82c8db30cc1cce4c7fa8a5d18cf13ce8d007
Fail html5lib_tests5.html 9cac6179dc295f43afd5a41ed98aef3a9d5a08de
Pass html5lib_tests5.html 9cac6179dc295f43afd5a41ed98aef3a9d5a08de
Pass html5lib_tests5.html 021a5fbf8c725781d08dce099d21f7023c9bb26d
Pass html5lib_tests5.html 412eae0c0e6e5da254550debd587ff86cff55c0c
Pass html5lib_tests5.html 410a64500216425d811748b0258c92a49fbad0ff
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ Harness status: OK

Found 33 tests

32 Pass
1 Fail
33 Pass
Pass html5lib_tests7.html 7cb496e242a4dc9aed321252b5ca6ebf4f02ebcd
Pass html5lib_tests7.html c0cffec1e999db2aefb2f6beb679fd9620566dbd
Pass html5lib_tests7.html 7c644a6da21bfd551385b0a5044b82cf7be0a22f
Expand All @@ -19,7 +18,7 @@ Pass html5lib_tests7.html 37b910b755c2df155a3129d5a1150f0c0fdd7934
Pass html5lib_tests7.html 868bff3a23219b836fdc702063d637f817ce65e1
Pass html5lib_tests7.html a33a56f5571b4bcb23138ffb60df3824f5c53773
Pass html5lib_tests7.html facf5e60205451cf740f64628b8608f0aee30f3a
Fail html5lib_tests7.html 8ba11b54fa74a1c229d079b2902d6e33e139f33b
Pass html5lib_tests7.html 8ba11b54fa74a1c229d079b2902d6e33e139f33b
Pass html5lib_tests7.html 84e2152c284f4dfee7d8d12846c08b2c025578a6
Pass html5lib_tests7.html 8e3432411baa59cbef731ab3ba2703cb5d518453
Pass html5lib_tests7.html e2f6144290512430ad25bbf9598eae77288c7b7a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ Harness status: OK

Found 1 tests

1 Fail
Fail document.write
1 Pass
Pass document.write

0 comments on commit c632ee6

Please sign in to comment.