Skip to content

Commit

Permalink
Add a patch to libxml2 to close HTML comments on --!>
Browse files Browse the repository at this point in the history
  • Loading branch information
flavorjones committed Aug 4, 2020
1 parent dbb539c commit 48e4a33
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
From 15b28d8a99224e16ae8a7e7774ee3b6b6769dca3 Mon Sep 17 00:00:00 2001
From: Mike Dalessio <[email protected]>
Date: Mon, 3 Aug 2020 17:36:05 -0400
Subject: [PATCH] htmlParseComment: treat `--!>` as if it closed the comment

See guidance provided on incorrectly-closed comments here:

https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment

This is a minimal patch to try to avoid merge conflicts.
---
HTMLparser.c | 13 +++++++++++++
1 file changed, 13 insertions(+)

diff --git a/HTMLparser.c b/HTMLparser.c
index 7b6d689..0046642 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -3301,6 +3301,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
int r, rl;
int cur, l;
xmlParserInputState state;
+ int possiblyIncorrectlyClosed = 0;

/*
* Check that there is a comment right here.
@@ -3346,6 +3347,16 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
buf = tmp;
}
COPY_BUF(ql,buf,len,q);
+
+ if (possiblyIncorrectlyClosed && cur == '>') {
+ htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
+ "Comment incorrectly closed by '--!>'", NULL, NULL);
+ buf[len-2] = 0;
+ goto recover ;
+ }
+
+ possiblyIncorrectlyClosed = (q == '-' && r == '-' && cur == '!') ? 1 : 0 ;
+
q = r;
ql = rl;
r = cur;
@@ -3359,6 +3370,8 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
}
}
buf[len] = 0;
+
+recover:
if (IS_CHAR(cur)) {
NEXT;
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
--
2.25.1

29 changes: 23 additions & 6 deletions test/html/test_comments.rb
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,29 @@ class TestComment < Nokogiri::TestCase
let(:subject) { doc.at_css("div#under-test") }
let(:inner_div) { doc.at_css("div#do-i-exist") }

it "behaves as if the comment encompasses the inner div" do # NON-COMPLIANT
assert_equal 1, subject.children.length
assert subject.children.first.comment?
assert !inner_div
assert_match(/id=do-i-exist/, subject.children.first.content)
assert_equal 0, doc.errors.length
if Nokogiri.uses_libxml? && Nokogiri::VersionInfo.instance.libxml2_using_packaged?
# see patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch
it "behaves as if the comment is normally closed" do # COMPLIANT
assert_equal 3, subject.children.length
assert subject.children[0].comment?
assert_equal "foo", subject.children[0].content
assert inner_div
assert_equal inner_div, subject.children[1]
assert subject.children[2].comment?
assert_equal "bar", subject.children[2].content
assert_equal 1, doc.errors.length
assert_match(/Comment incorrectly closed/, doc.errors.first.to_s)
end
end

if Nokogiri.jruby? || Nokogiri::VersionInfo.instance.libxml2_using_system?
it "behaves as if the comment encompasses the inner div" do # NON-COMPLIANT
assert_equal 1, subject.children.length
assert subject.children.first.comment?
assert !inner_div
assert_match(/id=do-i-exist/, subject.children.first.content)
assert_equal 0, doc.errors.length
end
end
end

Expand Down

0 comments on commit 48e4a33

Please sign in to comment.