-
-
Notifications
You must be signed in to change notification settings - Fork 905
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2294 from sparklemotion/flavorjones-abruptly-clos…
…ed-html-comments feat(cruby): patch libxml2 to handle abruptly-closed HTML comments --- **What problem is this PR intended to solve?** Hackerone user [tehryanx](https://hackerone.com/tehryanx?type=user) reported a method of potentially confusing Loofah sanitizers by taking advantage of the difference between how abruptly-closed comments are handled by libxml2 and how they're handled by WHATWG-guidance-compliant modern browsers. WHATWG advises to treat `<!-->` and `<!--->` as empty comments, but libxml2 today treats these as the _start_ of a comment. https://html.spec.whatwg.org/multipage/parsing.html#parse-error-abrupt-closing-of-empty-comment I've submitted this patch upstream at https://gitlab.gnome.org/GNOME/libxml2/-/merge_requests/124 Similar prior art is for incorrectly-closed comments: - #2058 - https://gitlab.gnome.org/GNOME/libxml2/-/merge_requests/82 **Have you included adequate test coverage?** Yes! **Does this change affect the behavior of either the C or the Java implementations?** This is a behavior change to libxml2/CRuby. Note that nekoHTML/JRuby already follow WHATWG guidance and so JRuby behavior is unchanged.
- Loading branch information
Showing
3 changed files
with
91 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
61 changes: 61 additions & 0 deletions
61
patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
From 3ea8d08da310b645e37940eaae5cc28e251b155b Mon Sep 17 00:00:00 2001 | ||
From: Mike Dalessio <[email protected]> | ||
Date: Sat, 17 Jul 2021 14:36:53 -0400 | ||
Subject: [PATCH] htmlParseComment: handle abruptly-closed comments | ||
|
||
See guidance provided on abrutply-closed comments here: | ||
|
||
https://html.spec.whatwg.org/multipage/parsing.html#parse-error-abrupt-closing-of-empty-comment | ||
--- | ||
HTMLparser.c | 11 +++++++++++ | ||
include/libxml/xmlerror.h | 1 + | ||
2 files changed, 12 insertions(+) | ||
|
||
diff --git a/HTMLparser.c b/HTMLparser.c | ||
index b56363a..f0bf294 100644 | ||
--- a/HTMLparser.c | ||
+++ b/HTMLparser.c | ||
@@ -3485,10 +3485,20 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { | ||
q = CUR_CHAR(ql); | ||
if (q == 0) | ||
goto unfinished; | ||
+ if (q == '>') { | ||
+ htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL); | ||
+ cur = '>'; | ||
+ goto finished; | ||
+ } | ||
NEXTL(ql); | ||
r = CUR_CHAR(rl); | ||
if (r == 0) | ||
goto unfinished; | ||
+ if (q == '-' && r == '>') { | ||
+ htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL); | ||
+ cur = '>'; | ||
+ goto finished; | ||
+ } | ||
NEXTL(rl); | ||
cur = CUR_CHAR(l); | ||
while ((cur != 0) && | ||
@@ -3536,6 +3546,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { | ||
cur = next; | ||
l = nl; | ||
} | ||
+finished: | ||
buf[len] = 0; | ||
if (cur == '>') { | ||
NEXT; | ||
diff --git a/include/libxml/xmlerror.h b/include/libxml/xmlerror.h | ||
index c101997..7b68e40 100644 | ||
--- a/include/libxml/xmlerror.h | ||
+++ b/include/libxml/xmlerror.h | ||
@@ -209,6 +209,7 @@ typedef enum { | ||
XML_ERR_VERSION_MISMATCH, /* 109 */ | ||
XML_ERR_NAME_TOO_LONG, /* 110 */ | ||
XML_ERR_USER_STOP, /* 111 */ | ||
+ XML_ERR_COMMENT_ABRUPTLY_ENDED, /* 112 */ | ||
XML_NS_ERR_XML_NAMESPACE = 200, | ||
XML_NS_ERR_UNDEFINED_NAMESPACE, /* 201 */ | ||
XML_NS_ERR_QNAME, /* 202 */ | ||
-- | ||
2.31.0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters