Skip to content

Commit

Permalink
fix(dep): HTML parsing of processing instructions
Browse files Browse the repository at this point in the history
Added test coverage to describe behavior of our parsers, and update
nekohtml to 1.9.22.noko2.
  • Loading branch information
flavorjones committed Apr 10, 2022
1 parent db72b90 commit 0feac5a
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 8 deletions.
Binary file modified lib/nekohtml.jar
Binary file not shown.
29 changes: 21 additions & 8 deletions test/html4/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,17 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262
assert_equal(0, doc.errors.length)
end

def test_leaking_dtd_nodes_after_internal_subset_removal
# see https://github.com/sparklemotion/nokogiri/issues/1784
#
# just checking that this doesn't raise a valgrind error. we
# don't otherwise have any test coverage for removing DTDs.
#
100.times do |_i|
Nokogiri::HTML::Document.new.internal_subset.remove
end
end

it "skips encoding for script tags" do
html = Nokogiri::HTML(<<~EOHTML)
<html>
Expand Down Expand Up @@ -763,15 +774,17 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262
assert_equal "ISO-8859-1", html.encoding.name
end

def test_leaking_dtd_nodes_after_internal_subset_removal
# see https://github.com/sparklemotion/nokogiri/issues/1784
#
# just checking that this doesn't raise a valgrind error. we
# don't otherwise have any test coverage for removing DTDs.
#
100.times do |_i|
Nokogiri::HTML::Document.new.internal_subset.remove
it "handles ill-formed processing instructions" do
html = %{<html><body><!--><?a/}
doc = Nokogiri::HTML4::Document.parse(html)
expected = if Nokogiri.jruby?
[Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::PI_NODE]
elsif Nokogiri.libxml2_patches.include?("0008-htmlParseComment-handle-abruptly-closed-comments.patch")
[Nokogiri::XML::Node::COMMENT_NODE]
else
[]
end
assert_equal(expected, doc.at_css("body").children.map(&:type))
end

describe ".parse" do
Expand Down
14 changes: 14 additions & 0 deletions test/html5/test_nokogumbo.rb
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,20 @@ def test_line_cdata
assert_equal(3, node.line)
end

it "handles ill-formed processing instructions in a document" do
html = %{<html><body><!--><?a/}
doc = Nokogiri::HTML5::Document.parse(html)
expected = [Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::COMMENT_NODE]
assert_equal(expected, doc.at_css("body").children.map(&:type))
end

it "handles ill-formed processing instructions in a fragment" do
html = %{<div><!--><?a/}
frag = Nokogiri::HTML5::DocumentFragment.parse(html)
expected = [Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::COMMENT_NODE]
assert_equal(expected, frag.at_css("div").children.map(&:type))
end

private

def buffer
Expand Down

0 comments on commit 0feac5a

Please sign in to comment.