Skip to content

Commit

Permalink
fix(jruby): serializing HTML with no save options emits HTML
Browse files Browse the repository at this point in the history
Previously this emitted XML. this brings the implementation into
agreement with the CRuby implementation.
  • Loading branch information
flavorjones committed Feb 28, 2023
1 parent 8934b5c commit 06d64c6
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 2 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA

### Fixed

* [JRuby] Serializing an HTML4 document with `#write_to` and specifying no save options will properly emit an HTML document anyway, like libxml2 does. Previously JRuby emitted XML in this situation.


### Improved

* `Nokogiri::XML::Node::SaveOptions#inspect` now shows the names of the options set in the bitmask, similar to `ParseOptions`. [[#2767](https://github.com/sparklemotion/nokogiri/issues/2767)]
Expand Down
13 changes: 11 additions & 2 deletions ext/java/nokogiri/XmlNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -1342,12 +1342,21 @@ public class XmlNode extends RubyObject
IRubyObject io = args[0];
IRubyObject encoding = args[1];
IRubyObject indentString = args[2];
IRubyObject options = args[3];
IRubyObject options_rb = args[3];
int options = RubyFixnum.fix2int(options_rb);

String encString = rubyStringToString(encoding);

// similar to behavior of libxml2's xmlSaveTree function
if ((options & SaveContextVisitor.AS_XML) == 0 &&
(options & SaveContextVisitor.AS_XHTML) == 0 &&
(options & SaveContextVisitor.AS_HTML) == 0 &&
isHtmlDoc(context)) {
options |= SaveContextVisitor.DEFAULT_HTML;
}

SaveContextVisitor visitor =
new SaveContextVisitor(RubyFixnum.fix2int(options), rubyStringToString(indentString), encString, isHtmlDoc(context),
new SaveContextVisitor(options, rubyStringToString(indentString), encString, isHtmlDoc(context),
isFragment(), 0);
accept(context, visitor);

Expand Down
1 change: 1 addition & 0 deletions ext/java/nokogiri/internals/SaveContextVisitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ public class SaveContextVisitor
public static final int AS_XML = 32;
public static final int AS_HTML = 64;
public static final int AS_BUILDER = 128;
public static final int DEFAULT_HTML = NO_DECL | NO_EMPTY | AS_HTML;

public static final int CANONICAL = 1;
public static final int INCL_NS = 2;
Expand Down
25 changes: 25 additions & 0 deletions test/html4/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,31 @@ def test_leaking_dtd_nodes_after_internal_subset_removal
assert_equal(expected, doc.at_css("body").children.map(&:type))
end

it "emits HTML even when no save options are specified" do
doc = Nokogiri::HTML4::Document.parse("<html><body><div>hello</div></body></html>")
expected = doc.to_html

assert_equal(
expected,
doc.write_to(StringIO.new, save_with: Nokogiri::XML::Node::SaveOptions::DEFAULT_HTML).tap(&:rewind).read,
)
assert_equal(
expected,
doc.write_to(StringIO.new).tap(&:rewind).read,
)

# but not when the AS_XML or AS_XHTML flag is set
as_xml = doc.write_to(StringIO.new, save_with: Nokogiri::XML::Node::SaveOptions::AS_XML).tap(&:rewind).read
pp as_xml
refute_equal(expected, as_xml)
assert(as_xml.start_with?("<?xml"))

as_xhtml = doc.write_to(StringIO.new, save_with: Nokogiri::XML::Node::SaveOptions::AS_XHTML).tap(&:rewind).read
pp as_xhtml
refute_equal(expected, as_xhtml)
assert(as_xhtml.start_with?("<?xml"))
end

describe ".parse" do
let(:html_strict) do
Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_HTML).norecover
Expand Down

0 comments on commit 06d64c6

Please sign in to comment.