From 8934b5c62852975fd667c070ce8d0f2c2c5512e3 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 28 Feb 2023 15:24:53 -0500 Subject: [PATCH 1/2] feat: SaveOptions#inspect shows the names of the set options Closes #2767 --- CHANGELOG.md | 3 +++ lib/nokogiri/xml/node/save_options.rb | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92dbae3942c..5ef0165c628 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,9 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Improved +* `Nokogiri::XML::Node::SaveOptions#inspect` now shows the names of the options set in the bitmask, similar to `ParseOptions`. [[#2767](https://github.com/sparklemotion/nokogiri/issues/2767)] + + ### Deprecated ### Performance diff --git a/lib/nokogiri/xml/node/save_options.rb b/lib/nokogiri/xml/node/save_options.rb index 2d4a0c4c29e..9e2652c6776 100644 --- a/lib/nokogiri/xml/node/save_options.rb +++ b/lib/nokogiri/xml/node/save_options.rb @@ -62,6 +62,14 @@ def #{constant.downcase}? end alias_method :to_i, :options + + def inspect + options = [] + self.class.constants.each do |k| + options << k.downcase if send(:"#{k.downcase}?") + end + super.sub(/>$/, " " + options.join(", ") + ">") + end end end end From 5242eff91a8c2ed15ac127b665d02f06d6ed2fd7 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 28 Feb 2023 15:55:32 -0500 Subject: [PATCH 2/2] fix(jruby): serializing HTML with no save options emits HTML Previously this emitted XML. this brings the implementation into agreement with the CRuby implementation. --- CHANGELOG.md | 3 +++ ext/java/nokogiri/XmlNode.java | 13 +++++++++-- .../internals/SaveContextVisitor.java | 1 + test/html4/test_document.rb | 23 +++++++++++++++++++ 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ef0165c628..2d2257b741e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,9 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Fixed +* [JRuby] Serializing an HTML4 document with `#write_to` and specifying no save options will properly emit an HTML document anyway, like libxml2 does. Previously JRuby emitted XML in this situation. + + ### Improved * `Nokogiri::XML::Node::SaveOptions#inspect` now shows the names of the options set in the bitmask, similar to `ParseOptions`. [[#2767](https://github.com/sparklemotion/nokogiri/issues/2767)] diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java index 69e67729597..c729f0c441a 100644 --- a/ext/java/nokogiri/XmlNode.java +++ b/ext/java/nokogiri/XmlNode.java @@ -1342,12 +1342,21 @@ public class XmlNode extends RubyObject IRubyObject io = args[0]; IRubyObject encoding = args[1]; IRubyObject indentString = args[2]; - IRubyObject options = args[3]; + IRubyObject options_rb = args[3]; + int options = RubyFixnum.fix2int(options_rb); String encString = rubyStringToString(encoding); + // similar to behavior of libxml2's xmlSaveTree function + if ((options & SaveContextVisitor.AS_XML) == 0 && + (options & SaveContextVisitor.AS_XHTML) == 0 && + (options & SaveContextVisitor.AS_HTML) == 0 && + isHtmlDoc(context)) { + options |= SaveContextVisitor.DEFAULT_HTML; + } + SaveContextVisitor visitor = - new SaveContextVisitor(RubyFixnum.fix2int(options), rubyStringToString(indentString), encString, isHtmlDoc(context), + new SaveContextVisitor(options, rubyStringToString(indentString), encString, isHtmlDoc(context), isFragment(), 0); accept(context, visitor); diff --git a/ext/java/nokogiri/internals/SaveContextVisitor.java b/ext/java/nokogiri/internals/SaveContextVisitor.java index 40708dbbbae..a1ccc348d9b 100644 --- a/ext/java/nokogiri/internals/SaveContextVisitor.java +++ b/ext/java/nokogiri/internals/SaveContextVisitor.java @@ -74,6 +74,7 @@ public class SaveContextVisitor public static final int AS_XML = 32; public static final int AS_HTML = 64; public static final int AS_BUILDER = 128; + public static final int DEFAULT_HTML = NO_DECL | NO_EMPTY | AS_HTML; public static final int CANONICAL = 1; public static final int INCL_NS = 2; diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb index df455983434..357c4bcc7b4 100644 --- a/test/html4/test_document.rb +++ b/test/html4/test_document.rb @@ -755,6 +755,29 @@ def test_leaking_dtd_nodes_after_internal_subset_removal assert_equal(expected, doc.at_css("body").children.map(&:type)) end + it "emits HTML even when no save options are specified" do + doc = Nokogiri::HTML4::Document.parse("
hello
") + expected = doc.to_html + + assert_equal( + expected, + doc.write_to(StringIO.new, save_with: Nokogiri::XML::Node::SaveOptions::DEFAULT_HTML).tap(&:rewind).read, + ) + assert_equal( + expected, + doc.write_to(StringIO.new).tap(&:rewind).read, + ) + + # but not when the AS_XML or AS_XHTML flag is set + as_xml = doc.write_to(StringIO.new, save_with: Nokogiri::XML::Node::SaveOptions::AS_XML).tap(&:rewind).read + refute_equal(expected, as_xml) + assert(as_xml.start_with?("