From 28c8827150163a899aa61d2be85f4f6f258a8281 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 14 Nov 2022 15:21:45 -0500 Subject: [PATCH] feat: {Node,NodeSet}#wrap accept a Node argument Duplicating an instantiated Node is significantly faster than re-parsing a string for multiple invocations. Closes #2657 --- CHANGELOG.md | 1 + lib/nokogiri/xml/node.rb | 74 +++++++++++++++++++++++++++++++----- lib/nokogiri/xml/node_set.rb | 67 ++++++++++++++++++++++++++++++-- test/html5/test_api.rb | 9 +++++ test/xml/test_node.rb | 66 ++++++++++++++++++++++++++++---- test/xml/test_node_set.rb | 40 ++++++++++++++++--- 6 files changed, 231 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52857711d57..dc9e11ded1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ This version of Nokogiri uses [`jar-dependencies`](https://github.com/mkristian/ ### Added +* `Node#wrap` and `NodeSet#wrap` now also accept a `Node` type argument, which will be `dup`ed for each wrapper. For cases where many nodes are being wrapped, creating a `Node` once using `Document#create_element` and passing that `Node` multiple times is significantly faster than re-parsing markup on each call. [[#2657](https://github.com/sparklemotion/nokogiri/issues/2657)] * [CRuby] Invocation of custom XPath or CSS handler functions may now use the `nokogiri` namespace prefix. Historically, the JRuby implementation _required_ this namespace but the CRuby implementation did not support it. It's recommended that all XPath and CSS queries use the `nokogiri` namespace going forward. Invocation without the namespace is planned for deprecation in v1.15.0 and removal in a future release. [[#2147](https://github.com/sparklemotion/nokogiri/issues/2147)] diff --git a/lib/nokogiri/xml/node.rb b/lib/nokogiri/xml/node.rb index 2ff0d246ba3..ca13af98f60 100644 --- a/lib/nokogiri/xml/node.rb +++ b/lib/nokogiri/xml/node.rb @@ -176,13 +176,69 @@ def prepend_child(node_or_tags) end end - ### - # Add html around this node + # :call-seq: + # wrap(markup) -> self + # wrap(node) -> self + # + # Wrap this Node with the node parsed from +markup+ or a dup of the +node+. + # + # [Parameters] + # - *markup* (String) + # Markup that is parsed and used as the wrapper. This node's parent, if it exists, is used + # as the context node for parsing; otherwise the associated document is used. If the parsed + # fragment has multiple roots, the first root node is used as the wrapper. + # - *node* (Nokogiri::XML::Node) + # An element that is `#dup`ed and used as the wrapper. + # + # [Returns] +self+, to support chaining. + # + # Also see NodeSet#wrap + # + # *Example* with a +String+ argument: + # + # doc = Nokogiri::HTML5(<<~HTML) + # + # asdf + # + # HTML + # doc.at_css("a").wrap("
") + # doc.to_html + # # => + # #
asdf
+ # # + # + # *Example* with a +Node+ argument: + # + # doc = Nokogiri::HTML5(<<~HTML) + # + # asdf + # + # HTML + # doc.at_css("a").wrap(doc.create_element("wrap")) + # doc.to_html + # # + # # asdf + # # # - # Returns self - def wrap(html) - new_parent = document.parse(html).first - add_next_sibling(new_parent) + def wrap(node_or_tags) + case node_or_tags + when String + new_parent = if parent + parent.coerce(node_or_tags).first + else + coerce(node_or_tags).first + end + when XML::Node + new_parent = node_or_tags.dup + else + raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node_or_tags.class}" + end + + if parent + add_next_sibling(new_parent) + else + new_parent.unlink + end new_parent.add_child(self) self end @@ -193,7 +249,7 @@ def wrap(html) # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String # containing markup. # - # Returns self, to support chaining of calls (e.g., root << child1 << child2) + # Returns +self+, to support chaining of calls (e.g., root << child1 << child2) # # Also see related method +add_child+. def <<(node_or_tags) @@ -241,7 +297,7 @@ def add_next_sibling(node_or_tags) # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String # containing markup. # - # Returns self, to support chaining of calls. + # Returns +self+, to support chaining of calls. # # Also see related method +add_previous_sibling+. def before(node_or_tags) @@ -255,7 +311,7 @@ def before(node_or_tags) # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String # containing markup. # - # Returns self, to support chaining of calls. + # Returns +self+, to support chaining of calls. # # Also see related method +add_next_sibling+. def after(node_or_tags) diff --git a/lib/nokogiri/xml/node_set.rb b/lib/nokogiri/xml/node_set.rb index 1a202dfd2eb..353faf27694 100644 --- a/lib/nokogiri/xml/node_set.rb +++ b/lib/nokogiri/xml/node_set.rb @@ -1,3 +1,4 @@ +# coding: utf-8 # frozen_string_literal: true module Nokogiri @@ -260,10 +261,68 @@ def inner_html(*args) collect { |j| j.inner_html(*args) }.join("") end - ### - # Wrap this NodeSet with +html+ - def wrap(html) - map { |node| node.wrap(html) } + # :call-seq: + # wrap(markup) -> self + # wrap(node) -> self + # + # Wrap each member of this NodeSet with the node parsed from +markup+ or a dup of the +node+. + # + # [Parameters] + # - *markup* (String) + # Markup that is parsed and used as the wrapper. Each node's parent, if it exists, is used + # as the context node for parsing; otherwise the associated document is used. If the parsed + # fragment has multiple roots, the first root node is used as the wrapper. + # - *node* (Nokogiri::XML::Node) + # An element that is `#dup`ed and used as the wrapper. + # + # [Returns] +self+, to support chaining. + # + # Also see Node#wrap + # + # *Example* with a +String+ argument: + # + # doc = Nokogiri::HTML5(<<~HTML) + # + # a + # b + # c + # d + # + # HTML + # doc.css("a").wrap("
") + # doc.to_html + # # => + # #
a
+ # #
b
+ # #
c
+ # #
d
+ # # + # + # *Example* with a +Node+ argument + # + # 💡 Note that this is faster than the equivalent call passing a +String+ because it avoids + # having to reparse the wrapper markup for each node. + # + # doc = Nokogiri::HTML5(<<~HTML) + # + # a + # b + # c + # d + # + # HTML + # doc.css("a").wrap(doc.create_element("div")) + # doc.to_html + # # => + # #
a
+ # #
b
+ # #
c
+ # #
d
+ # # + # + def wrap(node_or_tags) + map { |node| node.wrap(node_or_tags) } + self end ### diff --git a/test/html5/test_api.rb b/test/html5/test_api.rb index eaf72896c48..e9f3db0ed0d 100644 --- a/test/html5/test_api.rb +++ b/test/html5/test_api.rb @@ -213,6 +213,15 @@ def test_html_eh refute_predicate(doc, :xml?) end + def test_node_wrap + doc = Nokogiri.HTML5("
") + div = doc.at_css("div") + div.wrap("
") + + assert_equal("section", div.parent.name) + assert_equal("body", div.parent.parent.name) + end + describe Nokogiri::HTML5::Document do describe "#fragment" do it "parses text nodes in a `body` context" do diff --git a/test/xml/test_node.rb b/test/xml/test_node.rb index 0c855ba67d4..5c763052421 100644 --- a/test/xml/test_node.rb +++ b/test/xml/test_node.rb @@ -1297,13 +1297,65 @@ def test_text_node_robustness_gh1426 end end - def test_wrap - xml = '
important thing
' - doc = Nokogiri::XML(xml) - thing = doc.at_css("thing") - thing.wrap("") - assert_equal("wrapper", thing.parent.name) - assert_equal("thing", doc.at_css("wrapper").children.first.name) + describe "#wrap" do + let(:xml) { "
important thing
" } + let(:doc) { Nokogiri::XML(xml) } + + describe "string markup argument" do + it "parses and wraps" do + thing = doc.at_css("thing") + rval = thing.wrap("") + wrapper = doc.at_css("wrapper") + + assert_equal(rval, thing) + assert_equal(wrapper, thing.parent) + assert_equal("root", wrapper.parent.name) + assert_equal(1, wrapper.children.length) + assert_equal("thing", wrapper.children.first.name) + end + + it "wraps unparented nodes" do + thing = doc.create_element("thing") + thing.wrap("") + + assert_equal("wrapper", thing.parent.name) + assert_nil(thing.parent.parent) + end + end + + describe "Node argument" do + it "wraps using a dup of the node" do + thing = doc.at_css("thing") + wrapper_template = doc.create_element("wrapper") + rval = thing.wrap(wrapper_template) + wrapper = doc.at_css("wrapper") + + assert_equal(rval, thing) + refute_equal(wrapper, wrapper_template) + assert_equal(wrapper, thing.parent) + assert_equal("root", wrapper.parent.name) + assert_equal(1, wrapper.children.length) + assert_equal("thing", wrapper.children.first.name) + end + + it "wraps unparented nodes" do + thing = doc.create_element("thing") + wrapper_template = doc.create_element("wrapper") + thing.wrap(wrapper_template) + + refute_equal(wrapper_template, thing.parent) + assert_equal("wrapper", thing.parent.name) + assert_nil(thing.parent.parent) + end + end + + it "raises an ArgumentError on other types" do + thing = doc.at_css("thing") + + assert_raises(ArgumentError) do + thing.wrap(1) + end + end end describe "#line" do diff --git a/test/xml/test_node_set.rb b/test/xml/test_node_set.rb index 8016e8ca49b..fb81d6ffbe8 100644 --- a/test/xml/test_node_set.rb +++ b/test/xml/test_node_set.rb @@ -540,17 +540,45 @@ def awesome(ns) describe "#wrap" do it "wraps each node within a reified copy of the tag passed" do - employees = (xml / "//employee").wrap("") - assert_equal("wrapper", employees[0].parent.name) - assert_equal("employee", xml.search("//wrapper").first.children[0].name) + employees = xml.css("employee") + rval = employees.wrap("") + wrappers = xml.css("wrapper") + + assert_equal(rval, employees) + assert_equal(employees.length, wrappers.length) + employees.each do |employee| + assert_equal("wrapper", employee.parent.name) + end + wrappers.each do |wrapper| + assert_equal("staff", wrapper.parent.name) + assert_equal(1, wrapper.children.length) + assert_equal("employee", wrapper.children.first.name) + end + end + + it "wraps each node within a dup of the Node argument" do + employees = xml.css("employee") + rval = employees.wrap(xml.create_element("wrapper")) + wrappers = xml.css("wrapper") + + assert_equal(rval, employees) + assert_equal(employees.length, wrappers.length) + employees.each do |employee| + assert_equal("wrapper", employee.parent.name) + end + wrappers.each do |wrapper| + assert_equal("staff", wrapper.parent.name) + assert_equal(1, wrapper.children.length) + assert_equal("employee", wrapper.children.first.name) + end end it "handles various node types and handles recursive reparenting" do - xml = "contents" - doc = Nokogiri::XML(xml) + doc = Nokogiri::XML("contents") nodes = doc.at_css("root").xpath(".//* | .//*/text()") # foo and "contents" nodes.wrap("") wrappers = doc.css("wrapper") + assert_equal("root", wrappers.first.parent.name) assert_equal("foo", wrappers.first.children.first.name) assert_equal("foo", wrappers.last.parent.name) @@ -565,7 +593,7 @@ def awesome(ns) goodbye EOXML - employees = frag.xpath(".//employee") + employees = frag.css("employee") employees.wrap("") assert_equal("wrapper", employees[0].parent.name) assert_equal("employee", frag.at(".//wrapper").children.first.name)