From 37efa57184e7aae8cc068d55ef28f9b02e4bf785 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 29 Aug 2022 21:38:17 -0400 Subject: [PATCH 1/4] deprecate: non-namespaced custom XPath handler functions See #2147 for more context. --- ext/nokogiri/nokogiri.h | 4 ++-- ext/nokogiri/xml_xpath_context.c | 7 +++++++ lib/nokogiri/xml/searchable.rb | 21 +++++++++++++-------- test/xml/test_xpath.rb | 18 ++++++++++++++++++ 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index 3313961ca93..ffc1749b91d 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -218,9 +218,9 @@ xmlParserCtxtPtr noko_xml_sax_parser_context_unwrap(VALUE rb_context); #define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v) #if HAVE_RB_CATEGORY_WARNING -# define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message) +# define NOKO_WARN_DEPRECATION(message...) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message) #else -# define NOKO_WARN_DEPRECATION(message) rb_warning(message) +# define NOKO_WARN_DEPRECATION(message...) rb_warning(message) #endif void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state); diff --git a/ext/nokogiri/xml_xpath_context.c b/ext/nokogiri/xml_xpath_context.c index d7df29636a4..175f5a2e26a 100644 --- a/ext/nokogiri/xml_xpath_context.c +++ b/ext/nokogiri/xml_xpath_context.c @@ -320,6 +320,13 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri) { VALUE rb_handler = (VALUE)data; if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) { + if (c_ns_uri == NULL) { + NOKO_WARN_DEPRECATION( + "A custom XPath or CSS handler function named '%s' is being invoked without a namespace." + " Please update your query to reference this function as 'nokogiri:%s'." + " Invoking custom handler functions without a namespace is deprecated and support will be removed in a future release of Nokogiri.", + c_name, c_name); + } return method_caller; } diff --git a/lib/nokogiri/xml/searchable.rb b/lib/nokogiri/xml/searchable.rb index d7674ec3da5..52536291f8c 100644 --- a/lib/nokogiri/xml/searchable.rb +++ b/lib/nokogiri/xml/searchable.rb @@ -36,16 +36,19 @@ module Searchable # node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'}) # # 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom - # functions create a class and implement the function you want to define. The first argument - # to the method will be the current matching NodeSet. Any other arguments are ones that you - # pass in. Note that this class may appear anywhere in the argument list. For example: + # functions create a class and implement the function you want to define, which will be in the + # `nokogiri` namespace in XPath queries. + # + # The first argument to the method will be the current matching NodeSet. Any other arguments + # are ones that you pass in. Note that this class may appear anywhere in the argument + # list. For example: # # handler = Class.new { # def regex node_set, regex # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ } # end # }.new - # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler) + # node.search('.//title[nokogiri:regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler) # # See Searchable#xpath and Searchable#css for further usage help. def search(*args) @@ -160,16 +163,18 @@ def at_css(*args) # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'}) # # 💡 Custom XPath functions may also be defined. To define custom functions create a class and - # implement the function you want to define. The first argument to the method will be the - # current matching NodeSet. Any other arguments are ones that you pass in. Note that this - # class may appear anywhere in the argument list. For example: + # implement the function you want to define, which will be in the `nokogiri` namespace. + # + # The first argument to the method will be the current matching NodeSet. Any other arguments + # are ones that you pass in. Note that this class may appear anywhere in the argument + # list. For example: # # handler = Class.new { # def regex(node_set, regex) # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ } # end # }.new - # node.xpath('.//title[regex(., "\w+")]', handler) + # node.xpath('.//title[nokogiri:regex(., "\w+")]', handler) # def xpath(*args) paths, handler, ns, binds = extract_params(args) diff --git a/test/xml/test_xpath.rb b/test/xml/test_xpath.rb index becca2dc9e2..90bbe9e3c88 100644 --- a/test/xml/test_xpath.rb +++ b/test/xml/test_xpath.rb @@ -121,6 +121,24 @@ def test_css_search_with_ambiguous_integer_or_string_attributes refute_nil(doc.at_css("img[width=200]")) end + def test_xpath_with_nonnamespaced_custom_function_is_deprecated_but_works + skip_unless_libxml2("only deprecated in CRuby") + + result = nil + assert_output("", /Invoking custom handler functions without a namespace is deprecated/) do + result = @xml.xpath("anint()", @handler) + end + assert_equal(1230456, result) + end + + def test_xpath_with_namespaced_custom_function_is_not_deprecated + result = nil + assert_silent do + result = @xml.xpath("nokogiri:anint()", @handler) + end + assert_equal(1230456, result) + end + def test_css_search_uses_custom_selectors_with_arguments set = @xml.css('employee > address:my_filter("domestic", "Yes")', @handler) refute_empty(set) From f6eede5d2134ceeb03ce0bd0ca13691c0f9dd4d0 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 28 Apr 2023 11:06:02 -0400 Subject: [PATCH 2/4] test: update tests to use namespaced xpath functions --- test/xml/test_node_set.rb | 4 +- test/xml/test_xpath.rb | 88 ++++++++------------------------------- 2 files changed, 19 insertions(+), 73 deletions(-) diff --git a/test/xml/test_node_set.rb b/test/xml/test_node_set.rb index a327d52bd45..158a92f61f9 100644 --- a/test/xml/test_node_set.rb +++ b/test/xml/test_node_set.rb @@ -173,8 +173,8 @@ class TestNodeSet < Nokogiri::TestCase set = xml.xpath("//staff") [ - [:xpath, "//*[awesome(.)]"], - [:search, "//*[awesome(.)]"], + [:xpath, "//*[nokogiri:awesome(.)]"], + [:search, "//*[nokogiri:awesome(.)]"], [:css, "*:awesome"], [:search, "*:awesome"], ].each do |method, query| diff --git a/test/xml/test_xpath.rb b/test/xml/test_xpath.rb index 90bbe9e3c88..7948cac0514 100644 --- a/test/xml/test_xpath.rb +++ b/test/xml/test_xpath.rb @@ -163,21 +163,7 @@ def test_search_with_css_query_uses_custom_selectors_with_arguments end def test_search_with_xpath_query_uses_custom_selectors_with_arguments - set = if Nokogiri.uses_libxml? - @xml.search('//employee/address[my_filter(., "domestic", "Yes")]', @handler) - else - @xml.search('//employee/address[nokogiri:my_filter(., "domestic", "Yes")]', @handler) - end - refute_empty(set) - set.each do |node| - assert_equal("Yes", node["domestic"]) - end - end - - def test_search_with_xpath_query_using_namespaced_custom_function - # Note that invocation with namespaces was not historically supported in CRuby. - # see https://github.com/sparklemotion/nokogiri/issues/2147 for more context. - set = @xml.xpath('//employee/address[nokogiri:my_filter(., "domestic", "Yes")]', @handler) + set = @xml.search('//employee/address[nokogiri:my_filter(., "domestic", "Yes")]', @handler) refute_empty(set) set.each do |node| assert_equal("Yes", node["domestic"]) @@ -185,11 +171,7 @@ def test_search_with_xpath_query_using_namespaced_custom_function end def test_pass_self_to_function - set = if Nokogiri.uses_libxml? - @xml.xpath('//employee/address[my_filter(., "domestic", "Yes")]', @handler) - else - @xml.xpath('//employee/address[nokogiri:my_filter(., "domestic", "Yes")]', @handler) - end + set = @xml.xpath('//employee/address[nokogiri:my_filter(., "domestic", "Yes")]', @handler) refute_empty(set) set.each do |node| assert_equal("Yes", node["domestic"]) @@ -198,11 +180,7 @@ def test_pass_self_to_function def test_custom_xpath_function_gets_strings set = @xml.xpath("//employee") - if Nokogiri.uses_libxml? - @xml.xpath('//employee[thing("asdf")]', @handler) - else - @xml.xpath('//employee[nokogiri:thing("asdf")]', @handler) - end + @xml.xpath('//employee[nokogiri:thing("asdf")]', @handler) assert_equal(set.length, @handler.things.length) assert_equal(["asdf"] * set.length, @handler.things) end @@ -264,65 +242,41 @@ def test_xpath_results_cache_should_get_cleared_on_attr_update end def test_custom_xpath_function_returns_string - result = if Nokogiri.uses_libxml? - @xml.xpath('thing("asdf")', @handler) - else - @xml.xpath('nokogiri:thing("asdf")', @handler) - end + result = @xml.xpath('nokogiri:thing("asdf")', @handler) assert_equal("asdf", result) end def test_custom_xpath_gets_true_booleans set = @xml.xpath("//employee") - if Nokogiri.uses_libxml? - @xml.xpath("//employee[thing(true())]", @handler) - else - @xml.xpath("//employee[nokogiri:thing(true())]", @handler) - end + @xml.xpath("//employee[nokogiri:thing(true())]", @handler) assert_equal(set.length, @handler.things.length) assert_equal([true] * set.length, @handler.things) end def test_custom_xpath_gets_false_booleans set = @xml.xpath("//employee") - if Nokogiri.uses_libxml? - @xml.xpath("//employee[thing(false())]", @handler) - else - @xml.xpath("//employee[nokogiri:thing(false())]", @handler) - end + @xml.xpath("//employee[nokogiri:thing(false())]", @handler) assert_equal(set.length, @handler.things.length) assert_equal([false] * set.length, @handler.things) end def test_custom_xpath_gets_numbers set = @xml.xpath("//employee") - if Nokogiri.uses_libxml? - @xml.xpath("//employee[thing(10)]", @handler) - else - @xml.xpath("//employee[nokogiri:thing(10)]", @handler) - end + @xml.xpath("//employee[nokogiri:thing(10)]", @handler) assert_equal(set.length, @handler.things.length) assert_equal([10] * set.length, @handler.things) end def test_custom_xpath_gets_node_sets set = @xml.xpath("//employee/name") - if Nokogiri.uses_libxml? - @xml.xpath("//employee[thing(name)]", @handler) - else - @xml.xpath("//employee[nokogiri:thing(name)]", @handler) - end + @xml.xpath("//employee[nokogiri:thing(name)]", @handler) assert_equal(set.length, @handler.things.length) assert_equal(set.to_a, @handler.things.flatten) end def test_custom_xpath_gets_node_sets_and_returns_array set = @xml.xpath("//employee/name") - if Nokogiri.uses_libxml? - @xml.xpath("//employee[returns_array(name)]", @handler) - else - @xml.xpath("//employee[nokogiri:returns_array(name)]", @handler) - end + @xml.xpath("//employee[nokogiri:returns_array(name)]", @handler) assert_equal(set.length, @handler.things.length) assert_equal(set.to_a, @handler.things.flatten) end @@ -335,7 +289,7 @@ def awesome!; end assert(@xml.xpath("//employee/name")) - @xml.xpath("//employee[saves_node_set(name)]", @handler) + @xml.xpath("//employee[nokogiri:saves_node_set(name)]", @handler) assert_equal(@xml, @handler.things.document) assert_respond_to(@handler.things, :awesome!) end @@ -370,7 +324,7 @@ def name_equals(nodeset, name, *args) # long list of long arguments, to apply GC pressure during # ruby_funcall argument marshalling - xpath = ["//tool[name_equals(.,'hammer'"] + xpath = ["//tool[nokogiri:name_equals(.,'hammer'"] 500.times { xpath << "'unused argument #{"x" * 1000}'" } xpath << "'unused argument')]" xpath = xpath.join(",") @@ -380,20 +334,12 @@ def name_equals(nodeset, name, *args) end def test_custom_xpath_without_arguments - value = if Nokogiri.uses_libxml? - @xml.xpath("value()", @handler) - else - @xml.xpath("nokogiri:value()", @handler) - end + value = @xml.xpath("nokogiri:value()", @handler) assert_in_delta(123.456, value) end def test_custom_xpath_without_arguments_returning_int - value = if Nokogiri.uses_libxml? - @xml.xpath("anint()", @handler) - else - @xml.xpath("nokogiri:anint()", @handler) - end + value = @xml.xpath("nokogiri:anint()", @handler) assert_equal(1230456, value) end @@ -401,7 +347,7 @@ def test_custom_xpath_with_bullshit_arguments xml = " " doc = Nokogiri::XML.parse(xml) foo = doc.xpath( - "//foo[bool_function(bar/baz)]", + "//foo[nokogiri:bool_function(bar/baz)]", Class.new do def bool_function(value) true @@ -611,21 +557,21 @@ def collision(nodes) it "handles multiple handler function calls" do # test that jruby handles this case identically to C - result = @xml.xpath("//employee[thing(.)]/employeeId[another_thing(.)]", @handler) + result = @xml.xpath("//employee[nokogiri:thing(.)]/employeeId[nokogiri:another_thing(.)]", @handler) assert_equal(5, result.length) assert_equal(10, @handler.things.length) end it "doesn't get confused by an XPath function, flavor 1" do # test that it doesn't get confused by an XPath function - result = @xml.xpath("//employee[thing(.)]/employeeId[last()]", @handler) + result = @xml.xpath("//employee[nokogiri:thing(.)]/employeeId[last()]", @handler) assert_equal(5, result.length) assert_equal(5, @handler.things.length) end it "doesn't get confused by an XPath function, flavor 2" do # test that it doesn't get confused by an XPath function - result = @xml.xpath("//employee[last()]/employeeId[thing(.)]", @handler) + result = @xml.xpath("//employee[last()]/employeeId[nokogiri:thing(.)]", @handler) assert_equal(1, result.length) assert_equal(1, @handler.things.length) end From 892b2c2d6b6d33361ecf4a8575f251a55352c87a Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 28 Apr 2023 11:12:02 -0400 Subject: [PATCH 3/4] feat: xpath generated from css uses custom function namespace --- lib/nokogiri/css/xpath_visitor.rb | 4 ++-- test/css/test_xpath_visitor.rb | 40 +++++++++++++++---------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/lib/nokogiri/css/xpath_visitor.rb b/lib/nokogiri/css/xpath_visitor.rb index c8d45cf9c72..ba63f3c4122 100644 --- a/lib/nokogiri/css/xpath_visitor.rb +++ b/lib/nokogiri/css/xpath_visitor.rb @@ -133,7 +133,7 @@ def visit_function(node) args += node.value[1..-1].map do |n| n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n end - "#{node.value.first}#{args.join(",")})" + "nokogiri:#{node.value.first}#{args.join(",")})" end end @@ -207,7 +207,7 @@ def visit_pseudo_class(node) when "parent" then "node()" when "root" then "not(parent::*)" else - node.value.first + "(.)" + "nokogiri:#{node.value.first}(.)" end end end diff --git a/test/css/test_xpath_visitor.rb b/test/css/test_xpath_visitor.rb index 31f82760a87..4dbd2ff3219 100644 --- a/test/css/test_xpath_visitor.rb +++ b/test/css/test_xpath_visitor.rb @@ -343,27 +343,27 @@ def assert_xpath(expecteds, asts) end it "miscellaneous pseudo-classes are converted into xpath function calls" do - assert_xpath("//a[aaron(.)]", parser.parse("a:aaron")) - assert_xpath("//a[aaron(.)]", parser.parse("a:aaron()")) - assert_xpath("//a[aaron(.,12)]", parser.parse("a:aaron(12)")) - assert_xpath("//a[aaron(.,12,1)]", parser.parse("a:aaron(12, 1)")) + assert_xpath("//a[nokogiri:aaron(.)]", parser.parse("a:aaron")) + assert_xpath("//a[nokogiri:aaron(.)]", parser.parse("a:aaron()")) + assert_xpath("//a[nokogiri:aaron(.,12)]", parser.parse("a:aaron(12)")) + assert_xpath("//a[nokogiri:aaron(.,12,1)]", parser.parse("a:aaron(12, 1)")) - assert_xpath("//a[link(.)]", parser.parse("a:link")) - assert_xpath("//a[visited(.)]", parser.parse("a:visited")) - assert_xpath("//a[hover(.)]", parser.parse("a:hover")) - assert_xpath("//a[active(.)]", parser.parse("a:active")) + assert_xpath("//a[nokogiri:link(.)]", parser.parse("a:link")) + assert_xpath("//a[nokogiri:visited(.)]", parser.parse("a:visited")) + assert_xpath("//a[nokogiri:hover(.)]", parser.parse("a:hover")) + assert_xpath("//a[nokogiri:active(.)]", parser.parse("a:active")) - assert_xpath("//a[foo(.,@href)]", parser.parse("a:foo(@href)")) - assert_xpath("//a[foo(.,@href,@id)]", parser.parse("a:foo(@href, @id)")) - assert_xpath("//a[foo(.,@a,b)]", parser.parse("a:foo(@a, b)")) - assert_xpath("//a[foo(.,a,@b)]", parser.parse("a:foo(a, @b)")) - assert_xpath("//a[foo(.,a,10)]", parser.parse("a:foo(a, 10)")) - assert_xpath("//a[foo(.,42)]", parser.parse("a:foo(42)")) - assert_xpath("//a[foo(.,'bar')]", parser.parse("a:foo('bar')")) + assert_xpath("//a[nokogiri:foo(.,@href)]", parser.parse("a:foo(@href)")) + assert_xpath("//a[nokogiri:foo(.,@href,@id)]", parser.parse("a:foo(@href, @id)")) + assert_xpath("//a[nokogiri:foo(.,@a,b)]", parser.parse("a:foo(@a, b)")) + assert_xpath("//a[nokogiri:foo(.,a,@b)]", parser.parse("a:foo(a, @b)")) + assert_xpath("//a[nokogiri:foo(.,a,10)]", parser.parse("a:foo(a, 10)")) + assert_xpath("//a[nokogiri:foo(.,42)]", parser.parse("a:foo(42)")) + assert_xpath("//a[nokogiri:foo(.,'bar')]", parser.parse("a:foo('bar')")) end it "bare pseudo-class matches any ident" do - assert_xpath("//*[link(.)]", parser.parse(":link")) + assert_xpath("//*[nokogiri:link(.)]", parser.parse(":link")) assert_xpath("//*[not(@id='foo')]", parser.parse(":not(#foo)")) assert_xpath("//*[count(preceding-sibling::*)=0]", parser.parse(":first-child")) end @@ -483,18 +483,18 @@ def visit_pseudo_class_aaron(node) it "handles pseudo-class with class selector" do assert_xpath( - "//a[active(.) and contains(concat(' ',normalize-space(@class),' '),' foo ')]", + "//a[nokogiri:active(.) and contains(concat(' ',normalize-space(@class),' '),' foo ')]", parser.parse("a:active.foo"), ) assert_xpath( - "//a[contains(concat(' ',normalize-space(@class),' '),' foo ') and active(.)]", + "//a[contains(concat(' ',normalize-space(@class),' '),' foo ') and nokogiri:active(.)]", parser.parse("a.foo:active"), ) end it "handles pseudo-class with an id selector" do - assert_xpath("//a[@id='foo' and active(.)]", parser.parse("a#foo:active")) - assert_xpath("//a[active(.) and @id='foo']", parser.parse("a:active#foo")) + assert_xpath("//a[@id='foo' and nokogiri:active(.)]", parser.parse("a#foo:active")) + assert_xpath("//a[nokogiri:active(.) and @id='foo']", parser.parse("a:active#foo")) end it "handles function with pseudo-class" do From 78b24e29ee5ce7f8769a980c50b7b78eaa8e137d Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 28 Apr 2023 11:30:01 -0400 Subject: [PATCH 4/4] doc: update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bcc4d19201..60407e155dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,7 @@ You can read more about this in the decision record at `adr/2023-04-libxml-memor * Passing a `Nokogiri::XML::Node` as the first parameter to `CDATA.new` is deprecated and will generate a warning. This parameter should be a kind of `Nokogiri::XML::Document`. This will become an error in a future version of Nokogiri. * Passing a `Nokogiri::XML::Node` as the first parameter to `Schema.from_document` is deprecated and will generate a warning. This parameter should be a kind of `Nokogiri::XML::Document`. This will become an error in a future version of Nokogiri. * Passing a `Nokogiri::XML::Node` as the second parameter to `Text.new` is deprecated and will generate a warning. This parameter should be a kind of `Nokogiri::XML::Document`. This will become an error in a future version of Nokogiri. +* [CRuby] Calling a custom XPath function without the `nokogiri` namespace is deprecated and will generate a warning. Support for non-namespaced functions will be removed in a future version of Nokogiri. (Note that JRuby has never supported non-namespaced custom XPath functions.) ### Performance