diff --git a/CHANGELOG.md b/CHANGELOG.md index 65d1570b6a5..d862e2dc51d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -299,6 +299,8 @@ This CVE's public notice is [#1915](https://github.com/sparklemotion/nokogiri/is * [MRI] Address a memory leak when unparenting a DTD. [[#1784](https://github.com/sparklemotion/nokogiri/issues/1784)] (Thanks, [@stevecheckoway](https://github.com/stevecheckoway)!) * [MRI] Use RbConfig::CONFIG instead of ::MAKEFILE_CONFIG to fix installations that use Makefile macros. [[#1820](https://github.com/sparklemotion/nokogiri/issues/1820)] (Thanks, [@nobu](https://github.com/nobu)!) * [JRuby] Decrease large memory usage when making nested XPath queries. [[#1749](https://github.com/sparklemotion/nokogiri/issues/1749)] +* [JRuby] Fix how custom XPath function namespaces are inferred to be less naive. [#1890, #2148] +* [JRuby] Clarify exception message when custom XPath functions can't be resolved. * [JRuby] Fix failing tests on JRuby 9.2.x * [JRuby] Fix default namespaces in nodes reparented into a different document [[#1774](https://github.com/sparklemotion/nokogiri/issues/1774)] * [JRuby] Fix support for Java 9. [[#1759](https://github.com/sparklemotion/nokogiri/issues/1759)] (Thanks, [@Taywee](https://github.com/Taywee)!) diff --git a/ext/java/nokogiri/XmlXpathContext.java b/ext/java/nokogiri/XmlXpathContext.java index f0aa3213e1b..ad74dd44cbf 100644 --- a/ext/java/nokogiri/XmlXpathContext.java +++ b/ext/java/nokogiri/XmlXpathContext.java @@ -33,6 +33,8 @@ package nokogiri; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.xml.transform.TransformerException; @@ -68,7 +70,6 @@ */ @JRubyClass(name="Nokogiri::XML::XPathContext") public class XmlXpathContext extends RubyObject { - static { final String DTMManager = "org.apache.xml.dtm.DTMManager"; if (SafePropertyAccessor.getProperty(DTMManager) == null) { @@ -109,25 +110,53 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject klazz, IRubyO } } + + // see https://en.wikipedia.org/wiki/QName + private static final String NameStartCharStr = "[_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]" ; + private static final String NameCharStr = "[-\\.0-9\u00B7\u0300-\u036F\u203F-\u2040]|" + NameStartCharStr ; + private static final String NCNameStr = "(?:" + NameStartCharStr + ")(?:" + NameCharStr + ")*"; + private static final String XPathFunctionCaptureStr = "(" + NCNameStr + "(?=\\())"; + private static final Pattern XPathFunctionCaptureRE = Pattern.compile(XPathFunctionCaptureStr); + @JRubyMethod - public IRubyObject evaluate(ThreadContext context, IRubyObject expr, IRubyObject handler) { - - String src = expr.convertToString().asJavaString(); - if (!handler.isNil()) { - if (!isContainsPrefix(src)) { - StringBuilder replacement = new StringBuilder(); - Set methodNames = handler.getMetaClass().getMethods().keySet(); - final String PREFIX = NokogiriNamespaceContext.NOKOGIRI_PREFIX; - for (String name : methodNames) { - replacement.setLength(0); - replacement.ensureCapacity(PREFIX.length() + 1 + name.length()); - replacement.append(PREFIX).append(':').append(name); - src = src.replace(name, replacement); // replace(name, NOKOGIRI_PREFIX + ':' + name) + public IRubyObject evaluate(ThreadContext context, IRubyObject rbQuery, IRubyObject handler) { + String query = rbQuery.convertToString().asJavaString(); + + if (!handler.isNil() && !isContainsPrefix(query)) { + // + // The user has passed in a handler, but isn't using the `nokogiri:` prefix as + // instructed in JRuby land, so let's try to be clever and rewrite the query, inserting + // the nokogiri namespace where appropriate. + // + StringBuilder namespacedQuery = new StringBuilder(); + int jchar = 0; + + // Find the methods on the handler object + Set methodNames = handler.getMetaClass().getMethods().keySet(); + + // Find the function calls in the xpath query + Matcher xpathFunctionCalls = XPathFunctionCaptureRE.matcher(query); + + while (xpathFunctionCalls.find()) { + namespacedQuery.append(query.subSequence(jchar, xpathFunctionCalls.start())); + jchar = xpathFunctionCalls.start(); + + if (methodNames.contains(xpathFunctionCalls.group())) { + namespacedQuery.append(NokogiriNamespaceContext.NOKOGIRI_PREFIX); + namespacedQuery.append(":"); } + + namespacedQuery.append(query.subSequence(xpathFunctionCalls.start(), xpathFunctionCalls.end())); + jchar = xpathFunctionCalls.end(); + } + + if (jchar < query.length()-1) { + namespacedQuery.append(query.subSequence(jchar, query.length())); } + query = namespacedQuery.toString(); } - return node_set(context, src, handler); + return node_set(context, query, handler); } @JRubyMethod @@ -162,7 +191,9 @@ private IRubyObject node_set(ThreadContext context, String expr, IRubyObject han return tryGetNodeSet(context, expr, fnResolver); } catch (TransformerException ex) { - throw XmlSyntaxError.createXMLXPathSyntaxError(context.runtime, expr, ex).toThrowable(); // Nokogiri::XML::XPath::SyntaxError + throw XmlSyntaxError.createXMLXPathSyntaxError(context.runtime, + (expr + ": " + ex.toString()), + ex).toThrowable(); } } diff --git a/test/files/staff.xml b/test/files/staff.xml index fd635110add..acebb31d99a 100644 --- a/test/files/staff.xml +++ b/test/files/staff.xml @@ -35,7 +35,7 @@ EMP0003 Roger Jones - Department Manager + Department Manager 100,000 &ent4;
PO Box 27 Irving, texas 98553
diff --git a/test/xml/test_document.rb b/test/xml/test_document.rb index 3bf417fb5e5..5d61b12991c 100644 --- a/test/xml/test_document.rb +++ b/test/xml/test_document.rb @@ -419,7 +419,7 @@ def test_move_root_with_existing_root_gets_gcd def test_validate if Nokogiri.uses_libxml? - assert_equal 44, @xml.validate.length + assert_equal 45, @xml.validate.length else xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) { |cfg| cfg.dtdvalid } assert_equal 40, xml.validate.length diff --git a/test/xml/test_dtd.rb b/test/xml/test_dtd.rb index 93e30d62ee2..4668a1fb984 100644 --- a/test/xml/test_dtd.rb +++ b/test/xml/test_dtd.rb @@ -150,7 +150,7 @@ def test_line def test_validate if Nokogiri.uses_libxml? list = @xml.internal_subset.validate @xml - assert_equal 44, list.length + assert_equal 45, list.length else xml = Nokogiri::XML(File.open(XML_FILE)) {|cfg| cfg.dtdvalid} list = xml.internal_subset.validate xml diff --git a/test/xml/test_xpath.rb b/test/xml/test_xpath.rb index 80c65c81db4..878eae0049e 100644 --- a/test/xml/test_xpath.rb +++ b/test/xml/test_xpath.rb @@ -35,6 +35,11 @@ def thing thing thing end + def another_thing thing + @things << thing + thing + end + def returns_array node_set @things << node_set.to_a node_set.to_a @@ -528,6 +533,41 @@ def test_xpath_syntax_error end end end + + describe "jruby inferring XPath functions from the handler methods" do + it "should not get confused simply by a string similarity" do + # https://github.com/sparklemotion/nokogiri/pull/1890 + # this describes a bug where XmlXpathContext naively replaced query substrings using method names + handler = Class.new do + def collision(nodes) + nil + end + end.new + found_by_id = @xml.xpath("//*[@id='partial_collision_id']", handler) + assert_equal 1, found_by_id.length + end + + it "handles multiple handler function calls" do + # test that jruby handles this case identically to C + result = @xml.xpath('//employee[thing(.)]/employeeId[another_thing(.)]', @handler) + assert_equal(5, result.length) + assert_equal(10, @handler.things.length) + end + + it "doesn't get confused by an XPath function, flavor 1" do + # test that it doesn't get confused by an XPath function + result = @xml.xpath('//employee[thing(.)]/employeeId[last()]', @handler) + assert_equal(5, result.length) + assert_equal(5, @handler.things.length) + end + + it "doesn't get confused by an XPath function, flavor 2" do + # test that it doesn't get confused by an XPath function + result = @xml.xpath('//employee[last()]/employeeId[thing(.)]', @handler) + assert_equal(1, result.length) + assert_equal(1, @handler.things.length) + end + end end end end