Skip to content

Commit

Permalink
Merge pull request #2148 from sparklemotion/1890-fix-jruby-xpath-name…
Browse files Browse the repository at this point in the history
…space-inference

fix(jruby): make less-naive inference of xpath custom function ns

---

**What problem is this PR intended to solve?**

Closes #1890
Related to #2147

JRuby's logic for inferring a namespace for the custom XPath function handler has been naive since it was introduced in 468c757. This PR makes it less naive and less likely to break things by looking for function calls in the query via regex, and looking for matches among the handler's declared Ruby methods.

Ideally we should require namespaces in both JRuby and CRuby implementations at some point to avoid having to do this kind of heavy lifting. See #2147 for a proposal.

**Have you included adequate test coverage?**

Sure.


**Does this change affect the behavior of either the C or the Java implementations?**

Only changes the JRuby implementation, to do a better job of matching the CRuby behavior.
  • Loading branch information
flavorjones authored Dec 27, 2020
2 parents a09b007 + e1461ce commit a24d9d4
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 19 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ This CVE's public notice is [#1915](https://github.com/sparklemotion/nokogiri/is
* [MRI] Address a memory leak when unparenting a DTD. [[#1784](https://github.com/sparklemotion/nokogiri/issues/1784)] (Thanks, [@stevecheckoway](https://github.com/stevecheckoway)!)
* [MRI] Use RbConfig::CONFIG instead of ::MAKEFILE_CONFIG to fix installations that use Makefile macros. [[#1820](https://github.com/sparklemotion/nokogiri/issues/1820)] (Thanks, [@nobu](https://github.com/nobu)!)
* [JRuby] Decrease large memory usage when making nested XPath queries. [[#1749](https://github.com/sparklemotion/nokogiri/issues/1749)]
* [JRuby] Fix how custom XPath function namespaces are inferred to be less naive. [#1890, #2148]
* [JRuby] Clarify exception message when custom XPath functions can't be resolved.
* [JRuby] Fix failing tests on JRuby 9.2.x
* [JRuby] Fix default namespaces in nodes reparented into a different document [[#1774](https://github.com/sparklemotion/nokogiri/issues/1774)]
* [JRuby] Fix support for Java 9. [[#1759](https://github.com/sparklemotion/nokogiri/issues/1759)] (Thanks, [@Taywee](https://github.com/Taywee)!)
Expand Down
63 changes: 47 additions & 16 deletions ext/java/nokogiri/XmlXpathContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
package nokogiri;

import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.transform.TransformerException;

Expand Down Expand Up @@ -68,7 +70,6 @@
*/
@JRubyClass(name="Nokogiri::XML::XPathContext")
public class XmlXpathContext extends RubyObject {

static {
final String DTMManager = "org.apache.xml.dtm.DTMManager";
if (SafePropertyAccessor.getProperty(DTMManager) == null) {
Expand Down Expand Up @@ -109,25 +110,53 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject klazz, IRubyO
}
}


// see https://en.wikipedia.org/wiki/QName
private static final String NameStartCharStr = "[_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]" ;
private static final String NameCharStr = "[-\\.0-9\u00B7\u0300-\u036F\u203F-\u2040]|" + NameStartCharStr ;
private static final String NCNameStr = "(?:" + NameStartCharStr + ")(?:" + NameCharStr + ")*";
private static final String XPathFunctionCaptureStr = "(" + NCNameStr + "(?=\\())";
private static final Pattern XPathFunctionCaptureRE = Pattern.compile(XPathFunctionCaptureStr);

@JRubyMethod
public IRubyObject evaluate(ThreadContext context, IRubyObject expr, IRubyObject handler) {

String src = expr.convertToString().asJavaString();
if (!handler.isNil()) {
if (!isContainsPrefix(src)) {
StringBuilder replacement = new StringBuilder();
Set<String> methodNames = handler.getMetaClass().getMethods().keySet();
final String PREFIX = NokogiriNamespaceContext.NOKOGIRI_PREFIX;
for (String name : methodNames) {
replacement.setLength(0);
replacement.ensureCapacity(PREFIX.length() + 1 + name.length());
replacement.append(PREFIX).append(':').append(name);
src = src.replace(name, replacement); // replace(name, NOKOGIRI_PREFIX + ':' + name)
public IRubyObject evaluate(ThreadContext context, IRubyObject rbQuery, IRubyObject handler) {
String query = rbQuery.convertToString().asJavaString();

if (!handler.isNil() && !isContainsPrefix(query)) {
//
// The user has passed in a handler, but isn't using the `nokogiri:` prefix as
// instructed in JRuby land, so let's try to be clever and rewrite the query, inserting
// the nokogiri namespace where appropriate.
//
StringBuilder namespacedQuery = new StringBuilder();
int jchar = 0;

// Find the methods on the handler object
Set<String> methodNames = handler.getMetaClass().getMethods().keySet();

// Find the function calls in the xpath query
Matcher xpathFunctionCalls = XPathFunctionCaptureRE.matcher(query);

while (xpathFunctionCalls.find()) {
namespacedQuery.append(query.subSequence(jchar, xpathFunctionCalls.start()));
jchar = xpathFunctionCalls.start();

if (methodNames.contains(xpathFunctionCalls.group())) {
namespacedQuery.append(NokogiriNamespaceContext.NOKOGIRI_PREFIX);
namespacedQuery.append(":");
}

namespacedQuery.append(query.subSequence(xpathFunctionCalls.start(), xpathFunctionCalls.end()));
jchar = xpathFunctionCalls.end();
}

if (jchar < query.length()-1) {
namespacedQuery.append(query.subSequence(jchar, query.length()));
}
query = namespacedQuery.toString();
}

return node_set(context, src, handler);
return node_set(context, query, handler);
}

@JRubyMethod
Expand Down Expand Up @@ -162,7 +191,9 @@ private IRubyObject node_set(ThreadContext context, String expr, IRubyObject han
return tryGetNodeSet(context, expr, fnResolver);
}
catch (TransformerException ex) {
throw XmlSyntaxError.createXMLXPathSyntaxError(context.runtime, expr, ex).toThrowable(); // Nokogiri::XML::XPath::SyntaxError
throw XmlSyntaxError.createXMLXPathSyntaxError(context.runtime,
(expr + ": " + ex.toString()),
ex).toThrowable();
}
}

Expand Down
2 changes: 1 addition & 1 deletion test/files/staff.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
<employeeId>EMP0003</employeeId>
<name>Roger
Jones</name>
<position>Department Manager</position>
<position id='partial_collision_id'>Department Manager</position>
<salary>100,000</salary>
<gender>&ent4;</gender>
<address domestic="Yes" street="No">PO Box 27 Irving, texas 98553</address>
Expand Down
2 changes: 1 addition & 1 deletion test/xml/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def test_move_root_with_existing_root_gets_gcd

def test_validate
if Nokogiri.uses_libxml?
assert_equal 44, @xml.validate.length
assert_equal 45, @xml.validate.length
else
xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) { |cfg| cfg.dtdvalid }
assert_equal 40, xml.validate.length
Expand Down
2 changes: 1 addition & 1 deletion test/xml/test_dtd.rb
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def test_line
def test_validate
if Nokogiri.uses_libxml?
list = @xml.internal_subset.validate @xml
assert_equal 44, list.length
assert_equal 45, list.length
else
xml = Nokogiri::XML(File.open(XML_FILE)) {|cfg| cfg.dtdvalid}
list = xml.internal_subset.validate xml
Expand Down
40 changes: 40 additions & 0 deletions test/xml/test_xpath.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ def thing thing
thing
end

def another_thing thing
@things << thing
thing
end

def returns_array node_set
@things << node_set.to_a
node_set.to_a
Expand Down Expand Up @@ -528,6 +533,41 @@ def test_xpath_syntax_error
end
end
end

describe "jruby inferring XPath functions from the handler methods" do
it "should not get confused simply by a string similarity" do
# https://github.com/sparklemotion/nokogiri/pull/1890
# this describes a bug where XmlXpathContext naively replaced query substrings using method names
handler = Class.new do
def collision(nodes)
nil
end
end.new
found_by_id = @xml.xpath("//*[@id='partial_collision_id']", handler)
assert_equal 1, found_by_id.length
end

it "handles multiple handler function calls" do
# test that jruby handles this case identically to C
result = @xml.xpath('//employee[thing(.)]/employeeId[another_thing(.)]', @handler)
assert_equal(5, result.length)
assert_equal(10, @handler.things.length)
end

it "doesn't get confused by an XPath function, flavor 1" do
# test that it doesn't get confused by an XPath function
result = @xml.xpath('//employee[thing(.)]/employeeId[last()]', @handler)
assert_equal(5, result.length)
assert_equal(5, @handler.things.length)
end

it "doesn't get confused by an XPath function, flavor 2" do
# test that it doesn't get confused by an XPath function
result = @xml.xpath('//employee[last()]/employeeId[thing(.)]', @handler)
assert_equal(1, result.length)
assert_equal(1, @handler.things.length)
end
end
end
end
end

0 comments on commit a24d9d4

Please sign in to comment.