Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(jruby): make less-naive inference of xpath custom function ns #2148

Merged
merged 3 commits into from
Dec 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ This CVE's public notice is [#1915](https://github.com/sparklemotion/nokogiri/is
* [MRI] Address a memory leak when unparenting a DTD. [[#1784](https://github.com/sparklemotion/nokogiri/issues/1784)] (Thanks, [@stevecheckoway](https://github.com/stevecheckoway)!)
* [MRI] Use RbConfig::CONFIG instead of ::MAKEFILE_CONFIG to fix installations that use Makefile macros. [[#1820](https://github.com/sparklemotion/nokogiri/issues/1820)] (Thanks, [@nobu](https://github.com/nobu)!)
* [JRuby] Decrease large memory usage when making nested XPath queries. [[#1749](https://github.com/sparklemotion/nokogiri/issues/1749)]
* [JRuby] Fix how custom XPath function namespaces are inferred to be less naive. [#1890, #2148]
* [JRuby] Clarify exception message when custom XPath functions can't be resolved.
* [JRuby] Fix failing tests on JRuby 9.2.x
* [JRuby] Fix default namespaces in nodes reparented into a different document [[#1774](https://github.com/sparklemotion/nokogiri/issues/1774)]
* [JRuby] Fix support for Java 9. [[#1759](https://github.com/sparklemotion/nokogiri/issues/1759)] (Thanks, [@Taywee](https://github.com/Taywee)!)
Expand Down
63 changes: 47 additions & 16 deletions ext/java/nokogiri/XmlXpathContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
package nokogiri;

import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.transform.TransformerException;

Expand Down Expand Up @@ -68,7 +70,6 @@
*/
@JRubyClass(name="Nokogiri::XML::XPathContext")
public class XmlXpathContext extends RubyObject {

static {
final String DTMManager = "org.apache.xml.dtm.DTMManager";
if (SafePropertyAccessor.getProperty(DTMManager) == null) {
Expand Down Expand Up @@ -109,25 +110,53 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject klazz, IRubyO
}
}


// see https://en.wikipedia.org/wiki/QName
private static final String NameStartCharStr = "[_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]" ;
private static final String NameCharStr = "[-\\.0-9\u00B7\u0300-\u036F\u203F-\u2040]|" + NameStartCharStr ;
private static final String NCNameStr = "(?:" + NameStartCharStr + ")(?:" + NameCharStr + ")*";
private static final String XPathFunctionCaptureStr = "(" + NCNameStr + "(?=\\())";
private static final Pattern XPathFunctionCaptureRE = Pattern.compile(XPathFunctionCaptureStr);

@JRubyMethod
public IRubyObject evaluate(ThreadContext context, IRubyObject expr, IRubyObject handler) {

String src = expr.convertToString().asJavaString();
if (!handler.isNil()) {
if (!isContainsPrefix(src)) {
StringBuilder replacement = new StringBuilder();
Set<String> methodNames = handler.getMetaClass().getMethods().keySet();
final String PREFIX = NokogiriNamespaceContext.NOKOGIRI_PREFIX;
for (String name : methodNames) {
replacement.setLength(0);
replacement.ensureCapacity(PREFIX.length() + 1 + name.length());
replacement.append(PREFIX).append(':').append(name);
src = src.replace(name, replacement); // replace(name, NOKOGIRI_PREFIX + ':' + name)
public IRubyObject evaluate(ThreadContext context, IRubyObject rbQuery, IRubyObject handler) {
String query = rbQuery.convertToString().asJavaString();

if (!handler.isNil() && !isContainsPrefix(query)) {
//
// The user has passed in a handler, but isn't using the `nokogiri:` prefix as
// instructed in JRuby land, so let's try to be clever and rewrite the query, inserting
// the nokogiri namespace where appropriate.
//
StringBuilder namespacedQuery = new StringBuilder();
int jchar = 0;

// Find the methods on the handler object
Set<String> methodNames = handler.getMetaClass().getMethods().keySet();

// Find the function calls in the xpath query
Matcher xpathFunctionCalls = XPathFunctionCaptureRE.matcher(query);

while (xpathFunctionCalls.find()) {
namespacedQuery.append(query.subSequence(jchar, xpathFunctionCalls.start()));
jchar = xpathFunctionCalls.start();

if (methodNames.contains(xpathFunctionCalls.group())) {
namespacedQuery.append(NokogiriNamespaceContext.NOKOGIRI_PREFIX);
namespacedQuery.append(":");
}

namespacedQuery.append(query.subSequence(xpathFunctionCalls.start(), xpathFunctionCalls.end()));
jchar = xpathFunctionCalls.end();
}

if (jchar < query.length()-1) {
namespacedQuery.append(query.subSequence(jchar, query.length()));
}
query = namespacedQuery.toString();
}

return node_set(context, src, handler);
return node_set(context, query, handler);
}

@JRubyMethod
Expand Down Expand Up @@ -162,7 +191,9 @@ private IRubyObject node_set(ThreadContext context, String expr, IRubyObject han
return tryGetNodeSet(context, expr, fnResolver);
}
catch (TransformerException ex) {
throw XmlSyntaxError.createXMLXPathSyntaxError(context.runtime, expr, ex).toThrowable(); // Nokogiri::XML::XPath::SyntaxError
throw XmlSyntaxError.createXMLXPathSyntaxError(context.runtime,
(expr + ": " + ex.toString()),
ex).toThrowable();
}
}

Expand Down
2 changes: 1 addition & 1 deletion test/files/staff.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
<employeeId>EMP0003</employeeId>
<name>Roger
Jones</name>
<position>Department Manager</position>
<position id='partial_collision_id'>Department Manager</position>
<salary>100,000</salary>
<gender>&ent4;</gender>
<address domestic="Yes" street="No">PO Box 27 Irving, texas 98553</address>
Expand Down
2 changes: 1 addition & 1 deletion test/xml/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def test_move_root_with_existing_root_gets_gcd

def test_validate
if Nokogiri.uses_libxml?
assert_equal 44, @xml.validate.length
assert_equal 45, @xml.validate.length
else
xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) { |cfg| cfg.dtdvalid }
assert_equal 40, xml.validate.length
Expand Down
2 changes: 1 addition & 1 deletion test/xml/test_dtd.rb
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def test_line
def test_validate
if Nokogiri.uses_libxml?
list = @xml.internal_subset.validate @xml
assert_equal 44, list.length
assert_equal 45, list.length
else
xml = Nokogiri::XML(File.open(XML_FILE)) {|cfg| cfg.dtdvalid}
list = xml.internal_subset.validate xml
Expand Down
40 changes: 40 additions & 0 deletions test/xml/test_xpath.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ def thing thing
thing
end

def another_thing thing
@things << thing
thing
end

def returns_array node_set
@things << node_set.to_a
node_set.to_a
Expand Down Expand Up @@ -528,6 +533,41 @@ def test_xpath_syntax_error
end
end
end

describe "jruby inferring XPath functions from the handler methods" do
it "should not get confused simply by a string similarity" do
# https://github.com/sparklemotion/nokogiri/pull/1890
# this describes a bug where XmlXpathContext naively replaced query substrings using method names
handler = Class.new do
def collision(nodes)
nil
end
end.new
found_by_id = @xml.xpath("//*[@id='partial_collision_id']", handler)
assert_equal 1, found_by_id.length
end

it "handles multiple handler function calls" do
# test that jruby handles this case identically to C
result = @xml.xpath('//employee[thing(.)]/employeeId[another_thing(.)]', @handler)
assert_equal(5, result.length)
assert_equal(10, @handler.things.length)
end

it "doesn't get confused by an XPath function, flavor 1" do
# test that it doesn't get confused by an XPath function
result = @xml.xpath('//employee[thing(.)]/employeeId[last()]', @handler)
assert_equal(5, result.length)
assert_equal(5, @handler.things.length)
end

it "doesn't get confused by an XPath function, flavor 2" do
# test that it doesn't get confused by an XPath function
result = @xml.xpath('//employee[last()]/employeeId[thing(.)]', @handler)
assert_equal(1, result.length)
assert_equal(1, @handler.things.length)
end
end
end
end
end