Skip to content

Commit

Permalink
Merge pull request #2137 from sparklemotion/2135-faster-css-class-query
Browse files Browse the repository at this point in the history
speed up CSS class queries
  • Loading branch information
flavorjones authored Dec 18, 2020
2 parents 4cf6209 + 5d0b7fe commit ab7855e
Show file tree
Hide file tree
Showing 13 changed files with 830 additions and 519 deletions.
3 changes: 1 addition & 2 deletions ext/java/nokogiri/XmlXpathContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,7 @@ public IRubyObject register_variable(IRubyObject name, IRubyObject value) {
}

private IRubyObject node_set(ThreadContext context, String expr, IRubyObject handler) {
final NokogiriXPathFunctionResolver fnResolver =
handler.isNil() ? null : NokogiriXPathFunctionResolver.create(handler);
final NokogiriXPathFunctionResolver fnResolver = NokogiriXPathFunctionResolver.create(handler);
try {
return tryGetNodeSet(context, expr, fnResolver);
}
Expand Down
11 changes: 9 additions & 2 deletions ext/java/nokogiri/internals/NokogiriNamespaceContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,15 @@
*/
public final class NokogiriNamespaceContext implements NamespaceContext {

public static final String NOKOGIRI_PREFIX = "nokogiri";
/*
* these constants have matching declarations in
* ext/nokogiri/xml_xpath_context.c
*/
public static final String NOKOGIRI_PREFIX = "nokogiri";
public static final String NOKOGIRI_URI = "http://www.nokogiri.org/default_ns/ruby/extensions_functions";
public static final String NOKOGIRI_TEMPORARY_ROOT_TAG = "nokogiri-temporary-root-tag";

public static final String NOKOGIRI_BUILTIN_PREFIX = "nokogiri-builtin";
public static final String NOKOGIRI_BUILTIN_URI = "https://www.nokogiri.org/default_ns/ruby/builtins";

private final Map<String,String> register;

Expand All @@ -63,6 +69,7 @@ public static NokogiriNamespaceContext create() {
private NokogiriNamespaceContext() {
register = new HashMap<String, String>(6, 1);
register.put(NOKOGIRI_PREFIX, NOKOGIRI_URI);
register.put(NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
register.put("xml", "http://www.w3.org/XML/1998/namespace");
register.put("xhtml", "http://www.w3.org/1999/xhtml");
}
Expand Down
68 changes: 62 additions & 6 deletions ext/java/nokogiri/internals/NokogiriXPathFunction.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import javax.xml.xpath.XPathFunction;
import javax.xml.xpath.XPathFunctionException;
import javax.xml.namespace.QName;

import org.jruby.Ruby;
import org.jruby.RubyArray;
Expand Down Expand Up @@ -64,14 +65,14 @@
public class NokogiriXPathFunction implements XPathFunction {

private final IRubyObject handler;
private final String name;
private final QName name;
private final int arity;

public static NokogiriXPathFunction create(IRubyObject handler, String name, int arity) {
public static NokogiriXPathFunction create(IRubyObject handler, QName name, int arity) {
return new NokogiriXPathFunction(handler, name, arity);
}

private NokogiriXPathFunction(IRubyObject handler, String name, int arity) {
private NokogiriXPathFunction(IRubyObject handler, QName name, int arity) {
this.handler = handler;
this.name = name;
this.arity = arity;
Expand All @@ -82,11 +83,20 @@ public Object evaluate(List args) throws XPathFunctionException {
throw new XPathFunctionException("arity does not match");
}

final Ruby runtime = this.handler.getRuntime();
ThreadContext context = runtime.getCurrentContext();
if (name.getNamespaceURI().equals(NokogiriNamespaceContext.NOKOGIRI_BUILTIN_URI)) {
if (name.getLocalPart().equals("css-class")) {
return builtinCssClass(args);
}
}

IRubyObject result = Helpers.invoke(context, this.handler, this.name, fromObjectToRubyArgs(runtime, args));
if (this.handler.isNil()) {
throw new XPathFunctionException("no custom function handler declared for '" + name + "'");
}

final Ruby runtime = this.handler.getRuntime();
ThreadContext context = runtime.getCurrentContext();
IRubyObject result = Helpers.invoke(context, this.handler, this.name.getLocalPart(),
fromObjectToRubyArgs(runtime, args));
return fromRubyToObject(runtime, result);
}

Expand Down Expand Up @@ -121,4 +131,50 @@ private static Object fromRubyToObject(final Ruby runtime, IRubyObject obj) {
}
/*if (o instanceof XmlNode)*/ return ((XmlNode) obj).getNode();
}

private static boolean builtinCssClass(List args) throws XPathFunctionException {
if (args.size() != 2) {
throw new XPathFunctionException("builtin function nokogiri:css-class takes two arguments");
}

String hay = args.get(0).toString();
String needle = args.get(1).toString();

if (needle.length() == 0) {
return true;
}

int j = 0;
int j_lim = hay.length() - needle.length();
while (j <= j_lim) {
int k;
for (k = 0; k < needle.length(); k++) {
if (needle.charAt(k) != hay.charAt(j+k)) {
break;
}
}
if (k == needle.length()) {
if ((hay.length() == (j+k)) || isWhitespace(hay.charAt(j+k))) {
return true ;
}
}

/* advance str to whitespace */
while (j <= j_lim && !isWhitespace(hay.charAt(j))) {
j++;
}

/* advance str to start of next word or end of string */
while (j <= j_lim && isWhitespace(hay.charAt(j))) {
j++;
}
}

return false;
}

private static boolean isWhitespace(char subject) {
// see libxml2's xmlIsBlank_ch()
return ((subject == 0x09) || (subject == 0x0A) || (subject == 0x0D) || (subject == 0x20));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ public final class NokogiriXPathFunctionResolver implements XPathFunctionResolve

public static NokogiriXPathFunctionResolver create(IRubyObject handler) {
NokogiriXPathFunctionResolver freshResolver = new NokogiriXPathFunctionResolver();
freshResolver.setHandler(handler);
if (!handler.isNil()) {
freshResolver.setHandler(handler);
}
return freshResolver;
}

Expand All @@ -65,6 +67,6 @@ public void setHandler(IRubyObject handler) {
}

public XPathFunction resolveFunction(QName name, int arity) {
return NokogiriXPathFunction.create(handler, name.getLocalPart(), arity);
return NokogiriXPathFunction.create(handler, name, arity);
}
}
84 changes: 81 additions & 3 deletions ext/nokogiri/xml_xpath_context.c
Original file line number Diff line number Diff line change
@@ -1,12 +1,86 @@
#include <xml_xpath_context.h>

/*
* these constants have matching declarations in
* ext/java/nokogiri/internals/NokogiriNamespaceContext.java
*/
static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-builtin";
static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";

static void deallocate(xmlXPathContextPtr ctx)
{
NOKOGIRI_DEBUG_START(ctx);
xmlXPathFreeContext(ctx);
NOKOGIRI_DEBUG_END(ctx);
}

/* find a CSS class in an HTML element's `class` attribute */
const xmlChar* builtin_css_class(const xmlChar* str, const xmlChar *val)
{
int val_len;

if (str == NULL) { return(NULL); }
if (val == NULL) { return(NULL); }

val_len = xmlStrlen(val);
if (val_len == 0) { return(str); }

while (*str != 0) {
if ((*str == *val) && !xmlStrncmp(str, val, val_len)) {
const xmlChar* next_byte = str + val_len;

/* only match if the next byte is whitespace or end of string */
if ((*next_byte == 0) || (IS_BLANK_CH(*next_byte))) {
return((const xmlChar*)str);
}
}

/* advance str to whitespace */
while ((*str != 0) && !IS_BLANK_CH(*str)) {
str++;
}

/* advance str to start of next word or end of string */
while ((*str != 0) && IS_BLANK_CH(*str)) {
str++;
}
}

return(NULL);
}

/* xmlXPathFunction to wrap builtin_css_class() */
static void xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
{
xmlXPathObjectPtr hay, needle;

CHECK_ARITY(2);

CAST_TO_STRING;
needle = valuePop(ctxt);
if ((needle == NULL) || (needle->type != XPATH_STRING)) {
xmlXPathFreeObject(needle);
XP_ERROR(XPATH_INVALID_TYPE);
}

CAST_TO_STRING;
hay = valuePop(ctxt);
if ((hay == NULL) || (hay->type != XPATH_STRING)) {
xmlXPathFreeObject(hay);
xmlXPathFreeObject(needle);
XP_ERROR(XPATH_INVALID_TYPE);
}

if (builtin_css_class(hay->stringval, needle->stringval)) {
valuePush(ctxt, xmlXPathNewBoolean(1));
} else {
valuePush(ctxt, xmlXPathNewBoolean(0));
}

xmlXPathFreeObject(hay);
xmlXPathFreeObject(needle);
}

/*
* call-seq:
* register_ns(prefix, uri)
Expand Down Expand Up @@ -261,14 +335,18 @@ static VALUE new(VALUE klass, VALUE nodeobj)
xmlXPathContextPtr ctx;
VALUE self;

xmlXPathInit();

Data_Get_Struct(nodeobj, xmlNode, node);

xmlXPathInit();

ctx = xmlXPathNewContext(node->doc);
ctx->node = node;

xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
xpath_builtin_css_class);

self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
/*rb_iv_set(self, "@xpath_handler", Qnil); */
return self;
}

Expand Down
6 changes: 3 additions & 3 deletions lib/nokogiri/css/parser.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# frozen_string_literal: true
#
# DO NOT MODIFY!!!!
# This file is automatically generated by Racc 1.4.16
# This file is automatically generated by Racc 1.5.1
# from Racc grammar file "".
#

Expand Down Expand Up @@ -476,7 +476,7 @@ def _reduce_26(val, _values, result)
end

def _reduce_27(val, _values, result)
# Non standard, but hpricot supports it.
# non-standard, from hpricot
result = Node.new(:PSEUDO_CLASS,
[Node.new(:FUNCTION, ['nth-child(', val[1]])]
)
Expand Down Expand Up @@ -558,7 +558,7 @@ def _reduce_40(val, _values, result)
when 'n'
result = Node.new(:NTH, ['1','n','+','0'])
else
# This is not CSS standard. It allows us to support this:
# non-standard to support custom functions:
# assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
# assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
# assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
Expand Down
4 changes: 2 additions & 2 deletions lib/nokogiri/css/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ rule
)
}
| LSQUARE NUMBER RSQUARE {
# Non standard, but hpricot supports it.
# non-standard, from hpricot
result = Node.new(:PSEUDO_CLASS,
[Node.new(:FUNCTION, ['nth-child(', val[1]])]
)
Expand Down Expand Up @@ -139,7 +139,7 @@ rule
when 'n'
result = Node.new(:NTH, ['1','n','+','0'])
else
# This is not CSS standard. It allows us to support this:
# non-standard to support custom functions:
# assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
# assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
# assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
Expand Down
Loading

0 comments on commit ab7855e

Please sign in to comment.