From 32d167be4eff59c1dbf4b1e3cbf5c9c8d8a87ffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Thu, 7 Nov 2024 14:26:02 +0100 Subject: [PATCH] GH-5182 improve performance of SHACL sh:pattern --- .../PatternConstraintComponent.java | 54 ++++++++++++++++++- .../shacl/ast/planNodes/PatternFilter.java | 51 ++---------------- 2 files changed, 57 insertions(+), 48 deletions(-) diff --git a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/constraintcomponents/PatternConstraintComponent.java b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/constraintcomponents/PatternConstraintComponent.java index 7e1f164a87..8536421133 100644 --- a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/constraintcomponents/PatternConstraintComponent.java +++ b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/constraintcomponents/PatternConstraintComponent.java @@ -17,6 +17,7 @@ import java.util.Objects; import java.util.Set; import java.util.function.Function; +import java.util.regex.Pattern; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Literal; @@ -30,11 +31,16 @@ import org.eclipse.rdf4j.sail.shacl.ast.planNodes.PatternFilter; import org.eclipse.rdf4j.sail.shacl.ast.planNodes.PlanNode; import org.eclipse.rdf4j.sail.shacl.wrapper.data.ConnectionsGroup; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class PatternConstraintComponent extends AbstractSimpleConstraintComponent { + private static final Logger logger = LoggerFactory.getLogger(PatternConstraintComponent.class); + String pattern; String flags; + private final Pattern compiledPattern; public PatternConstraintComponent(String pattern, String flags) { super(); @@ -44,6 +50,52 @@ public PatternConstraintComponent(String pattern, String flags) { if (flags == null) { this.flags = ""; } + + if (flags != null && !flags.isEmpty()) { + int flag = 0b0; + + if (flags.contains("i")) { + flag = flag | Pattern.CASE_INSENSITIVE; + logger.trace("PatternFilter constructed with case insensitive flag"); + } + + if (flags.contains("d")) { + flag = flag | Pattern.UNIX_LINES; + logger.trace("PatternFilter constructed with UNIX lines flag"); + } + + if (flags.contains("m")) { + flag = flag | Pattern.MULTILINE; + logger.trace("PatternFilter constructed with multiline flag"); + } + + if (flags.contains("s")) { + flag = flag | Pattern.DOTALL; + logger.trace("PatternFilter constructed with dotall flag"); + } + + if (flags.contains("u")) { + flag = flag | Pattern.UNICODE_CASE; + logger.trace("PatternFilter constructed with unicode case flag"); + } + + if (flags.contains("x")) { + flag = flag | Pattern.COMMENTS; + logger.trace("PatternFilter constructed with comments flag"); + } + + if (flags.contains("U")) { + flag = flag | Pattern.UNICODE_CHARACTER_CLASS; + logger.trace("PatternFilter constructed with unicode character class flag"); + } + + this.compiledPattern = Pattern.compile(pattern, flag); + logger.trace("PatternFilter constructed with pattern: {} and flags: {}", pattern, flags); + + } else { + this.compiledPattern = Pattern.compile(pattern, 0b0); + logger.trace("PatternFilter constructed with pattern: {} and no flags", pattern); + } } @Override @@ -87,7 +139,7 @@ public ConstraintComponent deepClone() { @Override Function getFilterAttacher(ConnectionsGroup connectionsGroup) { - return (parent) -> new PatternFilter(parent, pattern, flags, connectionsGroup); + return (parent) -> new PatternFilter(parent, compiledPattern, connectionsGroup); } @Override diff --git a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/PatternFilter.java b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/PatternFilter.java index c972ae0e3a..404cd983da 100644 --- a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/PatternFilter.java +++ b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/PatternFilter.java @@ -34,53 +34,9 @@ public class PatternFilter extends FilterPlanNode { private final Pattern pattern; - public PatternFilter(PlanNode parent, String pattern, String flags, ConnectionsGroup connectionsGroup) { + public PatternFilter(PlanNode parent, Pattern pattern, ConnectionsGroup connectionsGroup) { super(parent, connectionsGroup); - if (flags != null && !flags.isEmpty()) { - int flag = 0b0; - - if (flags.contains("i")) { - flag = flag | Pattern.CASE_INSENSITIVE; - logger.trace("PatternFilter constructed with case insensitive flag"); - } - - if (flags.contains("d")) { - flag = flag | Pattern.UNIX_LINES; - logger.trace("PatternFilter constructed with UNIX lines flag"); - } - - if (flags.contains("m")) { - flag = flag | Pattern.MULTILINE; - logger.trace("PatternFilter constructed with multiline flag"); - } - - if (flags.contains("s")) { - flag = flag | Pattern.DOTALL; - logger.trace("PatternFilter constructed with dotall flag"); - } - - if (flags.contains("u")) { - flag = flag | Pattern.UNICODE_CASE; - logger.trace("PatternFilter constructed with unicode case flag"); - } - - if (flags.contains("x")) { - flag = flag | Pattern.COMMENTS; - logger.trace("PatternFilter constructed with comments flag"); - } - - if (flags.contains("U")) { - flag = flag | Pattern.UNICODE_CHARACTER_CLASS; - logger.trace("PatternFilter constructed with unicode character class flag"); - } - - this.pattern = Pattern.compile(pattern, flag); - logger.trace("PatternFilter constructed with pattern: {} and flags: {}", pattern, flags); - - } else { - this.pattern = Pattern.compile(pattern, 0b0); - logger.trace("PatternFilter constructed with pattern: {} and no flags", pattern); - } + this.pattern = pattern; } private static Literal str(Value argValue, ValueFactory valueFactory) { @@ -104,8 +60,9 @@ boolean checkTuple(Reference t) { Value literal = t.get().getValue(); literal = str(literal, SimpleValueFactory.getInstance()); - if (literal == null) + if (literal == null) { return false; + } if (QueryEvaluationUtility.isStringLiteral(literal)) { boolean result = pattern.matcher(((Literal) literal).getLabel()).find();