Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-5182 improve performance of SHACL sh:pattern #5183

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Pattern;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
Expand All @@ -30,11 +31,16 @@
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.PatternFilter;
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.PlanNode;
import org.eclipse.rdf4j.sail.shacl.wrapper.data.ConnectionsGroup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PatternConstraintComponent extends AbstractSimpleConstraintComponent {

private static final Logger logger = LoggerFactory.getLogger(PatternConstraintComponent.class);

String pattern;
String flags;
private final Pattern compiledPattern;

public PatternConstraintComponent(String pattern, String flags) {
super();
Expand All @@ -44,6 +50,52 @@ public PatternConstraintComponent(String pattern, String flags) {
if (flags == null) {
this.flags = "";
}

if (flags != null && !flags.isEmpty()) {
int flag = 0b0;

if (flags.contains("i")) {
flag = flag | Pattern.CASE_INSENSITIVE;
logger.trace("PatternFilter constructed with case insensitive flag");
}

if (flags.contains("d")) {
flag = flag | Pattern.UNIX_LINES;
logger.trace("PatternFilter constructed with UNIX lines flag");
}

if (flags.contains("m")) {
flag = flag | Pattern.MULTILINE;
logger.trace("PatternFilter constructed with multiline flag");
}

if (flags.contains("s")) {
flag = flag | Pattern.DOTALL;
logger.trace("PatternFilter constructed with dotall flag");
}

if (flags.contains("u")) {
flag = flag | Pattern.UNICODE_CASE;
logger.trace("PatternFilter constructed with unicode case flag");
}

if (flags.contains("x")) {
flag = flag | Pattern.COMMENTS;
logger.trace("PatternFilter constructed with comments flag");
}

if (flags.contains("U")) {
flag = flag | Pattern.UNICODE_CHARACTER_CLASS;
logger.trace("PatternFilter constructed with unicode character class flag");
}

this.compiledPattern = Pattern.compile(pattern, flag);
logger.trace("PatternFilter constructed with pattern: {} and flags: {}", pattern, flags);

} else {
this.compiledPattern = Pattern.compile(pattern, 0b0);
logger.trace("PatternFilter constructed with pattern: {} and no flags", pattern);
}
}

@Override
Expand Down Expand Up @@ -87,7 +139,7 @@ public ConstraintComponent deepClone() {

@Override
Function<PlanNode, FilterPlanNode> getFilterAttacher(ConnectionsGroup connectionsGroup) {
return (parent) -> new PatternFilter(parent, pattern, flags, connectionsGroup);
return (parent) -> new PatternFilter(parent, compiledPattern, connectionsGroup);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,53 +34,9 @@ public class PatternFilter extends FilterPlanNode {

private final Pattern pattern;

public PatternFilter(PlanNode parent, String pattern, String flags, ConnectionsGroup connectionsGroup) {
public PatternFilter(PlanNode parent, Pattern pattern, ConnectionsGroup connectionsGroup) {
super(parent, connectionsGroup);
if (flags != null && !flags.isEmpty()) {
int flag = 0b0;

if (flags.contains("i")) {
flag = flag | Pattern.CASE_INSENSITIVE;
logger.trace("PatternFilter constructed with case insensitive flag");
}

if (flags.contains("d")) {
flag = flag | Pattern.UNIX_LINES;
logger.trace("PatternFilter constructed with UNIX lines flag");
}

if (flags.contains("m")) {
flag = flag | Pattern.MULTILINE;
logger.trace("PatternFilter constructed with multiline flag");
}

if (flags.contains("s")) {
flag = flag | Pattern.DOTALL;
logger.trace("PatternFilter constructed with dotall flag");
}

if (flags.contains("u")) {
flag = flag | Pattern.UNICODE_CASE;
logger.trace("PatternFilter constructed with unicode case flag");
}

if (flags.contains("x")) {
flag = flag | Pattern.COMMENTS;
logger.trace("PatternFilter constructed with comments flag");
}

if (flags.contains("U")) {
flag = flag | Pattern.UNICODE_CHARACTER_CLASS;
logger.trace("PatternFilter constructed with unicode character class flag");
}

this.pattern = Pattern.compile(pattern, flag);
logger.trace("PatternFilter constructed with pattern: {} and flags: {}", pattern, flags);

} else {
this.pattern = Pattern.compile(pattern, 0b0);
logger.trace("PatternFilter constructed with pattern: {} and no flags", pattern);
}
this.pattern = pattern;
}

private static Literal str(Value argValue, ValueFactory valueFactory) {
Expand All @@ -104,8 +60,9 @@ boolean checkTuple(Reference t) {
Value literal = t.get().getValue();
literal = str(literal, SimpleValueFactory.getInstance());

if (literal == null)
if (literal == null) {
return false;
}

if (QueryEvaluationUtility.isStringLiteral(literal)) {
boolean result = pattern.matcher(((Literal) literal).getLabel()).find();
Expand Down
Loading