From 937c9557fc182b83233669bf812aaec403e1f4c4 Mon Sep 17 00:00:00 2001 From: Chloe Date: Wed, 6 Oct 2021 11:30:53 -0700 Subject: [PATCH] Support match function as filter in SQL and PPL (#204) * supported match in sql and ppl where Signed-off-by: chloe-zh * added legacy syntax in new parser Signed-off-by: chloe-zh * added integration test Signed-off-by: chloe-zh * updated user manual Signed-off-by: chloe-zh * updated user manual Signed-off-by: chloe-zh * update Signed-off-by: chloe-zh * added ppl integ test, updated ppl manual Signed-off-by: chloe-zh * update Signed-off-by: chloe-zh * update Signed-off-by: chloe-zh * update Signed-off-by: chloe-zh --- .../sql/analysis/ExpressionAnalyzer.java | 7 + .../org/opensearch/sql/expression/DSL.java | 10 ++ .../sql/expression/ExpressionNodeVisitor.java | 4 + .../expression/NamedArgumentExpression.java | 48 +++++++ .../expression/config/ExpressionConfig.java | 2 + .../function/BuiltinFunctionName.java | 14 +- .../function/OpenSearchFunctions.java | 116 ++++++++++++++++ .../sql/analysis/ExpressionAnalyzerTest.java | 11 ++ .../expression/ExpressionNodeVisitorTest.java | 1 + .../NamedArgumentExpressionTest.java | 32 +++++ .../function/OpenSearchFunctionsTest.java | 131 ++++++++++++++++++ docs/category.json | 3 +- docs/user/dql/functions.rst | 52 +++++++ docs/user/ppl/functions/relevance.rst | 58 ++++++++ .../opensearch/sql/ppl/WhereCommandIT.java | 12 ++ .../sql/sql/RelevanceFunctionIT.java | 45 ++++++ .../plugin/OpenSearchSQLPluginConfig.java | 11 ++ .../script/filter/FilterQueryBuilder.java | 5 + .../script/filter/lucene/LuceneQuery.java | 20 ++- .../filter/lucene/relevance/MatchQuery.java | 91 ++++++++++++ .../script/filter/FilterQueryBuilderTest.java | 76 ++++++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 15 ++ ppl/src/main/antlr/OpenSearchPPLParser.g4 | 26 ++++ .../sql/ppl/parser/AstExpressionBuilder.java | 24 ++++ .../ppl/parser/AstExpressionBuilderTest.java | 18 +++ sql/src/main/antlr/OpenSearchSQLLexer.g4 | 14 ++ sql/src/main/antlr/OpenSearchSQLParser.g4 | 30 ++++ .../sql/sql/parser/AstExpressionBuilder.java | 26 ++++ .../sql/parser/AstExpressionBuilderTest.java | 17 +++ 29 files changed, 916 insertions(+), 3 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/expression/NamedArgumentExpression.java create mode 100644 core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java create mode 100644 core/src/test/java/org/opensearch/sql/expression/NamedArgumentExpressionTest.java create mode 100644 core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java create mode 100644 docs/user/ppl/functions/relevance.rst create mode 100644 integ-test/src/test/java/org/opensearch/sql/sql/RelevanceFunctionIT.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MatchQuery.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index 933e68085a..510c05f1aa 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -51,6 +51,7 @@ import org.opensearch.sql.ast.expression.Not; import org.opensearch.sql.ast.expression.Or; import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedAttribute; import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.expression.When; @@ -62,6 +63,7 @@ import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.NamedArgumentExpression; import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.aggregation.AggregationState; import org.opensearch.sql.expression.aggregation.Aggregator; @@ -258,6 +260,11 @@ public Expression visitQualifiedName(QualifiedName node, AnalysisContext context return visitIdentifier(qualifierAnalyzer.unqualified(node), context); } + @Override + public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisContext context) { + return new NamedArgumentExpression(node.getArgName(), node.getValue().accept(this, context)); + } + private Expression visitIdentifier(String ident, AnalysisContext context) { TypeEnvironment typeEnv = context.peek(); ReferenceExpression ref = DSL.ref(ident, diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index af51d0898a..d7bd64a662 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -26,6 +26,7 @@ package org.opensearch.sql.expression; +import com.sun.tools.javac.util.List; import java.util.Arrays; import java.util.Collections; import lombok.RequiredArgsConstructor; @@ -128,6 +129,10 @@ public static NamedAggregator named(String name, Aggregator aggregator) { return new NamedAggregator(name, aggregator); } + public NamedArgumentExpression namedArgument(String argName, Expression value) { + return new NamedArgumentExpression(argName, value); + } + public FunctionExpression abs(Expression... expressions) { return function(BuiltinFunctionName.ABS, expressions); } @@ -650,4 +655,9 @@ public FunctionExpression castDatetime(Expression value) { return (FunctionExpression) repository .compile(BuiltinFunctionName.CAST_TO_DATETIME.getName(), Arrays.asList(value)); } + + public FunctionExpression match(Expression... args) { + return (FunctionExpression) repository + .compile(BuiltinFunctionName.MATCH.getName(), Arrays.asList(args.clone())); + } } diff --git a/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java b/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java index b8bf9de3da..96ee478351 100644 --- a/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java @@ -106,4 +106,8 @@ public T visitWhen(WhenClause node, C context) { return visitFunction(node, context); } + public T visitNamedArgument(NamedArgumentExpression node, C context) { + return visitNode(node, context); + } + } diff --git a/core/src/main/java/org/opensearch/sql/expression/NamedArgumentExpression.java b/core/src/main/java/org/opensearch/sql/expression/NamedArgumentExpression.java new file mode 100644 index 0000000000..5305922031 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/NamedArgumentExpression.java @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + * + */ + +package org.opensearch.sql.expression; + +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.env.Environment; + +/** + * Named argument expression that represents function argument with name. + */ +@RequiredArgsConstructor +@Getter +@EqualsAndHashCode +@ToString +public class NamedArgumentExpression implements Expression { + private final String argName; + private final Expression value; + + @Override + public ExprValue valueOf(Environment valueEnv) { + return value.valueOf(valueEnv); + } + + @Override + public ExprType type() { + return value.type(); + } + + @Override + public T accept(ExpressionNodeVisitor visitor, C context) { + return visitor.visitNamedArgument(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/config/ExpressionConfig.java b/core/src/main/java/org/opensearch/sql/expression/config/ExpressionConfig.java index 42e5283ad0..388e9faf0d 100644 --- a/core/src/main/java/org/opensearch/sql/expression/config/ExpressionConfig.java +++ b/core/src/main/java/org/opensearch/sql/expression/config/ExpressionConfig.java @@ -32,6 +32,7 @@ import org.opensearch.sql.expression.datetime.DateTimeFunction; import org.opensearch.sql.expression.datetime.IntervalClause; import org.opensearch.sql.expression.function.BuiltinFunctionRepository; +import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.expression.operator.arthmetic.ArithmeticFunction; import org.opensearch.sql.expression.operator.arthmetic.MathematicalFunction; import org.opensearch.sql.expression.operator.convert.TypeCastOperator; @@ -64,6 +65,7 @@ public BuiltinFunctionRepository functionRepository() { WindowFunctions.register(builtinFunctionRepository); TextFunction.register(builtinFunctionRepository); TypeCastOperator.register(builtinFunctionRepository); + OpenSearchFunctions.register(builtinFunctionRepository); return builtinFunctionRepository; } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index cd66825567..fadee9dc2d 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -187,7 +187,19 @@ public enum BuiltinFunctionName { CAST_TO_DATE(FunctionName.of("cast_to_date")), CAST_TO_TIME(FunctionName.of("cast_to_time")), CAST_TO_TIMESTAMP(FunctionName.of("cast_to_timestamp")), - CAST_TO_DATETIME(FunctionName.of("cast_to_datetime")); + CAST_TO_DATETIME(FunctionName.of("cast_to_datetime")), + + /** + * Relevance Function. + */ + MATCH(FunctionName.of("match")), + + /** + * Legacy Relevance Function. + */ + QUERY(FunctionName.of("query")), + MATCH_QUERY(FunctionName.of("match_query")), + MATCHQUERY(FunctionName.of("matchquery")); private final FunctionName name; diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java new file mode 100644 index 0000000000..f146f2379f --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -0,0 +1,116 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + * + */ + +package org.opensearch.sql.expression.function; + +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import java.util.List; +import java.util.stream.Collectors; +import lombok.ToString; +import lombok.experimental.UtilityClass; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.NamedArgumentExpression; +import org.opensearch.sql.expression.env.Environment; + +@UtilityClass +public class OpenSearchFunctions { + public void register(BuiltinFunctionRepository repository) { + repository.register(match()); + } + + private static FunctionResolver match() { + FunctionName funcName = BuiltinFunctionName.MATCH.getName(); + return new FunctionResolver(funcName, + ImmutableMap.builder() + .put(new FunctionSignature(funcName, ImmutableList.of(STRING, STRING)), + args -> new OpenSearchFunction(funcName, args)) + .put(new FunctionSignature(funcName, ImmutableList.of(STRING, STRING, STRING)), + args -> new OpenSearchFunction(funcName, args)) + .put(new FunctionSignature(funcName, ImmutableList.of(STRING, STRING, STRING, STRING)), + args -> new OpenSearchFunction(funcName, args)) + .put(new FunctionSignature(funcName, ImmutableList + .of(STRING, STRING, STRING, STRING, STRING)), + args -> new OpenSearchFunction(funcName, args)) + .put(new FunctionSignature(funcName, ImmutableList + .of(STRING, STRING, STRING, STRING, STRING, STRING)), + args -> new OpenSearchFunction(funcName, args)) + .put(new FunctionSignature(funcName, ImmutableList + .of(STRING, STRING, STRING, STRING, STRING, STRING, STRING)), + args -> new OpenSearchFunction(funcName, args)) + .put(new FunctionSignature(funcName, ImmutableList + .of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING)), + args -> new OpenSearchFunction(funcName, args)) + .put(new FunctionSignature(funcName, ImmutableList + .of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING)), + args -> new OpenSearchFunction(funcName, args)) + .put(new FunctionSignature(funcName, ImmutableList + .of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, + STRING)), + args -> new OpenSearchFunction(funcName, args)) + .put(new FunctionSignature(funcName, ImmutableList + .of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, + STRING, STRING)), + args -> new OpenSearchFunction(funcName, args)) + .put(new FunctionSignature(funcName, ImmutableList + .of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, + STRING, STRING, STRING)), + args -> new OpenSearchFunction(funcName, args)) + .put(new FunctionSignature(funcName, ImmutableList + .of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, + STRING, STRING, STRING, STRING)), + args -> new OpenSearchFunction(funcName, args)) + .put(new FunctionSignature(funcName, ImmutableList + .of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, + STRING, STRING, STRING, STRING, STRING)), + args -> new OpenSearchFunction(funcName, args)) + .build()); + } + + private static class OpenSearchFunction extends FunctionExpression { + private final FunctionName functionName; + private final List arguments; + + public OpenSearchFunction(FunctionName functionName, List arguments) { + super(functionName, arguments); + this.functionName = functionName; + this.arguments = arguments; + } + + @Override + public ExprValue valueOf(Environment valueEnv) { + throw new UnsupportedOperationException(String.format( + "OpenSearch defined function [%s] is only supported in WHERE and HAVING clause.", + functionName)); + } + + @Override + public ExprType type() { + return ExprCoreType.BOOLEAN; + } + + @Override + public String toString() { + List args = arguments.stream() + .map(arg -> String.format("%s=%s", ((NamedArgumentExpression) arg) + .getArgName(), ((NamedArgumentExpression) arg).getValue().toString())) + .collect(Collectors.toList()); + return String.format("%s(%s)", functionName, String.join(", ", args)); + } + } +} diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index b0b1e7e773..2486606d68 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -33,10 +33,12 @@ import static org.opensearch.sql.ast.dsl.AstDSL.function; import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; +import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; import static org.opensearch.sql.data.model.ExprValueUtils.integerValue; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import org.junit.jupiter.api.Test; @@ -46,6 +48,7 @@ import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.data.model.ExprValueUtils; @@ -318,6 +321,14 @@ public void filtered_distinct_count() { ); } + @Test + public void named_argument() { + assertAnalyzeEqual( + dsl.namedArgument("arg_name", DSL.literal("query")), + AstDSL.unresolvedArg("arg_name", stringLiteral("query")) + ); + } + protected Expression analyze(UnresolvedExpression unresolvedExpression) { return expressionAnalyzer.analyze(unresolvedExpression, analysisContext); } diff --git a/core/src/test/java/org/opensearch/sql/expression/ExpressionNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/expression/ExpressionNodeVisitorTest.java index 9c1671ae2b..64fb3e506e 100644 --- a/core/src/test/java/org/opensearch/sql/expression/ExpressionNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/ExpressionNodeVisitorTest.java @@ -64,6 +64,7 @@ void should_return_null_by_default() { INTEGER)).accept(visitor, null)); assertNull(new CaseClause(ImmutableList.of(), null).accept(visitor, null)); assertNull(new WhenClause(literal("test"), literal(10)).accept(visitor, null)); + assertNull(dsl.namedArgument("field", literal("message")).accept(visitor, null)); } @Test diff --git a/core/src/test/java/org/opensearch/sql/expression/NamedArgumentExpressionTest.java b/core/src/test/java/org/opensearch/sql/expression/NamedArgumentExpressionTest.java new file mode 100644 index 0000000000..a996e8afbb --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/NamedArgumentExpressionTest.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + * + */ + +package org.opensearch.sql.expression; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +public class NamedArgumentExpressionTest extends ExpressionTestBase { + @Test + void name_an_argument() { + LiteralExpression value = DSL.literal("search"); + NamedArgumentExpression namedArgument = dsl.namedArgument("query", value); + + assertEquals("query", namedArgument.getArgName()); + assertEquals(value.type(), namedArgument.type()); + assertEquals(value.valueOf(valueEnv()), namedArgument.valueOf(valueEnv())); + } +} diff --git a/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java new file mode 100644 index 0000000000..cd70031069 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java @@ -0,0 +1,131 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + * + */ + +package org.opensearch.sql.expression.function; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; + +import org.junit.jupiter.api.Test; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.ExpressionTestBase; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.NamedArgumentExpression; + +public class OpenSearchFunctionsTest extends ExpressionTestBase { + private final NamedArgumentExpression field = new NamedArgumentExpression( + "field", DSL.literal("message")); + private final NamedArgumentExpression query = new NamedArgumentExpression( + "query", DSL.literal("search query")); + private final NamedArgumentExpression analyzer = new NamedArgumentExpression( + "analyzer", DSL.literal("keyword")); + private final NamedArgumentExpression autoGenerateSynonymsPhrase = new NamedArgumentExpression( + "auto_generate_synonyms_phrase", DSL.literal("true")); + private final NamedArgumentExpression fuzziness = new NamedArgumentExpression( + "fuzziness", DSL.literal("AUTO")); + private final NamedArgumentExpression maxExpansions = new NamedArgumentExpression( + "max_expansions", DSL.literal("10")); + private final NamedArgumentExpression prefixLength = new NamedArgumentExpression( + "prefix_length", DSL.literal("1")); + private final NamedArgumentExpression fuzzyTranspositions = new NamedArgumentExpression( + "fuzzy_transpositions", DSL.literal("false")); + private final NamedArgumentExpression fuzzyRewrite = new NamedArgumentExpression( + "fuzzy_rewrite", DSL.literal("rewrite method")); + private final NamedArgumentExpression lenient = new NamedArgumentExpression( + "lenient", DSL.literal("true")); + private final NamedArgumentExpression operator = new NamedArgumentExpression( + "operator", DSL.literal("OR")); + private final NamedArgumentExpression minimumShouldMatch = new NamedArgumentExpression( + "minimum_should_match", DSL.literal("1")); + private final NamedArgumentExpression zeroTermsQuery = new NamedArgumentExpression( + "zero_terms_query", DSL.literal("ALL")); + private final NamedArgumentExpression boost = new NamedArgumentExpression( + "boost", DSL.literal("2.0")); + + @Test + void match() { + FunctionExpression expr = dsl.match(field, query); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match(field, query, analyzer); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match(field, query, analyzer, autoGenerateSynonymsPhrase); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match(field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match(field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness, maxExpansions); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match( + field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness, maxExpansions, prefixLength); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match( + field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness, maxExpansions, prefixLength, + fuzzyTranspositions); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match( + field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness, maxExpansions, prefixLength, + fuzzyTranspositions, fuzzyRewrite); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match( + field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness, maxExpansions, prefixLength, + fuzzyTranspositions, fuzzyRewrite, lenient); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match( + field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness, maxExpansions, prefixLength, + fuzzyTranspositions, fuzzyRewrite, lenient, operator); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match( + field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness, maxExpansions, prefixLength, + fuzzyTranspositions, fuzzyRewrite, lenient, operator); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match( + field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness, maxExpansions, prefixLength, + fuzzyTranspositions, fuzzyRewrite, lenient, operator, minimumShouldMatch); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match( + field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness, maxExpansions, prefixLength, + fuzzyTranspositions, fuzzyRewrite, lenient, operator, minimumShouldMatch, zeroTermsQuery); + assertEquals(BOOLEAN, expr.type()); + + expr = dsl.match( + field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness, maxExpansions, prefixLength, + fuzzyTranspositions, fuzzyRewrite, lenient, operator, minimumShouldMatch, zeroTermsQuery, + boost); + assertEquals(BOOLEAN, expr.type()); + } + + @Test + void match_in_memory() { + FunctionExpression expr = dsl.match(field, query); + assertThrows(UnsupportedOperationException.class, + () -> expr.valueOf(valueEnv()), + "OpenSearch defined function [match] is only supported in WHERE and HAVING clause."); + } + + @Test + void match_to_string() { + FunctionExpression expr = dsl.match(field, query); + assertEquals("match(field=\"message\", query=\"search query\")", expr.toString()); + } +} diff --git a/docs/category.json b/docs/category.json index 45732bc47c..93b898f73d 100644 --- a/docs/category.json +++ b/docs/category.json @@ -20,7 +20,8 @@ "user/ppl/functions/math.rst", "user/ppl/functions/datetime.rst", "user/ppl/functions/string.rst", - "user/ppl/functions/condition.rst" + "user/ppl/functions/condition.rst", + "user/ppl/functions/relevance.rst" ], "sql_cli": [ "user/dql/expressions.rst", diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index 7c78b7b23a..cd6be34383 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -2141,3 +2141,55 @@ Here are examples for searched case syntax:: | One | Hello | null | +-----------------+------------------+-----------+ + +RELEVANCE +========= + +The relevance based functions enable users to search the index for documents by the relevance of the input query. The functions are built on the top of the search queries of the OpenSearch engine, but in memory execution within the plugin is not supported. These functions are able to perform the global filter of a query, for example the condition expression in a ``WHERE`` clause or in a ``HAVING`` clause. For more details of the relevance based search, check out the design here: `Relevance Based Search With SQL/PPL Query Engine `_ + +MATCH +----- + +Description +>>>>>>>>>>> + +``match(field_expression, query_expression[, option=]*)`` + +The match function maps to the match query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field. Available parameters include: + +- analyzer +- auto_generate_synonyms_phrase +- fuzziness +- max_expansions +- prefix_length +- fuzzy_transpositions +- fuzzy_rewrite +- lenient +- operator +- minimum_should_match +- zero_terms_query +- boost + +Example with only ``field`` and ``query`` expressions, and all other parameters are set default values:: + + os> SELECT lastname, address FROM accounts WHERE match(address, 'Street'); + fetched rows / total rows = 2/2 + +------------+--------------------+ + | lastname | address | + |------------+--------------------| + | Bond | 671 Bristol Street | + | Bates | 789 Madison Street | + +------------+--------------------+ + + + +Another example to show how to set custom values for the optional parameters:: + + os> SELECT lastname FROM accounts WHERE match(firstname, 'Hattie', operator='AND', boost=2.0); + fetched rows / total rows = 1/1 + +------------+ + | lastname | + |------------| + | Bond | + +------------+ + diff --git a/docs/user/ppl/functions/relevance.rst b/docs/user/ppl/functions/relevance.rst new file mode 100644 index 0000000000..0ec4c979b4 --- /dev/null +++ b/docs/user/ppl/functions/relevance.rst @@ -0,0 +1,58 @@ +=================== +Relevance Functions +=================== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 1 + +The relevance based functions enable users to search the index for documents by the relevance of the input query. The functions are built on the top of the search queries of the OpenSearch engine, but in memory execution within the plugin is not supported. These functions are able to perform the global filter of a query, for example the condition expression in a ``WHERE`` clause or in a ``HAVING`` clause. For more details of the relevance based search, check out the design here: `Relevance Based Search With SQL/PPL Query Engine `_ + +MATCH +----- + +Description +>>>>>>>>>>> + +``match(field_expression, query_expression[, option=]*)`` + +The match function maps to the match query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field. Available parameters include: + +- analyzer +- auto_generate_synonyms_phrase +- fuzziness +- max_expansions +- prefix_length +- fuzzy_transpositions +- fuzzy_rewrite +- lenient +- operator +- minimum_should_match +- zero_terms_query +- boost + +Example with only ``field`` and ``query`` expressions, and all other parameters are set default values:: + + os> source=accounts | where match(address, 'Street') | fields lastname, address; + fetched rows / total rows = 2/2 + +------------+--------------------+ + | lastname | address | + |------------+--------------------| + | Bond | 671 Bristol Street | + | Bates | 789 Madison Street | + +------------+--------------------+ + + + +Another example to show how to set custom values for the optional parameters:: + + os> source=accounts | where match(firstname, 'Hattie', operator='AND', boost=2.0) | fields lastname; + fetched rows / total rows = 1/1 + +------------+ + | lastname | + |------------| + | Bond | + +------------+ + diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java index 0db090487e..9605cf6b24 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java @@ -27,6 +27,7 @@ package org.opensearch.sql.ppl; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -41,6 +42,7 @@ public class WhereCommandIT extends PPLIntegTestCase { public void init() throws IOException { loadIndex(Index.ACCOUNT); loadIndex(Index.BANK_WITH_NULL_VALUES); + loadIndex(Index.BANK); } @Test @@ -118,4 +120,14 @@ public void testIsNotNullFunction() throws IOException { TEST_INDEX_BANK_WITH_NULL_VALUES)); verifyDataRows(result, rows("Amber JOHnny")); } + + @Test + public void testRelevanceFunction() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where match(firstname, 'Hattie') | fields firstname", + TEST_INDEX_BANK)); + verifyDataRows(result, rows("Hattie")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/RelevanceFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/RelevanceFunctionIT.java new file mode 100644 index 0000000000..5977448c73 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/RelevanceFunctionIT.java @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + * + */ + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +public class RelevanceFunctionIT extends SQLIntegTestCase { + @Override + public void init() throws IOException { + loadIndex(Index.BANK); + } + + @Test + void match_in_where() throws IOException { + JSONObject result = executeQuery("SELECT firstname WHERE match(lastname, 'Bates')"); + verifySchema(result, schema("firstname", "text")); + verifyDataRows(result, rows("Nanette")); + } + + @Test + void match_in_having() throws IOException { + JSONObject result = executeQuery("SELECT lastname HAVING match(firstname, 'Nanette')"); + verifySchema(result, schema("lastname", "keyword")); + verifyDataRows(result, rows("Bates")); + } + +} diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/OpenSearchSQLPluginConfig.java b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/OpenSearchSQLPluginConfig.java index c8b736a635..255f1f8a24 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/OpenSearchSQLPluginConfig.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/OpenSearchSQLPluginConfig.java @@ -32,6 +32,9 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.executor.ExecutionEngine; +import org.opensearch.sql.expression.config.ExpressionConfig; +import org.opensearch.sql.expression.function.BuiltinFunctionRepository; +import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.monitor.ResourceMonitor; import org.opensearch.sql.opensearch.client.OpenSearchClient; import org.opensearch.sql.opensearch.client.OpenSearchNodeClient; @@ -44,10 +47,14 @@ import org.opensearch.sql.storage.StorageEngine; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; /** * OpenSearch Plugin Config for SQL. */ +@Configuration +@Import({ExpressionConfig.class}) public class OpenSearchSQLPluginConfig { @Autowired private ClusterService clusterService; @@ -58,6 +65,9 @@ public class OpenSearchSQLPluginConfig { @Autowired private Settings settings; + @Autowired + private BuiltinFunctionRepository functionRepository; + @Bean public OpenSearchClient client() { return new OpenSearchNodeClient(clusterService, nodeClient); @@ -70,6 +80,7 @@ public StorageEngine storageEngine() { @Bean public ExecutionEngine executionEngine() { + OpenSearchFunctions.register(functionRepository); return new OpenSearchExecutionEngine(client(), protector()); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java index 3bf748708b..92f0db3d0e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java @@ -50,6 +50,7 @@ import org.opensearch.sql.opensearch.storage.script.filter.lucene.RangeQuery.Comparison; import org.opensearch.sql.opensearch.storage.script.filter.lucene.TermQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.WildcardQuery; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchQuery; import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer; @RequiredArgsConstructor @@ -71,6 +72,10 @@ public class FilterQueryBuilder extends ExpressionNodeVisitor analyzer = + (b, v) -> b.analyzer(v.stringValue()); + private final BiFunction synonymsPhrase = + (b, v) -> b.autoGenerateSynonymsPhraseQuery(Boolean.parseBoolean(v.stringValue())); + private final BiFunction fuzziness = + (b, v) -> b.fuzziness(v.stringValue()); + private final BiFunction maxExpansions = + (b, v) -> b.maxExpansions(Integer.parseInt(v.stringValue())); + private final BiFunction prefixLength = + (b, v) -> b.prefixLength(Integer.parseInt(v.stringValue())); + private final BiFunction fuzzyTranspositions = + (b, v) -> b.fuzzyTranspositions(Boolean.parseBoolean(v.stringValue())); + private final BiFunction fuzzyRewrite = + (b, v) -> b.fuzzyRewrite(v.stringValue()); + private final BiFunction lenient = + (b, v) -> b.lenient(Boolean.parseBoolean(v.stringValue())); + private final BiFunction operator = + (b, v) -> b.operator(Operator.fromString(v.stringValue())); + private final BiFunction minimumShouldMatch = + (b, v) -> b.minimumShouldMatch(v.stringValue()); + private final BiFunction zeroTermsQuery = + (b, v) -> b.zeroTermsQuery( + org.opensearch.index.search.MatchQuery.ZeroTermsQuery.valueOf(v.stringValue())); + private final BiFunction boost = + (b, v) -> b.boost(Float.parseFloat(v.stringValue())); + + ImmutableMap argAction = ImmutableMap.builder() + .put("analyzer", analyzer) + .put("auto_generate_synonyms_phrase_query", synonymsPhrase) + .put("fuzziness", fuzziness) + .put("max_expansions", maxExpansions) + .put("prefix_length", prefixLength) + .put("fuzzy_transpositions", fuzzyTranspositions) + .put("fuzzy_rewrite", fuzzyRewrite) + .put("lenient", lenient) + .put("operator", operator) + .put("minimum_should_match", minimumShouldMatch) + .put("zero_terms_query", zeroTermsQuery) + .put("boost", boost) + .build(); + + @Override + public QueryBuilder build(FunctionExpression func) { + Iterator iterator = func.getArguments().iterator(); + NamedArgumentExpression field = (NamedArgumentExpression) iterator.next(); + NamedArgumentExpression query = (NamedArgumentExpression) iterator.next(); + MatchQueryBuilder queryBuilder = QueryBuilders.matchQuery( + field.getValue().valueOf(null).stringValue(), + query.getValue().valueOf(null).stringValue()); + while (iterator.hasNext()) { + NamedArgumentExpression arg = (NamedArgumentExpression) iterator.next(); + if (!argAction.containsKey(arg.getArgName())) { + throw new SemanticCheckException(String + .format("Parameter %s is invalid for match function.", arg.getArgName())); + } + ((BiFunction) argAction + .get(arg.getArgName())) + .apply(queryBuilder, arg.getValue().valueOf(null)); + } + return queryBuilder; + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java index 658e10332e..ffbbb5feda 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java @@ -27,6 +27,7 @@ package org.opensearch.sql.opensearch.storage.script.filter; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doAnswer; @@ -47,6 +48,7 @@ import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.common.utils.StringUtils; +import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.FunctionExpression; @@ -276,6 +278,80 @@ void should_use_keyword_for_multi_field_in_like_expression() { ref("name", OPENSEARCH_TEXT_KEYWORD), literal("John%")))); } + @Test + void should_build_match_query_with_default_parameters() { + assertJsonEquals( + "{\n" + + " \"match\" : {\n" + + " \"message\" : {\n" + + " \"query\" : \"search query\",\n" + + " \"operator\" : \"OR\",\n" + + " \"prefix_length\" : 0,\n" + + " \"max_expansions\" : 50,\n" + + " \"fuzzy_transpositions\" : true,\n" + + " \"lenient\" : false,\n" + + " \"zero_terms_query\" : \"NONE\",\n" + + " \"auto_generate_synonyms_phrase_query\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + "}", + buildQuery( + dsl.match( + dsl.namedArgument("field", literal("message")), + dsl.namedArgument("query", literal("search query"))))); + } + + @Test + void should_build_match_query_with_custom_parameters() { + assertJsonEquals( + "{\n" + + " \"match\" : {\n" + + " \"message\" : {\n" + + " \"query\" : \"search query\",\n" + + " \"operator\" : \"AND\",\n" + + " \"analyzer\" : \"keyword\"," + + " \"fuzziness\" : \"AUTO\"," + + " \"prefix_length\" : 0,\n" + + " \"max_expansions\" : 50,\n" + + " \"minimum_should_match\" : \"3\"," + + " \"fuzzy_rewrite\" : \"top_terms_N\"," + + " \"fuzzy_transpositions\" : false,\n" + + " \"lenient\" : false,\n" + + " \"zero_terms_query\" : \"ALL\",\n" + + " \"auto_generate_synonyms_phrase_query\" : true,\n" + + " \"boost\" : 2.0\n" + + " }\n" + + " }\n" + + "}", + buildQuery( + dsl.match( + dsl.namedArgument("field", literal("message")), + dsl.namedArgument("query", literal("search query")), + dsl.namedArgument("operator", literal("AND")), + dsl.namedArgument("analyzer", literal("keyword")), + dsl.namedArgument("auto_generate_synonyms_phrase_query", literal("true")), + dsl.namedArgument("fuzziness", literal("AUTO")), + dsl.namedArgument("max_expansions", literal("50")), + dsl.namedArgument("prefix_length", literal("0")), + dsl.namedArgument("fuzzy_transpositions", literal("false")), + dsl.namedArgument("fuzzy_rewrite", literal("top_terms_N")), + dsl.namedArgument("lenient", literal("false")), + dsl.namedArgument("minimum_should_match", literal("3")), + dsl.namedArgument("zero_terms_query", literal("ALL")), + dsl.namedArgument("boost", literal("2.0"))))); + } + + @Test + void match_invalid_parameter() { + FunctionExpression expr = dsl.match( + dsl.namedArgument("field", literal("message")), + dsl.namedArgument("query", literal("search query")), + dsl.namedArgument("invalid_parameter", literal("invalid_value"))); + assertThrows(SemanticCheckException.class, () -> buildQuery(expr), + "Parameter invalid_parameter is invalid for match function."); + } + private static void assertJsonEquals(String expected, String actual) { assertTrue(new JSONObject(expected).similar(new JSONObject(actual)), StringUtils.format("Expected: %s, actual: %s", expected, actual)); diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index cb665f6c88..0e9fdd46cb 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -255,6 +255,21 @@ IFNULL: 'IFNULL'; NULLIF: 'NULLIF'; IF: 'IF'; +// RELEVANCE FUNCTIONS AND PARAMETERS +MATCH: 'MATCH'; +ANALYZER: 'ANALYZER'; +FUZZINESS: 'FUZZINESS'; +AUTO_GENERATE_SYNONYMS_PHRASE_QUERY:'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY'; +MAX_EXPANSIONS: 'MAX_EXPANSIONS'; +PREFIX_LENGTH: 'PREFIX_LENGTH'; +FUZZY_TRANSPOSITIONS: 'FUZZY_TRANSPOSITIONS'; +FUZZY_REWRITE: 'FUZZY_REWRITE'; +LENIENT: 'LENIENT'; +OPERATOR: 'OPERATOR'; +MINIMUM_SHOULD_MATCH: 'MINIMUM_SHOULD_MATCH'; +ZERO_TERMS_QUERY: 'ZERO_TERMS_QUERY'; +BOOST: 'BOOST'; + // LITERALS AND VALUES //STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index fa3c5b64b7..b6c59b8057 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -161,6 +161,7 @@ logicalExpression | left=logicalExpression (AND)? right=logicalExpression #logicalAnd | left=logicalExpression XOR right=logicalExpression #logicalXor | booleanExpression #booleanExpr + | relevanceExpression #relevanceExpr ; comparisonExpression @@ -185,6 +186,12 @@ booleanExpression : booleanFunctionCall ; +relevanceExpression + : relevanceFunctionName LT_PRTHS + field=relevanceArgValue COMMA query=relevanceArgValue + (COMMA relevanceArg)* RT_PRTHS + ; + /** tables */ tableSource : qualifiedName @@ -245,6 +252,21 @@ functionArg : valueExpression ; +relevanceArg + : relevanceArgName EQUAL relevanceArgValue + ; + +relevanceArgName + : ANALYZER | FUZZINESS | AUTO_GENERATE_SYNONYMS_PHRASE_QUERY | MAX_EXPANSIONS | PREFIX_LENGTH + | FUZZY_TRANSPOSITIONS | FUZZY_REWRITE | LENIENT | OPERATOR | MINIMUM_SHOULD_MATCH | ZERO_TERMS_QUERY + | BOOST + ; + +relevanceArgValue + : qualifiedName + | literalValue + ; + mathematicalFunctionBase : ABS | CEIL | CEILING | CONV | CRC32 | E | EXP | FLOOR | LN | LOG | LOG10 | LOG2 | MOD | PI |POW | POWER | RAND | ROUND | SIGN | SQRT | TRUNCATE @@ -281,6 +303,10 @@ binaryOperator : PLUS | MINUS | STAR | DIVIDE | MODULE ; +relevanceFunctionName + : MATCH + ; + /** literals and values*/ literalValue : intervalLiteral diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 7da4f90cf0..419b4ded07 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -50,12 +50,14 @@ import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.LogicalXorContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.ParentheticBinaryArithmeticContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.PercentileAggFunctionContext; +import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.RelevanceExpressionContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SortFieldContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StatsFunctionCallContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StringLiteralContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.TableSourceContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.WcFieldExpressionContext; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.Arrays; import java.util.Collections; @@ -80,6 +82,7 @@ import org.opensearch.sql.ast.expression.Not; import org.opensearch.sql.ast.expression.Or; import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.expression.Xor; import org.opensearch.sql.common.utils.StringUtils; @@ -245,6 +248,13 @@ public UnresolvedExpression visitEvalFunctionCall(EvalFunctionCallContext ctx) { .collect(Collectors.toList())); } + @Override + public UnresolvedExpression visitRelevanceExpression(RelevanceExpressionContext ctx) { + return new Function( + ctx.relevanceFunctionName().getText().toLowerCase(), + relevanceArguments(ctx)); + } + @Override public UnresolvedExpression visitTableSource(TableSourceContext ctx) { return visitIdentifiers(Arrays.asList(ctx)); @@ -303,4 +313,18 @@ private QualifiedName visitIdentifiers(List ctx) { ); } + private List relevanceArguments(RelevanceExpressionContext ctx) { + // all the arguments are defaulted to string values + // to skip environment resolving and function signature resolving + ImmutableList.Builder builder = ImmutableList.builder(); + builder.add(new UnresolvedArgument("field", + new Literal(StringUtils.unquoteText(ctx.field.getText()), DataType.STRING))); + builder.add(new UnresolvedArgument("query", + new Literal(StringUtils.unquoteText(ctx.query.getText()), DataType.STRING))); + ctx.relevanceArg().forEach(v -> builder.add(new UnresolvedArgument( + v.relevanceArgName().getText().toLowerCase(), new Literal(StringUtils.unquoteText( + v.relevanceArgValue().getText()), DataType.STRING)))); + return builder.build(); + } + } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index bfb975ba74..402023f68b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -30,6 +30,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.agg; import static org.opensearch.sql.ast.dsl.AstDSL.aggregate; import static org.opensearch.sql.ast.dsl.AstDSL.alias; +import static org.opensearch.sql.ast.dsl.AstDSL.allFields; import static org.opensearch.sql.ast.dsl.AstDSL.and; import static org.opensearch.sql.ast.dsl.AstDSL.argument; import static org.opensearch.sql.ast.dsl.AstDSL.booleanLiteral; @@ -58,6 +59,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.relation; import static org.opensearch.sql.ast.dsl.AstDSL.sort; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; +import static org.opensearch.sql.ast.dsl.AstDSL.unresolvedArg; import static org.opensearch.sql.ast.dsl.AstDSL.xor; import org.junit.Ignore; @@ -645,4 +647,20 @@ public void canBuildKeywordsAsIdentInQualifiedName() { ) ); } + + @Test + public void canBuildRelevanceFunctionWithArguments() { + assertEqual( + "source=test | where match(message, 'test query', analyzer='keyword')", + filter( + relation("test"), + function( + "match", + unresolvedArg("field", stringLiteral("message")), + unresolvedArg("query", stringLiteral("test query")), + unresolvedArg("analyzer", stringLiteral("keyword")) + ) + ) + ); + } } diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 426c77cf06..2dc4ed9fb5 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -315,6 +315,20 @@ STRCMP: 'STRCMP'; // DATE AND TIME FUNCTIONS ADDDATE: 'ADDDATE'; +// RELEVANCE FUNCTIONS AND PARAMETERS +ANALYZER: 'ANALYZER'; +FUZZINESS: 'FUZZINESS'; +AUTO_GENERATE_SYNONYMS_PHRASE_QUERY:'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY'; +MAX_EXPANSIONS: 'MAX_EXPANSIONS'; +PREFIX_LENGTH: 'PREFIX_LENGTH'; +FUZZY_TRANSPOSITIONS: 'FUZZY_TRANSPOSITIONS'; +FUZZY_REWRITE: 'FUZZY_REWRITE'; +LENIENT: 'LENIENT'; +OPERATOR: 'OPERATOR'; +MINIMUM_SHOULD_MATCH: 'MINIMUM_SHOULD_MATCH'; +ZERO_TERMS_QUERY: 'ZERO_TERMS_QUERY'; +BOOST: 'BOOST'; + // Operators // Operators. Arithmetics diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 61d2f5990f..f2f004396c 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -300,6 +300,7 @@ functionCall | windowFunctionClause #windowFunctionCall | aggregateFunction #aggregateFunctionCall | aggregateFunction (orderByClause)? filterClause #filteredAggregationFunctionCall + | relevanceFunction #relevanceFunctionCall ; scalarFunctionName @@ -317,6 +318,12 @@ specificFunction | CAST '(' expression AS convertedDataType ')' #dataTypeFunctionCall ; +relevanceFunction + : relevanceFunctionName LR_BRACKET + field=relevanceArgValue COMMA query=relevanceArgValue + (COMMA relevanceArg)* RR_BRACKET + ; + convertedDataType : typeName=DATE | typeName=TIME @@ -376,6 +383,14 @@ flowControlFunctionName : IF | IFNULL | NULLIF | ISNULL ; +relevanceFunctionName + : MATCH + ; + +legacyRelevanceFunctionName + : QUERY | MATCH_QUERY | MATCHQUERY + ; + functionArgs : functionArg (COMMA functionArg)* ; @@ -384,3 +399,18 @@ functionArg : expression ; +relevanceArg + : relevanceArgName EQUAL_SYMBOL relevanceArgValue + ; + +relevanceArgName + : ANALYZER | FUZZINESS | AUTO_GENERATE_SYNONYMS_PHRASE_QUERY | MAX_EXPANSIONS | PREFIX_LENGTH + | FUZZY_TRANSPOSITIONS | FUZZY_REWRITE | LENIENT | OPERATOR | MINIMUM_SHOULD_MATCH | ZERO_TERMS_QUERY + | BOOST + ; + +relevanceArgValue + : qualifiedName + | constant + ; + diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 8dda63b750..db95805e79 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -53,6 +53,7 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.QualifiedNameContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.RegexpPredicateContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.RegularAggregateFunctionCallContext; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.RelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScalarFunctionCallContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScalarWindowFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ShowDescribePatternContext; @@ -66,6 +67,7 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.WindowFunctionClauseContext; import static org.opensearch.sql.sql.parser.ParserUtils.createSortOption; +import com.google.common.collect.ImmutableList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -79,12 +81,15 @@ import org.opensearch.sql.ast.expression.And; import org.opensearch.sql.ast.expression.Case; import org.opensearch.sql.ast.expression.Cast; +import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Function; import org.opensearch.sql.ast.expression.Interval; import org.opensearch.sql.ast.expression.IntervalUnit; +import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.Not; import org.opensearch.sql.ast.expression.Or; import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.expression.When; import org.opensearch.sql.ast.expression.WindowFunction; @@ -363,6 +368,13 @@ public UnresolvedExpression visitConvertedDataType( return AstDSL.stringLiteral(ctx.getText()); } + @Override + public UnresolvedExpression visitRelevanceFunction(RelevanceFunctionContext ctx) { + return new Function( + ctx.relevanceFunctionName().getText().toLowerCase(), + relevanceArguments(ctx)); + } + private Function visitFunction(String functionName, FunctionArgsContext args) { if (args == null) { return new Function(functionName, Collections.emptyList()); @@ -385,4 +397,18 @@ private QualifiedName visitIdentifiers(List identifiers) { ); } + private List relevanceArguments(RelevanceFunctionContext ctx) { + // all the arguments are defaulted to string values + // to skip environment resolving and function signature resolving + ImmutableList.Builder builder = ImmutableList.builder(); + builder.add(new UnresolvedArgument("field", + new Literal(StringUtils.unquoteText(ctx.field.getText()), DataType.STRING))); + builder.add(new UnresolvedArgument("query", + new Literal(StringUtils.unquoteText(ctx.query.getText()), DataType.STRING))); + ctx.relevanceArg().forEach(v -> builder.add(new UnresolvedArgument( + v.relevanceArgName().getText().toLowerCase(), new Literal(StringUtils.unquoteText( + v.relevanceArgValue().getText()), DataType.STRING)))); + return builder.build(); + } + } diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index e101eb9404..091c763e28 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -45,6 +45,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.timeLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.timestampLiteral; +import static org.opensearch.sql.ast.dsl.AstDSL.unresolvedArg; import static org.opensearch.sql.ast.dsl.AstDSL.when; import static org.opensearch.sql.ast.dsl.AstDSL.window; import static org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_LAST; @@ -448,6 +449,22 @@ public void filteredDistinctCount() { ); } + @Test + public void relevanceMatch() { + assertEquals(AstDSL.function("match", + unresolvedArg("field", stringLiteral("message")), + unresolvedArg("query", stringLiteral("search query"))), + buildExprAst("match(message, 'search query')") + ); + + assertEquals(AstDSL.function("match", + unresolvedArg("field", stringLiteral("message")), + unresolvedArg("query", stringLiteral("search query")), + unresolvedArg("analyzer", stringLiteral("keyword")), + unresolvedArg("operator", stringLiteral("AND"))), + buildExprAst("match(message, 'search query', analyzer='keyword', operator='AND')")); + } + private Node buildExprAst(String expr) { OpenSearchSQLLexer lexer = new OpenSearchSQLLexer(new CaseInsensitiveCharStream(expr)); OpenSearchSQLParser parser = new OpenSearchSQLParser(new CommonTokenStream(lexer));