diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index fdf52688bf..2fcf5e7c6b 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -662,6 +662,10 @@ public FunctionExpression multi_match(Expression... args) { return compile(BuiltinFunctionName.MULTI_MATCH, args); } + public FunctionExpression simple_query_string(Expression... args) { + return compile(BuiltinFunctionName.SIMPLE_QUERY_STRING, args); + } + private FunctionExpression compile(BuiltinFunctionName bfn, Expression... args) { return (FunctionExpression) repository.compile(bfn.getName(), Arrays.asList(args.clone())); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index de5d094f5a..b59c767828 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -187,6 +187,7 @@ public enum BuiltinFunctionName { * Relevance Function. */ MATCH(FunctionName.of("match")), + SIMPLE_QUERY_STRING(FunctionName.of("simple_query_string")), MATCH_PHRASE(FunctionName.of("match_phrase")), MATCHPHRASE(FunctionName.of("matchphrase")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java index d944b66b9e..6f41075224 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -30,6 +30,7 @@ public class OpenSearchFunctions { public static final int MATCH_PHRASE_MAX_NUM_PARAMETERS = 5; public static final int MIN_NUM_PARAMETERS = 2; public static final int MULTI_MATCH_MAX_NUM_PARAMETERS = 17; + public static final int SIMPLE_QUERY_STRING_MAX_NUM_PARAMETERS = 14; /** * Add functions specific to OpenSearch to repository. @@ -37,6 +38,7 @@ public class OpenSearchFunctions { public void register(BuiltinFunctionRepository repository) { repository.register(match()); repository.register(multi_match()); + repository.register(simple_query_string()); // Register MATCHPHRASE as MATCH_PHRASE as well for backwards // compatibility. repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE)); @@ -58,6 +60,11 @@ private static FunctionResolver multi_match() { return getRelevanceFunctionResolver(funcName, MULTI_MATCH_MAX_NUM_PARAMETERS, STRUCT); } + private static FunctionResolver simple_query_string() { + FunctionName funcName = BuiltinFunctionName.SIMPLE_QUERY_STRING.getName(); + return getRelevanceFunctionResolver(funcName, SIMPLE_QUERY_STRING_MAX_NUM_PARAMETERS, STRUCT); + } + private static FunctionResolver getRelevanceFunctionResolver( FunctionName funcName, int maxNumParameters, ExprCoreType firstArgType) { return new FunctionResolver(funcName, diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index 9a39a82874..f6fe679328 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -410,6 +410,51 @@ void multi_match_expression_two_fields() { AstDSL.unresolvedArg("query", stringLiteral("sample query")))); } + @Test + void simple_query_string_expression() { + assertAnalyzeEqual( + dsl.simple_query_string( + dsl.namedArgument("fields", DSL.literal( + new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of( + "field", ExprValueUtils.floatValue(1.F)))))), + dsl.namedArgument("query", DSL.literal("sample query"))), + AstDSL.function("simple_query_string", + AstDSL.unresolvedArg("fields", new RelevanceFieldList(Map.of( + "field", 1.F))), + AstDSL.unresolvedArg("query", stringLiteral("sample query")))); + } + + @Test + void simple_query_string_expression_with_params() { + assertAnalyzeEqual( + dsl.simple_query_string( + dsl.namedArgument("fields", DSL.literal( + new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of( + "field", ExprValueUtils.floatValue(1.F)))))), + dsl.namedArgument("query", DSL.literal("sample query")), + dsl.namedArgument("analyzer", DSL.literal("keyword"))), + AstDSL.function("simple_query_string", + AstDSL.unresolvedArg("fields", new RelevanceFieldList(Map.of( + "field", 1.F))), + AstDSL.unresolvedArg("query", stringLiteral("sample query")), + AstDSL.unresolvedArg("analyzer", stringLiteral("keyword")))); + } + + @Test + void simple_query_string_expression_two_fields() { + assertAnalyzeEqual( + dsl.simple_query_string( + dsl.namedArgument("fields", DSL.literal( + new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of( + "field1", ExprValueUtils.floatValue(1.F), + "field2", ExprValueUtils.floatValue(.3F)))))), + dsl.namedArgument("query", DSL.literal("sample query"))), + AstDSL.function("simple_query_string", + AstDSL.unresolvedArg("fields", new RelevanceFieldList(ImmutableMap.of( + "field1", 1.F, "field2", .3F))), + AstDSL.unresolvedArg("query", stringLiteral("sample query")))); + } + protected Expression analyze(UnresolvedExpression unresolvedExpression) { return expressionAnalyzer.analyze(unresolvedExpression, analysisContext); } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java index 8559fd47b3..1c958819d3 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java @@ -161,4 +161,12 @@ void multi_match() { fields.getValue().toString(), query.getValue().toString()), expr.toString()); } + + @Test + void simple_query_string() { + FunctionExpression expr = dsl.simple_query_string(fields, query); + assertEquals(String.format("simple_query_string(fields=%s, query=%s)", + fields.getValue().toString(), query.getValue().toString()), + expr.toString()); + } } diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index 5039d8ee00..4194dd15e9 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -2283,3 +2283,53 @@ Another example to show how to set custom values for the optional parameters:: | firstname | lastname | city | address | |-------------+------------+--------+-----------| +-------------+------------+--------+-----------+ + +SIMPLE_QUERY_STRING +------------------- + +Description +>>>>>>>>>>> + +``simple_query_string([field_expression+], query_expression[, option=]*)`` + +The simple_query_string function maps to the simple_query_string query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field or fields. +The **^** lets you *boost* certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. The syntax allows to specify the fields in double quotes, single quotes, in backtick or even without any wrap. All fields search using star ``"*"`` is also available (star symbol should be wrapped). The weight is optional and should be specified using after the field name, it could be delimeted by the `caret` character or by whitespace. Please, refer to examples below: + +| ``simple_query_string(["Tags" ^ 2, 'Title' 3.4, `Body`, Comments ^ 0.3], ...)`` +| ``simple_query_string(["*"], ...)`` + +Available parameters include: + +- analyze_wildcard +- analyzer +- auto_generate_synonyms_phrase +- flags +- fuzziness +- fuzzy_max_expansions +- fuzzy_prefix_length +- fuzzy_transpositions +- lenient +- default_operator +- minimum_should_match +- quote_field_suffix +- boost + +Example with only ``fields`` and ``query`` expressions, and all other parameters are set default values:: + + os> select firstname, lastname, city, address from accounts where simple_query_string(['firstname', city ^ 2], 'Amber | Nogal'); + fetched rows / total rows = 2/2 + +-------------+------------+--------+--------------------+ + | firstname | lastname | city | address | + |-------------+------------+--------+--------------------| + | Amber | Duke | Brogan | 880 Holmes Lane | + | Nanette | Bates | Nogal | 789 Madison Street | + +-------------+------------+--------+--------------------+ + +Another example to show how to set custom values for the optional parameters:: + + os> select firstname, lastname, city, address from accounts where simple_query_string(['firstname', city ^ 2], 'Amber Nogal', analyzer=keyword, default_operator='AND'); + fetched rows / total rows = 0/0 + +-------------+------------+--------+-----------+ + | firstname | lastname | city | address | + |-------------+------------+--------+-----------| + +-------------+------------+--------+-----------+ diff --git a/docs/user/ppl/functions/relevance.rst b/docs/user/ppl/functions/relevance.rst index 9841fedfc1..557ef19246 100644 --- a/docs/user/ppl/functions/relevance.rst +++ b/docs/user/ppl/functions/relevance.rst @@ -151,6 +151,57 @@ Another example to show how to set custom values for the optional parameters:: +-------------+------------+--------+-----------+ +SIMPLE_QUERY_STRING +------------------- + +Description +>>>>>>>>>>> + +``simple_query_string([field_expression+], query_expression[, option=]*)`` + +The simple_query_string function maps to the simple_query_string query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field or fields. +The **^** lets you *boost* certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. The syntax allows to specify the fields in double quotes, single quotes, in backtick or even without any wrap. All fields search using star ``"*"`` is also available (star symbol should be wrapped). The weight is optional and should be specified using after the field name, it could be delimeted by the `caret` character or by whitespace. Please, refer to examples below: + +| ``simple_query_string(["Tags" ^ 2, 'Title' 3.4, `Body`, Comments ^ 0.3], ...)`` +| ``simple_query_string(["*"], ...)`` + + +Available parameters include: + +- analyze_wildcard +- analyzer +- auto_generate_synonyms_phrase +- flags +- fuzziness +- fuzzy_max_expansions +- fuzzy_prefix_length +- fuzzy_transpositions +- lenient +- default_operator +- minimum_should_match +- quote_field_suffix +- boost + +Example with only ``fields`` and ``query`` expressions, and all other parameters are set default values:: + + os> source=accounts | where simple_query_string(['firstname', city ^ 2], 'Amber | Nogal') | fields firstname, lastname, city, address; + fetched rows / total rows = 2/2 + +-------------+------------+--------+--------------------+ + | firstname | lastname | city | address | + |-------------+------------+--------+--------------------| + | Amber | Duke | Brogan | 880 Holmes Lane | + | Nanette | Bates | Nogal | 789 Madison Street | + +-------------+------------+--------+--------------------+ + +Another example to show how to set custom values for the optional parameters:: + + os> source=accounts | where simple_query_string(['firstname', city ^ 2], 'Amber Nogal', analyzer=keyword, default_operator='AND') | fields firstname, lastname, city, address; + fetched rows / total rows = 0/0 + +-------------+------------+--------+-----------+ + | firstname | lastname | city | address | + |-------------+------------+--------+-----------| + +-------------+------------+--------+-----------+ + Limitations >>>>>>>>>>> diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/RelevanceFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/RelevanceFunctionIT.java index 53c6a933f3..4e34daf4d5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/RelevanceFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/RelevanceFunctionIT.java @@ -22,7 +22,7 @@ public void init() throws IOException { } @Test - public void test1() throws IOException { + public void multi_match() throws IOException { String query = "SOURCE=" + TEST_INDEX_BEER + " | WHERE multi_match([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste')"; var result = executeQuery(query); @@ -30,7 +30,15 @@ public void test1() throws IOException { } @Test - public void verify_wildcard_test() throws IOException { + public void simple_query_string() throws IOException { + String query = "SOURCE=" + TEST_INDEX_BEER + + " | WHERE simple_query_string([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste')"; + var result = executeQuery(query); + assertNotEquals(0, result.getInt("total")); + } + + @Test + public void verify_wildcard_multi_match() throws IOException { String query1 = "SOURCE=" + TEST_INDEX_BEER + " | WHERE multi_match(['Tags'], 'taste')"; var result1 = executeQuery(query1); @@ -39,4 +47,15 @@ public void verify_wildcard_test() throws IOException { var result2 = executeQuery(query2); assertNotEquals(result2.getInt("total"), result1.getInt("total")); } + + @Test + public void verify_wildcard_simple_query_string() throws IOException { + String query1 = "SOURCE=" + TEST_INDEX_BEER + + " | WHERE simple_query_string(['Tags'], 'taste')"; + var result1 = executeQuery(query1); + String query2 = "SOURCE=" + TEST_INDEX_BEER + + " | WHERE simple_query_string(['T*'], 'taste')"; + var result2 = executeQuery(query2); + assertNotEquals(result2.getInt("total"), result1.getInt("total")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/SimpleQueryStringIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/SimpleQueryStringIT.java new file mode 100644 index 0000000000..ec1f804a43 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/SimpleQueryStringIT.java @@ -0,0 +1,47 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BEER; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +public class SimpleQueryStringIT extends SQLIntegTestCase { + @Override + public void init() throws IOException { + loadIndex(Index.BEER); + } + + /* + The 'beer.stackexchange' index is a dump of beer.stackexchange.com converted to the format which might be ingested by OpenSearch. + This is a forum like StackOverflow with questions about beer brewing. The dump contains both questions, answers and comments. + The reference query is: + select count(Id) from beer.stackexchange where simple_query_string(["Tags" ^ 1.5, Title, `Body` 4.2], 'taste') and Tags like '% % %' and Title like '%'; + It filters out empty `Tags` and `Title`. + */ + + @Test + public void test1() throws IOException { + String query = "SELECT count(*) FROM " + + TEST_INDEX_BEER + " WHERE simple_query_string([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste')"; + var result = new JSONObject(executeQuery(query, "jdbc")); + assertNotEquals(0, result.getInt("total")); + } + + @Test + public void verify_wildcard_test() throws IOException { + String query1 = "SELECT count(*) FROM " + + TEST_INDEX_BEER + " WHERE simple_query_string(['Tags'], 'taste')"; + var result1 = new JSONObject(executeQuery(query1, "jdbc")); + String query2 = "SELECT count(*) FROM " + + TEST_INDEX_BEER + " WHERE simple_query_string(['T*'], 'taste')"; + var result2 = new JSONObject(executeQuery(query2, "jdbc")); + assertNotEquals(result2.getInt("total"), result1.getInt("total")); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java index 55f1e84019..3bba48d24c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java @@ -32,6 +32,7 @@ import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhraseQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MultiMatchQuery; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.SimpleQueryStringQuery; import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer; @RequiredArgsConstructor @@ -60,6 +61,7 @@ public class FilterQueryBuilder extends ExpressionNodeVisitor { + /** + * Default constructor for SimpleQueryString configures how RelevanceQuery.build() handles + * named arguments. + */ + public SimpleQueryStringQuery() { + super(ImmutableMap.>builder() + .put("analyze_wildcard", (b, v) -> b.analyzeWildcard(Boolean.parseBoolean(v.stringValue()))) + .put("analyzer", (b, v) -> b.analyzer(v.stringValue())) + .put("auto_generate_synonyms_phrase_query", (b, v) -> + b.autoGenerateSynonymsPhraseQuery(Boolean.parseBoolean(v.stringValue()))) + .put("boost", (b, v) -> b.boost(Float.parseFloat(v.stringValue()))) + .put("default_operator", (b, v) -> b.defaultOperator(Operator.fromString(v.stringValue()))) + .put("flags", (b, v) -> b.flags(SimpleQueryStringFlag.valueOf(v.stringValue()))) + .put("fuzzy_max_expansions", (b, v) -> + b.fuzzyMaxExpansions(Integer.parseInt(v.stringValue()))) + .put("fuzzy_prefix_length", (b, v) -> + b.fuzzyPrefixLength(Integer.parseInt(v.stringValue()))) + .put("fuzzy_transpositions", (b, v) -> + b.fuzzyTranspositions(Boolean.parseBoolean(v.stringValue()))) + .put("lenient", (b, v) -> b.lenient(Boolean.parseBoolean(v.stringValue()))) + .put("minimum_should_match", (b, v) -> b.minimumShouldMatch(v.stringValue())) + .put("quote_field_suffix", (b, v) -> b.quoteFieldSuffix(v.stringValue())) + .build()); + } + + @Override + public QueryBuilder build(FunctionExpression func) { + if (func.getArguments().size() < 2) { + throw new SemanticCheckException("'simple_query_string' must have at least two arguments"); + } + Iterator iterator = func.getArguments().iterator(); + var fields = (NamedArgumentExpression) iterator.next(); + var query = (NamedArgumentExpression) iterator.next(); + // Fields is a map already, but we need to convert types. + var fieldsAndWeights = fields + .getValue() + .valueOf(null) + .tupleValue() + .entrySet() + .stream() + .collect(ImmutableMap.toImmutableMap(e -> e.getKey(), e -> e.getValue().floatValue())); + + SimpleQueryStringBuilder queryBuilder = createQueryBuilder(null, + query.getValue().valueOf(null).stringValue()) + .fields(fieldsAndWeights); + while (iterator.hasNext()) { + NamedArgumentExpression arg = (NamedArgumentExpression) iterator.next(); + if (!queryBuildActions.containsKey(arg.getArgName())) { + throw new SemanticCheckException( + String.format("Parameter %s is invalid for %s function.", + arg.getArgName(), queryBuilder.getWriteableName())); + } + (Objects.requireNonNull( + queryBuildActions + .get(arg.getArgName()))) + .apply(queryBuilder, arg.getValue().valueOf(null)); + } + return queryBuilder; + } + + @Override + protected SimpleQueryStringBuilder createQueryBuilder(String field, String query) { + return QueryBuilders.simpleQueryStringQuery(query); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java index 1501f38a74..4904490fd7 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java @@ -532,6 +532,121 @@ void should_build_match_phrase_query_with_custom_parameters() { dsl.namedArgument("zero_terms_query", literal("ALL"))))); } + @Test + // Notes for following three tests: + // 1) OpenSearch (not the plugin) might change order of fields + // 2) `flags` are printed by OpenSearch as an integer + // 3) `minimum_should_match` printed as a string + void should_build_simple_query_string_query_with_default_parameters_single_field() { + assertJsonEquals("{\n" + + " \"simple_query_string\" : {\n" + + " \"query\" : \"search query\",\n" + + " \"fields\" : [\n" + + " \"field1^1.0\"\n" + + " ],\n" + + " \"default_operator\" : \"or\",\n" + + " \"analyze_wildcard\" : false,\n" + + " \"auto_generate_synonyms_phrase_query\" : true,\n" + + " \"flags\" : -1,\n" + + " \"fuzzy_max_expansions\" : 50,\n" + + " \"fuzzy_prefix_length\" : 0,\n" + + " \"fuzzy_transpositions\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + "}", + buildQuery(dsl.simple_query_string( + dsl.namedArgument("fields", DSL.literal(new ExprTupleValue( + new LinkedHashMap<>(ImmutableMap.of( + "field1", ExprValueUtils.floatValue(1.F)))))), + dsl.namedArgument("query", literal("search query"))))); + } + + @Test + void should_build_simple_query_string_query_with_default_parameters_multiple_fields() { + var expected = "{\n" + + " \"simple_query_string\" : {\n" + + " \"query\" : \"search query\",\n" + + " \"fields\" : [%s],\n" + + " \"default_operator\" : \"or\",\n" + + " \"analyze_wildcard\" : false,\n" + + " \"auto_generate_synonyms_phrase_query\" : true,\n" + + " \"flags\" : -1,\n" + + " \"fuzzy_max_expansions\" : 50,\n" + + " \"fuzzy_prefix_length\" : 0,\n" + + " \"fuzzy_transpositions\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + "}"; + var actual = buildQuery(dsl.simple_query_string( + dsl.namedArgument("fields", DSL.literal(new ExprTupleValue( + new LinkedHashMap<>(ImmutableMap.of( + "field1", ExprValueUtils.floatValue(1.F), + "field2", ExprValueUtils.floatValue(.3F)))))), + dsl.namedArgument("query", literal("search query")))); + + var ex1 = String.format(expected, "\"field1^1.0\", \"field2^0.3\""); + var ex2 = String.format(expected, "\"field2^0.3\", \"field1^1.0\""); + assertTrue(new JSONObject(ex1).similar(new JSONObject(actual)) + || new JSONObject(ex2).similar(new JSONObject(actual)), + StringUtils.format("Actual %s doesn't match neither expected %s nor %s", actual, ex1, ex2)); + } + + @Test + void should_build_simple_query_string_query_with_custom_parameters() { + var expected = "{\n" + + " \"simple_query_string\" : {\n" + + " \"query\" : \"search query\",\n" + + " \"fields\" : [%s],\n" + + " \"analyze_wildcard\" : true,\n" + + " \"analyzer\" : \"keyword\",\n" + + " \"auto_generate_synonyms_phrase_query\" : false,\n" + + " \"default_operator\" : \"and\",\n" + + " \"flags\" : 1,\n" + + " \"fuzzy_max_expansions\" : 10,\n" + + " \"fuzzy_prefix_length\" : 2,\n" + + " \"fuzzy_transpositions\" : false,\n" + + " \"lenient\" : false,\n" + + " \"minimum_should_match\" : \"3\",\n" + + " \"boost\" : 2.0\n" + + " }\n" + + "}"; + var actual = buildQuery( + dsl.simple_query_string( + dsl.namedArgument("fields", DSL.literal( + ExprValueUtils.tupleValue(ImmutableMap.of("field1", 1.F, "field2", .3F)))), + dsl.namedArgument("query", literal("search query")), + dsl.namedArgument("analyze_wildcard", literal("true")), + dsl.namedArgument("analyzer", literal("keyword")), + dsl.namedArgument("auto_generate_synonyms_phrase_query", literal("false")), + dsl.namedArgument("default_operator", literal("AND")), + dsl.namedArgument("flags", literal("AND")), + dsl.namedArgument("fuzzy_max_expansions", literal("10")), + dsl.namedArgument("fuzzy_prefix_length", literal("2")), + dsl.namedArgument("fuzzy_transpositions", literal("false")), + dsl.namedArgument("lenient", literal("false")), + dsl.namedArgument("minimum_should_match", literal("3")), + dsl.namedArgument("boost", literal("2.0")))); + + var ex1 = String.format(expected, "\"field1^1.0\", \"field2^0.3\""); + var ex2 = String.format(expected, "\"field2^0.3\", \"field1^1.0\""); + assertTrue(new JSONObject(ex1).similar(new JSONObject(actual)) + || new JSONObject(ex2).similar(new JSONObject(actual)), + StringUtils.format("Actual %s doesn't match neither expected %s nor %s", actual, ex1, ex2)); + } + + @Test + void simple_query_string_invalid_parameter() { + FunctionExpression expr = dsl.simple_query_string( + dsl.namedArgument("fields", DSL.literal( + new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of( + "field1", ExprValueUtils.floatValue(1.F), + "field2", ExprValueUtils.floatValue(.3F)))))), + dsl.namedArgument("query", literal("search query")), + dsl.namedArgument("invalid_parameter", literal("invalid_value"))); + assertThrows(SemanticCheckException.class, () -> buildQuery(expr), + "Parameter invalid_parameter is invalid for match function."); + } + @Test void match_phrase_invalid_parameter() { FunctionExpression expr = dsl.match_phrase( diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/SimpleQueryStringTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/SimpleQueryStringTest.java new file mode 100644 index 0000000000..746d8e5c75 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/SimpleQueryStringTest.java @@ -0,0 +1,173 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.opensearch.storage.script.filter.lucene; + +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.google.common.collect.ImmutableMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.stream.Stream; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.LiteralExpression; +import org.opensearch.sql.expression.NamedArgumentExpression; +import org.opensearch.sql.expression.config.ExpressionConfig; +import org.opensearch.sql.expression.env.Environment; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.SimpleQueryStringQuery; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +class SimpleQueryStringTest { + private static final DSL dsl = new ExpressionConfig() + .dsl(new ExpressionConfig().functionRepository()); + private final SimpleQueryStringQuery simpleQueryStringQuery = new SimpleQueryStringQuery(); + private final FunctionName simpleQueryString = FunctionName.of("simple_query_string"); + private static final LiteralExpression fields_value = DSL.literal( + new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of( + "title", ExprValueUtils.floatValue(1.F), + "body", ExprValueUtils.floatValue(.3F))))); + private static final LiteralExpression query_value = DSL.literal("query_value"); + + static Stream> generateValidData() { + return Stream.of( + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value) + ), + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value), + dsl.namedArgument("analyze_wildcard", DSL.literal("true")) + ), + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value), + dsl.namedArgument("analyzer", DSL.literal("standard")) + ), + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value), + dsl.namedArgument("auto_generate_synonyms_phrase_query", DSL.literal("true")) + ), + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value), + dsl.namedArgument("flags", DSL.literal("PREFIX")) + ), + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value), + dsl.namedArgument("fuzzy_max_expansions", DSL.literal("42")) + ), + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value), + dsl.namedArgument("fuzzy_prefix_length", DSL.literal("42")) + ), + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value), + dsl.namedArgument("fuzzy_transpositions", DSL.literal("42")) + ), + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value), + dsl.namedArgument("lenient", DSL.literal("true")) + ), + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value), + dsl.namedArgument("default_operator", DSL.literal("AND")) + ), + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value), + dsl.namedArgument("minimum_should_match", DSL.literal("4")) + ), + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value), + dsl.namedArgument("quote_field_suffix", DSL.literal(".exact")) + ), + List.of( + dsl.namedArgument("fields", fields_value), + dsl.namedArgument("query", query_value), + dsl.namedArgument("boost", DSL.literal("1")) + ) + ); + } + + @ParameterizedTest + @MethodSource("generateValidData") + public void test_valid_parameters(List validArgs) { + Assertions.assertNotNull(simpleQueryStringQuery.build( + new SimpleQueryStringExpression(validArgs))); + } + + @Test + public void test_SemanticCheckException_when_no_arguments() { + List arguments = List.of(); + assertThrows(SemanticCheckException.class, + () -> simpleQueryStringQuery.build(new SimpleQueryStringExpression(arguments))); + } + + @Test + public void test_SemanticCheckException_when_one_argument() { + List arguments = List.of(namedArgument("fields", fields_value)); + assertThrows(SemanticCheckException.class, + () -> simpleQueryStringQuery.build(new SimpleQueryStringExpression(arguments))); + } + + @Test + public void test_SemanticCheckException_when_invalid_parameter() { + List arguments = List.of( + namedArgument("fields", fields_value), + namedArgument("query", query_value), + namedArgument("unsupported", "unsupported_value")); + Assertions.assertThrows(SemanticCheckException.class, + () -> simpleQueryStringQuery.build(new SimpleQueryStringExpression(arguments))); + } + + private NamedArgumentExpression namedArgument(String name, String value) { + return dsl.namedArgument(name, DSL.literal(value)); + } + + private NamedArgumentExpression namedArgument(String name, LiteralExpression value) { + return dsl.namedArgument(name, value); + } + + private class SimpleQueryStringExpression extends FunctionExpression { + public SimpleQueryStringExpression(List arguments) { + super(SimpleQueryStringTest.this.simpleQueryString, arguments); + } + + @Override + public ExprValue valueOf(Environment valueEnv) { + throw new UnsupportedOperationException("Invalid function call, " + + "valueOf function need implementation only to support Expression interface"); + } + + @Override + public ExprType type() { + throw new UnsupportedOperationException("Invalid function call, " + + "type function need implementation only to support Expression interface"); + } + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java index 3ca30bb0e3..41e2168d2f 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java @@ -112,5 +112,38 @@ public void can_parse_multi_match_relevance_function() { "SOURCE=test | WHERE multi_match([\"Tags\" ^ 1.5, Title, `Body` 4.2], 'query'," + "analyzer=keyword, quote_field_suffix=\".exact\", fuzzy_prefix_length = 4)")); } + + @Test + public void can_parse_simple_query_string_relevance_function() { + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string(['address'], 'query')")); + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string(['address', 'notes'], 'query')")); + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string([\"*\"], 'query')")); + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string([\"address\"], 'query')")); + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string([`address`], 'query')")); + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string([address], 'query')")); + + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string(['address' ^ 1.0, 'notes' ^ 2.2], 'query')")); + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string(['address' ^ 1.1, 'notes'], 'query')")); + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string(['address', 'notes' ^ 1.5], 'query')")); + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string(['address', 'notes' 3], 'query')")); + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string(['address' ^ .3, 'notes' 3], 'query')")); + + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string([\"Tags\" ^ 1.5, Title, `Body` 4.2], 'query')")); + assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + "SOURCE=test | WHERE simple_query_string([\"Tags\" ^ 1.5, Title, `Body` 4.2], 'query'," + + "analyzer=keyword, quote_field_suffix=\".exact\", fuzzy_prefix_length = 4)")); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 95e398e8f0..033eb509dc 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -676,4 +676,22 @@ public void canBuildMulti_matchRelevanceFunctionWithArguments() { ) ); } + + @Test + public void canBuildSimple_query_stringRelevanceFunctionWithArguments() { + assertEqual( + "source=test | where simple_query_string(['field1', 'field2' ^ 3.2]," + + "'test query', analyzer='keyword')", + filter( + relation("test"), + function( + "simple_query_string", + unresolvedArg("fields", new RelevanceFieldList(ImmutableMap.of( + "field1", 1.F, "field2", 3.2F))), + unresolvedArg("query", stringLiteral("test query")), + unresolvedArg("analyzer", stringLiteral("keyword")) + ) + ) + ); + } } diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 5bd80589d2..28b1cea9fd 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -279,6 +279,7 @@ INCLUDE: 'INCLUDE'; IN_TERMS: 'IN_TERMS'; MATCHPHRASE: 'MATCHPHRASE'; MATCH_PHRASE: 'MATCH_PHRASE'; +SIMPLE_QUERY_STRING: 'SIMPLE_QUERY_STRING'; MATCHQUERY: 'MATCHQUERY'; MATCH_QUERY: 'MATCH_QUERY'; MINUTE_OF_DAY: 'MINUTE_OF_DAY'; diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 148cb57166..603ea50e77 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -399,6 +399,7 @@ singleFieldRelevanceFunctionName multiFieldRelevanceFunctionName : MULTI_MATCH + | SIMPLE_QUERY_STRING ; legacyRelevanceFunctionName diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java index 7f30fcc369..1969f845ef 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -190,6 +190,42 @@ public void can_parse_multi_match_relevance_function() { + "operator='AND', tie_breaker=0.3, type = \"most_fields\", fuzziness = 4)")); } + @Test + public void can_parse_simple_query_string_relevance_function() { + assertNotNull(parser.parse( + "SELECT id FROM test WHERE simple_query_string(['address'], 'query')")); + assertNotNull(parser.parse( + "SELECT id FROM test WHERE simple_query_string(['address', 'notes'], 'query')")); + assertNotNull(parser.parse( + "SELECT id FROM test WHERE simple_query_string([\"*\"], 'query')")); + assertNotNull(parser.parse( + "SELECT id FROM test WHERE simple_query_string([\"address\"], 'query')")); + assertNotNull(parser.parse( + "SELECT id FROM test WHERE simple_query_string([`address`], 'query')")); + assertNotNull(parser.parse( + "SELECT id FROM test WHERE simple_query_string([address], 'query')")); + + assertNotNull(parser.parse( + "SELECT id FROM test WHERE" + + " simple_query_string(['address' ^ 1.0, 'notes' ^ 2.2], 'query')")); + assertNotNull(parser.parse( + "SELECT id FROM test WHERE simple_query_string(['address' ^ 1.1, 'notes'], 'query')")); + assertNotNull(parser.parse( + "SELECT id FROM test WHERE simple_query_string(['address', 'notes' ^ 1.5], 'query')")); + assertNotNull(parser.parse( + "SELECT id FROM test WHERE simple_query_string(['address', 'notes' 3], 'query')")); + assertNotNull(parser.parse( + "SELECT id FROM test WHERE simple_query_string(['address' ^ .3, 'notes' 3], 'query')")); + + assertNotNull(parser.parse( + "SELECT id FROM test WHERE" + + " simple_query_string([\"Tags\" ^ 1.5, Title, `Body` 4.2], 'query')")); + assertNotNull(parser.parse( + "SELECT id FROM test WHERE" + + " simple_query_string([\"Tags\" ^ 1.5, Title, `Body` 4.2], 'query', analyzer=keyword," + + "flags='AND', quote_field_suffix=\".exact\", fuzzy_prefix_length = 4)")); + } + @Test public void can_parse_match_relevance_function() { assertNotNull(parser.parse("SELECT * FROM test WHERE match(column, \"this is a test\")")); diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index ee94409fee..65d48ac8b7 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -466,6 +466,25 @@ public void relevanceMulti_match() { + "analyzer='keyword', operator='AND')")); } + @Test + public void relevanceSimple_query_string() { + assertEquals(AstDSL.function("simple_query_string", + unresolvedArg("fields", new RelevanceFieldList(ImmutableMap.of( + "field2", 3.2F, "field1", 1.F))), + unresolvedArg("query", stringLiteral("search query"))), + buildExprAst("simple_query_string(['field1', 'field2' ^ 3.2], 'search query')") + ); + + assertEquals(AstDSL.function("simple_query_string", + unresolvedArg("fields", new RelevanceFieldList(ImmutableMap.of( + "field2", 3.2F, "field1", 1.F))), + unresolvedArg("query", stringLiteral("search query")), + unresolvedArg("analyzer", stringLiteral("keyword")), + unresolvedArg("operator", stringLiteral("AND"))), + buildExprAst("simple_query_string(['field1', 'field2' ^ 3.2], 'search query'," + + "analyzer='keyword', operator='AND')")); + } + @Test public void canBuildInClause() { assertEquals(