Skip to content

Commit

Permalink
Merge branch 'integ-multi_match-#188' into dev-multi_match-#188
Browse files Browse the repository at this point in the history
Signed-off-by: Yury Fridlyand <[email protected]>
  • Loading branch information
Yury-Fridlyand committed Jun 15, 2022
2 parents 88799ad + d81ef73 commit e781a94
Show file tree
Hide file tree
Showing 19 changed files with 718 additions and 2 deletions.
4 changes: 4 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,10 @@ public FunctionExpression multi_match(Expression... args) {
return compile(BuiltinFunctionName.MULTI_MATCH, args);
}

public FunctionExpression simple_query_string(Expression... args) {
return compile(BuiltinFunctionName.SIMPLE_QUERY_STRING, args);
}

private FunctionExpression compile(BuiltinFunctionName bfn, Expression... args) {
return (FunctionExpression) repository.compile(bfn.getName(), Arrays.asList(args.clone()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ public enum BuiltinFunctionName {
* Relevance Function.
*/
MATCH(FunctionName.of("match")),
SIMPLE_QUERY_STRING(FunctionName.of("simple_query_string")),
MATCH_PHRASE(FunctionName.of("match_phrase")),
MATCHPHRASE(FunctionName.of("matchphrase")),

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@ public class OpenSearchFunctions {
public static final int MATCH_PHRASE_MAX_NUM_PARAMETERS = 5;
public static final int MIN_NUM_PARAMETERS = 2;
public static final int MULTI_MATCH_MAX_NUM_PARAMETERS = 17;
public static final int SIMPLE_QUERY_STRING_MAX_NUM_PARAMETERS = 14;

/**
* Add functions specific to OpenSearch to repository.
*/
public void register(BuiltinFunctionRepository repository) {
repository.register(match());
repository.register(multi_match());
repository.register(simple_query_string());
// Register MATCHPHRASE as MATCH_PHRASE as well for backwards
// compatibility.
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
Expand All @@ -58,6 +60,11 @@ private static FunctionResolver multi_match() {
return getRelevanceFunctionResolver(funcName, MULTI_MATCH_MAX_NUM_PARAMETERS, STRUCT);
}

private static FunctionResolver simple_query_string() {
FunctionName funcName = BuiltinFunctionName.SIMPLE_QUERY_STRING.getName();
return getRelevanceFunctionResolver(funcName, SIMPLE_QUERY_STRING_MAX_NUM_PARAMETERS, STRUCT);
}

private static FunctionResolver getRelevanceFunctionResolver(
FunctionName funcName, int maxNumParameters, ExprCoreType firstArgType) {
return new FunctionResolver(funcName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,51 @@ void multi_match_expression_two_fields() {
AstDSL.unresolvedArg("query", stringLiteral("sample query"))));
}

@Test
void simple_query_string_expression() {
assertAnalyzeEqual(
dsl.simple_query_string(
dsl.namedArgument("fields", DSL.literal(
new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of(
"field", ExprValueUtils.floatValue(1.F)))))),
dsl.namedArgument("query", DSL.literal("sample query"))),
AstDSL.function("simple_query_string",
AstDSL.unresolvedArg("fields", new RelevanceFieldList(Map.of(
"field", 1.F))),
AstDSL.unresolvedArg("query", stringLiteral("sample query"))));
}

@Test
void simple_query_string_expression_with_params() {
assertAnalyzeEqual(
dsl.simple_query_string(
dsl.namedArgument("fields", DSL.literal(
new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of(
"field", ExprValueUtils.floatValue(1.F)))))),
dsl.namedArgument("query", DSL.literal("sample query")),
dsl.namedArgument("analyzer", DSL.literal("keyword"))),
AstDSL.function("simple_query_string",
AstDSL.unresolvedArg("fields", new RelevanceFieldList(Map.of(
"field", 1.F))),
AstDSL.unresolvedArg("query", stringLiteral("sample query")),
AstDSL.unresolvedArg("analyzer", stringLiteral("keyword"))));
}

@Test
void simple_query_string_expression_two_fields() {
assertAnalyzeEqual(
dsl.simple_query_string(
dsl.namedArgument("fields", DSL.literal(
new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of(
"field1", ExprValueUtils.floatValue(1.F),
"field2", ExprValueUtils.floatValue(.3F)))))),
dsl.namedArgument("query", DSL.literal("sample query"))),
AstDSL.function("simple_query_string",
AstDSL.unresolvedArg("fields", new RelevanceFieldList(ImmutableMap.of(
"field1", 1.F, "field2", .3F))),
AstDSL.unresolvedArg("query", stringLiteral("sample query"))));
}

protected Expression analyze(UnresolvedExpression unresolvedExpression) {
return expressionAnalyzer.analyze(unresolvedExpression, analysisContext);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,12 @@ void multi_match() {
fields.getValue().toString(), query.getValue().toString()),
expr.toString());
}

@Test
void simple_query_string() {
FunctionExpression expr = dsl.simple_query_string(fields, query);
assertEquals(String.format("simple_query_string(fields=%s, query=%s)",
fields.getValue().toString(), query.getValue().toString()),
expr.toString());
}
}
50 changes: 50 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2283,3 +2283,53 @@ Another example to show how to set custom values for the optional parameters::
| firstname | lastname | city | address |
|-------------+------------+--------+-----------|
+-------------+------------+--------+-----------+

SIMPLE_QUERY_STRING
-------------------

Description
>>>>>>>>>>>

``simple_query_string([field_expression+], query_expression[, option=<option_value>]*)``

The simple_query_string function maps to the simple_query_string query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field or fields.
The **^** lets you *boost* certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. The syntax allows to specify the fields in double quotes, single quotes, in backtick or even without any wrap. All fields search using star ``"*"`` is also available (star symbol should be wrapped). The weight is optional and should be specified using after the field name, it could be delimeted by the `caret` character or by whitespace. Please, refer to examples below:

| ``simple_query_string(["Tags" ^ 2, 'Title' 3.4, `Body`, Comments ^ 0.3], ...)``
| ``simple_query_string(["*"], ...)``
Available parameters include:

- analyze_wildcard
- analyzer
- auto_generate_synonyms_phrase
- flags
- fuzziness
- fuzzy_max_expansions
- fuzzy_prefix_length
- fuzzy_transpositions
- lenient
- default_operator
- minimum_should_match
- quote_field_suffix
- boost

Example with only ``fields`` and ``query`` expressions, and all other parameters are set default values::

os> select firstname, lastname, city, address from accounts where simple_query_string(['firstname', city ^ 2], 'Amber | Nogal');
fetched rows / total rows = 2/2
+-------------+------------+--------+--------------------+
| firstname | lastname | city | address |
|-------------+------------+--------+--------------------|
| Amber | Duke | Brogan | 880 Holmes Lane |
| Nanette | Bates | Nogal | 789 Madison Street |
+-------------+------------+--------+--------------------+

Another example to show how to set custom values for the optional parameters::

os> select firstname, lastname, city, address from accounts where simple_query_string(['firstname', city ^ 2], 'Amber Nogal', analyzer=keyword, default_operator='AND');
fetched rows / total rows = 0/0
+-------------+------------+--------+-----------+
| firstname | lastname | city | address |
|-------------+------------+--------+-----------|
+-------------+------------+--------+-----------+
51 changes: 51 additions & 0 deletions docs/user/ppl/functions/relevance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,57 @@ Another example to show how to set custom values for the optional parameters::
+-------------+------------+--------+-----------+


SIMPLE_QUERY_STRING
-------------------

Description
>>>>>>>>>>>

``simple_query_string([field_expression+], query_expression[, option=<option_value>]*)``

The simple_query_string function maps to the simple_query_string query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field or fields.
The **^** lets you *boost* certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. The syntax allows to specify the fields in double quotes, single quotes, in backtick or even without any wrap. All fields search using star ``"*"`` is also available (star symbol should be wrapped). The weight is optional and should be specified using after the field name, it could be delimeted by the `caret` character or by whitespace. Please, refer to examples below:

| ``simple_query_string(["Tags" ^ 2, 'Title' 3.4, `Body`, Comments ^ 0.3], ...)``
| ``simple_query_string(["*"], ...)``

Available parameters include:

- analyze_wildcard
- analyzer
- auto_generate_synonyms_phrase
- flags
- fuzziness
- fuzzy_max_expansions
- fuzzy_prefix_length
- fuzzy_transpositions
- lenient
- default_operator
- minimum_should_match
- quote_field_suffix
- boost

Example with only ``fields`` and ``query`` expressions, and all other parameters are set default values::

os> source=accounts | where simple_query_string(['firstname', city ^ 2], 'Amber | Nogal') | fields firstname, lastname, city, address;
fetched rows / total rows = 2/2
+-------------+------------+--------+--------------------+
| firstname | lastname | city | address |
|-------------+------------+--------+--------------------|
| Amber | Duke | Brogan | 880 Holmes Lane |
| Nanette | Bates | Nogal | 789 Madison Street |
+-------------+------------+--------+--------------------+

Another example to show how to set custom values for the optional parameters::

os> source=accounts | where simple_query_string(['firstname', city ^ 2], 'Amber Nogal', analyzer=keyword, default_operator='AND') | fields firstname, lastname, city, address;
fetched rows / total rows = 0/0
+-------------+------------+--------+-----------+
| firstname | lastname | city | address |
|-------------+------------+--------+-----------|
+-------------+------------+--------+-----------+

Limitations
>>>>>>>>>>>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,23 @@ public void init() throws IOException {
}

@Test
public void test1() throws IOException {
public void multi_match() throws IOException {
String query = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste')";
var result = executeQuery(query);
assertNotEquals(0, result.getInt("total"));
}

@Test
public void verify_wildcard_test() throws IOException {
public void simple_query_string() throws IOException {
String query = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste')";
var result = executeQuery(query);
assertNotEquals(0, result.getInt("total"));
}

@Test
public void verify_wildcard_multi_match() throws IOException {
String query1 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match(['Tags'], 'taste')";
var result1 = executeQuery(query1);
Expand All @@ -39,4 +47,15 @@ public void verify_wildcard_test() throws IOException {
var result2 = executeQuery(query2);
assertNotEquals(result2.getInt("total"), result1.getInt("total"));
}

@Test
public void verify_wildcard_simple_query_string() throws IOException {
String query1 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string(['Tags'], 'taste')";
var result1 = executeQuery(query1);
String query2 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string(['T*'], 'taste')";
var result2 = executeQuery(query2);
assertNotEquals(result2.getInt("total"), result1.getInt("total"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.sql;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BEER;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.jupiter.api.Test;
import org.opensearch.sql.legacy.SQLIntegTestCase;

public class SimpleQueryStringIT extends SQLIntegTestCase {
@Override
public void init() throws IOException {
loadIndex(Index.BEER);
}

/*
The 'beer.stackexchange' index is a dump of beer.stackexchange.com converted to the format which might be ingested by OpenSearch.
This is a forum like StackOverflow with questions about beer brewing. The dump contains both questions, answers and comments.
The reference query is:
select count(Id) from beer.stackexchange where simple_query_string(["Tags" ^ 1.5, Title, `Body` 4.2], 'taste') and Tags like '% % %' and Title like '%';
It filters out empty `Tags` and `Title`.
*/

@Test
public void test1() throws IOException {
String query = "SELECT count(*) FROM "
+ TEST_INDEX_BEER + " WHERE simple_query_string([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste')";
var result = new JSONObject(executeQuery(query, "jdbc"));
assertNotEquals(0, result.getInt("total"));
}

@Test
public void verify_wildcard_test() throws IOException {
String query1 = "SELECT count(*) FROM "
+ TEST_INDEX_BEER + " WHERE simple_query_string(['Tags'], 'taste')";
var result1 = new JSONObject(executeQuery(query1, "jdbc"));
String query2 = "SELECT count(*) FROM "
+ TEST_INDEX_BEER + " WHERE simple_query_string(['T*'], 'taste')";
var result2 = new JSONObject(executeQuery(query2, "jdbc"));
assertNotEquals(result2.getInt("total"), result1.getInt("total"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhraseQuery;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchQuery;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MultiMatchQuery;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.SimpleQueryStringQuery;
import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer;

@RequiredArgsConstructor
Expand Down Expand Up @@ -60,6 +61,7 @@ public class FilterQueryBuilder extends ExpressionNodeVisitor<QueryBuilder, Obje
.put(BuiltinFunctionName.MATCH_QUERY.getName(), new MatchQuery())
.put(BuiltinFunctionName.MATCHQUERY.getName(), new MatchQuery())
.put(BuiltinFunctionName.MULTI_MATCH.getName(), new MultiMatchQuery())
.put(BuiltinFunctionName.SIMPLE_QUERY_STRING.getName(), new SimpleQueryStringQuery())
.build();

/**
Expand Down
Loading

0 comments on commit e781a94

Please sign in to comment.