Skip to content

Commit

Permalink
Add support for wildcard_query function (#156)
Browse files Browse the repository at this point in the history
* Implemented wildcard_query and added tests in core

Signed-off-by: Guian Gumpac <[email protected]>

* Implemented and added tests for sql

Signed-off-by: Guian Gumpac <[email protected]>

* Implemented and added tests for ppl

Signed-off-by: Guian Gumpac <[email protected]>

* Implemented and added tests for lucene

Signed-off-by: Guian Gumpac <[email protected]>

* Fixed test for like expression

Signed-off-by: Guian Gumpac <[email protected]>

* Added parameters to wildcard_query

Signed-off-by: Guian Gumpac <[email protected]>

* Added integration tests for ppl and sql

Signed-off-by: Guian Gumpac <[email protected]>

* Added docs for doctests

Signed-off-by: Guian Gumpac <[email protected]>

* Fixed issues introduced during merging

Signed-off-by: Guian Gumpac <[email protected]>

* Addressed PR comment

Signed-off-by: Guian Gumpac <[email protected]>

* Added annotation that was deleted from merging

Signed-off-by: Guian Gumpac <[email protected]>

* Fixed merge conflict issues

Signed-off-by: Guian Gumpac <[email protected]>

* Addressed some PR comments and handled escaping wildcards

Signed-off-by: Guian Gumpac <[email protected]>

* Added tests for wildcard conversion and created data for testing

Signed-off-by: Guian Gumpac <[email protected]>

* Added javadoc

Signed-off-by: Guian Gumpac <[email protected]>

* Changed index name

Signed-off-by: Guian Gumpac <[email protected]>

* Temporarily changed jackson_version to run GH actions

Signed-off-by: Guian Gumpac <[email protected]>

* Added comparison test for wildcard conversion

Signed-off-by: Guian Gumpac <[email protected]>

* Removed PPL implementation of wildcard_query

Signed-off-by: Guian Gumpac <[email protected]>

* Reverted ppl docs change

Signed-off-by: Guian Gumpac <[email protected]>

* Made namedArgument a static function

Signed-off-by: Guian Gumpac <[email protected]>

* Removed extra space

Signed-off-by: Guian Gumpac <[email protected]>

* Fixed LIKE query

Signed-off-by: Guian Gumpac <[email protected]>

* Fixed LIKE tests and added more tests

Signed-off-by: Guian Gumpac <[email protected]>

* Addressed PR comments

Signed-off-by: Guian Gumpac <[email protected]>

* Implemented converting text field to keyword. Still needs testing

Signed-off-by: Guian Gumpac <[email protected]>

* Added test cases for LIKE in sql and ppl

Signed-off-by: Guian Gumpac <[email protected]>

* Addressed PR comments regarding docs

Signed-off-by: Guian Gumpac <[email protected]>

* Fixed backslashes in docs

Signed-off-by: Guian Gumpac <[email protected]>

* Added missed backticks in docs

Signed-off-by: Guian Gumpac <[email protected]>

* Moved escaping wildcard test to common/utils

Signed-off-by: Guian Gumpac <[email protected]>

* Fixed checkstyle error

Signed-off-by: Guian Gumpac <[email protected]>

Signed-off-by: Guian Gumpac <[email protected]>
  • Loading branch information
Guian Gumpac authored Nov 25, 2022
1 parent 50669eb commit 2cd6921
Show file tree
Hide file tree
Showing 29 changed files with 1,000 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,48 @@ public static String format(final String format, Object... args) {
return String.format(Locale.ROOT, format, args);
}

/**
* Converts sql wildcard character % and _ to * and ?.
* @param text string to be converted
* @return converted string
*/
public static String convertSqlWildcardToLucene(String text) {
final char DEFAULT_ESCAPE = '\\';
StringBuilder convertedString = new StringBuilder(text.length());
boolean escaped = false;

for (char currentChar : text.toCharArray()) {
switch (currentChar) {
case DEFAULT_ESCAPE:
escaped = true;
convertedString.append(currentChar);
break;
case '%':
if (escaped) {
convertedString.deleteCharAt(convertedString.length() - 1);
convertedString.append("%");
} else {
convertedString.append("*");
}
escaped = false;
break;
case '_':
if (escaped) {
convertedString.deleteCharAt(convertedString.length() - 1);
convertedString.append("_");
} else {
convertedString.append('?');
}
escaped = false;
break;
default:
convertedString.append(currentChar);
escaped = false;
}
}
return convertedString.toString();
}

private static boolean isQuoted(String text, String mark) {
return !Strings.isNullOrEmpty(text) && text.startsWith(mark) && text.endsWith(mark);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.common.utils;

import static org.junit.Assert.assertEquals;

import org.junit.Test;

public class ConvertSQLWildcardTest {
@Test
public void test_escaping_sql_wildcards() {
assertEquals("%", StringUtils.convertSqlWildcardToLucene("\\%"));
assertEquals("\\*", StringUtils.convertSqlWildcardToLucene("\\*"));
assertEquals("_", StringUtils.convertSqlWildcardToLucene("\\_"));
assertEquals("\\?", StringUtils.convertSqlWildcardToLucene("\\?"));
assertEquals("%*", StringUtils.convertSqlWildcardToLucene("\\%%"));
assertEquals("*%", StringUtils.convertSqlWildcardToLucene("%\\%"));
assertEquals("%*%", StringUtils.convertSqlWildcardToLucene("\\%%\\%"));
assertEquals("*%*", StringUtils.convertSqlWildcardToLucene("%\\%%"));
assertEquals("_?", StringUtils.convertSqlWildcardToLucene("\\__"));
assertEquals("?_", StringUtils.convertSqlWildcardToLucene("_\\_"));
assertEquals("_?_", StringUtils.convertSqlWildcardToLucene("\\__\\_"));
assertEquals("?_?", StringUtils.convertSqlWildcardToLucene("_\\__"));
assertEquals("%\\*_\\?", StringUtils.convertSqlWildcardToLucene("\\%\\*\\_\\?"));
}
}
4 changes: 4 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,10 @@ public static FunctionExpression match_bool_prefix(Expression... args) {
return compile(BuiltinFunctionName.MATCH_BOOL_PREFIX, args);
}

public static FunctionExpression wildcard_query(Expression... args) {
return compile(BuiltinFunctionName.WILDCARD_QUERY, args);
}

public static FunctionExpression now(Expression... args) {
return compile(BuiltinFunctionName.NOW, args);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,9 @@ public enum BuiltinFunctionName {
QUERY(FunctionName.of("query")),
MATCH_QUERY(FunctionName.of("match_query")),
MATCHQUERY(FunctionName.of("matchquery")),
MULTI_MATCH(FunctionName.of("multi_match"));
MULTI_MATCH(FunctionName.of("multi_match")),
WILDCARDQUERY(FunctionName.of("wildcardquery")),
WILDCARD_QUERY(FunctionName.of("wildcard_query"));

private final FunctionName name;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ public void register(BuiltinFunctionRepository repository) {
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
repository.register(match_phrase_prefix());
repository.register(wildcard_query(BuiltinFunctionName.WILDCARD_QUERY));
repository.register(wildcard_query(BuiltinFunctionName.WILDCARDQUERY));
}

private static FunctionResolver match_bool_prefix() {
Expand Down Expand Up @@ -79,6 +81,11 @@ private static FunctionResolver query_string() {
return new RelevanceFunctionResolver(funcName, STRUCT);
}

private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery) {
FunctionName funcName = wildcardQuery.getName();
return new RelevanceFunctionResolver(funcName, STRING);
}

public static class OpenSearchFunction extends FunctionExpression {
private final FunctionName functionName;
private final List<Expression> arguments;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,34 @@ void query_string_expression_two_fields() {
AstDSL.unresolvedArg("query", stringLiteral("query_value"))));
}

@Test
void wildcard_query_expression() {
assertAnalyzeEqual(
DSL.wildcard_query(
DSL.namedArgument("field", DSL.literal("test")),
DSL.namedArgument("query", DSL.literal("query_value*"))),
AstDSL.function("wildcard_query",
unresolvedArg("field", stringLiteral("test")),
unresolvedArg("query", stringLiteral("query_value*"))));
}

@Test
void wildcard_query_expression_all_params() {
assertAnalyzeEqual(
DSL.wildcard_query(
DSL.namedArgument("field", DSL.literal("test")),
DSL.namedArgument("query", DSL.literal("query_value*")),
DSL.namedArgument("boost", DSL.literal("1.5")),
DSL.namedArgument("case_insensitive", DSL.literal("true")),
DSL.namedArgument("rewrite", DSL.literal("scoring_boolean"))),
AstDSL.function("wildcard_query",
unresolvedArg("field", stringLiteral("test")),
unresolvedArg("query", stringLiteral("query_value*")),
unresolvedArg("boost", stringLiteral("1.5")),
unresolvedArg("case_insensitive", stringLiteral("true")),
unresolvedArg("rewrite", stringLiteral("scoring_boolean"))));
}

@Test
public void match_phrase_prefix_all_params() {
assertAnalyzeEqual(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,4 +197,12 @@ void query_string() {
fields.getValue(), query.getValue()),
expr.toString());
}

@Test
void wildcard_query() {
FunctionExpression expr = DSL.wildcard_query(field, query);
assertEquals(String.format("wildcard_query(field=%s, query=%s)",
field.getValue(), query.getValue()),
expr.toString());
}
}
54 changes: 54 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3118,6 +3118,58 @@ Example searching for field Tags::
| [Winnie-the-<em>Pooh</em>] |
+----------------------------------------------+

WILDCARD_QUERY
------------

Description
>>>>>>>>>>>

``wildcard_query(field_expression, query_expression[, option=<option_value>]*)``

The ``wildcard_query`` function maps to the ``wildcard_query`` query used in search engine. It returns documents that match provided text in the specified field.
OpenSearch supports wildcard characters ``*`` and ``?``. See the full description here: https://opensearch.org/docs/latest/opensearch/query-dsl/term/#wildcards.
You may include a backslash ``\`` to escape SQL wildcard characters ``\%`` and ``\_``.

Available parameters include:

- boost
- case_insensitive
- rewrite

For backward compatibility, ``wildcardquery`` is also supported and mapped to ``wildcard_query`` query as well.

Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> select Body from wildcard where wildcard_query(Body, 'test wildcard*');
fetched rows / total rows = 7/7
+-------------------------------------------+
| Body |
|-------------------------------------------|
| test wildcard |
| test wildcard in the end of the text% |
| test wildcard in % the middle of the text |
| test wildcard %% beside each other |
| test wildcard in the end of the text_ |
| test wildcard in _ the middle of the text |
| test wildcard __ beside each other |
+-------------------------------------------+

Another example to show how to set custom values for the optional parameters::

os> select Body from wildcard where wildcard_query(Body, 'test wildcard*', boost=0.7, case_insensitive=true, rewrite='constant_score');
fetched rows / total rows = 7/7
+-------------------------------------------+
| Body |
|-------------------------------------------|
| test wildcard |
| test wildcard in the end of the text% |
| test wildcard in % the middle of the text |
| test wildcard %% beside each other |
| test wildcard in the end of the text_ |
| test wildcard in _ the middle of the text |
| test wildcard __ beside each other |
+-------------------------------------------+

System Functions
================

Expand All @@ -3142,3 +3194,5 @@ Example::
|----------------+---------------+-----------------+------------------|
| DATE | INTEGER | DATETIME | STRUCT |
+----------------+---------------+-----------------+------------------+


20 changes: 20 additions & 0 deletions doctest/test_data/wildcard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{"index":{"_id":"0"}}
{"Body":"test wildcard"}
{"index":{"_id":"1"}}
{"Body":"test wildcard in the end of the text%"}
{"index":{"_id":"2"}}
{"Body":"%test wildcard in the beginning of the text"}
{"index":{"_id":"3"}}
{"Body":"test wildcard in % the middle of the text"}
{"index":{"_id":"4"}}
{"Body":"test wildcard %% beside each other"}
{"index":{"_id":"5"}}
{"Body":"test wildcard in the end of the text_"}
{"index":{"_id":"6"}}
{"Body":"_test wildcard in the beginning of the text"}
{"index":{"_id":"7"}}
{"Body":"test wildcard in _ the middle of the text"}
{"index":{"_id":"8"}}
{"Body":"test wildcard __ beside each other"}
{"index":{"_id":"9"}}
{"Body":"test backslash wildcard \\_"}
4 changes: 3 additions & 1 deletion doctest/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
NYC_TAXI = "nyc_taxi"
BOOKS = "books"
APACHE = "apache"
WILDCARD = "wildcard"


class DocTestConnection(OpenSearchConnection):
Expand Down Expand Up @@ -92,6 +93,7 @@ def set_up_test_indices(test):
load_file("nyc_taxi.json", index_name=NYC_TAXI)
load_file("books.json", index_name=BOOKS)
load_file("apache.json", index_name=APACHE)
load_file("wildcard.json", index_name=WILDCARD)


def load_file(filename, index_name):
Expand Down Expand Up @@ -120,7 +122,7 @@ def set_up(test):

def tear_down(test):
# drop leftover tables after each test
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE], ignore_unavailable=True)
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD], ignore_unavailable=True)


docsuite = partial(doctest.DocFileSuite,
Expand Down
9 changes: 9 additions & 0 deletions doctest/test_mapping/wildcard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"mappings" : {
"properties" : {
"Body" : {
"type" : "keyword"
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,11 @@ public enum Index {
CALCS(TestsConstants.TEST_INDEX_CALCS,
"calcs",
getMappingFile("calcs_index_mappings.json"),
"src/test/resources/calcs.json"),;
"src/test/resources/calcs.json"),
WILDCARD(TestsConstants.TEST_INDEX_WILDCARD,
"wildcard",
getMappingFile("wildcard_index_mappings.json"),
"src/test/resources/wildcard.json"),;

private final String name;
private final String type;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public class TestsConstants {
public final static String TEST_INDEX_BEER = TEST_INDEX + "_beer";
public final static String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing";
public final static String TEST_INDEX_CALCS = TEST_INDEX + "_calcs";
public final static String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard";

public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
public final static String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS";
Expand Down
Loading

0 comments on commit 2cd6921

Please sign in to comment.