diff --git a/plugin/trino-elasticsearch/src/main/java/io/trino/plugin/elasticsearch/ElasticsearchMetadata.java b/plugin/trino-elasticsearch/src/main/java/io/trino/plugin/elasticsearch/ElasticsearchMetadata.java index cc3aa9f72c44..adcf2a1e0bce 100644 --- a/plugin/trino-elasticsearch/src/main/java/io/trino/plugin/elasticsearch/ElasticsearchMetadata.java +++ b/plugin/trino-elasticsearch/src/main/java/io/trino/plugin/elasticsearch/ElasticsearchMetadata.java @@ -86,6 +86,7 @@ import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.collect.Iterators.singletonIterator; import static io.airlift.slice.SliceUtf8.getCodePointAt; +import static io.airlift.slice.SliceUtf8.lengthOfCodePoint; import static io.trino.plugin.elasticsearch.ElasticsearchTableHandle.Type.QUERY; import static io.trino.plugin.elasticsearch.ElasticsearchTableHandle.Type.SCAN; import static io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; @@ -592,7 +593,7 @@ protected static String likeToRegexp(Slice pattern, Optional escape) int position = 0; while (position < pattern.length()) { int currentChar = getCodePointAt(pattern, position); - position += 1; + position += lengthOfCodePoint(currentChar); checkEscape(!escaped || currentChar == '%' || currentChar == '_' || currentChar == escapeChar.get()); if (!escaped && escapeChar.isPresent() && currentChar == escapeChar.get()) { escaped = true; diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/BaseElasticsearchConnectorTest.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/BaseElasticsearchConnectorTest.java index 8fe0d4c3b93d..a7a05c55f544 100644 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/BaseElasticsearchConnectorTest.java +++ b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/BaseElasticsearchConnectorTest.java @@ -1053,6 +1053,30 @@ public void testLike() .put("text_column", "soome%text") .buildOrThrow()); + // Add another document to make sure utf8 character sequence length is right + index(indexName, ImmutableMap.builder() + .put("keyword_column", "中文") + .put("text_column", "中文") + .buildOrThrow()); + + // Add another document to make sure utf8 character sequence length is right + index(indexName, ImmutableMap.builder() + .put("keyword_column", "こんにちは") + .put("text_column", "こんにちは") + .buildOrThrow()); + + // Add another document to make sure utf8 character sequence length is right + index(indexName, ImmutableMap.builder() + .put("keyword_column", "안녕하세요") + .put("text_column", "안녕하세요") + .buildOrThrow()); + + // Add another document to make sure utf8 character sequence length is right + index(indexName, ImmutableMap.builder() + .put("keyword_column", "Привет") + .put("text_column", "Привет") + .buildOrThrow()); + assertThat(query("" + "SELECT " + "keyword_column " + @@ -1075,6 +1099,38 @@ public void testLike() "WHERE keyword_column LIKE 'soome$%%' ESCAPE '$'")) .matches("VALUES VARCHAR 'soome%text'") .isFullyPushedDown(); + + assertThat(query("" + + "SELECT " + + "text_column " + + "FROM " + indexName + " " + + "WHERE keyword_column LIKE '中%'")) + .matches("VALUES VARCHAR '中文'") + .isFullyPushedDown(); + + assertThat(query("" + + "SELECT " + + "text_column " + + "FROM " + indexName + " " + + "WHERE keyword_column LIKE 'こんに%'")) + .matches("VALUES VARCHAR 'こんにちは'") + .isFullyPushedDown(); + + assertThat(query("" + + "SELECT " + + "text_column " + + "FROM " + indexName + " " + + "WHERE keyword_column LIKE '안녕하%'")) + .matches("VALUES VARCHAR '안녕하세요'") + .isFullyPushedDown(); + + assertThat(query("" + + "SELECT " + + "text_column " + + "FROM " + indexName + " " + + "WHERE keyword_column LIKE 'При%'")) + .matches("VALUES VARCHAR 'Привет'") + .isFullyPushedDown(); } @Test diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearchMetadata.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearchMetadata.java index 8a6b5795b9b1..9ab47e1d93d6 100644 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearchMetadata.java +++ b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearchMetadata.java @@ -34,6 +34,10 @@ public void testLikeToRegexp() assertEquals(likeToRegexp("s_.m%ex\\t", Optional.of("$")), "s.\\.m.*ex\\\\t"); assertEquals(likeToRegexp("\000%", Optional.empty()), "\000.*"); assertEquals(likeToRegexp("\000%", Optional.of("\000")), "%"); + assertEquals(likeToRegexp("中文%", Optional.empty()), "中文.*"); + assertEquals(likeToRegexp("こんにちは%", Optional.empty()), "こんにちは.*"); + assertEquals(likeToRegexp("안녕하세요%", Optional.empty()), "안녕하세요.*"); + assertEquals(likeToRegexp("Привет%", Optional.empty()), "Привет.*"); } private static String likeToRegexp(String pattern, Optional escapeChar)