From 8d2c1f48cd64ae3ace764e21e5609743c51b380f Mon Sep 17 00:00:00 2001 From: Mingshi Liu <113382730+mingshl@users.noreply.github.com> Date: Mon, 9 Jan 2023 04:19:15 -0800 Subject: [PATCH] Fix Graph Filter Error in Search (#5665) * fix graph filter out of bound error Signed-off-by: Mingshi Liu * add changelog Signed-off-by: Mingshi Liu * run gradle spotlessApply Signed-off-by: Mingshi Liu * reproduce error in unit test Signed-off-by: Mingshi Liu * format to pass spotlessApply Signed-off-by: Mingshi Liu * organize package Signed-off-by: Mingshi Liu Signed-off-by: Mingshi Liu (cherry picked from commit 6a7a9a1b2472f8d4a496d5b976ae16be87893b0e) --- CHANGELOG.md | 1 + .../opensearch/index/search/MatchQuery.java | 9 ++++- .../index/query/MatchQueryBuilderTests.java | 35 +++++++++++++++++-- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c1aa64335796..4c9f428dc8e2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,6 +62,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - `opensearch-service.bat start` and `opensearch-service.bat manager` failing to run ([#4289](https://github.com/opensearch-project/OpenSearch/pull/4289)) - PR reference to checkout code for changelog verifier ([#4296](https://github.com/opensearch-project/OpenSearch/pull/4296)) - `opensearch.bat` and `opensearch-service.bat install` failing to run, missing logs directory ([#4305](https://github.com/opensearch-project/OpenSearch/pull/4305)) +- Fix graph filter error in search ([#5665](https://github.com/opensearch-project/OpenSearch/pull/5665)) ### Security ## [1.x] ### Added diff --git a/server/src/main/java/org/opensearch/index/search/MatchQuery.java b/server/src/main/java/org/opensearch/index/search/MatchQuery.java index 75f8d9aa6ba9f..f9dd45bb4fe6c 100644 --- a/server/src/main/java/org/opensearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/opensearch/index/search/MatchQuery.java @@ -70,7 +70,6 @@ import org.opensearch.index.mapper.TextFieldMapper; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.support.QueryParsers; - import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -743,6 +742,14 @@ private Query analyzeGraphBoolean(String field, TokenStream source, BooleanClaus lastState = end; final Query queryPos; boolean usePrefix = isPrefix && end == -1; + /** + * check if the GraphTokenStreamFiniteStrings graph is empty + * return empty BooleanQuery result + */ + Iterator graphIt = graph.getFiniteStrings(); + if (!graphIt.hasNext()) { + return builder.build(); + } if (graph.hasSidePath(start)) { final Iterator it = graph.getFiniteStrings(start, end); Iterator queries = new Iterator() { diff --git a/server/src/test/java/org/opensearch/index/query/MatchQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/MatchQueryBuilderTests.java index c4aba907f4f40..e5fc911373de2 100644 --- a/server/src/test/java/org/opensearch/index/query/MatchQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/MatchQueryBuilderTests.java @@ -33,8 +33,9 @@ package org.opensearch.index.query; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.CannedBinaryTokenStream; -import org.apache.lucene.analysis.MockSynonymAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.tests.analysis.CannedBinaryTokenStream; +import org.apache.lucene.tests.analysis.MockSynonymAnalyzer; import org.apache.lucene.index.Term; import org.apache.lucene.queries.ExtendedCommonTermsQuery; import org.apache.lucene.search.BooleanClause; @@ -52,6 +53,7 @@ import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings; import org.opensearch.action.admin.indices.mapping.put.PutMappingRequest; import org.opensearch.common.ParsingException; import org.opensearch.common.Strings; @@ -72,6 +74,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Iterator; import static org.hamcrest.CoreMatchers.either; import static org.hamcrest.CoreMatchers.instanceOf; @@ -326,6 +329,20 @@ public void testFuzzinessOnNonStringField() throws Exception { query.toQuery(context); // no exception } + public void testMatchQueryWithNoSidePath() throws Exception { + QueryShardContext testContext = createShardContext(); + final MatchQuery testMatchQuery = new MatchQuery(testContext); + MockGraphAnalyzer mockAnalyzer = new MockGraphAnalyzer(createGiantGraphWithNoSide()); + testMatchQuery.setAnalyzer(mockAnalyzer); + testMatchQuery.setAutoGenerateSynonymsPhraseQuery(true); + GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(mockAnalyzer.getTokenStream()); + Iterator graphIt = graph.getFiniteStrings(); + assertEquals(graphIt.hasNext(), false); + String testField = "Gas Lift Storage Bed Frame with Arched Bed Head in King"; + String testValue = "head board, bed head, bedhead, headboard"; + testMatchQuery.parse(Type.BOOLEAN, testField, testValue); // no exception + } + public void testExactOnUnsupportedField() throws Exception { MatchQueryBuilder query = new MatchQueryBuilder(GEO_POINT_FIELD_NAME, "2,3"); QueryShardContext context = createShardContext(); @@ -543,12 +560,18 @@ private static class MockGraphAnalyzer extends Analyzer { MockGraphAnalyzer(CannedBinaryTokenStream.BinaryToken[] tokens) { this.tokenStream = new CannedBinaryTokenStream(tokens); + } @Override protected TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents(r -> {}, tokenStream); } + + public CannedBinaryTokenStream getTokenStream() { + return this.tokenStream; + } + } /** @@ -571,6 +594,14 @@ private static CannedBinaryTokenStream.BinaryToken[] createGiantGraph(int numPos return tokens.toArray(new CannedBinaryTokenStream.BinaryToken[0]); } + /** + * Creates a graph token stream with no side path. + **/ + private static CannedBinaryTokenStream.BinaryToken[] createGiantGraphWithNoSide() { + List tokens = new ArrayList<>(); + return tokens.toArray(new CannedBinaryTokenStream.BinaryToken[0]); + } + /** * Creates a graph token stream with {@link BooleanQuery#getMaxClauseCount()} * expansions at the last position.