merge main

Signed-off-by: zhichao-aws <[email protected]>
opensearch-project · Apr 9, 2024 · 6657c9e · 6657c9e
2 parents 9332a21 + cb83e20
commit 6657c9e
Show file tree

Hide file tree

Showing 18 changed files with 804 additions and 137 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -1,2 +1,2 @@
 # This should match the owning team set up in https://github.com/orgs/opensearch-project/teams
-*   @heemin32 @navneet1v @VijayanB @vamshin @jmazanec15 @naveentatikonda @junqiu-lei @martin-gaievski @sean-zheng-amazon @model-collapse @zane-neo @ylwu-amzn @jngz-es @vibrantvarun
+*   @heemin32 @navneet1v @VijayanB @vamshin @jmazanec15 @naveentatikonda @junqiu-lei @martin-gaievski @sean-zheng-amazon @model-collapse @zane-neo @ylwu-amzn @jngz-es @vibrantvarun @zhichao-aws
diff --git a/.github/workflows/backwards_compatibility_tests_workflow.yml b/.github/workflows/backwards_compatibility_tests_workflow.yml
@@ -15,7 +15,7 @@ jobs:
       matrix:
         java: [ 11, 17, 21 ]
         os: [ubuntu-latest,windows-latest]
-        bwc_version : ["2.9.0","2.10.0","2.11.0","2.12.0","2.13.0-SNAPSHOT"]
+        bwc_version : ["2.9.0","2.10.0","2.11.0","2.12.0","2.13.0","2.14.0-SNAPSHOT"]
         opensearch_version : [ "3.0.0-SNAPSHOT" ]
 
     name: NeuralSearch Restart-Upgrade BWC Tests
@@ -42,7 +42,7 @@ jobs:
       matrix:
         java: [ 11, 17, 21 ]
         os: [ubuntu-latest,windows-latest]
-        bwc_version: [ "2.13.0-SNAPSHOT" ]
+        bwc_version: [ "2.14.0-SNAPSHOT" ]
         opensearch_version: [ "3.0.0-SNAPSHOT" ]
 
     name: NeuralSearch Rolling-Upgrade BWC Tests

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,9 +18,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 ## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.13...2.x)
 ### Features
 ### Enhancements
-- Update bwc tests for neural_query_enricher neural_sparse search ([#652](https://github.com/opensearch-project/neural-search/pull/652))
+- Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670))
 ### Bug Fixes
+- Add support for request_cache flag in hybrid query ([#663](https://github.com/opensearch-project/neural-search/pull/663))
 ### Infrastructure
 ### Documentation
 ### Maintenance
+- Update bwc tests for neural_query_enricher neural_sparse search ([#652](https://github.com/opensearch-project/neural-search/pull/652))
 ### Refactoring
diff --git a/MAINTAINERS.md b/MAINTAINERS.md
@@ -20,6 +20,7 @@ This document contains a list of maintainers in this repo. See [opensearch-proje
 | Naveen Tatikonda        | [naveentatikonda](https://github.com/naveentatikonda)     | Amazon      |
 | Vijayan Balasubramanian | [VijayanB](https://github.com/VijayanB)                   | Amazon      |
 | Varun Jain              | [vibrantvarun](https://github.com/vibrantvarun)           | Amazon      |
+| Zhichao Geng            | [zhichao-aws](https://github.com/zhichao-aws)             | Amazon      |
 
 ## Emeritus
 

diff --git a/gradle.properties b/gradle.properties
@@ -7,8 +7,8 @@
 # https://github.com/opensearch-project/OpenSearch/blob/main/libs/core/src/main/java/org/opensearch/Version.java .
 # Wired compatibility of OpenSearch works like 3.x version is compatible with 2.(latest-major) version.
 # Therefore, to run rolling-upgrade BWC Test on local machine the BWC version here should be set 2.(latest-major).
-systemProp.bwc.version=2.13.0-SNAPSHOT
-systemProp.bwc.bundle.version=2.13.0
+systemProp.bwc.version=2.14.0-SNAPSHOT
+systemProp.bwc.bundle.version=2.14.0
 
 # For fixing Spotless check with Java 17
 org.gradle.jvmargs=--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \

diff --git a/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java b/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java
@@ -138,7 +138,13 @@ private void updateOriginalFetchResults(
         // 3. update original scores to normalized and combined values
         // 4. order scores based on normalized and combined values
         FetchSearchResult fetchSearchResult = fetchSearchResultOptional.get();
-        SearchHit[] searchHitArray = getSearchHits(docIds, fetchSearchResult);
+        // checking case when results are cached
+        boolean requestCache = Objects.nonNull(querySearchResults)
+            && !querySearchResults.isEmpty()
+            && Objects.nonNull(querySearchResults.get(0).getShardSearchRequest().requestCache())
+            && querySearchResults.get(0).getShardSearchRequest().requestCache();
+
+        SearchHit[] searchHitArray = getSearchHits(docIds, fetchSearchResult, requestCache);
 
         // create map of docId to index of search hits. This solves (2), duplicates are from
         // delimiter and start/stop elements, they all have same valid doc_id. For this map
@@ -168,7 +174,7 @@ private void updateOriginalFetchResults(
         fetchSearchResult.hits(updatedSearchHits);
     }
 
-    private SearchHit[] getSearchHits(final List<Integer> docIds, final FetchSearchResult fetchSearchResult) {
+    private SearchHit[] getSearchHits(final List<Integer> docIds, final FetchSearchResult fetchSearchResult, final boolean requestCache) {
         SearchHits searchHits = fetchSearchResult.hits();
         SearchHit[] searchHitArray = searchHits.getHits();
         // validate the both collections are of the same size
@@ -177,7 +183,9 @@ private SearchHit[] getSearchHits(final List<Integer> docIds, final FetchSearchR
                 "score normalization processor cannot produce final query result, fetch query phase returns empty results"
             );
         }
-        if (searchHitArray.length != docIds.size()) {
+        // in case of cached request results of fetch and query may be different, only restriction is
+        // that number of query results size is greater or equal size of fetch results
+        if ((!requestCache && searchHitArray.length != docIds.size()) || requestCache && docIds.size() < searchHitArray.length) {
             throw new IllegalStateException(
                 String.format(
                     Locale.ROOT,

diff --git a/src/main/java/org/opensearch/neuralsearch/query/HybridQuery.java b/src/main/java/org/opensearch/neuralsearch/query/HybridQuery.java
@@ -29,12 +29,32 @@ public final class HybridQuery extends Query implements Iterable<Query> {
 
     private final List<Query> subQueries;
 
-    public HybridQuery(Collection<Query> subQueries) {
+    /**
+     * Create new instance of hybrid query object based on collection of sub queries and filter query
+     * @param subQueries collection of queries that are executed individually and contribute to a final list of combined scores
+     * @param filterQueries list of filters that will be applied to each sub query. Each filter from the list is added as bool "filter" clause. If this is null sub queries will be executed as is
+     */
+    public HybridQuery(final Collection<Query> subQueries, final List<Query> filterQueries) {
         Objects.requireNonNull(subQueries, "collection of queries must not be null");
         if (subQueries.isEmpty()) {
             throw new IllegalArgumentException("collection of queries must not be empty");
         }
-        this.subQueries = new ArrayList<>(subQueries);
+        if (Objects.isNull(filterQueries) || filterQueries.isEmpty()) {
+            this.subQueries = new ArrayList<>(subQueries);
+        } else {
+            List<Query> modifiedSubQueries = new ArrayList<>();
+            for (Query subQuery : subQueries) {
+                BooleanQuery.Builder builder = new BooleanQuery.Builder();
+                builder.add(subQuery, BooleanClause.Occur.MUST);
+                filterQueries.forEach(filterQuery -> builder.add(filterQuery, BooleanClause.Occur.FILTER));
+                modifiedSubQueries.add(builder.build());
+            }
+            this.subQueries = modifiedSubQueries;
+        }
+    }
+
+    public HybridQuery(final Collection<Query> subQueries) {
+        this(subQueries, List.of());
     }
 
     /**

diff --git a/src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java b/src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java
@@ -7,14 +7,14 @@
 import java.io.IOException;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.stream.Collectors;
 
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Query;
 import org.opensearch.common.settings.Settings;
 import org.opensearch.index.mapper.MapperService;
-import org.opensearch.index.search.NestedHelper;
 import org.opensearch.neuralsearch.query.HybridQuery;
 import org.opensearch.search.aggregations.AggregationProcessor;
 import org.opensearch.search.internal.ContextIndexSearcher;
@@ -25,6 +25,8 @@
 
 import lombok.extern.log4j.Log4j2;
 
+import static org.opensearch.neuralsearch.util.HybridQueryUtil.hasAliasFilter;
+import static org.opensearch.neuralsearch.util.HybridQueryUtil.hasNestedFieldOrNestedDocs;
 import static org.opensearch.neuralsearch.util.HybridQueryUtil.isHybridQuery;
 
 /**
@@ -51,26 +53,27 @@ public boolean searchWith(
         }
     }
 
-    private static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
-        return searchContext.mapperService().hasNested() && new NestedHelper(searchContext.mapperService()).mightMatchNestedDocs(query);
-    }
-
     private static boolean isWrappedHybridQuery(final Query query) {
         return query instanceof BooleanQuery
             && ((BooleanQuery) query).clauses().stream().anyMatch(clauseQuery -> clauseQuery.getQuery() instanceof HybridQuery);
     }
 
     @VisibleForTesting
     protected Query extractHybridQuery(final SearchContext searchContext, final Query query) {
-        if (hasNestedFieldOrNestedDocs(query, searchContext)
+        if ((hasAliasFilter(query, searchContext) || hasNestedFieldOrNestedDocs(query, searchContext))
             && isWrappedHybridQuery(query)
-            && ((BooleanQuery) query).clauses().size() > 0) {
-            // extract hybrid query and replace bool with hybrid query
+            && !((BooleanQuery) query).clauses().isEmpty()) {
             List<BooleanClause> booleanClauses = ((BooleanQuery) query).clauses();
-            if (booleanClauses.isEmpty() || booleanClauses.get(0).getQuery() instanceof HybridQuery == false) {
-                throw new IllegalStateException("cannot process hybrid query due to incorrect structure of top level bool query");
+            if (!(booleanClauses.get(0).getQuery() instanceof HybridQuery)) {
+                throw new IllegalStateException("cannot process hybrid query due to incorrect structure of top level query");
             }
-            return booleanClauses.get(0).getQuery();
+            HybridQuery hybridQuery = (HybridQuery) booleanClauses.stream().findFirst().get().getQuery();
+            List<Query> filterQueries = booleanClauses.stream()
+                .filter(clause -> BooleanClause.Occur.FILTER == clause.getOccur())
+                .map(BooleanClause::getQuery)
+                .collect(Collectors.toList());
+            HybridQuery hybridQueryWithFilter = new HybridQuery(hybridQuery.getSubQueries(), filterQueries);
+            return hybridQueryWithFilter;
         }
         return query;
     }

diff --git a/src/main/java/org/opensearch/neuralsearch/util/HybridQueryUtil.java b/src/main/java/org/opensearch/neuralsearch/util/HybridQueryUtil.java
@@ -6,15 +6,14 @@
 
 import lombok.AccessLevel;
 import lombok.NoArgsConstructor;
-import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.FieldExistsQuery;
 import org.apache.lucene.search.Query;
-import org.opensearch.index.mapper.SeqNoFieldMapper;
 import org.opensearch.index.search.NestedHelper;
 import org.opensearch.neuralsearch.query.HybridQuery;
 import org.opensearch.search.internal.SearchContext;
 
+import java.util.Objects;
+
 /**
  * Utility class for anything related to hybrid query
  */
@@ -24,7 +23,7 @@ public class HybridQueryUtil {
     public static boolean isHybridQuery(final Query query, final SearchContext searchContext) {
         if (query instanceof HybridQuery) {
             return true;
-        } else if (isWrappedHybridQuery(query) && hasNestedFieldOrNestedDocs(query, searchContext)) {
+        } else if (isWrappedHybridQuery(query)) {
             /* Checking if this is a hybrid query that is wrapped into a Bool query by core Opensearch code
             https://github.com/opensearch-project/OpenSearch/blob/main/server/src/main/java/org/opensearch/search/DefaultSearchContext.java#L367-L370.
             main reason for that is performance optimization, at time of writing we are ok with loosing on performance if that's unblocks
@@ -34,38 +33,35 @@ public static boolean isHybridQuery(final Query query, final SearchContext searc
             below is sample structure of such query:
 
             Boolean {
-               should: {
-                   hybrid: {
-                       sub_query1 {}
-                       sub_query2 {}
-                   }
-               }
-               filter: {
-                   exists: {
-                       field: "_primary_term"
-                   }
-               }
+                should: {
+                       hybrid: {
+                           sub_query1 {}
+                           sub_query2 {}
+                       }
+                }
+                filter: {
+                       exists: {
+                           field: "_primary_term"
+                       }
+                }
             }
-            TODO Need to add logic for passing hybrid sub-queries through the same logic in core to ensure there is no latency regression */
+            */
             // we have already checked if query in instance of Boolean in higher level else if condition
-            return ((BooleanQuery) query).clauses()
-                .stream()
-                .filter(clause -> clause.getQuery() instanceof HybridQuery == false)
-                .allMatch(clause -> {
-                    return clause.getOccur() == BooleanClause.Occur.FILTER
-                        && clause.getQuery() instanceof FieldExistsQuery
-                        && SeqNoFieldMapper.PRIMARY_TERM_NAME.equals(((FieldExistsQuery) clause.getQuery()).getField());
-                });
+            return hasNestedFieldOrNestedDocs(query, searchContext) || hasAliasFilter(query, searchContext);
         }
         return false;
     }
 
-    private static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
+    public static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
         return searchContext.mapperService().hasNested() && new NestedHelper(searchContext.mapperService()).mightMatchNestedDocs(query);
     }
 
     private static boolean isWrappedHybridQuery(final Query query) {
         return query instanceof BooleanQuery
             && ((BooleanQuery) query).clauses().stream().anyMatch(clauseQuery -> clauseQuery.getQuery() instanceof HybridQuery);
     }
+
+    public static boolean hasAliasFilter(final Query query, final SearchContext searchContext) {
+        return Objects.nonNull(searchContext.aliasFilter());
+    }
 }
diff --git a/src/test/java/org/opensearch/neuralsearch/processor/NeuralQueryEnricherProcessorIT.java b/src/test/java/org/opensearch/neuralsearch/processor/NeuralQueryEnricherProcessorIT.java
@@ -86,12 +86,10 @@ public void testNeuralQueryEnricherProcessor_whenNoModelIdPassedInNeuralSparseQu
 
     @SneakyThrows
     public void testNeuralQueryEnricherProcessor_whenGetEmptyQueryBody_thenSuccess() {
-        String modelId = null;
         try {
             initializeIndexIfNotExist(index);
-            modelId = prepareModel();
-            createSearchRequestProcessor(modelId, search_pipeline);
-            createPipelineProcessor(modelId, ingest_pipeline, ProcessorType.TEXT_EMBEDDING);
+            createSearchRequestProcessor(null, search_pipeline);
+            createPipelineProcessor(null, ingest_pipeline, ProcessorType.TEXT_EMBEDDING);
             updateIndexSettings(index, Settings.builder().put("index.search.default_pipeline", search_pipeline));
             Request request = new Request("POST", "/" + index + "/_search");
             Response response = client().performRequest(request);
@@ -101,7 +99,7 @@ public void testNeuralQueryEnricherProcessor_whenGetEmptyQueryBody_thenSuccess()
             assertFalse(responseInMap.isEmpty());
             assertEquals(3, ((Map) responseInMap.get("hits")).size());
         } finally {
-            wipeOfTestResources(index, ingest_pipeline, modelId, search_pipeline);
+            wipeOfTestResources(index, ingest_pipeline, null, search_pipeline);
         }
     }
 

diff --git a/src/test/java/org/opensearch/neuralsearch/processor/NormalizationProcessorIT.java b/src/test/java/org/opensearch/neuralsearch/processor/NormalizationProcessorIT.java
@@ -230,10 +230,8 @@ public void testResultProcessor_whenMultipleShardsAndQueryMatches_thenSuccessful
 
     @SneakyThrows
     public void testResultProcessor_whenMultipleShardsAndNoMatches_thenSuccessful() {
-        String modelId = null;
         try {
             initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME);
-            modelId = prepareModel();
             createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
 
             HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
@@ -249,16 +247,14 @@ public void testResultProcessor_whenMultipleShardsAndNoMatches_thenSuccessful()
             );
             assertQueryResults(searchResponseAsMap, 0, true);
         } finally {
-            wipeOfTestResources(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME, null, modelId, SEARCH_PIPELINE);
+            wipeOfTestResources(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME, null, null, SEARCH_PIPELINE);
         }
     }
 
     @SneakyThrows
     public void testResultProcessor_whenMultipleShardsAndPartialMatches_thenSuccessful() {
-        String modelId = null;
         try {
             initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME);
-            modelId = prepareModel();
             createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
 
             HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
@@ -275,7 +271,7 @@ public void testResultProcessor_whenMultipleShardsAndPartialMatches_thenSuccessf
             );
             assertQueryResults(searchResponseAsMap, 4, true, Range.between(0.33f, 1.0f));
         } finally {
-            wipeOfTestResources(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME, null, modelId, SEARCH_PIPELINE);
+            wipeOfTestResources(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME, null, null, SEARCH_PIPELINE);
         }
     }
 

diff --git a/src/test/java/org/opensearch/neuralsearch/processor/NormalizationProcessorTests.java b/src/test/java/org/opensearch/neuralsearch/processor/NormalizationProcessorTests.java
@@ -53,6 +53,7 @@
 import org.opensearch.search.aggregations.pipeline.PipelineAggregator;
 import org.opensearch.search.fetch.FetchSearchResult;
 import org.opensearch.search.fetch.QueryFetchSearchResult;
+import org.opensearch.search.internal.ShardSearchRequest;
 import org.opensearch.search.query.QuerySearchResult;
 import org.opensearch.test.OpenSearchTestCase;
 import org.opensearch.threadpool.TestThreadPool;
@@ -401,6 +402,9 @@ public void testResultTypes_whenQueryAndFetchPresentAndSizeSame_thenCallNormaliz
 
         QueryFetchSearchResult queryFetchSearchResult = new QueryFetchSearchResult(querySearchResult, fetchSearchResult);
         queryFetchSearchResult.setShardIndex(shardId);
+        ShardSearchRequest shardSearchRequest = mock(ShardSearchRequest.class);
+        when(shardSearchRequest.requestCache()).thenReturn(Boolean.TRUE);
+        querySearchResult.setShardSearchRequest(shardSearchRequest);
 
         queryPhaseResultConsumer.consumeResult(queryFetchSearchResult, partialReduceLatch::countDown);
 
@@ -485,6 +489,9 @@ public void testResultTypes_whenQueryAndFetchPresentButSizeDifferent_thenFail()
 
         QueryFetchSearchResult queryFetchSearchResult = new QueryFetchSearchResult(querySearchResult, fetchSearchResult);
         queryFetchSearchResult.setShardIndex(shardId);
+        ShardSearchRequest shardSearchRequest = mock(ShardSearchRequest.class);
+        when(shardSearchRequest.requestCache()).thenReturn(Boolean.FALSE);
+        querySearchResult.setShardSearchRequest(shardSearchRequest);
 
         queryPhaseResultConsumer.consumeResult(queryFetchSearchResult, partialReduceLatch::countDown);