Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.13] Allowing execution of hybrid query on index alias with filters #676

Merged
merged 1 commit into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.13...2.x)
### Features
### Enhancements
- Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670))
### Bug Fixes
- Add support for request_cache flag in hybrid query ([#663](https://github.com/opensearch-project/neural-search/pull/663))
### Infrastructure
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,32 @@ public final class HybridQuery extends Query implements Iterable<Query> {

private final List<Query> subQueries;

public HybridQuery(Collection<Query> subQueries) {
/**
* Create new instance of hybrid query object based on collection of sub queries and filter query
* @param subQueries collection of queries that are executed individually and contribute to a final list of combined scores
* @param filterQueries list of filters that will be applied to each sub query. Each filter from the list is added as bool "filter" clause. If this is null sub queries will be executed as is
*/
public HybridQuery(final Collection<Query> subQueries, final List<Query> filterQueries) {
Objects.requireNonNull(subQueries, "collection of queries must not be null");
if (subQueries.isEmpty()) {
throw new IllegalArgumentException("collection of queries must not be empty");
}
this.subQueries = new ArrayList<>(subQueries);
if (Objects.isNull(filterQueries) || filterQueries.isEmpty()) {
this.subQueries = new ArrayList<>(subQueries);
} else {
List<Query> modifiedSubQueries = new ArrayList<>();
for (Query subQuery : subQueries) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(subQuery, BooleanClause.Occur.MUST);
filterQueries.forEach(filterQuery -> builder.add(filterQuery, BooleanClause.Occur.FILTER));
modifiedSubQueries.add(builder.build());
}
this.subQueries = modifiedSubQueries;
}
}

public HybridQuery(final Collection<Query> subQueries) {
this(subQueries, List.of());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.stream.Collectors;

import com.google.common.annotations.VisibleForTesting;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.opensearch.common.settings.Settings;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.search.NestedHelper;
import org.opensearch.neuralsearch.query.HybridQuery;
import org.opensearch.search.aggregations.AggregationProcessor;
import org.opensearch.search.internal.ContextIndexSearcher;
Expand All @@ -25,6 +25,8 @@

import lombok.extern.log4j.Log4j2;

import static org.opensearch.neuralsearch.util.HybridQueryUtil.hasAliasFilter;
import static org.opensearch.neuralsearch.util.HybridQueryUtil.hasNestedFieldOrNestedDocs;
import static org.opensearch.neuralsearch.util.HybridQueryUtil.isHybridQuery;

/**
Expand All @@ -51,26 +53,27 @@ public boolean searchWith(
}
}

private static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
return searchContext.mapperService().hasNested() && new NestedHelper(searchContext.mapperService()).mightMatchNestedDocs(query);
}

private static boolean isWrappedHybridQuery(final Query query) {
return query instanceof BooleanQuery
&& ((BooleanQuery) query).clauses().stream().anyMatch(clauseQuery -> clauseQuery.getQuery() instanceof HybridQuery);
}

@VisibleForTesting
protected Query extractHybridQuery(final SearchContext searchContext, final Query query) {
if (hasNestedFieldOrNestedDocs(query, searchContext)
if ((hasAliasFilter(query, searchContext) || hasNestedFieldOrNestedDocs(query, searchContext))
&& isWrappedHybridQuery(query)
&& ((BooleanQuery) query).clauses().size() > 0) {
// extract hybrid query and replace bool with hybrid query
&& !((BooleanQuery) query).clauses().isEmpty()) {
List<BooleanClause> booleanClauses = ((BooleanQuery) query).clauses();
if (booleanClauses.isEmpty() || booleanClauses.get(0).getQuery() instanceof HybridQuery == false) {
throw new IllegalStateException("cannot process hybrid query due to incorrect structure of top level bool query");
if (!(booleanClauses.get(0).getQuery() instanceof HybridQuery)) {
throw new IllegalStateException("cannot process hybrid query due to incorrect structure of top level query");
}
return booleanClauses.get(0).getQuery();
HybridQuery hybridQuery = (HybridQuery) booleanClauses.stream().findFirst().get().getQuery();
List<Query> filterQueries = booleanClauses.stream()
.filter(clause -> BooleanClause.Occur.FILTER == clause.getOccur())
.map(BooleanClause::getQuery)
.collect(Collectors.toList());
HybridQuery hybridQueryWithFilter = new HybridQuery(hybridQuery.getSubQueries(), filterQueries);
return hybridQueryWithFilter;
}
return query;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@

import lombok.AccessLevel;
import lombok.NoArgsConstructor;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.Query;
import org.opensearch.index.mapper.SeqNoFieldMapper;
import org.opensearch.index.search.NestedHelper;
import org.opensearch.neuralsearch.query.HybridQuery;
import org.opensearch.search.internal.SearchContext;

import java.util.Objects;

/**
* Utility class for anything related to hybrid query
*/
Expand All @@ -24,7 +23,7 @@ public class HybridQueryUtil {
public static boolean isHybridQuery(final Query query, final SearchContext searchContext) {
if (query instanceof HybridQuery) {
return true;
} else if (isWrappedHybridQuery(query) && hasNestedFieldOrNestedDocs(query, searchContext)) {
} else if (isWrappedHybridQuery(query)) {
/* Checking if this is a hybrid query that is wrapped into a Bool query by core Opensearch code
https://github.com/opensearch-project/OpenSearch/blob/main/server/src/main/java/org/opensearch/search/DefaultSearchContext.java#L367-L370.
main reason for that is performance optimization, at time of writing we are ok with loosing on performance if that's unblocks
Expand All @@ -34,38 +33,35 @@ public static boolean isHybridQuery(final Query query, final SearchContext searc
below is sample structure of such query:

Boolean {
should: {
hybrid: {
sub_query1 {}
sub_query2 {}
}
}
filter: {
exists: {
field: "_primary_term"
}
}
should: {
hybrid: {
sub_query1 {}
sub_query2 {}
}
}
filter: {
exists: {
field: "_primary_term"
}
}
}
TODO Need to add logic for passing hybrid sub-queries through the same logic in core to ensure there is no latency regression */
*/
// we have already checked if query in instance of Boolean in higher level else if condition
return ((BooleanQuery) query).clauses()
.stream()
.filter(clause -> clause.getQuery() instanceof HybridQuery == false)
.allMatch(clause -> {
return clause.getOccur() == BooleanClause.Occur.FILTER
&& clause.getQuery() instanceof FieldExistsQuery
&& SeqNoFieldMapper.PRIMARY_TERM_NAME.equals(((FieldExistsQuery) clause.getQuery()).getField());
});
return hasNestedFieldOrNestedDocs(query, searchContext) || hasAliasFilter(query, searchContext);
}
return false;
}

private static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
public static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
return searchContext.mapperService().hasNested() && new NestedHelper(searchContext.mapperService()).mightMatchNestedDocs(query);
}

private static boolean isWrappedHybridQuery(final Query query) {
return query instanceof BooleanQuery
&& ((BooleanQuery) query).clauses().stream().anyMatch(clauseQuery -> clauseQuery.getQuery() instanceof HybridQuery);
}

public static boolean hasAliasFilter(final Query query, final SearchContext searchContext) {
return Objects.nonNull(searchContext.aliasFilter());
}
}
128 changes: 122 additions & 6 deletions src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.opensearch.index.query.BoolQueryBuilder;
import org.opensearch.index.query.MatchQueryBuilder;
import org.opensearch.index.query.NestedQueryBuilder;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.index.query.RangeQueryBuilder;
import org.opensearch.index.query.TermQueryBuilder;
Expand All @@ -40,6 +41,7 @@
public class HybridQueryIT extends BaseNeuralSearchIT {
private static final String TEST_BASIC_INDEX_NAME = "test-hybrid-basic-index";
private static final String TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME = "test-hybrid-vector-doc-field-index";
private static final String TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME = "test-hybrid-multi-doc-nested-fields-index";
private static final String TEST_MULTI_DOC_INDEX_NAME = "test-hybrid-multi-doc-index";
private static final String TEST_MULTI_DOC_INDEX_NAME_ONE_SHARD = "test-hybrid-multi-doc-single-shard-index";
private static final String TEST_MULTI_DOC_INDEX_WITH_NESTED_TYPE_NAME_ONE_SHARD =
Expand Down Expand Up @@ -256,7 +258,7 @@ public void testComplexQuery_whenMultipleIdenticalSubQueries_thenSuccessful() {
public void testNoMatchResults_whenOnlyTermSubQueryWithoutMatch_thenEmptyResult() {
String modelId = null;
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_NAME);
initializeIndexIfNotExist(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME);
modelId = prepareModel();
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT);
Expand All @@ -266,7 +268,7 @@ public void testNoMatchResults_whenOnlyTermSubQueryWithoutMatch_thenEmptyResult(
hybridQueryBuilderOnlyTerm.add(termQuery2Builder);

Map<String, Object> searchResponseAsMap = search(
TEST_MULTI_DOC_INDEX_NAME,
TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME,
hybridQueryBuilderOnlyTerm,
null,
10,
Expand All @@ -283,7 +285,7 @@ public void testNoMatchResults_whenOnlyTermSubQueryWithoutMatch_thenEmptyResult(
assertNotNull(total.get("relation"));
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
} finally {
wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
wipeOfTestResources(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
}
}

Expand Down Expand Up @@ -578,6 +580,102 @@ public void testRequestCache_whenMultipleShardsQueryReturnResults_thenSuccessful
}
}

@SneakyThrows
public void testWrappedQueryWithFilter_whenIndexAliasHasFilterAndIndexWithNestedFields_thenSuccess() {
String modelId = null;
String alias = "alias_with_filter";
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME);
modelId = prepareModel();
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
// create alias for index
QueryBuilder aliasFilter = QueryBuilders.boolQuery()
.mustNot(QueryBuilders.matchQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3));
createIndexAlias(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME, alias, aliasFilter);

NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
TEST_KNN_VECTOR_FIELD_NAME_1,
TEST_QUERY_TEXT,
"",
modelId,
5,
null,
null
);
HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
hybridQueryBuilder.add(neuralQueryBuilder);

Map<String, Object> searchResponseAsMap = search(
alias,
hybridQueryBuilder,
null,
10,
Map.of("search_pipeline", SEARCH_PIPELINE)
);

assertEquals(2, getHitCount(searchResponseAsMap));
assertTrue(getMaxScore(searchResponseAsMap).isPresent());
assertEquals(1.0f, getMaxScore(searchResponseAsMap).get(), DELTA_FOR_SCORE_ASSERTION);

Map<String, Object> total = getTotalHits(searchResponseAsMap);
assertNotNull(total.get("value"));
assertEquals(2, total.get("value"));
assertNotNull(total.get("relation"));
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
} finally {
deleteIndexAlias(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME, alias);
wipeOfTestResources(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
}
}

@SneakyThrows
public void testWrappedQueryWithFilter_whenIndexAliasHasFilters_thenSuccess() {
String modelId = null;
String alias = "alias_with_filter";
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_NAME);
modelId = prepareModel();
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
// create alias for index
QueryBuilder aliasFilter = QueryBuilders.boolQuery()
.mustNot(QueryBuilders.matchQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3));
createIndexAlias(TEST_MULTI_DOC_INDEX_NAME, alias, aliasFilter);

NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
TEST_KNN_VECTOR_FIELD_NAME_1,
TEST_QUERY_TEXT,
"",
modelId,
5,
null,
null
);
HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
hybridQueryBuilder.add(neuralQueryBuilder);

Map<String, Object> searchResponseAsMap = search(
alias,
hybridQueryBuilder,
null,
10,
Map.of("search_pipeline", SEARCH_PIPELINE)
);

assertEquals(2, getHitCount(searchResponseAsMap));
assertTrue(getMaxScore(searchResponseAsMap).isPresent());
assertEquals(1.0f, getMaxScore(searchResponseAsMap).get(), DELTA_FOR_SCORE_ASSERTION);

Map<String, Object> total = getTotalHits(searchResponseAsMap);
assertNotNull(total.get("value"));
assertEquals(2, total.get("value"));
assertNotNull(total.get("relation"));
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
} finally {
deleteIndexAlias(TEST_MULTI_DOC_INDEX_NAME, alias);
wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
}
}

@SneakyThrows
private void initializeIndexIfNotExist(String indexName) throws IOException {
if (TEST_BASIC_INDEX_NAME.equals(indexName) && !indexExists(TEST_BASIC_INDEX_NAME)) {
Expand Down Expand Up @@ -628,10 +726,28 @@ private void initializeIndexIfNotExist(String indexName) throws IOException {
assertEquals(3, getDocCount(TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME));
}

if (TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME.equals(indexName) && !indexExists(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME)) {
createIndexWithConfiguration(
indexName,
buildIndexConfiguration(
Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE)),
List.of(TEST_NESTED_TYPE_FIELD_NAME_1),
1
),
""
);
addDocsToIndex(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME);
}

if (TEST_MULTI_DOC_INDEX_NAME.equals(indexName) && !indexExists(TEST_MULTI_DOC_INDEX_NAME)) {
prepareKnnIndex(
TEST_MULTI_DOC_INDEX_NAME,
Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE))
createIndexWithConfiguration(
indexName,
buildIndexConfiguration(
Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE)),
List.of(),
1
),
""
);
addDocsToIndex(TEST_MULTI_DOC_INDEX_NAME);
}
Expand Down
Loading
Loading