Skip to content

Commit

Permalink
Validate Disjuction query in HybridQueryPhaseSearcher
Browse files Browse the repository at this point in the history
Signed-off-by: Owais <[email protected]>
  • Loading branch information
owaiskazi19 committed Jan 21, 2025
1 parent 84e6306 commit 61bc2e3
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Support empty string for fields in text embedding processor ([#1041](https://github.com/opensearch-project/neural-search/pull/1041))
- Optimize ML inference connection retry logic ([#1054](https://github.com/opensearch-project/neural-search/pull/1054))
- Support for builder constructor in Neural Query Builder ([#1047](https://github.com/opensearch-project/neural-search/pull/1047))
- Validate Disjunction query to avoid having nested hybrid query ([#1127](https://github.com/opensearch-project/neural-search/pull/1127))
### Bug Fixes
- Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998))
- Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import lombok.NoArgsConstructor;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.Query;
import org.opensearch.common.settings.Settings;
import org.opensearch.index.mapper.MapperService;
Expand Down Expand Up @@ -104,7 +105,7 @@ protected Query extractHybridQuery(final SearchContext searchContext, final Quer
* }
* ]
* }
* TODO add similar validation for other compound type queries like dis_max, constant_score etc.
* TODO add similar validation for other compound type queries like constant_score, function_score etc.
*
* @param query query to validate
*/
Expand All @@ -114,6 +115,10 @@ private void validateQuery(final SearchContext searchContext, final Query query)
for (BooleanClause booleanClause : booleanClauses) {
validateNestedBooleanQuery(booleanClause.getQuery(), getMaxDepthLimit(searchContext));
}
} else if (query instanceof DisjunctionMaxQuery) {
for (Query disjunct : (DisjunctionMaxQuery) query) {
validateNestedDisJunctionQuery(disjunct, getMaxDepthLimit(searchContext));
}
}
}

Expand All @@ -135,6 +140,24 @@ private void validateNestedBooleanQuery(final Query query, final int level) {
}
}

private void validateNestedDisJunctionQuery(final Query query, final int level) {
if (query instanceof HybridQuery) {
throw new IllegalArgumentException("hybrid query must be a top level query and cannot be wrapped into other queries");
}
if (level <= 0) {
// ideally we should throw an error here but this code is on the main search workflow path and that might block
// execution of some queries. Instead, we're silently exit and allow such query to execute and potentially produce incorrect
// results in case hybrid query is wrapped into such dis_max query
log.error("reached max nested query limit, cannot process dis_max query with that many nested clauses");
return;
}
if (query instanceof DisjunctionMaxQuery) {
for (Query disjunct : (DisjunctionMaxQuery) query) {
validateNestedDisJunctionQuery(disjunct, level - 1);
}
}
}

private int getMaxDepthLimit(final SearchContext searchContext) {
Settings indexSettings = searchContext.getQueryShardContext().getIndexSettings().getSettings();
return MapperService.INDEX_MAPPING_DEPTH_LIMIT_SETTING.get(indexSettings).intValue();
Expand Down

0 comments on commit 61bc2e3

Please sign in to comment.