Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(search): make graphql query autoCompleteForMultiple to show exact matches first #11586

Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.tuple.Pair;
import org.opensearch.action.search.SearchRequest;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.index.query.*;
Expand All @@ -46,7 +47,7 @@
@Slf4j
public class AutocompleteRequestHandler {

private final List<String> _defaultAutocompleteFields;
private final List<Pair> _defaultAutocompleteFields;
private final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes;

private static final Map<EntitySpec, AutocompleteRequestHandler>
Expand All @@ -69,8 +70,12 @@ public AutocompleteRequestHandler(
fieldSpecs.stream()
.map(SearchableFieldSpec::getSearchableAnnotation)
.filter(SearchableAnnotation::isEnableAutocomplete)
.map(SearchableAnnotation::getFieldName),
Stream.of("urn"))
.map(
searchableAnnotation ->
Pair.of(
searchableAnnotation.getFieldName(),
Double.toString(searchableAnnotation.getBoostScore()))),
Stream.of(Pair.of("urn", PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore"))))
.collect(Collectors.toList());
searchableFieldTypes =
fieldSpecs.stream()
Expand Down Expand Up @@ -169,7 +174,7 @@ private BoolQueryBuilder getQuery(
public BoolQueryBuilder getQuery(
@Nonnull ObjectMapper objectMapper,
@Nullable AutocompleteConfiguration customAutocompleteConfig,
List<String> autocompleteFields,
List<Pair> autocompleteFields,
@Nonnull String query) {

BoolQueryBuilder finalQuery =
Expand All @@ -189,7 +194,7 @@ public BoolQueryBuilder getQuery(

private Optional<QueryBuilder> getAutocompleteQuery(
@Nullable AutocompleteConfiguration customConfig,
List<String> autocompleteFields,
List<Pair> autocompleteFields,
@Nonnull String query) {
Optional<QueryBuilder> result = Optional.empty();

Expand All @@ -201,27 +206,27 @@ private Optional<QueryBuilder> getAutocompleteQuery(
}

private static BoolQueryBuilder defaultQuery(
List<String> autocompleteFields, @Nonnull String query) {
List<Pair> autocompleteFields, @Nonnull String query) {
BoolQueryBuilder finalQuery = QueryBuilders.boolQuery().minimumShouldMatch(1);

// Search for exact matches with higher boost and ngram matches
MultiMatchQueryBuilder autocompleteQueryBuilder =
QueryBuilders.multiMatchQuery(query).type(MultiMatchQueryBuilder.Type.BOOL_PREFIX);
QueryBuilders.multiMatchQuery(query).type(MultiMatchQueryBuilder.Type.PHRASE);

final float urnBoost =
Float.parseFloat((String) PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore"));
autocompleteFields.forEach(
fieldName -> {
pair -> {
final String fieldName = (String) pair.getLeft();
final float boostScore = Float.parseFloat((String) pair.getRight());
if ("urn".equals(fieldName)) {
autocompleteQueryBuilder.field(fieldName + ".ngram", urnBoost);
autocompleteQueryBuilder.field(fieldName + ".ngram._2gram", urnBoost);
autocompleteQueryBuilder.field(fieldName + ".ngram._3gram", urnBoost);
autocompleteQueryBuilder.field(fieldName + ".ngram._4gram", urnBoost);
autocompleteQueryBuilder.field(fieldName + ".ngram", boostScore);
autocompleteQueryBuilder.field(fieldName + ".ngram._2gram", boostScore);
autocompleteQueryBuilder.field(fieldName + ".ngram._3gram", boostScore);
autocompleteQueryBuilder.field(fieldName + ".ngram._4gram", boostScore);
} else {
autocompleteQueryBuilder.field(fieldName + ".ngram");
autocompleteQueryBuilder.field(fieldName + ".ngram._2gram");
autocompleteQueryBuilder.field(fieldName + ".ngram._3gram");
autocompleteQueryBuilder.field(fieldName + ".ngram._4gram");
autocompleteQueryBuilder.field(fieldName + ".ngram", boostScore);
autocompleteQueryBuilder.field(fieldName + ".ngram._2gram", boostScore);
autocompleteQueryBuilder.field(fieldName + ".ngram._3gram", boostScore);
autocompleteQueryBuilder.field(fieldName + ".ngram._4gram", boostScore);
}
autocompleteQueryBuilder.field(fieldName + ".delimited");
finalQuery.should(QueryBuilders.matchPhrasePrefixQuery(fieldName + ".delimited", query));
Expand All @@ -241,12 +246,14 @@ private HighlightBuilder getHighlights(@Nullable String field) {
// Check for each field name and any subfields
getAutocompleteFields(field)
.forEach(
fieldName ->
highlightBuilder
.field(fieldName)
.field(fieldName + ".*")
.field(fieldName + ".ngram")
.field(fieldName + ".delimited"));
pair -> {
final String fieldName = (String) pair.getLeft();
highlightBuilder
.field(fieldName)
.field(fieldName + ".*")
.field(fieldName + ".ngram")
.field(fieldName + ".delimited");
});

// set field match req false for ngram
highlightBuilder.fields().stream()
Expand All @@ -256,9 +263,9 @@ private HighlightBuilder getHighlights(@Nullable String field) {
return highlightBuilder;
}

private List<String> getAutocompleteFields(@Nullable String field) {
private List<Pair> getAutocompleteFields(@Nullable String field) {
if (field != null && !field.isEmpty()) {
return ImmutableList.of(field);
return ImmutableList.of(Pair.of(field, "2.0"));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is hardcoded to 2.0 and looks like it treats URN matches and non-URN matches the same. Previously it weighted URNs higher which probably doesn't make sense for auto-complete purposes, but now the conditional on line 220 is the same for both conditions. So we should probably simplify the conditional and likely remove the boostScore entirely?

Copy link
Contributor Author

@deepgarg-visa deepgarg-visa Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I checked, we can't remove the boostscore entirely. We need to have higher boost score for ngram fields for non-urn fields in "multi_match" clause to get the expected result. I would suggest that we can have boostscore for non-urn fields from their respective searchable spec(which I already did) and urn field can have only default boost score (1.0), as you have rightly mention that it doesn't make sense for auto-complete purposes.

Please find the expected multi_match clause

{ "multi_match": { "query": "aucdl", "fields": [ "name.delimited^1.0", "name.ngram^10.0", "name.ngram._2gram^10.0", "name.ngram._3gram^10.0", "name.ngram._4gram^10.0", "urn.delimited^1.0", "urn.ngram^1.0", "urn.ngram._2gram^1.0", "urn.ngram._3gram^1.0", "urn.ngram._4gram^1.0" ], "type": "phrase", "operator": "OR", "slop": 0, "prefix_length": 0, "max_expansions": 50, "zero_terms_query": "NONE", "auto_generate_synonyms_phrase_query": true, "fuzzy_transpositions": true, "boost": 1.0 } }

In case of function at line 266, we can put condition to have default boostscore of 10.0 for non-urn field and boostscore of 1.0 for urn field ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, there are two things to consider here, the relative weights between the fields being used for the query AND the relative weights of the subfields (delimited, ngram, etc). Most likely we should be applying a multiplier. The URN while important as a unique identifier for general search is not as important here so the urn field weight is likely 1.0.

Next lets consider a multiplier for ngram. I am not seeing these being applied ->

The field weights for ngrams should be impacted by that configuration and it doesn't appear to be based on. the example query above.

Generally the weight formula for ngram subfields should be something like:
<field annotation weight> * <configuration ngram factor>

Copy link
Contributor Author

@deepgarg-visa deepgarg-visa Oct 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @david-leifker, As suggested done the following changes

  • add "match" clause for exact matches
  • make URN field default boostscore to 1.0
  • make ngram subfields weight as per the formula suggested
  • add a testcase in class "SampleDataFixtureTestBase.java"
  • add testdata in containerindex_v2.json.gz
  • reverted to "bool_prefix" type in "multi_match" clause
  • simplify the condition at line 222

Please find the final query

{ "multi_match": { "query": "container", "fields": [ "name.delimited^1.0", "name.ngram^10.0", "name.ngram._2gram^12.0", "name.ngram._3gram^15.0", "name.ngram._4gram^18.0", "urn.delimited^1.0", "urn.ngram^1.0", "urn.ngram._2gram^1.0", "urn.ngram._3gram^1.0", "urn.ngram._4gram^1.0" ], "type": "bool_prefix", "operator": "OR", "slop": 0, "prefix_length": 0, "max_expansions": 50, "zero_terms_query": "NONE", "auto_generate_synonyms_phrase_query": true, "fuzzy_transpositions": true, "boost": 1 } }, { "match": { "name.keyword": { "query": "container", "operator": "OR", "prefix_length": 0, "max_expansions": 50, "fuzzy_transpositions": true, "lenient": false, "zero_terms_query": "NONE", "auto_generate_synonyms_phrase_query": true, "boost": 10 } } }

}
return _defaultAutocompleteFields;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public void testDefaultAutocompleteRequest() {
assertTrue(queryFields.containsKey("keyPart1.ngram._2gram"));
assertTrue(queryFields.containsKey("keyPart1.ngram._3gram"));
assertTrue(queryFields.containsKey("keyPart1.ngram._4gram"));
assertEquals(autocompleteQuery.type(), MultiMatchQueryBuilder.Type.BOOL_PREFIX);
assertEquals(autocompleteQuery.type(), MultiMatchQueryBuilder.Type.PHRASE);

MatchPhrasePrefixQueryBuilder prefixQuery =
(MatchPhrasePrefixQueryBuilder) query.should().get(0);
Expand Down Expand Up @@ -138,7 +138,7 @@ public void testAutocompleteRequestWithField() {
assertTrue(queryFields.containsKey("field.ngram._2gram"));
assertTrue(queryFields.containsKey("field.ngram._3gram"));
assertTrue(queryFields.containsKey("field.ngram._4gram"));
assertEquals(autocompleteQuery.type(), MultiMatchQueryBuilder.Type.BOOL_PREFIX);
assertEquals(autocompleteQuery.type(), MultiMatchQueryBuilder.Type.PHRASE);

MatchPhrasePrefixQueryBuilder prefixQuery =
(MatchPhrasePrefixQueryBuilder) query.should().get(0);
Expand Down
Loading