Use ValueFetcher when loading text snippets to highlight (elastic#63572)

HighlighterUtils.loadFieldValues() loads values directly from the source, and then callers have to deal with filtering out values that would have been removed by an ignore_above filter on keyword fields. Instead, we can use the ValueFetcher for the relevant field, which handles all this logic for us. Closes elastic#59931.
romseygeek · Nov 24, 2020 · f5e92be · f5e92be
1 parent 65293df
commit f5e92be
Show file tree

Hide file tree

Showing 9 changed files with 81 additions and 44 deletions.
diff --git a/docs/reference/migration/migrate_7_11.asciidoc b/docs/reference/migration/migrate_7_11.asciidoc
@@ -0,0 +1,34 @@
+[[breaking-changes-7.11]]
+== Breaking changes in 7.11
+++++
+<titleabbrev>7.11</titleabbrev>
+++++
+
+This section discusses the changes that you need to be aware of when migrating
+your application to {es} 7.11.
+
+See also <<release-highlights>> and <<es-release-notes>>.
+
+// * <<breaking_710_blah_changes>>
+// * <<breaking_710_blah_changes>>
+
+//NOTE: The notable-breaking-changes tagged regions are re-used in the
+//Installation and Upgrade Guide
+
+//tag::notable-breaking-changes[]
+
+[discrete]
+[[breaking_710_search_changes]]
+=== Search changes
+
+[[highlight-normalization]]
+.Keyword fields with a custom normalizer will use the normalized form when highlighting
+[%collapsible]
+====
+*Details* +
+Highlighters now use the same framework to load their values as the
+`fields` section of a search response.  This means that normalization
+will be applied to the values of a keyword field; for example, a
+field configured with a lowercase normalizer will return highlighted
+snippets in lower case.
+====
diff --git a/...main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java b/...main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java
@@ -26,8 +26,8 @@
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
+import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
-import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -41,12 +41,12 @@ public class AnnotatedTextHighlighter extends UnifiedHighlighter {
     @Override
     protected List<Object> loadFieldValues(
         CustomUnifiedHighlighter highlighter,
+        QueryShardContext qsc,
         MappedFieldType fieldType,
-        Field field,
         HitContext hitContext,
         boolean forceSource
     ) throws IOException {
-        List<Object> fieldValues = super.loadFieldValues(highlighter, fieldType, field, hitContext, forceSource);
+        List<Object> fieldValues = super.loadFieldValues(highlighter, qsc, fieldType, hitContext, forceSource);
 
         List<Object> strings = new ArrayList<>(fieldValues.size());
         AnnotatedText[] annotations = new AnnotatedText[fieldValues.size()];

diff --git a/...java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java b/...java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java
@@ -118,7 +118,6 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
             noMatchSize,
             expectedPassages.length,
             name -> "text".equals(name),
-            Integer.MAX_VALUE,
             Integer.MAX_VALUE
         );
         highlighter.setFieldMatcher((name) -> "text".equals(name));

diff --git a/...usterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/...usterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
@@ -2775,6 +2775,34 @@ public void testKeywordFieldHighlighting() throws IOException {
                 equalTo("<em>some text</em>"));
     }
 
+    public void testCopyToFields() throws Exception {
+        XContentBuilder b = jsonBuilder().startObject().startObject("properties");
+        b.startObject("foo");
+        {
+            b.field("type", "text");
+            b.field("copy_to", "foo_copy");
+        }
+        b.endObject();
+        b.startObject("foo_copy").field("type", "text").endObject();
+        b.endObject().endObject();
+        prepareCreate("test").setMapping(b).get();
+
+        client().prepareIndex("test").setId("1")
+            .setSource(jsonBuilder().startObject().field("foo", "how now brown cow").endObject())
+            .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
+            .get();
+
+        SearchResponse response = client().prepareSearch()
+            .setQuery(matchQuery("foo_copy", "brown"))
+            .highlighter(new HighlightBuilder().field(new Field("foo_copy")))
+            .get();
+
+        assertHitCount(response, 1);
+        HighlightField field = response.getHits().getAt(0).getHighlightFields().get("foo_copy");
+        assertThat(field.getFragments().length, equalTo(1));
+        assertThat(field.getFragments()[0].string(), equalTo("how now <em>brown</em> cow"));
+    }
+
     public void testACopyFieldWithNestedQuery() throws Exception {
         String mapping = Strings.toString(jsonBuilder().startObject().startObject("type").startObject("properties")
                     .startObject("foo")
@@ -2986,7 +3014,7 @@ public void testWithNormalizer() throws Exception {
             assertHitCount(searchResponse, 1);
             HighlightField field = searchResponse.getHits().getAt(0).getHighlightFields().get("keyword");
             assertThat(field.getFragments().length, equalTo(1));
-            assertThat(field.getFragments()[0].string(), equalTo("<em>Hello World</em>"));
+            assertThat(field.getFragments()[0].string(), equalTo("<em>hello world</em>"));
         }
     }
 

diff --git a/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java
@@ -64,7 +64,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
     private final Locale breakIteratorLocale;
     private final int noMatchSize;
     private final FieldHighlighter fieldHighlighter;
-    private final int keywordIgnoreAbove;
     private final int maxAnalyzedOffset;
 
     /**
@@ -84,7 +83,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
      * @param noMatchSize The size of the text that should be returned when no highlighting can be performed.
      * @param maxPassages the maximum number of passes to highlight
      * @param fieldMatcher decides which terms should be highlighted
-     * @param keywordIgnoreAbove if the field's value is longer than this we'll skip it
      * @param maxAnalyzedOffset if the field is more than this long we'll refuse to use the ANALYZED
      *                          offset source for it because it'd be super slow
      */
@@ -98,7 +96,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
                                     int noMatchSize,
                                     int maxPassages,
                                     Predicate<String> fieldMatcher,
-                                    int keywordIgnoreAbove,
                                     int maxAnalyzedOffset) throws IOException {
         super(searcher, analyzer);
         this.offsetSource = offsetSource;
@@ -109,7 +106,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
         this.field = field;
         this.noMatchSize = noMatchSize;
         this.setFieldMatcher(fieldMatcher);
-        this.keywordIgnoreAbove = keywordIgnoreAbove;
         this.maxAnalyzedOffset = maxAnalyzedOffset;
         fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
     }
@@ -127,9 +123,6 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier<St
             return null;
         }
         int fieldValueLength = fieldValue.length();
-        if (fieldValueLength > keywordIgnoreAbove) {
-            return null; // skip highlighting keyword terms that were ignored during indexing
-        }
         if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
             throw new IllegalArgumentException(
                 "The length of ["

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java
@@ -23,12 +23,14 @@
 import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
 import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
 import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.ValueFetcher;
+import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.search.fetch.FetchSubPhase;
-import org.elasticsearch.search.lookup.SourceLookup;
 
 import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
+import java.util.Objects;
 
 import static java.util.Collections.singleton;
 
@@ -46,24 +48,17 @@ private HighlightUtils() {
      * Load field values for highlighting.
      */
     public static List<Object> loadFieldValues(MappedFieldType fieldType,
+                                               QueryShardContext qsc,
                                                FetchSubPhase.HitContext hitContext,
                                                boolean forceSource) throws IOException {
-        //percolator needs to always load from source, thus it sets the global force source to true
-        List<Object> textsToHighlight;
         if (forceSource == false && fieldType.isStored()) {
             CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(singleton(fieldType.name()), false);
             hitContext.reader().document(hitContext.docId(), fieldVisitor);
-            textsToHighlight = fieldVisitor.fields().get(fieldType.name());
-            if (textsToHighlight == null) {
-                // Can happen if the document doesn't have the field to highlight
-                textsToHighlight = Collections.emptyList();
-            }
-        } else {
-            SourceLookup sourceLookup = hitContext.sourceLookup();
-            textsToHighlight = sourceLookup.extractRawValues(fieldType.name());
+            List<Object> textsToHighlight = fieldVisitor.fields().get(fieldType.name());
+            return Objects.requireNonNullElse(textsToHighlight, Collections.emptyList());
         }
-        assert textsToHighlight != null;
-        return textsToHighlight;
+        ValueFetcher fetcher = fieldType.valueFetcher(qsc, null);
+        return fetcher.fetchValues(hitContext.sourceLookup());
     }
 
     public static class Encoders {

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
@@ -36,7 +36,6 @@
 import org.apache.lucene.util.CollectionUtil;
 import org.elasticsearch.common.text.Text;
 import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.search.fetch.FetchContext;
 import org.elasticsearch.search.fetch.FetchSubPhase;
@@ -101,20 +100,14 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
         ArrayList<TextFragment> fragsList = new ArrayList<>();
         List<Object> textsToHighlight;
         Analyzer analyzer = context.getQueryShardContext().getFieldNameIndexAnalyzer();
-        Integer keywordIgnoreAbove = null;
-        if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
-            keywordIgnoreAbove = ((KeywordFieldMapper.KeywordFieldType)fieldType).ignoreAbove();
-        }
         final int maxAnalyzedOffset = context.getQueryShardContext().getIndexSettings().getHighlightMaxAnalyzedOffset();
 
-        textsToHighlight = HighlightUtils.loadFieldValues(fieldType, hitContext, fieldContext.forceSource);
+        textsToHighlight
+            = HighlightUtils.loadFieldValues(fieldType, context.getQueryShardContext(), hitContext, fieldContext.forceSource);
 
         for (Object textToHighlight : textsToHighlight) {
             String text = convertFieldValue(fieldType, textToHighlight);
             int textLength = text.length();
-            if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) {
-                continue; // skip highlighting keyword terms that were ignored during indexing
-            }
             if (textLength > maxAnalyzedOffset) {
                 throw new IllegalArgumentException(
                     "The length of [" + fieldContext.fieldName + "] field of [" + hitContext.hit().getId() +

diff --git a/...r/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/...r/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java
@@ -34,9 +34,9 @@
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.text.Text;
 import org.elasticsearch.index.mapper.IdFieldMapper;
-import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.TextSearchInfo;
+import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.search.fetch.FetchSubPhase;
 import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 
@@ -72,7 +72,8 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
         FetchSubPhase.HitContext hitContext = fieldContext.hitContext;
 
         CheckedSupplier<String, IOException> loadFieldValues = () -> {
-            List<Object> fieldValues = loadFieldValues(highlighter, fieldType, field, hitContext, fieldContext.forceSource);
+            List<Object> fieldValues = loadFieldValues(highlighter, fieldContext.context.getQueryShardContext(), fieldType,
+                hitContext, fieldContext.forceSource);
             if (fieldValues.size() == 0) {
                 return null;
             }
@@ -111,10 +112,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th
             ? HighlightUtils.Encoders.HTML
             : HighlightUtils.Encoders.DEFAULT;
         int maxAnalyzedOffset = fieldContext.context.getQueryShardContext().getIndexSettings().getHighlightMaxAnalyzedOffset();
-        int keywordIgnoreAbove = Integer.MAX_VALUE;
-        if (fieldContext.fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
-            keywordIgnoreAbove = ((KeywordFieldMapper.KeywordFieldType)fieldContext.fieldType).ignoreAbove();
-        }
         int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
         Analyzer analyzer = wrapAnalyzer(fieldContext.context.getQueryShardContext().getFieldNameIndexAnalyzer());
         PassageFormatter passageFormatter = getPassageFormatter(fieldContext.hitContext, fieldContext.field, encoder);
@@ -151,7 +148,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th
             fieldContext.field.fieldOptions().noMatchSize(),
             higlighterNumberOfFragments,
             fieldMatcher(fieldContext),
-            keywordIgnoreAbove,
             maxAnalyzedOffset
         );
     }
@@ -167,12 +163,12 @@ protected Analyzer wrapAnalyzer(Analyzer analyzer) {
 
     protected List<Object> loadFieldValues(
         CustomUnifiedHighlighter highlighter,
+        QueryShardContext qsc,
         MappedFieldType fieldType,
-        SearchHighlightContext.Field field,
         FetchSubPhase.HitContext hitContext,
         boolean forceSource
     ) throws IOException {
-        List<Object> fieldValues = HighlightUtils.loadFieldValues(fieldType, hitContext, forceSource);
+        List<Object> fieldValues = HighlightUtils.loadFieldValues(fieldType, qsc, hitContext, forceSource);
         fieldValues = fieldValues.stream()
             .map((s) -> convertFieldValue(fieldType, s))
             .collect(Collectors.toList());

diff --git a/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java
@@ -92,7 +92,6 @@ private void assertHighlightOneDoc(String fieldName, String[] inputs, Analyzer a
             noMatchSize,
             expectedPassages.length,
             name -> "text".equals(name),
-            Integer.MAX_VALUE,
             Integer.MAX_VALUE
         );
         final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);