Skip to content

Commit

Permalink
Use ValueFetcher when loading text snippets to highlight (elastic#63572)
Browse files Browse the repository at this point in the history
HighlighterUtils.loadFieldValues() loads values directly from the source, and
then callers have to deal with filtering out values that would have been removed
by an ignore_above filter on keyword fields. Instead, we can use the
ValueFetcher for the relevant field, which handles all this logic for us.

Closes elastic#59931.
  • Loading branch information
romseygeek committed Nov 24, 2020
1 parent 65293df commit f5e92be
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 44 deletions.
34 changes: 34 additions & 0 deletions docs/reference/migration/migrate_7_11.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
[[breaking-changes-7.11]]
== Breaking changes in 7.11
++++
<titleabbrev>7.11</titleabbrev>
++++

This section discusses the changes that you need to be aware of when migrating
your application to {es} 7.11.

See also <<release-highlights>> and <<es-release-notes>>.

// * <<breaking_710_blah_changes>>
// * <<breaking_710_blah_changes>>

//NOTE: The notable-breaking-changes tagged regions are re-used in the
//Installation and Upgrade Guide

//tag::notable-breaking-changes[]

[discrete]
[[breaking_710_search_changes]]
=== Search changes

[[highlight-normalization]]
.Keyword fields with a custom normalizer will use the normalized form when highlighting
[%collapsible]
====
*Details* +
Highlighters now use the same framework to load their values as the
`fields` section of a search response. This means that normalization
will be applied to the values of a keyword field; for example, a
field configured with a lowercase normalizer will return highlighted
snippets in lower case.
====
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -41,12 +41,12 @@ public class AnnotatedTextHighlighter extends UnifiedHighlighter {
@Override
protected List<Object> loadFieldValues(
CustomUnifiedHighlighter highlighter,
QueryShardContext qsc,
MappedFieldType fieldType,
Field field,
HitContext hitContext,
boolean forceSource
) throws IOException {
List<Object> fieldValues = super.loadFieldValues(highlighter, fieldType, field, hitContext, forceSource);
List<Object> fieldValues = super.loadFieldValues(highlighter, qsc, fieldType, hitContext, forceSource);

List<Object> strings = new ArrayList<>(fieldValues.size());
AnnotatedText[] annotations = new AnnotatedText[fieldValues.size()];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
noMatchSize,
expectedPassages.length,
name -> "text".equals(name),
Integer.MAX_VALUE,
Integer.MAX_VALUE
);
highlighter.setFieldMatcher((name) -> "text".equals(name));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2775,6 +2775,34 @@ public void testKeywordFieldHighlighting() throws IOException {
equalTo("<em>some text</em>"));
}

public void testCopyToFields() throws Exception {
XContentBuilder b = jsonBuilder().startObject().startObject("properties");
b.startObject("foo");
{
b.field("type", "text");
b.field("copy_to", "foo_copy");
}
b.endObject();
b.startObject("foo_copy").field("type", "text").endObject();
b.endObject().endObject();
prepareCreate("test").setMapping(b).get();

client().prepareIndex("test").setId("1")
.setSource(jsonBuilder().startObject().field("foo", "how now brown cow").endObject())
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
.get();

SearchResponse response = client().prepareSearch()
.setQuery(matchQuery("foo_copy", "brown"))
.highlighter(new HighlightBuilder().field(new Field("foo_copy")))
.get();

assertHitCount(response, 1);
HighlightField field = response.getHits().getAt(0).getHighlightFields().get("foo_copy");
assertThat(field.getFragments().length, equalTo(1));
assertThat(field.getFragments()[0].string(), equalTo("how now <em>brown</em> cow"));
}

public void testACopyFieldWithNestedQuery() throws Exception {
String mapping = Strings.toString(jsonBuilder().startObject().startObject("type").startObject("properties")
.startObject("foo")
Expand Down Expand Up @@ -2986,7 +3014,7 @@ public void testWithNormalizer() throws Exception {
assertHitCount(searchResponse, 1);
HighlightField field = searchResponse.getHits().getAt(0).getHighlightFields().get("keyword");
assertThat(field.getFragments().length, equalTo(1));
assertThat(field.getFragments()[0].string(), equalTo("<em>Hello World</em>"));
assertThat(field.getFragments()[0].string(), equalTo("<em>hello world</em>"));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
private final Locale breakIteratorLocale;
private final int noMatchSize;
private final FieldHighlighter fieldHighlighter;
private final int keywordIgnoreAbove;
private final int maxAnalyzedOffset;

/**
Expand All @@ -84,7 +83,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
* @param noMatchSize The size of the text that should be returned when no highlighting can be performed.
* @param maxPassages the maximum number of passes to highlight
* @param fieldMatcher decides which terms should be highlighted
* @param keywordIgnoreAbove if the field's value is longer than this we'll skip it
* @param maxAnalyzedOffset if the field is more than this long we'll refuse to use the ANALYZED
* offset source for it because it'd be super slow
*/
Expand All @@ -98,7 +96,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
int noMatchSize,
int maxPassages,
Predicate<String> fieldMatcher,
int keywordIgnoreAbove,
int maxAnalyzedOffset) throws IOException {
super(searcher, analyzer);
this.offsetSource = offsetSource;
Expand All @@ -109,7 +106,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
this.field = field;
this.noMatchSize = noMatchSize;
this.setFieldMatcher(fieldMatcher);
this.keywordIgnoreAbove = keywordIgnoreAbove;
this.maxAnalyzedOffset = maxAnalyzedOffset;
fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
}
Expand All @@ -127,9 +123,6 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier<St
return null;
}
int fieldValueLength = fieldValue.length();
if (fieldValueLength > keywordIgnoreAbove) {
return null; // skip highlighting keyword terms that were ignored during indexing
}
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length of ["
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Objects;

import static java.util.Collections.singleton;

Expand All @@ -46,24 +48,17 @@ private HighlightUtils() {
* Load field values for highlighting.
*/
public static List<Object> loadFieldValues(MappedFieldType fieldType,
QueryShardContext qsc,
FetchSubPhase.HitContext hitContext,
boolean forceSource) throws IOException {
//percolator needs to always load from source, thus it sets the global force source to true
List<Object> textsToHighlight;
if (forceSource == false && fieldType.isStored()) {
CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(singleton(fieldType.name()), false);
hitContext.reader().document(hitContext.docId(), fieldVisitor);
textsToHighlight = fieldVisitor.fields().get(fieldType.name());
if (textsToHighlight == null) {
// Can happen if the document doesn't have the field to highlight
textsToHighlight = Collections.emptyList();
}
} else {
SourceLookup sourceLookup = hitContext.sourceLookup();
textsToHighlight = sourceLookup.extractRawValues(fieldType.name());
List<Object> textsToHighlight = fieldVisitor.fields().get(fieldType.name());
return Objects.requireNonNullElse(textsToHighlight, Collections.emptyList());
}
assert textsToHighlight != null;
return textsToHighlight;
ValueFetcher fetcher = fieldType.valueFetcher(qsc, null);
return fetcher.fetchValues(hitContext.sourceLookup());
}

public static class Encoders {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
Expand Down Expand Up @@ -101,20 +100,14 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
Analyzer analyzer = context.getQueryShardContext().getFieldNameIndexAnalyzer();
Integer keywordIgnoreAbove = null;
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
keywordIgnoreAbove = ((KeywordFieldMapper.KeywordFieldType)fieldType).ignoreAbove();
}
final int maxAnalyzedOffset = context.getQueryShardContext().getIndexSettings().getHighlightMaxAnalyzedOffset();

textsToHighlight = HighlightUtils.loadFieldValues(fieldType, hitContext, fieldContext.forceSource);
textsToHighlight
= HighlightUtils.loadFieldValues(fieldType, context.getQueryShardContext(), hitContext, fieldContext.forceSource);

for (Object textToHighlight : textsToHighlight) {
String text = convertFieldValue(fieldType, textToHighlight);
int textLength = text.length();
if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) {
continue; // skip highlighting keyword terms that were ignored during indexing
}
if (textLength > maxAnalyzedOffset) {
throw new IllegalArgumentException(
"The length of [" + fieldContext.fieldName + "] field of [" + hitContext.hit().getId() +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;

Expand Down Expand Up @@ -72,7 +72,8 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
FetchSubPhase.HitContext hitContext = fieldContext.hitContext;

CheckedSupplier<String, IOException> loadFieldValues = () -> {
List<Object> fieldValues = loadFieldValues(highlighter, fieldType, field, hitContext, fieldContext.forceSource);
List<Object> fieldValues = loadFieldValues(highlighter, fieldContext.context.getQueryShardContext(), fieldType,
hitContext, fieldContext.forceSource);
if (fieldValues.size() == 0) {
return null;
}
Expand Down Expand Up @@ -111,10 +112,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th
? HighlightUtils.Encoders.HTML
: HighlightUtils.Encoders.DEFAULT;
int maxAnalyzedOffset = fieldContext.context.getQueryShardContext().getIndexSettings().getHighlightMaxAnalyzedOffset();
int keywordIgnoreAbove = Integer.MAX_VALUE;
if (fieldContext.fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
keywordIgnoreAbove = ((KeywordFieldMapper.KeywordFieldType)fieldContext.fieldType).ignoreAbove();
}
int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
Analyzer analyzer = wrapAnalyzer(fieldContext.context.getQueryShardContext().getFieldNameIndexAnalyzer());
PassageFormatter passageFormatter = getPassageFormatter(fieldContext.hitContext, fieldContext.field, encoder);
Expand Down Expand Up @@ -151,7 +148,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th
fieldContext.field.fieldOptions().noMatchSize(),
higlighterNumberOfFragments,
fieldMatcher(fieldContext),
keywordIgnoreAbove,
maxAnalyzedOffset
);
}
Expand All @@ -167,12 +163,12 @@ protected Analyzer wrapAnalyzer(Analyzer analyzer) {

protected List<Object> loadFieldValues(
CustomUnifiedHighlighter highlighter,
QueryShardContext qsc,
MappedFieldType fieldType,
SearchHighlightContext.Field field,
FetchSubPhase.HitContext hitContext,
boolean forceSource
) throws IOException {
List<Object> fieldValues = HighlightUtils.loadFieldValues(fieldType, hitContext, forceSource);
List<Object> fieldValues = HighlightUtils.loadFieldValues(fieldType, qsc, hitContext, forceSource);
fieldValues = fieldValues.stream()
.map((s) -> convertFieldValue(fieldType, s))
.collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ private void assertHighlightOneDoc(String fieldName, String[] inputs, Analyzer a
noMatchSize,
expectedPassages.length,
name -> "text".equals(name),
Integer.MAX_VALUE,
Integer.MAX_VALUE
);
final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
Expand Down

0 comments on commit f5e92be

Please sign in to comment.