Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ValueFetcher when loading text snippets to highlight #63572

Merged
merged 10 commits into from
Nov 24, 2020
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -43,11 +43,11 @@ public class AnnotatedTextHighlighter extends UnifiedHighlighter {
protected List<Object> loadFieldValues(
CustomUnifiedHighlighter highlighter,
MappedFieldType fieldType,
Field field,
MapperService mapperService,
HitContext hitContext,
boolean forceSource
boolean storedFieldsAvailable
) throws IOException {
List<Object> fieldValues = super.loadFieldValues(highlighter, fieldType, field, hitContext, forceSource);
List<Object> fieldValues = super.loadFieldValues(highlighter, fieldType, mapperService, hitContext, storedFieldsAvailable);

List<Object> strings = new ArrayList<>(fieldValues.size());
AnnotatedText[] annotations = new AnnotatedText[fieldValues.size()];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
noMatchSize,
expectedPassages.length,
name -> "text".equals(name),
Integer.MAX_VALUE,
Integer.MAX_VALUE
);
highlighter.setFieldMatcher((name) -> "text".equals(name));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
private final Locale breakIteratorLocale;
private final int noMatchSize;
private final FieldHighlighter fieldHighlighter;
private final int keywordIgnoreAbove;
private final int maxAnalyzedOffset;

/**
Expand All @@ -84,7 +83,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
* @param noMatchSize The size of the text that should be returned when no highlighting can be performed.
* @param maxPassages the maximum number of passes to highlight
* @param fieldMatcher decides which terms should be highlighted
* @param keywordIgnoreAbove if the field's value is longer than this we'll skip it
* @param maxAnalyzedOffset if the field is more than this long we'll refuse to use the ANALYZED
* offset source for it because it'd be super slow
*/
Expand All @@ -98,7 +96,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
int noMatchSize,
int maxPassages,
Predicate<String> fieldMatcher,
int keywordIgnoreAbove,
int maxAnalyzedOffset) throws IOException {
super(searcher, analyzer);
this.offsetSource = offsetSource;
Expand All @@ -109,7 +106,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
this.field = field;
this.noMatchSize = noMatchSize;
this.setFieldMatcher(fieldMatcher);
this.keywordIgnoreAbove = keywordIgnoreAbove;
this.maxAnalyzedOffset = maxAnalyzedOffset;
fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
}
Expand All @@ -127,9 +123,6 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier<St
return null;
}
int fieldValueLength = fieldValue.length();
if (fieldValueLength > keywordIgnoreAbove) {
return null; // skip highlighting keyword terms that were ignored during indexing
}
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length of ["
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Objects;

import static java.util.Collections.singleton;

Expand All @@ -46,24 +48,17 @@ private HighlightUtils() {
* Load field values for highlighting.
*/
public static List<Object> loadFieldValues(MappedFieldType fieldType,
MapperService mapperService,
FetchSubPhase.HitContext hitContext,
boolean forceSource) throws IOException {
//percolator needs to always load from source, thus it sets the global force source to true
List<Object> textsToHighlight;
if (forceSource == false && fieldType.isStored()) {
romseygeek marked this conversation as resolved.
Show resolved Hide resolved
boolean storedFieldsAvailable) throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To me it'd be best to keep the original forceSource name and value -- it matches the force_source REST option that this value comes and avoids having mixed concepts in the code.

if (storedFieldsAvailable && fieldType.isStored()) {
CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(singleton(fieldType.name()), false);
hitContext.reader().document(hitContext.docId(), fieldVisitor);
textsToHighlight = fieldVisitor.fields().get(fieldType.name());
if (textsToHighlight == null) {
// Can happen if the document doesn't have the field to highlight
textsToHighlight = Collections.emptyList();
}
} else {
SourceLookup sourceLookup = hitContext.sourceLookup();
textsToHighlight = sourceLookup.extractRawValues(fieldType.name());
List<Object> textsToHighlight = fieldVisitor.fields().get(fieldType.name());
return Objects.requireNonNullElse(textsToHighlight, Collections.emptyList());
}
assert textsToHighlight != null;
return textsToHighlight;
ValueFetcher fetcher = fieldType.valueFetcher(mapperService, null, null);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

null looks is going to cause this to fail on runtime fields. Do we filter those out other places?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant to highlight this with a comment of 'not sure about this' - I'm pretty sure that we filter out non-text fields further up the stack, but I need to double-check.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we do try to highlight runtime fields. I'm not sure how we manage not to fail here. Runtime fields really need the SearchLookup to do anything.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I vaguely remember looking at this too, and finding that for runtime fields we simply don't return anything highlighted rather than failing, as they are not in _source nor stored.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that runtime fields can't be highlighted anyway, I just added a null check for SearchLookup inside the value fetcher in AbstractScriptFieldType.

return fetcher.fetchValues(hitContext.sourceLookup());
}

public static class Encoders {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
Expand Down Expand Up @@ -101,22 +100,14 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
Analyzer analyzer = context.mapperService().documentMapper().mappers().indexAnalyzer();
Integer keywordIgnoreAbove = null;
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper mapper = (KeywordFieldMapper) context.mapperService().documentMapper()
.mappers().getMapper(fieldContext.fieldName);
keywordIgnoreAbove = mapper.ignoreAbove();
}
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();

textsToHighlight = HighlightUtils.loadFieldValues(fieldType, hitContext, fieldContext.forceSource);
textsToHighlight
= HighlightUtils.loadFieldValues(fieldType, context.mapperService(), hitContext, fieldContext.forceSource == false);

for (Object textToHighlight : textsToHighlight) {
String text = convertFieldValue(fieldType, textToHighlight);
int textLength = text.length();
if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) {
continue; // skip highlighting keyword terms that were ignored during indexing
}
if (textLength > maxAnalyzedOffset) {
throw new IllegalArgumentException(
"The length of [" + fieldContext.fieldName + "] field of [" + hitContext.hit().getId() +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
Expand Down Expand Up @@ -73,7 +73,8 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
FetchSubPhase.HitContext hitContext = fieldContext.hitContext;

CheckedSupplier<String, IOException> loadFieldValues = () -> {
List<Object> fieldValues = loadFieldValues(highlighter, fieldType, field, hitContext, fieldContext.forceSource);
List<Object> fieldValues = loadFieldValues(highlighter, fieldType,
fieldContext.context.mapperService(), hitContext, fieldContext.forceSource == false);
if (fieldValues.size() == 0) {
return null;
}
Expand Down Expand Up @@ -112,12 +113,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th
? HighlightUtils.Encoders.HTML
: HighlightUtils.Encoders.DEFAULT;
int maxAnalyzedOffset = fieldContext.context.getIndexSettings().getHighlightMaxAnalyzedOffset();
int keywordIgnoreAbove = Integer.MAX_VALUE;
if (fieldContext.fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper mapper = (KeywordFieldMapper) fieldContext.context.mapperService().documentMapper()
.mappers().getMapper(fieldContext.fieldName);
keywordIgnoreAbove = mapper.ignoreAbove();
}
int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
Analyzer analyzer = getAnalyzer(fieldContext.context.mapperService().documentMapper());
PassageFormatter passageFormatter = getPassageFormatter(fieldContext.hitContext, fieldContext.field, encoder);
Expand Down Expand Up @@ -154,7 +149,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th
fieldContext.field.fieldOptions().noMatchSize(),
higlighterNumberOfFragments,
fieldMatcher(fieldContext),
keywordIgnoreAbove,
maxAnalyzedOffset
);
}
Expand All @@ -172,11 +166,11 @@ protected Analyzer getAnalyzer(DocumentMapper docMapper) {
protected List<Object> loadFieldValues(
CustomUnifiedHighlighter highlighter,
MappedFieldType fieldType,
SearchHighlightContext.Field field,
MapperService mapperService,
FetchSubPhase.HitContext hitContext,
boolean forceSource
boolean storedFieldsAvailable
) throws IOException {
List<Object> fieldValues = HighlightUtils.loadFieldValues(fieldType, hitContext, forceSource);
List<Object> fieldValues = HighlightUtils.loadFieldValues(fieldType, mapperService, hitContext, storedFieldsAvailable);
fieldValues = fieldValues.stream()
.map((s) -> convertFieldValue(fieldType, s))
.collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ private void assertHighlightOneDoc(String fieldName, String[] inputs, Analyzer a
noMatchSize,
expectedPassages.length,
name -> "text".equals(name),
Integer.MAX_VALUE,
Integer.MAX_VALUE
);
final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
Expand Down