Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ValueFetcher when loading text snippets to highlight (#63572) #65441

Merged
merged 4 commits into from
Nov 25, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions docs/reference/migration/migrate_7_11.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
[[breaking-changes-7.11]]
== Breaking changes in 7.11
++++
<titleabbrev>7.11</titleabbrev>
++++

This section discusses the changes that you need to be aware of when migrating
your application to {es} 7.11.

See also <<release-highlights>> and <<es-release-notes>>.

// * <<breaking_711_blah_changes>>
// * <<breaking_711_blah_changes>>

//NOTE: The notable-breaking-changes tagged regions are re-used in the
//Installation and Upgrade Guide

//tag::notable-breaking-changes[]

[discrete]
[[breaking_711_search_changes]]
=== Search changes

[[highlight-normalization]]
.Keyword fields with a custom normalizer will use the normalized form when highlighting
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we mention the change to copy_to as well? This is technically a breaking behavioral change, and it's nice to alert users about it because they could be able to stopping setting store: true on some fields.

[%collapsible]
====
*Details* +
Highlighters now use the same framework to load their values as the
`fields` section of a search response. This means that normalization
will be applied to the values of a keyword field; for example, a
field configured with a lowercase normalizer will return highlighted
snippets in lower case.
====
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -41,12 +41,12 @@ public class AnnotatedTextHighlighter extends UnifiedHighlighter {
@Override
protected List<Object> loadFieldValues(
CustomUnifiedHighlighter highlighter,
QueryShardContext qsc,
MappedFieldType fieldType,
Field field,
HitContext hitContext,
boolean forceSource
) throws IOException {
List<Object> fieldValues = super.loadFieldValues(highlighter, fieldType, field, hitContext, forceSource);
List<Object> fieldValues = super.loadFieldValues(highlighter, qsc, fieldType, hitContext, forceSource);

List<Object> strings = new ArrayList<>(fieldValues.size());
AnnotatedText[] annotations = new AnnotatedText[fieldValues.size()];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
noMatchSize,
expectedPassages.length,
name -> "text".equals(name),
Integer.MAX_VALUE,
Integer.MAX_VALUE
);
highlighter.setFieldMatcher((name) -> "text".equals(name));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2775,6 +2775,34 @@ public void testKeywordFieldHighlighting() throws IOException {
equalTo("<em>some text</em>"));
}

public void testCopyToFields() throws Exception {
XContentBuilder b = jsonBuilder().startObject().startObject("properties");
b.startObject("foo");
{
b.field("type", "text");
b.field("copy_to", "foo_copy");
}
b.endObject();
b.startObject("foo_copy").field("type", "text").endObject();
b.endObject().endObject();
prepareCreate("test").addMapping("_doc", b).get();

client().prepareIndex("test", "_doc").setId("1")
.setSource(jsonBuilder().startObject().field("foo", "how now brown cow").endObject())
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
.get();

SearchResponse response = client().prepareSearch()
.setQuery(matchQuery("foo_copy", "brown"))
.highlighter(new HighlightBuilder().field(new Field("foo_copy")))
.get();

assertHitCount(response, 1);
HighlightField field = response.getHits().getAt(0).getHighlightFields().get("foo_copy");
assertThat(field.getFragments().length, equalTo(1));
assertThat(field.getFragments()[0].string(), equalTo("how now <em>brown</em> cow"));
}

public void testACopyFieldWithNestedQuery() throws Exception {
String mapping = Strings.toString(jsonBuilder().startObject().startObject("type").startObject("properties")
.startObject("foo")
Expand Down Expand Up @@ -2986,7 +3014,7 @@ public void testWithNormalizer() throws Exception {
assertHitCount(searchResponse, 1);
HighlightField field = searchResponse.getHits().getAt(0).getHighlightFields().get("keyword");
assertThat(field.getFragments().length, equalTo(1));
assertThat(field.getFragments()[0].string(), equalTo("<em>Hello World</em>"));
assertThat(field.getFragments()[0].string(), equalTo("<em>hello world</em>"));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
private final Locale breakIteratorLocale;
private final int noMatchSize;
private final FieldHighlighter fieldHighlighter;
private final int keywordIgnoreAbove;
private final int maxAnalyzedOffset;

/**
Expand All @@ -84,7 +83,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
* @param noMatchSize The size of the text that should be returned when no highlighting can be performed.
* @param maxPassages the maximum number of passes to highlight
* @param fieldMatcher decides which terms should be highlighted
* @param keywordIgnoreAbove if the field's value is longer than this we'll skip it
* @param maxAnalyzedOffset if the field is more than this long we'll refuse to use the ANALYZED
* offset source for it because it'd be super slow
*/
Expand All @@ -98,7 +96,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
int noMatchSize,
int maxPassages,
Predicate<String> fieldMatcher,
int keywordIgnoreAbove,
int maxAnalyzedOffset) throws IOException {
super(searcher, analyzer);
this.offsetSource = offsetSource;
Expand All @@ -109,7 +106,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
this.field = field;
this.noMatchSize = noMatchSize;
this.setFieldMatcher(fieldMatcher);
this.keywordIgnoreAbove = keywordIgnoreAbove;
this.maxAnalyzedOffset = maxAnalyzedOffset;
fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
}
Expand All @@ -127,9 +123,6 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier<St
return null;
}
int fieldValueLength = fieldValue.length();
if (fieldValueLength > keywordIgnoreAbove) {
return null; // skip highlighting keyword terms that were ignored during indexing
}
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length of ["
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.Collections;
Expand All @@ -46,24 +47,20 @@ private HighlightUtils() {
* Load field values for highlighting.
*/
public static List<Object> loadFieldValues(MappedFieldType fieldType,
QueryShardContext qsc,
FetchSubPhase.HitContext hitContext,
boolean forceSource) throws IOException {
//percolator needs to always load from source, thus it sets the global force source to true
List<Object> textsToHighlight;
if (forceSource == false && fieldType.isStored()) {
CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(singleton(fieldType.name()), false);
hitContext.reader().document(hitContext.docId(), fieldVisitor);
textsToHighlight = fieldVisitor.fields().get(fieldType.name());
List<Object> textsToHighlight = fieldVisitor.fields().get(fieldType.name());
if (textsToHighlight == null) {
// Can happen if the document doesn't have the field to highlight
textsToHighlight = Collections.emptyList();
return Collections.emptyList();
}
} else {
SourceLookup sourceLookup = hitContext.sourceLookup();
textsToHighlight = sourceLookup.extractRawValues(fieldType.name());
return textsToHighlight;
}
assert textsToHighlight != null;
return textsToHighlight;
ValueFetcher fetcher = fieldType.valueFetcher(qsc, null);
return fetcher.fetchValues(hitContext.sourceLookup());
}

public static class Encoders {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
Expand Down Expand Up @@ -101,20 +100,14 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
Analyzer analyzer = context.getQueryShardContext().getFieldNameIndexAnalyzer();
Integer keywordIgnoreAbove = null;
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
keywordIgnoreAbove = ((KeywordFieldMapper.KeywordFieldType)fieldType).ignoreAbove();
}
final int maxAnalyzedOffset = context.getQueryShardContext().getIndexSettings().getHighlightMaxAnalyzedOffset();

textsToHighlight = HighlightUtils.loadFieldValues(fieldType, hitContext, fieldContext.forceSource);
textsToHighlight
= HighlightUtils.loadFieldValues(fieldType, context.getQueryShardContext(), hitContext, fieldContext.forceSource);

for (Object textToHighlight : textsToHighlight) {
String text = convertFieldValue(fieldType, textToHighlight);
int textLength = text.length();
if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) {
continue; // skip highlighting keyword terms that were ignored during indexing
}
if (textLength > maxAnalyzedOffset) {
throw new IllegalArgumentException(
"The length of [" + fieldContext.fieldName + "] field of [" + hitContext.hit().getId() +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;

Expand Down Expand Up @@ -72,7 +72,8 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
FetchSubPhase.HitContext hitContext = fieldContext.hitContext;

CheckedSupplier<String, IOException> loadFieldValues = () -> {
List<Object> fieldValues = loadFieldValues(highlighter, fieldType, field, hitContext, fieldContext.forceSource);
List<Object> fieldValues = loadFieldValues(highlighter, fieldContext.context.getQueryShardContext(), fieldType,
hitContext, fieldContext.forceSource);
if (fieldValues.size() == 0) {
return null;
}
Expand Down Expand Up @@ -111,10 +112,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th
? HighlightUtils.Encoders.HTML
: HighlightUtils.Encoders.DEFAULT;
int maxAnalyzedOffset = fieldContext.context.getQueryShardContext().getIndexSettings().getHighlightMaxAnalyzedOffset();
int keywordIgnoreAbove = Integer.MAX_VALUE;
if (fieldContext.fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
keywordIgnoreAbove = ((KeywordFieldMapper.KeywordFieldType)fieldContext.fieldType).ignoreAbove();
}
int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
Analyzer analyzer = wrapAnalyzer(fieldContext.context.getQueryShardContext().getFieldNameIndexAnalyzer());
PassageFormatter passageFormatter = getPassageFormatter(fieldContext.hitContext, fieldContext.field, encoder);
Expand Down Expand Up @@ -151,7 +148,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th
fieldContext.field.fieldOptions().noMatchSize(),
higlighterNumberOfFragments,
fieldMatcher(fieldContext),
keywordIgnoreAbove,
maxAnalyzedOffset
);
}
Expand All @@ -167,12 +163,12 @@ protected Analyzer wrapAnalyzer(Analyzer analyzer) {

protected List<Object> loadFieldValues(
CustomUnifiedHighlighter highlighter,
QueryShardContext qsc,
MappedFieldType fieldType,
SearchHighlightContext.Field field,
FetchSubPhase.HitContext hitContext,
boolean forceSource
) throws IOException {
List<Object> fieldValues = HighlightUtils.loadFieldValues(fieldType, hitContext, forceSource);
List<Object> fieldValues = HighlightUtils.loadFieldValues(fieldType, qsc, hitContext, forceSource);
fieldValues = fieldValues.stream()
.map((s) -> convertFieldValue(fieldType, s))
.collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ private void assertHighlightOneDoc(String fieldName, String[] inputs, Analyzer a
noMatchSize,
expectedPassages.length,
name -> "text".equals(name),
Integer.MAX_VALUE,
Integer.MAX_VALUE
);
final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
Expand Down