Skip to content

Commit

Permalink
Bug fix for AnnotatedTextHighlighter (#39525)
Browse files Browse the repository at this point in the history
Bug fix for AnnotatedTextHighlighter.
Added thread safety by moving parsed state to per-request context rather than holding in AnnotatedTextHighlighter singleton.
Added YAML test that reproduced the error.
Refactored to pull formatting logic from AnnotatedHighlighterAnalyzer into AnnotatedPassageFormatter

Closes #39395
  • Loading branch information
markharwood authored Mar 6, 2019
1 parent 3d3506b commit 4b5d658
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 65 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;

import java.io.IOException;
import java.io.Reader;
Expand Down Expand Up @@ -317,46 +318,13 @@ public AnnotationToken getAnnotation(int index) {
// When asked to tokenize plain-text versions by the highlighter it tokenizes the
// original markup form in order to inject annotations.
public static final class AnnotatedHighlighterAnalyzer extends AnalyzerWrapper {
private Analyzer delegate;
private AnnotatedText[] annotations;
public AnnotatedHighlighterAnalyzer(Analyzer delegate){
private final Analyzer delegate;
private final HitContext hitContext;
public AnnotatedHighlighterAnalyzer(Analyzer delegate, HitContext hitContext){
super(delegate.getReuseStrategy());
this.delegate = delegate;
this.hitContext = hitContext;
}

public void init(String[] markedUpFieldValues) {
this.annotations = new AnnotatedText[markedUpFieldValues.length];
for (int i = 0; i < markedUpFieldValues.length; i++) {
annotations[i] = AnnotatedText.parse(markedUpFieldValues[i]);
}
}

public String [] getPlainTextValuesForHighlighter(){
String [] result = new String[annotations.length];
for (int i = 0; i < annotations.length; i++) {
result[i] = annotations[i].textMinusMarkup;
}
return result;
}

public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
int fieldValueOffset =0;
for (AnnotatedText fieldValueAnnotations : this.annotations) {
//This is called from a highlighter where all of the field values are concatenated
// so each annotation offset will need to be adjusted so that it takes into account
// the previous values AND the MULTIVAL delimiter
for (AnnotationToken token : fieldValueAnnotations.annotations) {
if(token.intersects(start - fieldValueOffset , end - fieldValueOffset)) {
intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset,
token.endOffset + fieldValueOffset, token.value));
}
}
//add 1 for the fieldvalue separator character
fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
}
return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
}

@Override
public Analyzer getWrappedAnalyzer(String fieldName) {
Expand All @@ -370,7 +338,8 @@ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComp
return components;
}
AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
return new AnnotatedHighlighterTokenStreamComponents(components.getTokenizer(), injector, this.annotations);
AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
return new AnnotatedHighlighterTokenStreamComponents(components.getTokenizer(), injector, annotations);
}
}
private static final class AnnotatedHighlighterTokenStreamComponents extends TokenStreamComponents{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import org.apache.lucene.search.uhighlight.Passage;
import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.apache.lucene.search.uhighlight.Snippet;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;

import java.io.UnsupportedEncodingException;
Expand All @@ -42,11 +42,11 @@ public class AnnotatedPassageFormatter extends PassageFormatter {

public static final String SEARCH_HIT_TYPE = "_hit_term";
private final Encoder encoder;
private AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer;
AnnotatedText[] annotations;

public AnnotatedPassageFormatter(AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer, Encoder encoder) {
this.annotatedHighlighterAnalyzer = annotatedHighlighterAnalyzer;
public AnnotatedPassageFormatter(AnnotatedText[] annotations, Encoder encoder) {
this.encoder = encoder;
this.annotations = annotations;
}

static class MarkupPassage {
Expand Down Expand Up @@ -158,7 +158,7 @@ public Snippet[] format(Passage[] passages, String content) {
int pos;
int j = 0;
for (Passage passage : passages) {
AnnotationToken [] annotations = annotatedHighlighterAnalyzer.getIntersectingAnnotations(passage.getStartOffset(),
AnnotationToken [] annotations = getIntersectingAnnotations(passage.getStartOffset(),
passage.getEndOffset());
MarkupPassage mergedMarkup = mergeAnnotations(annotations, passage);

Expand Down Expand Up @@ -194,6 +194,27 @@ public Snippet[] format(Passage[] passages, String content) {
}
return snippets;
}

public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
int fieldValueOffset =0;
for (AnnotatedText fieldValueAnnotations : this.annotations) {
//This is called from a highlighter where all of the field values are concatenated
// so each annotation offset will need to be adjusted so that it takes into account
// the previous values AND the MULTIVAL delimiter
for (int i = 0; i < fieldValueAnnotations.numAnnotations(); i++) {
AnnotationToken token = fieldValueAnnotations.getAnnotation(i);
if (token.intersects(start - fieldValueOffset, end - fieldValueOffset)) {
intersectingAnnotations
.add(new AnnotationToken(token.offset + fieldValueOffset, token.endOffset +
fieldValueOffset, token.value));
}
}
//add 1 for the fieldvalue separator character
fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
}
return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
}

private void append(StringBuilder dest, String content, int start, int end) {
dest.append(encoder.encodeText(content.substring(start, end)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,22 @@
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;

public class AnnotatedTextHighlighter extends UnifiedHighlighter {

public static final String NAME = "annotated";

AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer = null;

@Override
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
annotatedHighlighterAnalyzer = new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type));
return annotatedHighlighterAnalyzer;
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type, hitContext), hitContext);
}

// Convert the marked-up values held on-disk to plain-text versions for highlighting
Expand All @@ -51,14 +49,26 @@ protected List<Object> loadFieldValues(MappedFieldType fieldType, Field field, S
throws IOException {
List<Object> fieldValues = super.loadFieldValues(fieldType, field, context, hitContext);
String[] fieldValuesAsString = fieldValues.toArray(new String[fieldValues.size()]);
annotatedHighlighterAnalyzer.init(fieldValuesAsString);
return Arrays.asList((Object[]) annotatedHighlighterAnalyzer.getPlainTextValuesForHighlighter());

AnnotatedText[] annotations = new AnnotatedText[fieldValuesAsString.length];
for (int i = 0; i < fieldValuesAsString.length; i++) {
annotations[i] = AnnotatedText.parse(fieldValuesAsString[i]);
}
// Store the annotations in the hitContext
hitContext.cache().put(AnnotatedText.class.getName(), annotations);

ArrayList<Object> result = new ArrayList<>(annotations.length);
for (int i = 0; i < annotations.length; i++) {
result.add(annotations[i].textMinusMarkup);
}
return result;
}

@Override
protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
return new AnnotatedPassageFormatter(annotatedHighlighterAnalyzer, encoder);

protected PassageFormatter getPassageFormatter(HitContext hitContext,SearchContextHighlight.Field field, Encoder encoder) {
// Retrieve the annotations from the hitContext
AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
return new AnnotatedPassageFormatter(annotations, encoder);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -40,36 +40,50 @@
import org.apache.lucene.search.highlight.DefaultEncoder;
import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.apache.lucene.search.uhighlight.Snippet;
import org.apache.lucene.search.uhighlight.SplittingBreakIterator;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.Strings;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.AnnotatedPassageFormatter;
import org.elasticsearch.test.ESTestCase;

import java.net.URLEncoder;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Locale;

import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
import static org.hamcrest.CoreMatchers.equalTo;

public class AnnotatedTextHighlighterTests extends ESTestCase {


private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
Query query, Locale locale, BreakIterator breakIterator,
int noMatchSize, String[] expectedPassages) throws Exception {


// Annotated fields wrap the usual analyzer with one that injects extra tokens
Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
hiliteAnalyzer.init(markedUpInputs);
PassageFormatter passageFormatter = new AnnotatedPassageFormatter(hiliteAnalyzer,new DefaultEncoder());
String []plainTextForHighlighter = hiliteAnalyzer.getPlainTextValuesForHighlighter();
HitContext mockHitContext = new HitContext();
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer, mockHitContext);

AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
for (int i = 0; i < markedUpInputs.length; i++) {
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
}
mockHitContext.cache().put(AnnotatedText.class.getName(), annotations);

AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(annotations,new DefaultEncoder());

ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
for (int i = 0; i < annotations.length; i++) {
plainTextForHighlighter.add(annotations[i].textMinusMarkup);
}

Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
Expand All @@ -94,7 +108,7 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
iw.close();
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
assertThat(topDocs.totalHits, equalTo(1L));
String rawValue = Strings.arrayToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));

CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null,
passageFormatter, locale,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,80 @@
body: { "query" : {"term" : { "text" : "quick" } }, "highlight" : { "type" : "annotated", "require_field_match": false, "fields" : { "text" : {} } } }

- match: {hits.hits.0.highlight.text.0: "The [quick](_hit_term=quick) brown fox is brown."}

---
"issue 39395 thread safety issue -requires multiple calls to reveal":
- skip:
version: " - 6.4.99"
reason: Annotated text type introduced in 6.5.0

- do:
indices.create:
index: annotated
body:
settings:
number_of_shards: "5"
number_of_replicas: "0"
mappings:
doc:
properties:
my_field:
type: annotated_text

- do:
index:
index: annotated
type: doc
id: 1
body:
"my_field" : "[A](~MARK0&~MARK0) [B](~MARK1)"
- do:
index:
index: annotated
type: doc
id: 2
body:
"my_field" : "[A](~MARK0) [C](~MARK2)"
refresh: true
- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ public Map<String, Object> cache() {
}
return cache;
}

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.index.IndexSettings;

Expand Down Expand Up @@ -70,12 +71,13 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
int numberOfFragments;
try {

final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType);
final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType,
hitContext);
List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext);
if (fieldValues.size() == 0) {
return null;
}
final PassageFormatter passageFormatter = getPassageFormatter(field, encoder);
final PassageFormatter passageFormatter = getPassageFormatter(hitContext, field, encoder);
final IndexSearcher searcher = new IndexSearcher(hitContext.reader());
final CustomUnifiedHighlighter highlighter;
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
Expand Down Expand Up @@ -155,14 +157,14 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
return null;
}

protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchContextHighlight.Field field, Encoder encoder) {
CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0],
field.fieldOptions().postTags()[0], encoder);
return passageFormatter;
}


protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
return HighlightUtils.getAnalyzer(docMapper, type);
}

Expand Down

0 comments on commit 4b5d658

Please sign in to comment.