From 4b5d65834a51d28762ab5494068b96b4b3b124bb Mon Sep 17 00:00:00 2001 From: markharwood Date: Wed, 6 Mar 2019 13:45:54 +0000 Subject: [PATCH] Bug fix for AnnotatedTextHighlighter (#39525) Bug fix for AnnotatedTextHighlighter. Added thread safety by moving parsed state to per-request context rather than holding in AnnotatedTextHighlighter singleton. Added YAML test that reproduced the error. Refactored to pull formatting logic from AnnotatedHighlighterAnalyzer into AnnotatedPassageFormatter Closes #39395 --- .../AnnotatedTextFieldMapper.java | 45 ++--------- .../highlight/AnnotatedPassageFormatter.java | 31 ++++++-- .../highlight/AnnotatedTextHighlighter.java | 32 +++++--- .../AnnotatedTextHighlighterTests.java | 26 +++++-- .../test/mapper_annotatedtext/10_basic.yml | 77 +++++++++++++++++++ .../search/fetch/FetchSubPhase.java | 1 - .../highlight/UnifiedHighlighter.java | 10 ++- 7 files changed, 157 insertions(+), 65 deletions(-) diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java index a4a58d0c9946a..835003521f2d9 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java @@ -57,6 +57,7 @@ import org.elasticsearch.index.mapper.TextFieldMapper; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken; import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; import java.io.IOException; import java.io.Reader; @@ -317,46 +318,13 @@ public AnnotationToken getAnnotation(int index) { // When asked to tokenize plain-text versions by the highlighter it tokenizes the // original markup form in order to inject annotations. public static final class AnnotatedHighlighterAnalyzer extends AnalyzerWrapper { - private Analyzer delegate; - private AnnotatedText[] annotations; - public AnnotatedHighlighterAnalyzer(Analyzer delegate){ + private final Analyzer delegate; + private final HitContext hitContext; + public AnnotatedHighlighterAnalyzer(Analyzer delegate, HitContext hitContext){ super(delegate.getReuseStrategy()); this.delegate = delegate; + this.hitContext = hitContext; } - - public void init(String[] markedUpFieldValues) { - this.annotations = new AnnotatedText[markedUpFieldValues.length]; - for (int i = 0; i < markedUpFieldValues.length; i++) { - annotations[i] = AnnotatedText.parse(markedUpFieldValues[i]); - } - } - - public String [] getPlainTextValuesForHighlighter(){ - String [] result = new String[annotations.length]; - for (int i = 0; i < annotations.length; i++) { - result[i] = annotations[i].textMinusMarkup; - } - return result; - } - - public AnnotationToken[] getIntersectingAnnotations(int start, int end) { - List intersectingAnnotations = new ArrayList<>(); - int fieldValueOffset =0; - for (AnnotatedText fieldValueAnnotations : this.annotations) { - //This is called from a highlighter where all of the field values are concatenated - // so each annotation offset will need to be adjusted so that it takes into account - // the previous values AND the MULTIVAL delimiter - for (AnnotationToken token : fieldValueAnnotations.annotations) { - if(token.intersects(start - fieldValueOffset , end - fieldValueOffset)) { - intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset, - token.endOffset + fieldValueOffset, token.value)); - } - } - //add 1 for the fieldvalue separator character - fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1; - } - return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]); - } @Override public Analyzer getWrappedAnalyzer(String fieldName) { @@ -370,7 +338,8 @@ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComp return components; } AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream()); - return new AnnotatedHighlighterTokenStreamComponents(components.getTokenizer(), injector, this.annotations); + AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName()); + return new AnnotatedHighlighterTokenStreamComponents(components.getTokenizer(), injector, annotations); } } private static final class AnnotatedHighlighterTokenStreamComponents extends TokenStreamComponents{ diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java index ad1acc85031dd..7d360dd0b9bac 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java @@ -23,7 +23,7 @@ import org.apache.lucene.search.uhighlight.Passage; import org.apache.lucene.search.uhighlight.PassageFormatter; import org.apache.lucene.search.uhighlight.Snippet; -import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer; +import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken; import java.io.UnsupportedEncodingException; @@ -42,11 +42,11 @@ public class AnnotatedPassageFormatter extends PassageFormatter { public static final String SEARCH_HIT_TYPE = "_hit_term"; private final Encoder encoder; - private AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer; + AnnotatedText[] annotations; - public AnnotatedPassageFormatter(AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer, Encoder encoder) { - this.annotatedHighlighterAnalyzer = annotatedHighlighterAnalyzer; + public AnnotatedPassageFormatter(AnnotatedText[] annotations, Encoder encoder) { this.encoder = encoder; + this.annotations = annotations; } static class MarkupPassage { @@ -158,7 +158,7 @@ public Snippet[] format(Passage[] passages, String content) { int pos; int j = 0; for (Passage passage : passages) { - AnnotationToken [] annotations = annotatedHighlighterAnalyzer.getIntersectingAnnotations(passage.getStartOffset(), + AnnotationToken [] annotations = getIntersectingAnnotations(passage.getStartOffset(), passage.getEndOffset()); MarkupPassage mergedMarkup = mergeAnnotations(annotations, passage); @@ -194,6 +194,27 @@ public Snippet[] format(Passage[] passages, String content) { } return snippets; } + + public AnnotationToken[] getIntersectingAnnotations(int start, int end) { + List intersectingAnnotations = new ArrayList<>(); + int fieldValueOffset =0; + for (AnnotatedText fieldValueAnnotations : this.annotations) { + //This is called from a highlighter where all of the field values are concatenated + // so each annotation offset will need to be adjusted so that it takes into account + // the previous values AND the MULTIVAL delimiter + for (int i = 0; i < fieldValueAnnotations.numAnnotations(); i++) { + AnnotationToken token = fieldValueAnnotations.getAnnotation(i); + if (token.intersects(start - fieldValueOffset, end - fieldValueOffset)) { + intersectingAnnotations + .add(new AnnotationToken(token.offset + fieldValueOffset, token.endOffset + + fieldValueOffset, token.value)); + } + } + //add 1 for the fieldvalue separator character + fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1; + } + return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]); + } private void append(StringBuilder dest, String content, int start, int end) { dest.append(encoder.encodeText(content.substring(start, end))); diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java index d93316c78921a..2ba7838b90950 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java @@ -25,24 +25,22 @@ import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer; +import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText; import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; -import java.util.Arrays; +import java.util.ArrayList; import java.util.List; public class AnnotatedTextHighlighter extends UnifiedHighlighter { public static final String NAME = "annotated"; - - AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer = null; @Override - protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) { - annotatedHighlighterAnalyzer = new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type)); - return annotatedHighlighterAnalyzer; + protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) { + return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type, hitContext), hitContext); } // Convert the marked-up values held on-disk to plain-text versions for highlighting @@ -51,14 +49,26 @@ protected List loadFieldValues(MappedFieldType fieldType, Field field, S throws IOException { List fieldValues = super.loadFieldValues(fieldType, field, context, hitContext); String[] fieldValuesAsString = fieldValues.toArray(new String[fieldValues.size()]); - annotatedHighlighterAnalyzer.init(fieldValuesAsString); - return Arrays.asList((Object[]) annotatedHighlighterAnalyzer.getPlainTextValuesForHighlighter()); + + AnnotatedText[] annotations = new AnnotatedText[fieldValuesAsString.length]; + for (int i = 0; i < fieldValuesAsString.length; i++) { + annotations[i] = AnnotatedText.parse(fieldValuesAsString[i]); + } + // Store the annotations in the hitContext + hitContext.cache().put(AnnotatedText.class.getName(), annotations); + + ArrayList result = new ArrayList<>(annotations.length); + for (int i = 0; i < annotations.length; i++) { + result.add(annotations[i].textMinusMarkup); + } + return result; } @Override - protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) { - return new AnnotatedPassageFormatter(annotatedHighlighterAnalyzer, encoder); - + protected PassageFormatter getPassageFormatter(HitContext hitContext,SearchContextHighlight.Field field, Encoder encoder) { + // Retrieve the annotations from the hitContext + AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName()); + return new AnnotatedPassageFormatter(annotations, encoder); } } diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java index 1710b46fab115..e462d25426531 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java @@ -40,36 +40,50 @@ import org.apache.lucene.search.highlight.DefaultEncoder; import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator; import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter; -import org.apache.lucene.search.uhighlight.PassageFormatter; import org.apache.lucene.search.uhighlight.Snippet; import org.apache.lucene.search.uhighlight.SplittingBreakIterator; import org.apache.lucene.store.Directory; import org.elasticsearch.common.Strings; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer; +import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper; +import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; import org.elasticsearch.search.fetch.subphase.highlight.AnnotatedPassageFormatter; import org.elasticsearch.test.ESTestCase; import java.net.URLEncoder; import java.text.BreakIterator; +import java.util.ArrayList; import java.util.Locale; import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR; import static org.hamcrest.CoreMatchers.equalTo; public class AnnotatedTextHighlighterTests extends ESTestCase { + private void assertHighlightOneDoc(String fieldName, String []markedUpInputs, Query query, Locale locale, BreakIterator breakIterator, int noMatchSize, String[] expectedPassages) throws Exception { + // Annotated fields wrap the usual analyzer with one that injects extra tokens Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer()); - AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer); - hiliteAnalyzer.init(markedUpInputs); - PassageFormatter passageFormatter = new AnnotatedPassageFormatter(hiliteAnalyzer,new DefaultEncoder()); - String []plainTextForHighlighter = hiliteAnalyzer.getPlainTextValuesForHighlighter(); + HitContext mockHitContext = new HitContext(); + AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer, mockHitContext); + + AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length]; + for (int i = 0; i < markedUpInputs.length; i++) { + annotations[i] = AnnotatedText.parse(markedUpInputs[i]); + } + mockHitContext.cache().put(AnnotatedText.class.getName(), annotations); + AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(annotations,new DefaultEncoder()); + + ArrayList plainTextForHighlighter = new ArrayList<>(annotations.length); + for (int i = 0; i < annotations.length; i++) { + plainTextForHighlighter.add(annotations[i].textMinusMarkup); + } Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer); @@ -94,7 +108,7 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs, iw.close(); TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1L)); - String rawValue = Strings.arrayToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR)); + String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR)); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null, passageFormatter, locale, diff --git a/plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml b/plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml index d55ee0ff15b9a..f24e2e3d0fc35 100644 --- a/plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml +++ b/plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml @@ -42,3 +42,80 @@ body: { "query" : {"term" : { "text" : "quick" } }, "highlight" : { "type" : "annotated", "require_field_match": false, "fields" : { "text" : {} } } } - match: {hits.hits.0.highlight.text.0: "The [quick](_hit_term=quick) brown fox is brown."} + +--- +"issue 39395 thread safety issue -requires multiple calls to reveal": + - skip: + version: " - 6.4.99" + reason: Annotated text type introduced in 6.5.0 + + - do: + indices.create: + index: annotated + body: + settings: + number_of_shards: "5" + number_of_replicas: "0" + mappings: + doc: + properties: + my_field: + type: annotated_text + + - do: + index: + index: annotated + type: doc + id: 1 + body: + "my_field" : "[A](~MARK0&~MARK0) [B](~MARK1)" + - do: + index: + index: annotated + type: doc + id: 2 + body: + "my_field" : "[A](~MARK0) [C](~MARK2)" + refresh: true + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java index 84154926bf665..8a8e4e8d77ff6 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java @@ -74,7 +74,6 @@ public Map cache() { } return cache; } - } /** diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java index d957300f98dba..e4cbf28486e8e 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java @@ -39,6 +39,7 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.search.fetch.FetchPhaseExecutionException; import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.index.IndexSettings; @@ -70,12 +71,13 @@ public HighlightField highlight(HighlighterContext highlighterContext) { int numberOfFragments; try { - final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType); + final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType, + hitContext); List fieldValues = loadFieldValues(fieldType, field, context, hitContext); if (fieldValues.size() == 0) { return null; } - final PassageFormatter passageFormatter = getPassageFormatter(field, encoder); + final PassageFormatter passageFormatter = getPassageFormatter(hitContext, field, encoder); final IndexSearcher searcher = new IndexSearcher(hitContext.reader()); final CustomUnifiedHighlighter highlighter; final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR); @@ -155,14 +157,14 @@ public HighlightField highlight(HighlighterContext highlighterContext) { return null; } - protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) { + protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchContextHighlight.Field field, Encoder encoder) { CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0], encoder); return passageFormatter; } - protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) { + protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) { return HighlightUtils.getAnalyzer(docMapper, type); }