Bug fix for AnnotatedTextHighlighter (#39525)

Bug fix for AnnotatedTextHighlighter. Added thread safety by moving parsed state to per-request context rather than holding in AnnotatedTextHighlighter singleton. Added YAML test that reproduced the error. Refactored to pull formatting logic from AnnotatedHighlighterAnalyzer into AnnotatedPassageFormatter Closes #39395
elastic · Mar 6, 2019 · 4b5d658 · 4b5d658
1 parent 3d3506b
commit 4b5d658
Show file tree

Hide file tree

Showing 7 changed files with 157 additions and 65 deletions.
diff --git a/.../src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/.../src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
@@ -57,6 +57,7 @@
 import org.elasticsearch.index.mapper.TextFieldMapper;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
 import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 
 import java.io.IOException;
 import java.io.Reader;
@@ -317,46 +318,13 @@ public AnnotationToken getAnnotation(int index) {
     // When asked to tokenize plain-text versions by the highlighter it tokenizes the
     // original markup form in order to inject annotations.
     public static final class AnnotatedHighlighterAnalyzer extends AnalyzerWrapper {
-        private Analyzer delegate;
-        private AnnotatedText[] annotations;
-        public AnnotatedHighlighterAnalyzer(Analyzer delegate){
+        private final Analyzer delegate;
+        private final HitContext hitContext;
+        public AnnotatedHighlighterAnalyzer(Analyzer delegate, HitContext hitContext){
             super(delegate.getReuseStrategy());
             this.delegate = delegate;
+            this.hitContext = hitContext;
         }
-
-        public void init(String[] markedUpFieldValues) {
-            this.annotations = new AnnotatedText[markedUpFieldValues.length];
-            for (int i = 0; i < markedUpFieldValues.length; i++) {
-                annotations[i] = AnnotatedText.parse(markedUpFieldValues[i]);
-            }
-        }
-
-        public String []  getPlainTextValuesForHighlighter(){
-            String [] result = new String[annotations.length];
-            for (int i = 0; i < annotations.length; i++) {
-                result[i] = annotations[i].textMinusMarkup;
-            }
-            return result;
-        }
-
-        public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
-            List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
-            int fieldValueOffset =0;
-            for (AnnotatedText fieldValueAnnotations : this.annotations) {
-                //This is called from a highlighter where all of the field values are concatenated
-                // so each annotation offset will need to be adjusted so that it takes into account 
-                // the previous values AND the MULTIVAL delimiter
-                for (AnnotationToken token : fieldValueAnnotations.annotations) {
-                    if(token.intersects(start - fieldValueOffset , end - fieldValueOffset)) {
-                        intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset, 
-                                token.endOffset + fieldValueOffset, token.value));
-                    }
-                } 
-                //add 1 for the fieldvalue separator character
-                fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
-            }
-            return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
-        }        
 
         @Override
         public Analyzer getWrappedAnalyzer(String fieldName) {
@@ -370,7 +338,8 @@ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComp
                 return components;
             }
             AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
-            return new AnnotatedHighlighterTokenStreamComponents(components.getTokenizer(), injector, this.annotations);
+            AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
+            return new AnnotatedHighlighterTokenStreamComponents(components.getTokenizer(), injector, annotations);
         }        
     }
     private static final class AnnotatedHighlighterTokenStreamComponents extends TokenStreamComponents{

diff --git a/...ain/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java b/...ain/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java
@@ -23,7 +23,7 @@
 import org.apache.lucene.search.uhighlight.Passage;
 import org.apache.lucene.search.uhighlight.PassageFormatter;
 import org.apache.lucene.search.uhighlight.Snippet;
-import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
 
 import java.io.UnsupportedEncodingException;
@@ -42,11 +42,11 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
 
     public static final String SEARCH_HIT_TYPE = "_hit_term";
     private final Encoder encoder;
-    private AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer;
+    AnnotatedText[] annotations;
 
-    public AnnotatedPassageFormatter(AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer, Encoder encoder) {
-        this.annotatedHighlighterAnalyzer = annotatedHighlighterAnalyzer;
+    public AnnotatedPassageFormatter(AnnotatedText[] annotations, Encoder encoder) {
         this.encoder = encoder;
+        this.annotations = annotations;
     }
 
     static class MarkupPassage {
@@ -158,7 +158,7 @@ public Snippet[] format(Passage[] passages, String content) {
         int pos;
         int j = 0;
         for (Passage passage : passages) {
-            AnnotationToken [] annotations = annotatedHighlighterAnalyzer.getIntersectingAnnotations(passage.getStartOffset(), 
+            AnnotationToken [] annotations = getIntersectingAnnotations(passage.getStartOffset(), 
                     passage.getEndOffset());            
             MarkupPassage mergedMarkup = mergeAnnotations(annotations, passage);
 
@@ -194,6 +194,27 @@ public Snippet[] format(Passage[] passages, String content) {
         }                    
         return snippets;
     }
+
+    public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
+        List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
+        int fieldValueOffset =0;
+        for (AnnotatedText fieldValueAnnotations : this.annotations) {
+            //This is called from a highlighter where all of the field values are concatenated
+            // so each annotation offset will need to be adjusted so that it takes into account 
+            // the previous values AND the MULTIVAL delimiter
+            for (int i = 0; i < fieldValueAnnotations.numAnnotations(); i++) {
+                AnnotationToken token = fieldValueAnnotations.getAnnotation(i);
+                if (token.intersects(start - fieldValueOffset, end - fieldValueOffset)) {
+                    intersectingAnnotations
+                            .add(new AnnotationToken(token.offset + fieldValueOffset, token.endOffset + 
+                                    fieldValueOffset, token.value));
+                }
+            }
+            //add 1 for the fieldvalue separator character
+            fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
+        }
+        return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
+    }     
 
     private void append(StringBuilder dest, String content, int start, int end) {
         dest.append(encoder.encodeText(content.substring(start, end)));

diff --git a/...main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java b/...main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java
@@ -25,24 +25,22 @@
 import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
 import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field;
 import org.elasticsearch.search.internal.SearchContext;
 
 import java.io.IOException;
-import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.List;
 
 public class AnnotatedTextHighlighter extends UnifiedHighlighter {
 
     public static final String NAME = "annotated";
-
-    AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer = null;    
 
     @Override
-    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
-        annotatedHighlighterAnalyzer = new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type));
-        return annotatedHighlighterAnalyzer;
+    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
+        return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type, hitContext), hitContext);
     }
 
     // Convert the marked-up values held on-disk to plain-text versions for highlighting
@@ -51,14 +49,26 @@ protected List<Object> loadFieldValues(MappedFieldType fieldType, Field field, S
             throws IOException {
         List<Object> fieldValues = super.loadFieldValues(fieldType, field, context, hitContext);
         String[] fieldValuesAsString = fieldValues.toArray(new String[fieldValues.size()]);
-        annotatedHighlighterAnalyzer.init(fieldValuesAsString);
-        return Arrays.asList((Object[]) annotatedHighlighterAnalyzer.getPlainTextValuesForHighlighter());
+
+        AnnotatedText[] annotations = new AnnotatedText[fieldValuesAsString.length];
+        for (int i = 0; i < fieldValuesAsString.length; i++) {
+            annotations[i] = AnnotatedText.parse(fieldValuesAsString[i]);
+        }
+        // Store the annotations in the hitContext
+        hitContext.cache().put(AnnotatedText.class.getName(), annotations);
+
+        ArrayList<Object> result = new ArrayList<>(annotations.length);
+        for (int i = 0; i < annotations.length; i++) {
+            result.add(annotations[i].textMinusMarkup);
+        }
+        return result;
     }
 
     @Override
-    protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
-        return new AnnotatedPassageFormatter(annotatedHighlighterAnalyzer, encoder);
-
+    protected PassageFormatter getPassageFormatter(HitContext hitContext,SearchContextHighlight.Field field, Encoder encoder) {
+        // Retrieve the annotations from the hitContext
+        AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
+        return new AnnotatedPassageFormatter(annotations, encoder);
     }
 
 }
diff --git a/...-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java b/...-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java
@@ -40,36 +40,50 @@
 import org.apache.lucene.search.highlight.DefaultEncoder;
 import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
 import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
-import org.apache.lucene.search.uhighlight.PassageFormatter;
 import org.apache.lucene.search.uhighlight.Snippet;
 import org.apache.lucene.search.uhighlight.SplittingBreakIterator;
 import org.apache.lucene.store.Directory;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
+import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 import org.elasticsearch.search.fetch.subphase.highlight.AnnotatedPassageFormatter;
 import org.elasticsearch.test.ESTestCase;
 
 import java.net.URLEncoder;
 import java.text.BreakIterator;
+import java.util.ArrayList;
 import java.util.Locale;
 
 import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
 import static org.hamcrest.CoreMatchers.equalTo;
 
 public class AnnotatedTextHighlighterTests  extends ESTestCase {
+
 
     private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
             Query query, Locale locale, BreakIterator breakIterator,
             int noMatchSize, String[] expectedPassages) throws Exception {
 
+
         // Annotated fields wrap the usual analyzer with one that injects extra tokens
         Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
-        AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
-        hiliteAnalyzer.init(markedUpInputs);
-        PassageFormatter passageFormatter = new AnnotatedPassageFormatter(hiliteAnalyzer,new DefaultEncoder());
-        String []plainTextForHighlighter = hiliteAnalyzer.getPlainTextValuesForHighlighter();
+        HitContext mockHitContext = new HitContext();
+        AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer, mockHitContext);
+
+        AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
+        for (int i = 0; i < markedUpInputs.length; i++) {
+            annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
+        }
+        mockHitContext.cache().put(AnnotatedText.class.getName(), annotations);
 
+        AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(annotations,new DefaultEncoder());
+
+        ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
+        for (int i = 0; i < annotations.length; i++) {
+            plainTextForHighlighter.add(annotations[i].textMinusMarkup);
+        }        
 
         Directory dir = newDirectory();
         IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
@@ -94,7 +108,7 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
         iw.close();
         TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
         assertThat(topDocs.totalHits, equalTo(1L));
-        String rawValue = Strings.arrayToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
+        String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
 
         CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null,
                 passageFormatter, locale,

diff --git a/...er-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml b/...er-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml
@@ -42,3 +42,80 @@
         body: { "query" : {"term" : { "text" : "quick" } }, "highlight" : { "type" : "annotated", "require_field_match": false, "fields" : { "text" : {} } } }
 
   - match: {hits.hits.0.highlight.text.0: "The [quick](_hit_term=quick) brown fox is brown."}
+
+---
+"issue 39395 thread safety issue -requires multiple calls to reveal":
+  - skip:
+      version: " - 6.4.99"
+      reason: Annotated text type introduced in 6.5.0
+
+  - do:
+      indices.create:
+        index: annotated
+        body:
+          settings:
+            number_of_shards: "5"
+            number_of_replicas: "0"
+          mappings:
+            doc:
+              properties:
+                my_field:
+                  type: annotated_text
+
+  - do:
+      index:
+        index: annotated
+        type: doc
+        id: 1
+        body: 
+            "my_field" : "[A](~MARK0&~MARK0) [B](~MARK1)"
+  - do:
+      index:
+        index: annotated
+        type: doc
+        id: 2
+        body: 
+            "my_field" : "[A](~MARK0) [C](~MARK2)"
+        refresh: true
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java
@@ -74,7 +74,6 @@ public Map<String, Object> cache() {
             }
             return cache;
         }
-
     }
 
     /**

diff --git a/...r/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/...r/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java
@@ -39,6 +39,7 @@
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
 import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 import org.elasticsearch.search.internal.SearchContext;
 import org.elasticsearch.index.IndexSettings;
 
@@ -70,12 +71,13 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
         int numberOfFragments;
         try {
 
-            final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType);
+            final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType,
+                    hitContext);
             List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext);
             if (fieldValues.size() == 0) {
                 return null;
             }
-            final PassageFormatter passageFormatter = getPassageFormatter(field, encoder);
+            final PassageFormatter passageFormatter = getPassageFormatter(hitContext, field, encoder);
             final IndexSearcher searcher = new IndexSearcher(hitContext.reader());
             final CustomUnifiedHighlighter highlighter;
             final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
@@ -155,14 +157,14 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
         return null;
     }
 
-    protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
+    protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchContextHighlight.Field field, Encoder encoder) {
         CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0],
             field.fieldOptions().postTags()[0], encoder);
         return passageFormatter;
     }
 
 
-    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
+    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
         return HighlightUtils.getAnalyzer(docMapper, type);
     }
-Original file line number
+Diff line change
@@ Expand Up / @@ -74,7 +74,6 @@ public Map<String, Object> cache() { @@
                 }
                 return cache;
             }
         }
         /**
@@ Expand Down @@