diff --git a/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/20_highlighting.yml b/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/20_highlighting.yml
index b09bc8418c98a..15778393959e5 100644
--- a/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/20_highlighting.yml
+++ b/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/20_highlighting.yml
@@ -165,7 +165,7 @@ setup:
- match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" }
- match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" }
- match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] }
- - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] }
+ - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] }
- match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] }
- match: { hits.hits.0.highlight.a_field\._4gram: null }
@@ -197,6 +197,6 @@ setup:
- match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" }
- match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" }
- match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] }
- - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] }
- - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] }
+ - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] }
+ - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] }
- match: { hits.hits.0.highlight.a_field\._4gram: ["quick brown fox jump lazy dog"] }
diff --git a/server/src/main/java/org/apache/lucene/search/uhighlight/CustomPassageFormatter.java b/server/src/main/java/org/apache/lucene/search/uhighlight/CustomPassageFormatter.java
index 52eee559c6888..723c30f10dc61 100644
--- a/server/src/main/java/org/apache/lucene/search/uhighlight/CustomPassageFormatter.java
+++ b/server/src/main/java/org/apache/lucene/search/uhighlight/CustomPassageFormatter.java
@@ -49,17 +49,23 @@ public Snippet[] format(Passage[] passages, String content) {
pos = passage.getStartOffset();
for (int i = 0; i < passage.getNumMatches(); i++) {
int start = passage.getMatchStarts()[i];
+ assert start >= pos && start < passage.getEndOffset();
+ // append content before this start
+ append(sb, content, pos, start);
+
int end = passage.getMatchEnds()[i];
- // its possible to have overlapping terms
- if (start > pos) {
- append(sb, content, pos, start);
- }
- if (end > pos) {
- sb.append(preTag);
- append(sb, content, Math.max(pos, start), end);
- sb.append(postTag);
- pos = end;
+ assert end > start;
+ // Look ahead to expand 'end' past all overlapping:
+ while (i + 1 < passage.getNumMatches() && passage.getMatchStarts()[i + 1] < end) {
+ end = passage.getMatchEnds()[++i];
}
+ end = Math.min(end, passage.getEndOffset()); // in case match straddles past passage
+
+ sb.append(preTag);
+ append(sb, content, start, end);
+ sb.append(postTag);
+
+ pos = end;
}
// its possible a "term" from the analyzer could span a sentence boundary.
append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
diff --git a/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java
index 3c24dc2d42b82..4504cfbee64c7 100644
--- a/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java
+++ b/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java
@@ -20,6 +20,8 @@
package org.apache.lucene.search.uhighlight;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.custom.CustomAnalyzer;
+import org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -32,6 +34,7 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
@@ -224,4 +227,33 @@ public void testGroupSentences() throws Exception {
BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 20), 0, outputs);
}
+ public void testOverlappingTerms() throws Exception {
+ final String[] inputs = {
+ "bro",
+ "brown",
+ "brownie",
+ "browser"
+ };
+ final String[] outputs = {
+ "bro",
+ "brown",
+ "brownie",
+ "browser"
+ };
+ BooleanQuery query = new BooleanQuery.Builder()
+ .add(new FuzzyQuery(new Term("text", "brow")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("text", "b")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("text", "br")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("text", "bro")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("text", "brown")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("text", "browni")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("text", "browser")), BooleanClause.Occur.SHOULD)
+ .build();
+ Analyzer analyzer = CustomAnalyzer.builder()
+ .withTokenizer(EdgeNGramTokenizerFactory.class, "minGramSize", "1", "maxGramSize", "7")
+ .build();
+ assertHighlightOneDoc("text", inputs,
+ analyzer, query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, outputs);
+ }
+
}