Skip to content

Commit

Permalink
Use index-prefix fields for terms of length min_chars - 1 (#36703)
Browse files Browse the repository at this point in the history
The default index_prefix settings will index prefixes of between 2 and 5 characters in length. 
Currently, if a prefix search falls outside of this range at either end we fall back to a standard prefix 
expansion, which is still very expensive for single character prefixes. However, we have an option 
here to use a wildcard expansion rather than a prefix expansion, so that a query of a* gets remapped 
to a? against the _index_prefix field - likely to be a very small set of terms, and certain to be much
smaller than a* against the whole index.

This commit adds this extra level of mapping for any prefix term whose length is one less than
the min_chars parameter of the index_prefixes field.
  • Loading branch information
romseygeek authored Dec 19, 2018
1 parent 132ccbe commit dd540ef
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,21 @@ setup:
- match: {hits.max_score: 2}
- match: {hits.hits.0._score: 2}

- do:
search:
rest_total_hits_as_int: true
index: test
body:
query:
query_string:
default_field: text
query: s*
boost: 2

- match: {hits.total: 1}
- match: {hits.max_score: 2}
- match: {hits.hits.0._score: 2}

- do:
search:
rest_total_hits_as_int: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
Expand All @@ -40,6 +41,9 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.intervals.IntervalsSource;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.Version;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.settings.Settings;
Expand Down Expand Up @@ -360,7 +364,7 @@ PrefixFieldType setAnalyzer(NamedAnalyzer delegate) {
}

boolean accept(int length) {
return length >= minChars && length <= maxChars;
return length >= minChars - 1 && length <= maxChars;
}

void doXContent(XContentBuilder builder) throws IOException {
Expand All @@ -370,6 +374,22 @@ void doXContent(XContentBuilder builder) throws IOException {
builder.endObject();
}

@Override
public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
if (value.length() >= minChars) {
return super.termQuery(value, context);
}
List<Automaton> automata = new ArrayList<>();
automata.add(Automata.makeString(value));
for (int i = value.length(); i < minChars; i++) {
automata.add(Automata.makeAnyChar());
}
Automaton automaton = Operations.concatenate(automata);
AutomatonQuery query = new AutomatonQuery(new Term(name(), value + "*"), automaton);
query.setRewriteMethod(method);
return query;
}

@Override
public PrefixFieldType clone() {
return new PrefixFieldType(name(), minChars, maxChars);
Expand Down Expand Up @@ -402,7 +422,6 @@ public boolean equals(Object o) {

@Override
public int hashCode() {

return Objects.hash(super.hashCode(), minChars, maxChars);
}
}
Expand Down Expand Up @@ -564,7 +583,7 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer
if (prefixFieldType == null || prefixFieldType.accept(value.length()) == false) {
return super.prefixQuery(value, method, context);
}
Query tq = prefixFieldType.termQuery(value, context);
Query tq = prefixFieldType.prefixQuery(value, method, context);
if (method == null || method == MultiTermQuery.CONSTANT_SCORE_REWRITE
|| method == MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE) {
return new ConstantScoreQuery(tq);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
Expand Down Expand Up @@ -71,10 +69,8 @@
import java.util.HashMap;
import java.util.Map;

import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.core.Is.is;

public class TextFieldMapperTests extends ESSingleNodeTestCase {
Expand Down Expand Up @@ -817,18 +813,13 @@ public void testFastPhraseMapping() throws IOException {

public void testIndexPrefixMapping() throws IOException {

QueryShardContext queryShardContext = indexService.newQueryShardContext(
randomInt(20), null, () -> {
throw new UnsupportedOperationException();
}, null);

{
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "standard")
.startObject("index_prefixes")
.field("min_chars", 1)
.field("min_chars", 2)
.field("max_chars", 10)
.endObject()
.endObject().endObject()
Expand All @@ -837,16 +828,7 @@ public void testIndexPrefixMapping() throws IOException {
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());

assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=1:10"));

FieldMapper fieldMapper = (FieldMapper) mapper.mappers().getMapper("field");
MappedFieldType fieldType = fieldMapper.fieldType;

Query q = fieldType.prefixQuery("goin", CONSTANT_SCORE_REWRITE, queryShardContext);

assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field._index_prefix", "goin"))), q);
q = fieldType.prefixQuery("internationalisatio", CONSTANT_SCORE_REWRITE, queryShardContext);
assertEquals(new PrefixQuery(new Term("field", "internationalisatio")), q);
assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=2:10"));

ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
.bytes(XContentFactory.jsonBuilder()
Expand All @@ -870,17 +852,8 @@ public void testIndexPrefixMapping() throws IOException {
CompressedXContent json = new CompressedXContent(mapping);
DocumentMapper mapper = parser.parse("type", json);

FieldMapper fieldMapper = (FieldMapper) mapper.mappers().getMapper("field");
MappedFieldType fieldType = fieldMapper.fieldType;

Query q1 = fieldType.prefixQuery("g", CONSTANT_SCORE_REWRITE, queryShardContext);
assertThat(q1, instanceOf(PrefixQuery.class));
Query q2 = fieldType.prefixQuery("go", CONSTANT_SCORE_REWRITE, queryShardContext);
assertThat(q2, instanceOf(ConstantScoreQuery.class));
Query q5 = fieldType.prefixQuery("going", CONSTANT_SCORE_REWRITE, queryShardContext);
assertThat(q5, instanceOf(ConstantScoreQuery.class));
Query q6 = fieldType.prefixQuery("goings", CONSTANT_SCORE_REWRITE, queryShardContext);
assertThat(q6, instanceOf(PrefixQuery.class));
assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=2:5"));

}

{
Expand All @@ -898,10 +871,8 @@ public void testIndexPrefixMapping() throws IOException {
.endObject().endObject()
.endObject().endObject());

IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> {
indexService.mapperService()
.merge("type", new CompressedXContent(illegalMapping), MergeReason.MAPPING_UPDATE);
});
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () ->
indexService.mapperService().merge("type", new CompressedXContent(illegalMapping), MergeReason.MAPPING_UPDATE));
assertThat(e.getMessage(), containsString("Field [field._index_prefix] is defined twice in [type]"));

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,27 @@

import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.common.unit.Fuzziness;
import org.junit.Before;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;

public class TextFieldTypeTests extends FieldTypeTestCase {
@Override
protected MappedFieldType createDefaultFieldType() {
Expand Down Expand Up @@ -143,4 +152,21 @@ public void testFuzzyQuery() {
() -> ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true));
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
}

public void testIndexPrefixes() {
TextFieldMapper.TextFieldType ft = new TextFieldMapper.TextFieldType();
ft.setName("field");
ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType("field._index_prefix", 2, 10));

Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, null);
assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field._index_prefix", "goin"))), q);

q = ft.prefixQuery("internationalisatio", CONSTANT_SCORE_REWRITE, null);
assertEquals(new PrefixQuery(new Term("field", "internationalisatio")), q);

q = ft.prefixQuery("g", CONSTANT_SCORE_REWRITE, null);
Automaton automaton
= Operations.concatenate(Arrays.asList(Automata.makeChar('g'), Automata.makeAnyChar()));
assertEquals(new ConstantScoreQuery(new AutomatonQuery(new Term("field._index_prefix", "g*"), automaton)), q);
}
}

0 comments on commit dd540ef

Please sign in to comment.