Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use index-prefix fields for terms of length min_chars - 1 #36703

Merged
merged 8 commits into from
Dec 19, 2018
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,21 @@ setup:
- match: {hits.max_score: 2}
- match: {hits.hits.0._score: 2}

- do:
search:
rest_total_hits_as_int: true
index: test
body:
query:
query_string:
default_field: text
query: s*
boost: 2

- match: {hits.total: 1}
- match: {hits.max_score: 2}
- match: {hits.hits.0._score: 2}

- do:
search:
rest_total_hits_as_int: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,19 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.NormsFieldExistsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.intervals.IntervalsSource;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.Version;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.settings.Settings;
Expand Down Expand Up @@ -360,7 +365,7 @@ PrefixFieldType setAnalyzer(NamedAnalyzer delegate) {
}

boolean accept(int length) {
return length >= minChars && length <= maxChars;
return length >= minChars - 1 && length <= maxChars;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's go even further, change this to just length <= maxChars, and then append minChars - prefixTerm.length wildcards to the automaton that is used for querying?

}

void doXContent(XContentBuilder builder) throws IOException {
Expand All @@ -370,6 +375,23 @@ void doXContent(XContentBuilder builder) throws IOException {
builder.endObject();
}

public Query termQuery(Object value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should rename this method, now that it is not a pure term query (for example prefixQuery could make more sense)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made this just override prefixQuery, it makes much more sense!

assert value instanceof String;
String strValue = (String) value;
if (strValue.length() >= minChars) {
return super.termQuery(value, context);
}
List<Automaton> automata = new ArrayList<>();
automata.add(Automata.makeString(strValue));
for (int i = strValue.length(); i < minChars; i++) {
automata.add(Automata.makeAnyChar());
}
Automaton automaton = Operations.concatenate(automata);
AutomatonQuery query = new AutomatonQuery(new Term(name(), value + "*"), automaton);
query.setRewriteMethod(method);
return query;
}

@Override
public PrefixFieldType clone() {
return new PrefixFieldType(name(), minChars, maxChars);
Expand Down Expand Up @@ -402,7 +424,6 @@ public boolean equals(Object o) {

@Override
public int hashCode() {

return Objects.hash(super.hashCode(), minChars, maxChars);
}
}
Expand Down Expand Up @@ -564,7 +585,7 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer
if (prefixFieldType == null || prefixFieldType.accept(value.length()) == false) {
return super.prefixQuery(value, method, context);
}
Query tq = prefixFieldType.termQuery(value, context);
Query tq = prefixFieldType.termQuery(value, method, context);
if (method == null || method == MultiTermQuery.CONSTANT_SCORE_REWRITE
|| method == MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE) {
return new ConstantScoreQuery(tq);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,18 @@
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.Version;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.common.Strings;
Expand Down Expand Up @@ -828,7 +833,7 @@ public void testIndexPrefixMapping() throws IOException {
.field("type", "text")
.field("analyzer", "standard")
.startObject("index_prefixes")
.field("min_chars", 1)
.field("min_chars", 2)
.field("max_chars", 10)
.endObject()
.endObject().endObject()
Expand All @@ -837,17 +842,22 @@ public void testIndexPrefixMapping() throws IOException {
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());

assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=1:10"));
assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=2:10"));

FieldMapper fieldMapper = (FieldMapper) mapper.mappers().getMapper("field");
MappedFieldType fieldType = fieldMapper.fieldType;

Query q = fieldType.prefixQuery("goin", CONSTANT_SCORE_REWRITE, queryShardContext);

assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field._index_prefix", "goin"))), q);

q = fieldType.prefixQuery("internationalisatio", CONSTANT_SCORE_REWRITE, queryShardContext);
assertEquals(new PrefixQuery(new Term("field", "internationalisatio")), q);

q = fieldType.prefixQuery("g", CONSTANT_SCORE_REWRITE, queryShardContext);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like these detailed query construction tests would fit better in a unit test like TextFieldTypeTests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

++

Automaton automaton
= Operations.concatenate(Arrays.asList(Automata.makeChar('g'), Automata.makeAnyChar()));
assertEquals(new ConstantScoreQuery(new AutomatonQuery(new Term("field._index_prefix", "g*"), automaton)), q);

ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
.bytes(XContentFactory.jsonBuilder()
.startObject()
Expand All @@ -874,7 +884,8 @@ public void testIndexPrefixMapping() throws IOException {
MappedFieldType fieldType = fieldMapper.fieldType;

Query q1 = fieldType.prefixQuery("g", CONSTANT_SCORE_REWRITE, queryShardContext);
assertThat(q1, instanceOf(PrefixQuery.class));
assertThat(q1, instanceOf(ConstantScoreQuery.class));
assertThat(((ConstantScoreQuery)q1).getQuery(), instanceOf(AutomatonQuery.class));
Query q2 = fieldType.prefixQuery("go", CONSTANT_SCORE_REWRITE, queryShardContext);
assertThat(q2, instanceOf(ConstantScoreQuery.class));
Query q5 = fieldType.prefixQuery("going", CONSTANT_SCORE_REWRITE, queryShardContext);
Expand Down