From 23395a9b9fd40f83a3fd5b8a5a7f3a05030a6743 Mon Sep 17 00:00:00 2001 From: Andy Bristol Date: Wed, 27 Mar 2019 13:29:13 -0700 Subject: [PATCH] search as you type fieldmapper (#35600) Adds the search_as_you_type field type that acts like a text field optimized for as-you-type search completion. It creates a couple subfields that analyze the indexed terms as shingles, against which full terms are queried, and a prefix subfield that analyze terms as the largest shingle size used and edge-ngrams, against which partial terms are queried Adds a match_bool_prefix query type that creates a boolean clause of a term query for each term except the last, for which a boolean clause with a prefix query is created. The match_bool_prefix query is the recommended way of querying a search as you type field, which will boil down to term queries for each shingle of the input text on the appropriate shingle field, and the final (possibly partial) term as a term query on the prefix field. This field type also supports phrase and phrase prefix queries however --- docs/reference/mapping/types.asciidoc | 3 + .../mapping/types/search-as-you-type.asciidoc | 258 ++++ .../query-dsl/full-text-queries.asciidoc | 9 +- .../match-bool-prefix-query.asciidoc | 85 ++ .../match-phrase-prefix-query.asciidoc | 2 +- docs/reference/query-dsl/match-query.asciidoc | 3 +- .../query-dsl/multi-match-query.asciidoc | 37 + .../suggesters/completion-suggest.asciidoc | 4 +- .../index/mapper/MapperExtrasPlugin.java | 1 + .../mapper/SearchAsYouTypeFieldMapper.java | 826 +++++++++++ .../mapper/SearchAsYouTypeAnalyzerTests.java | 197 +++ .../SearchAsYouTypeFieldMapperTests.java | 758 ++++++++++ .../mapper/SearchAsYouTypeFieldTypeTests.java | 113 ++ .../test/search-as-you-type/10_basic.yml | 1249 +++++++++++++++++ .../search-as-you-type/20_highlighting.yml | 202 +++ .../AnnotatedTextFieldMapper.java | 2 +- .../test/search/310_match_bool_prefix.yml | 363 +++++ .../index/mapper/TextFieldMapper.java | 127 +- .../query/MatchBoolPrefixQueryBuilder.java | 393 ++++++ .../index/query/MultiMatchQueryBuilder.java | 17 +- .../index/search/MatchQuery.java | 194 ++- .../index/search/MultiMatchQuery.java | 16 +- .../elasticsearch/search/SearchModule.java | 3 + .../MatchBoolPrefixQueryBuilderTests.java | 284 ++++ .../index/query/MatchQueryBuilderTests.java | 72 + .../query/MultiMatchQueryBuilderTests.java | 79 +- .../search/SearchModuleTests.java | 1 + 27 files changed, 5198 insertions(+), 100 deletions(-) create mode 100644 docs/reference/mapping/types/search-as-you-type.asciidoc create mode 100644 docs/reference/query-dsl/match-bool-prefix-query.asciidoc create mode 100644 modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java create mode 100644 modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeAnalyzerTests.java create mode 100644 modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapperTests.java create mode 100644 modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldTypeTests.java create mode 100644 modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/10_basic.yml create mode 100644 modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/20_highlighting.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix.yml create mode 100644 server/src/main/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilder.java create mode 100644 server/src/test/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilderTests.java diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc index 76b832a529fb4..c0db156dc3a1c 100644 --- a/docs/reference/mapping/types.asciidoc +++ b/docs/reference/mapping/types.asciidoc @@ -52,6 +52,7 @@ string:: <> and <> <>:: Record sparse vectors of float values. +<>:: A text-like field optimized for queries to implement as-you-type completion [float] === Multi-fields @@ -110,3 +111,5 @@ include::types/rank-features.asciidoc[] include::types/dense-vector.asciidoc[] include::types/sparse-vector.asciidoc[] + +include::types/search-as-you-type.asciidoc[] diff --git a/docs/reference/mapping/types/search-as-you-type.asciidoc b/docs/reference/mapping/types/search-as-you-type.asciidoc new file mode 100644 index 0000000000000..aec21f2e3ca6c --- /dev/null +++ b/docs/reference/mapping/types/search-as-you-type.asciidoc @@ -0,0 +1,258 @@ +[[search-as-you-type]] +=== Search as you type datatype + +experimental[] + +The `search_as_you_type` field type is a text-like field that is optimized to +provide out-of-the-box support for queries that serve an as-you-type completion +use case. It creates a series of subfields that are analyzed to index terms +that can be efficiently matched by a query that partially matches the entire +indexed text value. Both prefix completion (i.e matching terms starting at the +beginning of the input) and infix completion (i.e. matching terms at any +position within the input) are supported. + +When adding a field of this type to a mapping + +[source,js] +-------------------------------------------------- +PUT my_index +{ + "mappings": { + "properties": { + "my_field": { + "type": "search_as_you_type" + } + } + } +} +-------------------------------------------------- +// CONSOLE + +This creates the following fields + +[horizontal] + +`my_field`:: + + Analyzed as configured in the mapping. If an analyzer is not configured, + the default analyzer for the index is used + +`my_field._2gram`:: + + Wraps the analyzer of `my_field` with a shingle token filter of shingle + size 2 + +`my_field._3gram`:: + + Wraps the analyzer of `my_field` with a shingle token filter of shingle + size 3 + +`my_field._index_prefix`:: + + Wraps the analyzer of `my_field._3gram` with an edge ngram token filter + + +The size of shingles in subfields can be configured with the `max_shingle_size` +mapping parameter. The default is 3, and valid values for this parameter are +integer values 2 - 4 inclusive. Shingle subfields will be created for each +shingle size from 2 up to and including the `max_shingle_size`. The +`my_field._index_prefix` subfield will always use the analyzer from the shingle +subfield with the `max_shingle_size` when constructing its own analyzer. + +Increasing the `max_shingle_size` will improve matches for queries with more +consecutive terms, at the cost of larger index size. The default +`max_shingle_size` should usually be sufficient. + +The same input text is indexed into each of these fields automatically, with +their differing analysis chains, when an indexed document has a value for the +root field `my_field`. + +[source,js] +-------------------------------------------------- +PUT my_index/_doc/1?refresh +{ + "my_field": "quick brown fox jump lazy dog" +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +The most efficient way of querying to serve a search-as-you-type use case is +usually a <> query of type +<> that targets the root +`search_as_you_type` field and its shingle subfields. This can match the query +terms in any order, but will score documents higher if they contain the terms +in order in a shingle subfield. + +[source,js] +-------------------------------------------------- +GET my_index/_search +{ + "query": { + "multi_match": { + "query": "brown f", + "type": "bool_prefix", + "fields": [ + "my_field", + "my_field._2gram", + "my_field._3gram" + ] + } + } +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +[source,js] +-------------------------------------------------- +{ + "took" : 44, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : 0.8630463, + "hits" : [ + { + "_index" : "my_index", + "_type" : "_doc", + "_id" : "1", + "_score" : 0.8630463, + "_source" : { + "my_field" : "quick brown fox jump lazy dog" + } + } + ] + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"took" : 44/"took" : $body.took/] +// TESTRESPONSE[s/"max_score" : 0.8630463/"max_score" : $body.hits.max_score/] +// TESTRESPONSE[s/"_score" : 0.8630463/"_score" : $body.hits.hits.0._score/] + +To search for documents that strictly match the query terms in order, or to +search using other properties of phrase queries, use a +<> on the root +field. A <> can also be used +if the last term should be matched exactly, and not as a prefix. Using phrase +queries may be less efficient than using the `match_bool_prefix` query. + +[source,js] +-------------------------------------------------- +GET my_index/_search +{ + "query": { + "match_phrase_prefix": { + "my_field": "brown f" + } + } +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +[[specific-params]] +==== Parameters specific to the `search_as_you_type` field + +The following parameters are accepted in a mapping for the `search_as_you_type` +field and are specific to this field type + +[horizontal] + +`max_shingle_size`:: + + The largest shingle size to index the input with and create subfields for, + creating one subfield for each shingle size between 2 and + `max_shingle_size`. Accepts integer values between 2 and 4 inclusive. This + option defaults to 3. + + +[[general-params]] +==== Parameters of the field type as a text field + +The following parameters are accepted in a mapping for the `search_as_you_type` +field due to its nature as a text-like field, and behave similarly to their +behavior when configuring a field of the <> datatype. Unless +otherwise noted, these options configure the root fields subfields in +the same way. + +<>:: + + The <> which should be used for + <> string fields, both at index-time and at + search-time (unless overridden by the + <>). Defaults to the default index + analyzer, or the <>. + +<>:: + + Should the field be searchable? Accepts `true` (default) or `false`. + +<>:: + + What information should be stored in the index, for search and highlighting + purposes. Defaults to `positions`. + +<>:: + + Whether field-length should be taken into account when scoring queries. + Accepts `true` or `false`. This option configures the root field + and shingle subfields, where its default is `true`. It does not configure + the prefix subfield, where it it `false`. + +<>:: + + Whether the field value should be stored and retrievable separately from + the <> field. Accepts `true` or `false` + (default). This option only configures the root field, and does not + configure any subfields. + +<>:: + + The <> that should be used at search time on + <> fields. Defaults to the `analyzer` setting. + +<>:: + + The <> that should be used at search time when a + phrase is encountered. Defaults to the `search_analyzer` setting. + +<>:: + + Which scoring algorithm or _similarity_ should be used. Defaults + to `BM25`. + +<>:: + + Whether term vectors should be stored for an <> + field. Defaults to `no`. This option configures the root field and shingle + subfields, but not the prefix subfield. + + +[[prefix-queries]] +==== Optimization of prefix queries + +When making a <> query to the root field or +any of its subfields, the query will be rewritten to a +<> query on the `._index_prefix` subfield. This +matches more efficiently than is typical of `prefix` queries on text fields, +as prefixes up to a certain length of each shingle are indexed directly as +terms in the `._index_prefix` subfield. + +The analyzer of the `._index_prefix` subfield slightly modifies the +shingle-building behavior to also index prefixes of the terms at the end of the +field's value that normally would not be produced as shingles. For example, if +the value `quick brown fox` is indexed into a `search_as_you_type` field with +`max_shingle_size` of 3, prefixes for `brown fox` and `fox` are also indexed +into the `._index_prefix` subfield even though they do not appear as terms in +the `._3gram` subfield. This allows for completion of all the terms in the +field's input. diff --git a/docs/reference/query-dsl/full-text-queries.asciidoc b/docs/reference/query-dsl/full-text-queries.asciidoc index 5fb5447dbb79a..0af99b61f194f 100644 --- a/docs/reference/query-dsl/full-text-queries.asciidoc +++ b/docs/reference/query-dsl/full-text-queries.asciidoc @@ -18,7 +18,12 @@ The queries in this group are: <>:: - The poor man's _search-as-you-type_. Like the `match_phrase` query, but does a wildcard search on the final word. + Like the `match_phrase` query, but does a wildcard search on the final word. + +<>:: + + Creates a `bool` query that matches each term as a `term` query, except for + the last term, which is matched as a `prefix` query <>:: @@ -50,6 +55,8 @@ include::match-phrase-query.asciidoc[] include::match-phrase-prefix-query.asciidoc[] +include::match-bool-prefix-query.asciidoc[] + include::multi-match-query.asciidoc[] include::common-terms-query.asciidoc[] diff --git a/docs/reference/query-dsl/match-bool-prefix-query.asciidoc b/docs/reference/query-dsl/match-bool-prefix-query.asciidoc new file mode 100644 index 0000000000000..623f2423d8055 --- /dev/null +++ b/docs/reference/query-dsl/match-bool-prefix-query.asciidoc @@ -0,0 +1,85 @@ +[[query-dsl-match-bool-prefix-query]] +=== Match Bool Prefix Query + +A `match_bool_prefix` query analyzes its input and constructs a +<> from the terms. Each term except the last +is used in a `term` query. The last term is used in a `prefix` query. A +`match_bool_prefix` query such as + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "match_bool_prefix" : { + "message" : "quick brown f" + } + } +} +-------------------------------------------------- +// CONSOLE + +where analysis produces the terms `quick`, `brown`, and `f` is similar to the +following `bool` query + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "bool" : { + "should": [ + { "term": { "message": "quick" }}, + { "term": { "message": "brown" }}, + { "prefix": { "message": "f"}} + ] + } + } +} +-------------------------------------------------- +// CONSOLE + +An important difference between the `match_bool_prefix` query and +<> is that the +`match_phrase_prefix` query matches its terms as a phrase, but the +`match_bool_prefix` query can match its terms in any position. The example +`match_bool_prefix` query above could match a field containing containing +`quick brown fox`, but it could also match `brown fox quick`. It could also +match a field containing the term `quick`, the term `brown` and a term +starting with `f`, appearing in any position. + +==== Parameters + +By default, `match_bool_prefix` queries' input text will be analyzed using the +analyzer from the queried field's mapping. A different search analyzer can be +configured with the `analyzer` parameter + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "match_bool_prefix" : { + "message": { + "query": "quick brown f", + "analyzer": "keyword" + } + } + } +} +-------------------------------------------------- +// CONSOLE + +`match_bool_prefix` queries support the +<> and `operator` +parameters as described for the +<>, applying the setting to the +constructed `bool` query. The number of clauses in the constructed `bool` +query will in most cases be the number of terms produced by analysis of the +query text. + +The <>, `prefix_length`, +`max_expansions`, `fuzzy_transpositions`, and `fuzzy_rewrite` parameters can +be applied to the `term` subqueries constructed for all terms but the final +term. They do not have any effect on the prefix query constructed for the +final term. diff --git a/docs/reference/query-dsl/match-phrase-prefix-query.asciidoc b/docs/reference/query-dsl/match-phrase-prefix-query.asciidoc index 73f1be9143cf2..304eaf9a5b4f0 100644 --- a/docs/reference/query-dsl/match-phrase-prefix-query.asciidoc +++ b/docs/reference/query-dsl/match-phrase-prefix-query.asciidoc @@ -59,6 +59,6 @@ for appears. For better solutions for _search-as-you-type_ see the <> and -{defguide}/_index_time_search_as_you_type.html[Index-Time Search-as-You-Type]. +the <>. =================================================== diff --git a/docs/reference/query-dsl/match-query.asciidoc b/docs/reference/query-dsl/match-query.asciidoc index 5c397d603bef3..b4e90e7765084 100644 --- a/docs/reference/query-dsl/match-query.asciidoc +++ b/docs/reference/query-dsl/match-query.asciidoc @@ -186,7 +186,6 @@ process. It does not support field name prefixes, wildcard characters, or other "advanced" features. For this reason, chances of it failing are very small / non existent, and it provides an excellent behavior when it comes to just analyze and run that text as a query behavior (which is -usually what a text search box does). Also, the <> -type can provide a great "as you type" behavior to automatically load search results. +usually what a text search box does). ************************************************** diff --git a/docs/reference/query-dsl/multi-match-query.asciidoc b/docs/reference/query-dsl/multi-match-query.asciidoc index 512eee4900b41..b8fbb61a950d0 100644 --- a/docs/reference/query-dsl/multi-match-query.asciidoc +++ b/docs/reference/query-dsl/multi-match-query.asciidoc @@ -91,6 +91,10 @@ parameter, which can be set to: `phrase_prefix`:: Runs a `match_phrase_prefix` query on each field and combines the `_score` from each field. See <>. +`bool_prefix`:: Creates a `match_bool_prefix` query on each field and + combines the `_score` from each field. See + <>. + [[type-best-fields]] ==== `best_fields` @@ -516,3 +520,36 @@ per-term `blended` queries. It accepts: =================================================== The `fuzziness` parameter cannot be used with the `cross_fields` type. =================================================== + +[[type-bool-prefix]] +==== `bool_prefix` + +The `bool_prefix` type's scoring behaves like <>, but using a +<> instead of a +`match` query. + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "multi_match" : { + "query": "quick brown f", + "type": "bool_prefix", + "fields": [ "subject", "message" ] + } + } +} +-------------------------------------------------- +// CONSOLE + +The `analyzer`, `boost`, `operator`, `minimum_should_match`, `lenient`, +`zero_terms_query`, and `auto_generate_synonyms_phrase_query` parameters as +explained in <> are supported. The +`fuzziness`, `prefix_length`, `max_expansions`, `rewrite`, and +`fuzzy_transpositions` parameters are supported for the terms that are used to +construct term queries, but do not have an effect on the prefix query +constructed from the final term. + +The `slop` and `cutoff_frequency` parameters are not supported by this query +type. diff --git a/docs/reference/search/suggesters/completion-suggest.asciidoc b/docs/reference/search/suggesters/completion-suggest.asciidoc index b27e6f0ef0b54..c89dce3d24160 100644 --- a/docs/reference/search/suggesters/completion-suggest.asciidoc +++ b/docs/reference/search/suggesters/completion-suggest.asciidoc @@ -2,7 +2,9 @@ === Completion Suggester NOTE: In order to understand the format of suggestions, please -read the <> page first. +read the <> page first. For more flexible +search-as-you-type searches that do not use suggesters, see the +<>. The `completion` suggester provides auto-complete/search-as-you-type functionality. This is a navigational feature to guide users to diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java index cbafd0fd1efff..45a067d7994d2 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java @@ -41,6 +41,7 @@ public Map getMappers() { mappers.put(RankFeaturesFieldMapper.CONTENT_TYPE, new RankFeaturesFieldMapper.TypeParser()); mappers.put(DenseVectorFieldMapper.CONTENT_TYPE, new DenseVectorFieldMapper.TypeParser()); mappers.put(SparseVectorFieldMapper.CONTENT_TYPE, new SparseVectorFieldMapper.TypeParser()); + mappers.put(SearchAsYouTypeFieldMapper.CONTENT_TYPE, new SearchAsYouTypeFieldMapper.TypeParser()); return Collections.unmodifiableMap(mappers); } diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java new file mode 100644 index 0000000000000..69948bf98a6ac --- /dev/null +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java @@ -0,0 +1,826 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; +import org.apache.lucene.analysis.shingle.FixedShingleFilter; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.AutomatonQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.NormsFieldExistsQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.FieldMaskingSpanQuery; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.util.automaton.Automata; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.Operations; +import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.analysis.AnalyzerScope; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.query.QueryShardContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.common.xcontent.support.XContentMapValues.nodeIntegerValue; +import static org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType.hasGaps; +import static org.elasticsearch.index.mapper.TypeParsers.parseTextField; + +/** + * Mapper for a text field that optimizes itself for as-you-type completion by indexing its content into subfields. Each subfield + * modifies the analysis chain of the root field to index terms the user would create as they type out the value in the root field + * + * The structure of these fields is + * + *
+ *     [ SearchAsYouTypeFieldMapper, SearchAsYouTypeFieldType, unmodified analysis ]
+ *     ├── [ ShingleFieldMapper, ShingleFieldType, analysis wrapped with 2-shingles ]
+ *     ├── ...
+ *     ├── [ ShingleFieldMapper, ShingleFieldType, analysis wrapped with max_shingle_size-shingles ]
+ *     └── [ PrefixFieldMapper, PrefixFieldType, analysis wrapped with max_shingle_size-shingles and edge-ngrams ]
+ * 
+ */ +public class SearchAsYouTypeFieldMapper extends FieldMapper { + + public static final String CONTENT_TYPE = "search_as_you_type"; + private static final int MAX_SHINGLE_SIZE_LOWER_BOUND = 2; + private static final int MAX_SHINGLE_SIZE_UPPER_BOUND = 4; + private static final String PREFIX_FIELD_SUFFIX = "._index_prefix"; + + public static class Defaults { + + public static final int MIN_GRAM = 1; + public static final int MAX_GRAM = 20; + public static final int MAX_SHINGLE_SIZE = 3; + + public static final MappedFieldType FIELD_TYPE = new SearchAsYouTypeFieldType(); + + static { + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + FIELD_TYPE.freeze(); + } + } + + public static class TypeParser implements Mapper.TypeParser { + + @Override + public Mapper.Builder parse(String name, + Map node, + ParserContext parserContext) throws MapperParsingException { + + final Builder builder = new Builder(name); + + builder.fieldType().setIndexAnalyzer(parserContext.getIndexAnalyzers().getDefaultIndexAnalyzer()); + builder.fieldType().setSearchAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchAnalyzer()); + builder.fieldType().setSearchQuoteAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchQuoteAnalyzer()); + parseTextField(builder, name, node, parserContext); + for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { + final Map.Entry entry = iterator.next(); + final String fieldName = entry.getKey(); + final Object fieldNode = entry.getValue(); + + if (fieldName.equals("max_shingle_size")) { + builder.maxShingleSize(nodeIntegerValue(fieldNode)); + iterator.remove(); + } + // TODO should we allow to configure the prefix field + } + return builder; + } + } + + public static class Builder extends FieldMapper.Builder { + private int maxShingleSize = Defaults.MAX_SHINGLE_SIZE; + + public Builder(String name) { + super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); + this.builder = this; + } + + public Builder maxShingleSize(int maxShingleSize) { + if (maxShingleSize < MAX_SHINGLE_SIZE_LOWER_BOUND || maxShingleSize > MAX_SHINGLE_SIZE_UPPER_BOUND) { + throw new MapperParsingException("[max_shingle_size] must be at least [" + MAX_SHINGLE_SIZE_LOWER_BOUND + "] and at most " + + "[" + MAX_SHINGLE_SIZE_UPPER_BOUND + "], got [" + maxShingleSize + "]"); + } + this.maxShingleSize = maxShingleSize; + return builder; + } + + @Override + public SearchAsYouTypeFieldType fieldType() { + return (SearchAsYouTypeFieldType) this.fieldType; + } + + @Override + public SearchAsYouTypeFieldMapper build(Mapper.BuilderContext context) { + setupFieldType(context); + + final NamedAnalyzer indexAnalyzer = fieldType().indexAnalyzer(); + final NamedAnalyzer searchAnalyzer = fieldType().searchAnalyzer(); + final NamedAnalyzer searchQuoteAnalyzer = fieldType().searchQuoteAnalyzer(); + + // set up the prefix field + final String prefixFieldName = name() + PREFIX_FIELD_SUFFIX; + final PrefixFieldType prefixFieldType = new PrefixFieldType(name(), prefixFieldName, Defaults.MIN_GRAM, Defaults.MAX_GRAM); + prefixFieldType.setIndexOptions(fieldType().indexOptions()); + // wrap the root field's index analyzer with shingles and edge ngrams + final SearchAsYouTypeAnalyzer prefixIndexWrapper = + SearchAsYouTypeAnalyzer.withShingleAndPrefix(indexAnalyzer.analyzer(), maxShingleSize); + // wrap the root field's search analyzer with only shingles + final SearchAsYouTypeAnalyzer prefixSearchWrapper = + SearchAsYouTypeAnalyzer.withShingle(searchAnalyzer.analyzer(), maxShingleSize); + // don't wrap the root field's search quote analyzer as prefix field doesn't support phrase queries + prefixFieldType.setIndexAnalyzer(new NamedAnalyzer(indexAnalyzer.name(), AnalyzerScope.INDEX, prefixIndexWrapper)); + prefixFieldType.setSearchAnalyzer(new NamedAnalyzer(searchAnalyzer.name(), AnalyzerScope.INDEX, prefixSearchWrapper)); + final PrefixFieldMapper prefixFieldMapper = new PrefixFieldMapper(prefixFieldType, context.indexSettings()); + + // set up the shingle fields + final ShingleFieldMapper[] shingleFieldMappers = new ShingleFieldMapper[maxShingleSize - 1]; + final ShingleFieldType[] shingleFieldTypes = new ShingleFieldType[maxShingleSize - 1]; + for (int i = 0; i < shingleFieldMappers.length; i++) { + final int shingleSize = i + 2; + final ShingleFieldType shingleFieldType = new ShingleFieldType(fieldType(), shingleSize); + shingleFieldType.setName(getShingleFieldName(name(), shingleSize)); + // wrap the root field's index, search, and search quote analyzers with shingles + final SearchAsYouTypeAnalyzer shingleIndexWrapper = + SearchAsYouTypeAnalyzer.withShingle(indexAnalyzer.analyzer(), shingleSize); + final SearchAsYouTypeAnalyzer shingleSearchWrapper = + SearchAsYouTypeAnalyzer.withShingle(searchAnalyzer.analyzer(), shingleSize); + final SearchAsYouTypeAnalyzer shingleSearchQuoteWrapper = + SearchAsYouTypeAnalyzer.withShingle(searchQuoteAnalyzer.analyzer(), shingleSize); + shingleFieldType.setIndexAnalyzer(new NamedAnalyzer(indexAnalyzer.name(), AnalyzerScope.INDEX, shingleIndexWrapper)); + shingleFieldType.setSearchAnalyzer(new NamedAnalyzer(searchAnalyzer.name(), AnalyzerScope.INDEX, shingleSearchWrapper)); + shingleFieldType.setSearchQuoteAnalyzer( + new NamedAnalyzer(searchQuoteAnalyzer.name(), AnalyzerScope.INDEX, shingleSearchQuoteWrapper)); + shingleFieldType.setPrefixFieldType(prefixFieldType); + shingleFieldTypes[i] = shingleFieldType; + shingleFieldMappers[i] = new ShingleFieldMapper(shingleFieldType, context.indexSettings()); + } + fieldType().setPrefixField(prefixFieldType); + fieldType().setShingleFields(shingleFieldTypes); + return new SearchAsYouTypeFieldMapper(name, fieldType(), context.indexSettings(), copyTo, + maxShingleSize, prefixFieldMapper, shingleFieldMappers); + } + } + + private static int countPosition(TokenStream stream) throws IOException { + assert stream instanceof CachingTokenFilter; + PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class); + stream.reset(); + int positionCount = 0; + while (stream.incrementToken()) { + if (posIncAtt.getPositionIncrement() != 0) { + positionCount += posIncAtt.getPositionIncrement(); + } + } + return positionCount; + } + + /** + * The root field type, which most queries should target as it will delegate queries to subfields better optimized for the query. When + * handling phrase queries, it analyzes the query text to find the appropriate sized shingle subfield to delegate to. When handling + * prefix or phrase prefix queries, it delegates to the prefix subfield + */ + static class SearchAsYouTypeFieldType extends StringFieldType { + + PrefixFieldType prefixField; + ShingleFieldType[] shingleFields = new ShingleFieldType[0]; + + SearchAsYouTypeFieldType() { + setTokenized(true); + } + + SearchAsYouTypeFieldType(SearchAsYouTypeFieldType other) { + super(other); + + if (other.prefixField != null) { + this.prefixField = other.prefixField.clone(); + } + if (other.shingleFields != null) { + this.shingleFields = new ShingleFieldType[other.shingleFields.length]; + for (int i = 0; i < this.shingleFields.length; i++) { + if (other.shingleFields[i] != null) { + this.shingleFields[i] = other.shingleFields[i].clone(); + } + } + } + } + + public void setPrefixField(PrefixFieldType prefixField) { + checkIfFrozen(); + this.prefixField = prefixField; + } + + public void setShingleFields(ShingleFieldType[] shingleFields) { + checkIfFrozen(); + this.shingleFields = shingleFields; + } + + @Override + public MappedFieldType clone() { + return new SearchAsYouTypeFieldType(this); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + private ShingleFieldType shingleFieldForPositions(int positions) { + final int indexFromShingleSize = Math.max(positions - 2, 0); + return shingleFields[Math.min(indexFromShingleSize, shingleFields.length - 1)]; + } + + @Override + public Query existsQuery(QueryShardContext context) { + if (omitNorms()) { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } else { + return new NormsFieldExistsQuery(name()); + } + } + + @Override + public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) { + if (prefixField == null || prefixField.termLengthWithinBounds(value.length()) == false) { + return super.prefixQuery(value, method, context); + } else { + final Query query = prefixField.prefixQuery(value, method, context); + if (method == null + || method == MultiTermQuery.CONSTANT_SCORE_REWRITE + || method == MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE) { + return new ConstantScoreQuery(query); + } else { + return query; + } + } + } + + @Override + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + int numPos = countPosition(stream); + if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { + return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); + } + final ShingleFieldType shingleField = shingleFieldForPositions(numPos); + stream = new FixedShingleFilter(stream, shingleField.shingleSize); + return shingleField.phraseQuery(stream, 0, true); + } + + @Override + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + int numPos = countPosition(stream); + if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { + return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); + } + final ShingleFieldType shingleField = shingleFieldForPositions(numPos); + stream = new FixedShingleFilter(stream, shingleField.shingleSize); + return shingleField.multiPhraseQuery(stream, 0, true); + } + + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + int numPos = countPosition(stream); + if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { + return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions, + null, null); + } + final ShingleFieldType shingleField = shingleFieldForPositions(numPos); + stream = new FixedShingleFilter(stream, shingleField.shingleSize); + return shingleField.phrasePrefixQuery(stream, 0, maxExpansions); + } + + @Override + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { + if (prefixField != null && prefixField.termLengthWithinBounds(value.length())) { + return new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixField.name(), indexedValueForSearch(value))), name()); + } else { + SpanMultiTermQueryWrapper spanMulti = + new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(name(), indexedValueForSearch(value)))); + spanMulti.setRewriteMethod(method); + return spanMulti; + } + } + + @Override + public void checkCompatibility(MappedFieldType other, List conflicts) { + super.checkCompatibility(other, conflicts); + final SearchAsYouTypeFieldType otherFieldType = (SearchAsYouTypeFieldType) other; + if (this.shingleFields.length != otherFieldType.shingleFields.length) { + conflicts.add("mapper [" + name() + "] has a different [max_shingle_size]"); + } else if (Arrays.equals(this.shingleFields, otherFieldType.shingleFields) == false) { + conflicts.add("mapper [" + name() + "] has shingle subfields that are configured differently"); + } + + if (Objects.equals(this.prefixField, otherFieldType.prefixField) == false) { + conflicts.add("mapper [" + name() + "] has different [index_prefixes] settings"); + } + } + + @Override + public boolean equals(Object otherObject) { + if (this == otherObject) { + return true; + } + if (otherObject == null || getClass() != otherObject.getClass()) { + return false; + } + if (!super.equals(otherObject)) { + return false; + } + final SearchAsYouTypeFieldType other = (SearchAsYouTypeFieldType) otherObject; + return Objects.equals(prefixField, other.prefixField) && + Arrays.equals(shingleFields, other.shingleFields); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), prefixField, Arrays.hashCode(shingleFields)); + } + } + + /** + * The prefix field type handles prefix and phrase prefix queries that are delegated to it by the other field types in a + * search_as_you_type structure + */ + static final class PrefixFieldType extends StringFieldType { + + final int minChars; + final int maxChars; + final String parentField; + + PrefixFieldType(String parentField, String name, int minChars, int maxChars) { + setTokenized(true); + setOmitNorms(true); + setStored(false); + setName(name); + this.minChars = minChars; + this.maxChars = maxChars; + this.parentField = parentField; + } + + PrefixFieldType(PrefixFieldType other) { + super(other); + this.minChars = other.minChars; + this.maxChars = other.maxChars; + this.parentField = other.parentField; + } + + boolean termLengthWithinBounds(int length) { + return length >= minChars - 1 && length <= maxChars; + } + + @Override + public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) { + if (value.length() >= minChars) { + return super.termQuery(value, context); + } + List automata = new ArrayList<>(); + automata.add(Automata.makeString(value)); + for (int i = value.length(); i < minChars; i++) { + automata.add(Automata.makeAnyChar()); + } + Automaton automaton = Operations.concatenate(automata); + AutomatonQuery query = new AutomatonQuery(new Term(name(), value + "*"), automaton); + query.setRewriteMethod(method); + return new BooleanQuery.Builder() + .add(query, BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term(parentField, value)), BooleanClause.Occur.SHOULD) + .build(); + } + + @Override + public PrefixFieldType clone() { + return new PrefixFieldType(this); + } + + @Override + public String typeName() { + return "prefix"; + } + + @Override + public String toString() { + return super.toString() + ",prefixChars=" + minChars + ":" + maxChars; + } + + @Override + public Query existsQuery(QueryShardContext context) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + if (!super.equals(o)) { + return false; + } + PrefixFieldType that = (PrefixFieldType) o; + return minChars == that.minChars && + maxChars == that.maxChars; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), minChars, maxChars); + } + } + + static final class PrefixFieldMapper extends FieldMapper { + + PrefixFieldMapper(PrefixFieldType fieldType, Settings indexSettings) { + super(fieldType.name(), fieldType, fieldType, indexSettings, MultiFields.empty(), CopyTo.empty()); + } + + @Override + public PrefixFieldType fieldType() { + return (PrefixFieldType) super.fieldType(); + } + + @Override + protected void parseCreateField(ParseContext context, List fields) { + throw new UnsupportedOperationException(); + } + + @Override + protected String contentType() { + return "prefix"; + } + + @Override + public String toString() { + return fieldType().toString(); + } + } + + static final class ShingleFieldMapper extends FieldMapper { + + ShingleFieldMapper(ShingleFieldType fieldType, Settings indexSettings) { + super(fieldType.name(), fieldType, fieldType, indexSettings, MultiFields.empty(), CopyTo.empty()); + } + + @Override + public ShingleFieldType fieldType() { + return (ShingleFieldType) super.fieldType(); + } + + @Override + protected void parseCreateField(ParseContext context, List fields) { + throw new UnsupportedOperationException(); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + } + + /** + * The shingle field type handles phrase queries and delegates prefix and phrase prefix queries to the prefix field + */ + static class ShingleFieldType extends StringFieldType { + final int shingleSize; + PrefixFieldType prefixFieldType; + + ShingleFieldType(MappedFieldType other, int shingleSize) { + super(other); + this.shingleSize = shingleSize; + this.setStored(false); + } + + ShingleFieldType(ShingleFieldType other) { + super(other); + this.shingleSize = other.shingleSize; + if (other.prefixFieldType != null) { + this.prefixFieldType = other.prefixFieldType.clone(); + } + } + + void setPrefixFieldType(PrefixFieldType prefixFieldType) { + checkIfFrozen(); + this.prefixFieldType = prefixFieldType; + } + + @Override + public ShingleFieldType clone() { + return new ShingleFieldType(this); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public Query existsQuery(QueryShardContext context) { + if (omitNorms()) { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } else { + return new NormsFieldExistsQuery(name()); + } + } + + @Override + public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) { + if (prefixFieldType == null || prefixFieldType.termLengthWithinBounds(value.length()) == false) { + return super.prefixQuery(value, method, context); + } else { + final Query query = prefixFieldType.prefixQuery(value, method, context); + if (method == null + || method == MultiTermQuery.CONSTANT_SCORE_REWRITE + || method == MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE) { + return new ConstantScoreQuery(query); + } else { + return query; + } + } + } + + @Override + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); + } + + @Override + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); + } + + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + final String prefixFieldName = slop > 0 + ? null + : prefixFieldType.name(); + return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions, + prefixFieldName, prefixFieldType::termLengthWithinBounds); + } + + @Override + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { + if (prefixFieldType != null && prefixFieldType.termLengthWithinBounds(value.length())) { + return new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixFieldType.name(), indexedValueForSearch(value))), name()); + } else { + SpanMultiTermQueryWrapper spanMulti = + new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(name(), indexedValueForSearch(value)))); + spanMulti.setRewriteMethod(method); + return spanMulti; + } + } + + @Override + public void checkCompatibility(MappedFieldType other, List conflicts) { + super.checkCompatibility(other, conflicts); + ShingleFieldType ft = (ShingleFieldType) other; + if (ft.shingleSize != this.shingleSize) { + conflicts.add("mapper [" + name() + "] has different [shingle_size] values"); + } + if (Objects.equals(this.prefixFieldType, ft.prefixFieldType) == false) { + conflicts.add("mapper [" + name() + "] has different [index_prefixes] settings"); + } + } + + @Override + public boolean equals(Object otherObject) { + if (this == otherObject) { + return true; + } + if (otherObject == null || getClass() != otherObject.getClass()) { + return false; + } + if (!super.equals(otherObject)) { + return false; + } + final ShingleFieldType other = (ShingleFieldType) otherObject; + return shingleSize == other.shingleSize + && Objects.equals(prefixFieldType, other.prefixFieldType); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), shingleSize, prefixFieldType); + } + } + + private final int maxShingleSize; + private PrefixFieldMapper prefixField; + private final ShingleFieldMapper[] shingleFields; + + public SearchAsYouTypeFieldMapper(String simpleName, + SearchAsYouTypeFieldType fieldType, + Settings indexSettings, + CopyTo copyTo, + int maxShingleSize, + PrefixFieldMapper prefixField, + ShingleFieldMapper[] shingleFields) { + super(simpleName, fieldType, Defaults.FIELD_TYPE, indexSettings, MultiFields.empty(), copyTo); + this.prefixField = prefixField; + this.shingleFields = shingleFields; + this.maxShingleSize = maxShingleSize; + } + + @Override + protected void parseCreateField(ParseContext context, List fields) throws IOException { + final String value = context.externalValueSet() ? context.externalValue().toString() : context.parser().textOrNull(); + if (value == null) { + return; + } + + List newFields = new ArrayList<>(); + newFields.add(new Field(fieldType().name(), value, fieldType())); + for (ShingleFieldMapper subFieldMapper : shingleFields) { + fields.add(new Field(subFieldMapper.fieldType().name(), value, subFieldMapper.fieldType())); + } + newFields.add(new Field(prefixField.fieldType().name(), value, prefixField.fieldType())); + if (fieldType().omitNorms()) { + createFieldNamesField(context, newFields); + } + fields.addAll(newFields); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + protected void doMerge(Mapper mergeWith) { + super.doMerge(mergeWith); + SearchAsYouTypeFieldMapper mw = (SearchAsYouTypeFieldMapper) mergeWith; + if (mw.maxShingleSize != maxShingleSize) { + throw new IllegalArgumentException("mapper [" + name() + "] has different maxShingleSize setting, current [" + + this.maxShingleSize + "], merged [" + mw.maxShingleSize + "]"); + } + this.prefixField = (PrefixFieldMapper) this.prefixField.merge(mw); + + ShingleFieldMapper[] shingleFieldMappers = new ShingleFieldMapper[mw.shingleFields.length]; + for (int i = 0; i < shingleFieldMappers.length; i++) { + this.shingleFields[i] = (ShingleFieldMapper) this.shingleFields[i].merge(mw.shingleFields[i]); + } + } + + public static String getShingleFieldName(String parentField, int shingleSize) { + return parentField + "._" + shingleSize + "gram"; + } + + @Override + public SearchAsYouTypeFieldType fieldType() { + return (SearchAsYouTypeFieldType) super.fieldType(); + } + + public int maxShingleSize() { + return maxShingleSize; + } + + public PrefixFieldMapper prefixField() { + return prefixField; + } + + public ShingleFieldMapper[] shingleFields() { + return shingleFields; + } + + @Override + protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { + super.doXContentBody(builder, includeDefaults, params); + doXContentAnalyzers(builder, includeDefaults); + builder.field("max_shingle_size", maxShingleSize); + } + + @Override + public Iterator iterator() { + List subIterators = new ArrayList<>(); + subIterators.add(prefixField); + subIterators.addAll(Arrays.asList(shingleFields)); + @SuppressWarnings("unchecked") Iterator concat = Iterators.concat(super.iterator(), subIterators.iterator()); + return concat; + } + + /** + * An analyzer wrapper to add a shingle token filter, an edge ngram token filter or both to its wrapped analyzer. When adding an edge + * ngrams token filter, it also adds a {@link TrailingShingleTokenFilter} to add extra position increments at the end of the stream + * to induce the shingle token filter to create tokens at the end of the stream smaller than the shingle size + */ + static class SearchAsYouTypeAnalyzer extends AnalyzerWrapper { + + private final Analyzer delegate; + private final int shingleSize; + private final boolean indexPrefixes; + + private SearchAsYouTypeAnalyzer(Analyzer delegate, + int shingleSize, + boolean indexPrefixes) { + + super(delegate.getReuseStrategy()); + this.delegate = Objects.requireNonNull(delegate); + this.shingleSize = shingleSize; + this.indexPrefixes = indexPrefixes; + } + + static SearchAsYouTypeAnalyzer withShingle(Analyzer delegate, int shingleSize) { + return new SearchAsYouTypeAnalyzer(delegate, shingleSize, false); + } + + static SearchAsYouTypeAnalyzer withShingleAndPrefix(Analyzer delegate, int shingleSize) { + return new SearchAsYouTypeAnalyzer(delegate, shingleSize, true); + } + + @Override + protected Analyzer getWrappedAnalyzer(String fieldName) { + return delegate; + } + + @Override + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + TokenStream tokenStream = components.getTokenStream(); + if (indexPrefixes) { + tokenStream = new TrailingShingleTokenFilter(tokenStream, shingleSize - 1); + } + tokenStream = new FixedShingleFilter(tokenStream, shingleSize, " ", ""); + if (indexPrefixes) { + tokenStream = new EdgeNGramTokenFilter(tokenStream, Defaults.MIN_GRAM, Defaults.MAX_GRAM, true); + } + return new TokenStreamComponents(components.getSource(), tokenStream); + } + + public int shingleSize() { + return shingleSize; + } + + public boolean indexPrefixes() { + return indexPrefixes; + } + + @Override + public String toString() { + return "<" + getClass().getCanonicalName() + " shingleSize=[" + shingleSize + "] indexPrefixes=[" + indexPrefixes + "]>"; + } + + private static class TrailingShingleTokenFilter extends TokenFilter { + + private final int extraPositionIncrements; + private final PositionIncrementAttribute positionIncrementAttribute; + + TrailingShingleTokenFilter(TokenStream input, int extraPositionIncrements) { + super(input); + this.extraPositionIncrements = extraPositionIncrements; + this.positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + return input.incrementToken(); + } + + @Override + public void end() throws IOException { + super.end(); + positionIncrementAttribute.setPositionIncrement(extraPositionIncrements); + } + } + } +} diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeAnalyzerTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeAnalyzerTests.java new file mode 100644 index 0000000000000..6cf0dc83d9070 --- /dev/null +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeAnalyzerTests.java @@ -0,0 +1,197 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.SimpleAnalyzer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.SearchAsYouTypeAnalyzer; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; +import java.util.stream.IntStream; + +import static java.util.Arrays.asList; +import static java.util.Collections.emptyList; +import static java.util.stream.Collectors.toList; +import static org.hamcrest.Matchers.equalTo; + +public class SearchAsYouTypeAnalyzerTests extends ESTestCase { + + private static final Analyzer SIMPLE = new SimpleAnalyzer(); + + public static List analyze(SearchAsYouTypeAnalyzer analyzer, String text) throws IOException { + final List tokens = new ArrayList<>(); + try (TokenStream tokenStream = analyzer.tokenStream("field", text)) { + final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); + tokenStream.reset(); + while (tokenStream.incrementToken()) { + tokens.add(charTermAttribute.toString()); + } + } + return tokens; + } + + private void testCase(String text, + Function analyzerFunction, + Function> expectedTokensFunction) throws IOException { + + for (int shingleSize = 2; shingleSize <= 4; shingleSize++) { + final SearchAsYouTypeAnalyzer analyzer = analyzerFunction.apply(shingleSize); + final List expectedTokens = expectedTokensFunction.apply(shingleSize); + final List actualTokens = analyze(analyzer, text); + assertThat("analyzed correctly with " + analyzer, actualTokens, equalTo(expectedTokens)); + } + } + + public void testSingleTermShingles() throws IOException { + testCase( + "quick", + shingleSize -> SearchAsYouTypeAnalyzer.withShingle(SIMPLE, shingleSize), + shingleSize -> emptyList() + ); + } + + public void testMultiTermShingles() throws IOException { + testCase( + "quick brown fox jump lazy", + shingleSize -> SearchAsYouTypeAnalyzer.withShingle(SIMPLE, shingleSize), + shingleSize -> { + if (shingleSize == 2) { + return asList("quick brown", "brown fox", "fox jump", "jump lazy"); + } else if (shingleSize == 3) { + return asList("quick brown fox", "brown fox jump", "fox jump lazy"); + } else if (shingleSize == 4) { + return asList("quick brown fox jump", "brown fox jump lazy"); + } + throw new IllegalArgumentException(); + } + ); + } + + public void testSingleTermPrefix() throws IOException { + testCase( + "quick", + shingleSize -> SearchAsYouTypeAnalyzer.withShingleAndPrefix(SIMPLE, shingleSize), + shingleSize -> { + final List tokens = new ArrayList<>(asList("q", "qu", "qui", "quic", "quick")); + tokens.addAll(tokenWithSpaces("quick", shingleSize)); + return tokens; + } + ); + } + + public void testMultiTermPrefix() throws IOException { + testCase( + //"quick red fox lazy brown", + "quick brown fox jump lazy", + shingleSize -> SearchAsYouTypeAnalyzer.withShingleAndPrefix(SIMPLE, shingleSize), + shingleSize -> { + if (shingleSize == 2) { + final List tokens = new ArrayList<>(); + tokens.addAll(asList( + "q", "qu", "qui", "quic", "quick", "quick ", "quick b", "quick br", "quick bro", "quick brow", "quick brown" + )); + tokens.addAll(asList( + "b", "br", "bro", "brow", "brown", "brown ", "brown f", "brown fo", "brown fox" + )); + tokens.addAll(asList( + "f", "fo", "fox", "fox ", "fox j", "fox ju", "fox jum", "fox jump" + )); + tokens.addAll(asList( + "j", "ju", "jum", "jump", "jump ", "jump l", "jump la", "jump laz", "jump lazy" + )); + tokens.addAll(asList( + "l", "la", "laz", "lazy" + )); + tokens.addAll(tokenWithSpaces("lazy", shingleSize)); + return tokens; + } else if (shingleSize == 3) { + final List tokens = new ArrayList<>(); + tokens.addAll(asList( + "q", "qu", "qui", "quic", "quick", "quick ", "quick b", "quick br", "quick bro", "quick brow", "quick brown", + "quick brown ", "quick brown f", "quick brown fo", "quick brown fox" + )); + tokens.addAll(asList( + "b", "br", "bro", "brow", "brown", "brown ", "brown f", "brown fo", "brown fox", "brown fox ", "brown fox j", + "brown fox ju", "brown fox jum", "brown fox jump" + )); + tokens.addAll(asList( + "f", "fo", "fox", "fox ", "fox j", "fox ju", "fox jum", "fox jump", "fox jump ", "fox jump l", "fox jump la", + "fox jump laz", "fox jump lazy" + )); + tokens.addAll(asList( + "j", "ju", "jum", "jump", "jump ", "jump l", "jump la", "jump laz", "jump lazy" + )); + tokens.addAll(tokenWithSpaces("jump lazy", shingleSize - 1)); + tokens.addAll(asList( + "l", "la", "laz", "lazy" + )); + tokens.addAll(tokenWithSpaces("lazy", shingleSize)); + return tokens; + } else if (shingleSize == 4) { + final List tokens = new ArrayList<>(); + tokens.addAll(asList( + "q", "qu", "qui", "quic", "quick", "quick ", "quick b", "quick br", "quick bro", "quick brow", "quick brown", + "quick brown ", "quick brown f", "quick brown fo", "quick brown fox", "quick brown fox ", "quick brown fox j", + "quick brown fox ju", "quick brown fox jum", "quick brown fox jump" + )); + tokens.addAll(asList( + "b", "br", "bro", "brow", "brown", "brown ", "brown f", "brown fo", "brown fox", "brown fox ", "brown fox j", + "brown fox ju", "brown fox jum", "brown fox jump", "brown fox jump ", "brown fox jump l", "brown fox jump la", + "brown fox jump laz", "brown fox jump lazy" + )); + tokens.addAll(asList( + "f", "fo", "fox", "fox ", "fox j", "fox ju", "fox jum", "fox jump", "fox jump ", "fox jump l", "fox jump la", + "fox jump laz", "fox jump lazy" + )); + tokens.addAll(tokenWithSpaces("fox jump lazy", shingleSize - 2)); + tokens.addAll(asList( + "j", "ju", "jum", "jump", "jump ", "jump l", "jump la", "jump laz", "jump lazy" + )); + tokens.addAll(tokenWithSpaces("jump lazy", shingleSize - 1)); + tokens.addAll(asList( + "l", "la", "laz", "lazy" + )); + tokens.addAll(tokenWithSpaces("lazy", shingleSize)); + return tokens; + } + + throw new IllegalArgumentException(); + } + ); + } + + private static List tokenWithSpaces(String text, int maxShingleSize) { + return IntStream.range(1, maxShingleSize).mapToObj(i -> text + spaces(i)).collect(toList()); + } + + private static String spaces(int count) { + final StringBuilder builder = new StringBuilder(); + for (int i = 0; i < count; i++) { + builder.append(" "); + } + return builder.toString(); + } +} diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapperTests.java new file mode 100644 index 0000000000000..9ed43a9505624 --- /dev/null +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapperTests.java @@ -0,0 +1,758 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.mapper; + +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DisjunctionMaxQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.FieldMaskingSpanQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.PrefixFieldMapper; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.PrefixFieldType; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.SearchAsYouTypeAnalyzer; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.SearchAsYouTypeFieldType; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.ShingleFieldMapper; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.ShingleFieldType; +import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder; +import org.elasticsearch.index.query.MatchPhraseQueryBuilder; +import org.elasticsearch.index.query.MultiMatchQueryBuilder; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.hamcrest.Matcher; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static java.util.Arrays.asList; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.hasProperty; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.collection.IsArrayContainingInAnyOrder.arrayContainingInAnyOrder; +import static org.hamcrest.core.IsInstanceOf.instanceOf; + +public class SearchAsYouTypeFieldMapperTests extends ESSingleNodeTestCase { + + @Override + protected Collection> getPlugins() { + return pluginList(MapperExtrasPlugin.class); + } + + public void testIndexing() throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper mapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + ParsedDocument doc = mapper.parse(new SourceToParse("test", "_doc", "1", BytesReference + .bytes(XContentFactory.jsonBuilder() + .startObject() + .field("a_field", "new york city") + .endObject()), + XContentType.JSON)); + + for (String field : new String[] { "a_field", "a_field._index_prefix", "a_field._2gram", "a_field._3gram"}) { + IndexableField[] fields = doc.rootDoc().getFields(field); + assertEquals(1, fields.length); + assertEquals("new york city", fields[0].stringValue()); + } + } + + public void testDefaultConfiguration() throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + final SearchAsYouTypeFieldMapper rootMapper = getRootFieldMapper(defaultMapper, "a_field"); + assertRootFieldMapper(rootMapper, 3, "default"); + + + final PrefixFieldMapper prefixFieldMapper = getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"); + assertPrefixFieldType(prefixFieldMapper.fieldType(), 3, "default"); + + assertShingleFieldType( + getShingleFieldMapper(defaultMapper, "a_field._2gram").fieldType(), 2, "default", prefixFieldMapper.fieldType()); + assertShingleFieldType( + getShingleFieldMapper(defaultMapper, "a_field._3gram").fieldType(), 3, "default", prefixFieldMapper.fieldType()); + } + + public void testConfiguration() throws IOException { + final int maxShingleSize = 4; + final String analyzerName = "simple"; + + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("analyzer", analyzerName) + .field("max_shingle_size", maxShingleSize) + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + final SearchAsYouTypeFieldMapper rootMapper = getRootFieldMapper(defaultMapper, "a_field"); + assertRootFieldMapper(rootMapper, maxShingleSize, analyzerName); + + final PrefixFieldMapper prefixFieldMapper = getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"); + assertPrefixFieldType(prefixFieldMapper.fieldType(), maxShingleSize, analyzerName); + + assertShingleFieldType( + getShingleFieldMapper(defaultMapper, "a_field._2gram").fieldType(), 2, analyzerName, prefixFieldMapper.fieldType()); + assertShingleFieldType( + getShingleFieldMapper(defaultMapper, "a_field._3gram").fieldType(), 3, analyzerName, prefixFieldMapper.fieldType()); + assertShingleFieldType( + getShingleFieldMapper(defaultMapper, "a_field._4gram").fieldType(), 4, analyzerName, prefixFieldMapper.fieldType()); + } + + public void testIndexOptions() throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("index_options", "offsets") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + Stream.of( + getRootFieldMapper(defaultMapper, "a_field"), + getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"), + getShingleFieldMapper(defaultMapper, "a_field._2gram"), + getShingleFieldMapper(defaultMapper, "a_field._3gram") + ).forEach(mapper -> assertThat("for " + mapper.name(), + mapper.fieldType().indexOptions(), equalTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS))); + } + + public void testStore() throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("store", "true") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + assertTrue(getRootFieldMapper(defaultMapper, "a_field").fieldType().stored()); + Stream.of( + getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"), + getShingleFieldMapper(defaultMapper, "a_field._2gram"), + getShingleFieldMapper(defaultMapper, "a_field._3gram") + ).forEach(mapper -> assertFalse("for " + mapper.name(), mapper.fieldType().stored())); + } + + public void testIndex() throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("index", "false") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + Stream.of( + getRootFieldMapper(defaultMapper, "a_field"), + getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"), + getShingleFieldMapper(defaultMapper, "a_field._2gram"), + getShingleFieldMapper(defaultMapper, "a_field._3gram") + ).forEach(mapper -> assertThat("for " + mapper.name(), mapper.fieldType().indexOptions(), equalTo(IndexOptions.NONE))); + } + + public void testTermVectors() throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("term_vector", "yes") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + Stream.of( + getRootFieldMapper(defaultMapper, "a_field"), + getShingleFieldMapper(defaultMapper, "a_field._2gram"), + getShingleFieldMapper(defaultMapper, "a_field._3gram") + ).forEach(mapper -> assertTrue("for " + mapper.name(), mapper.fieldType().storeTermVectors())); + + final PrefixFieldMapper prefixFieldMapper = getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"); + assertFalse(prefixFieldMapper.fieldType().storeTermVectors()); + } + + public void testNorms() throws IOException { + // default setting + { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test-1") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + Stream.of( + getRootFieldMapper(defaultMapper, "a_field"), + getShingleFieldMapper(defaultMapper, "a_field._2gram"), + getShingleFieldMapper(defaultMapper, "a_field._3gram") + ).forEach(mapper -> assertFalse("for " + mapper.name(), mapper.fieldType().omitNorms())); + + final PrefixFieldMapper prefixFieldMapper = getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"); + assertTrue(prefixFieldMapper.fieldType().omitNorms()); + } + + // can disable them on shingle fields + { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("norms", "false") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test-2") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + Stream.of( + getRootFieldMapper(defaultMapper, "a_field"), + getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"), + getShingleFieldMapper(defaultMapper, "a_field._2gram"), + getShingleFieldMapper(defaultMapper, "a_field._3gram") + ).forEach(mapper -> assertTrue("for " + mapper.name(), mapper.fieldType().omitNorms())); + } + } + + + public void testDocumentParsingSingleValue() throws IOException { + documentParsingTestCase(Collections.singleton(randomAlphaOfLengthBetween(5, 20))); + } + + public void testDocumentParsingMultipleValues() throws IOException { + documentParsingTestCase(randomUnique(() -> randomAlphaOfLengthBetween(3, 20), randomIntBetween(2, 10))); + } + + public void testMatchPhrasePrefix() throws IOException { + IndexService indexService = createIndex("test", Settings.EMPTY); + QueryShardContext queryShardContext = indexService.newQueryShardContext( + randomInt(20), null, () -> { + throw new UnsupportedOperationException(); + }, null); + + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "search_as_you_type") + .endObject() + .endObject() + .endObject().endObject()); + + queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").toQuery(queryShardContext); + Query expected = new SynonymQuery(new Term("field._index_prefix", "two words")); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "three words here").toQuery(queryShardContext); + Query expected = new SynonymQuery(new Term("field._index_prefix", "three words here")); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); + MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery("field"); + mpq.setSlop(1); + mpq.add(new Term("field", "two")); + mpq.add(new Term("field", "words")); + assertThat(q, equalTo(mpq)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "more than three words").toQuery(queryShardContext); + Query expected = new SpanNearQuery.Builder("field._3gram", true) + .addClause(new SpanTermQuery(new Term("field._3gram", "more than three"))) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "than three words")), "field._3gram") + ) + .build(); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field._3gram", "more than three words").toQuery(queryShardContext); + Query expected = new SpanNearQuery.Builder("field._3gram", true) + .addClause(new SpanTermQuery(new Term("field._3gram", "more than three"))) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "than three words")), "field._3gram") + ) + .build(); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field._3gram", "two words").toQuery(queryShardContext); + Query expected = new MatchNoDocsQuery(); + assertThat(q, equalTo(expected)); + } + + { + Query actual = new MatchPhrasePrefixQueryBuilder("field._3gram", "one two three four") + .slop(1) + .toQuery(queryShardContext); + MultiPhrasePrefixQuery expected = new MultiPhrasePrefixQuery("field._3gram"); + expected.setSlop(1); + expected.add(new Term("field._3gram", "one two three")); + expected.add(new Term("field._3gram", "two three four")); + assertThat(actual, equalTo(expected)); + } + + } + + public void testMatchPhrase() throws IOException { + final IndexService indexService = createIndex("test", Settings.EMPTY); + final QueryShardContext queryShardContext = indexService.newQueryShardContext(randomInt(20), null, + () -> { throw new UnsupportedOperationException(); }, null); + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .endObject() + .endObject() + .endObject() + .endObject()); + + queryShardContext.getMapperService().merge("_doc", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + + { + final Query actual = new MatchPhraseQueryBuilder("a_field", "one") + .toQuery(queryShardContext); + final Query expected = new TermQuery(new Term("a_field", "one")); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field", "one two") + .toQuery(queryShardContext); + final Query expected = new MultiPhraseQuery.Builder() + .add(new Term("a_field._2gram", "one two")) + .build(); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field", "one two three") + .toQuery(queryShardContext); + final Query expected = new MultiPhraseQuery.Builder() + .add(new Term("a_field._3gram", "one two three")) + .build(); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field", "one two three four") + .toQuery(queryShardContext); + final Query expected = new MultiPhraseQuery.Builder() + .add(new Term("a_field._3gram", "one two three")) + .add(new Term("a_field._3gram", "two three four")) + .build(); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field", "one two") + .slop(1) + .toQuery(queryShardContext); + final Query expected = new MultiPhraseQuery.Builder() + .add(new Term("a_field", "one")) + .add(new Term("a_field", "two")) + .setSlop(1) + .build(); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field._2gram", "one two") + .toQuery(queryShardContext); + final Query expected = new TermQuery(new Term("a_field._2gram", "one two")); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field._2gram", "one two three") + .toQuery(queryShardContext); + final Query expected = new MultiPhraseQuery.Builder() + .add(new Term("a_field._2gram", "one two")) + .add(new Term("a_field._2gram", "two three")) + .build(); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field._3gram", "one two three") + .toQuery(queryShardContext); + final Query expected = new TermQuery(new Term("a_field._3gram", "one two three")); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field._3gram", "one two three four") + .toQuery(queryShardContext); + final Query expected = new MultiPhraseQuery.Builder() + .add(new Term("a_field._3gram", "one two three")) + .add(new Term("a_field._3gram", "two three four")) + .build(); + assertThat(actual, equalTo(expected)); + } + + // todo are these queries generated for the prefix field right? + { + final Query actual = new MatchPhraseQueryBuilder("a_field._index_prefix", "one two") + .toQuery(queryShardContext); + final Query expected = new MatchNoDocsQuery("Matching no documents because no terms present"); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field._index_prefix", "one two three") + .toQuery(queryShardContext); + final Query expected = new TermQuery(new Term("a_field._index_prefix", "one two three")); + assertThat(actual, equalTo(expected)); + } + + { + expectThrows(IllegalArgumentException.class, + () -> new MatchPhraseQueryBuilder("a_field._index_prefix", "one two three four").toQuery(queryShardContext)); + } + } + + private static BooleanQuery buildBoolPrefixQuery(String shingleFieldName, String prefixFieldName, List terms) { + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (int i = 0; i < terms.size() - 1; i++) { + final String term = terms.get(i); + builder.add(new BooleanClause(new TermQuery(new Term(shingleFieldName, term)), BooleanClause.Occur.SHOULD)); + } + final String finalTerm = terms.get(terms.size() - 1); + builder.add(new BooleanClause( + new ConstantScoreQuery(new TermQuery(new Term(prefixFieldName, finalTerm))), BooleanClause.Occur.SHOULD)); + return builder.build(); + } + + public void testMultiMatchBoolPrefix() throws IOException { + final IndexService indexService = createIndex("test", Settings.EMPTY); + final QueryShardContext queryShardContext = indexService.newQueryShardContext(randomInt(20), null, + () -> { throw new UnsupportedOperationException(); }, null); + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("max_shingle_size", 4) + .endObject() + .endObject() + .endObject() + .endObject()); + + queryShardContext.getMapperService().merge("_doc", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + + final MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder( + "quick brown fox jump lazy dog", + "a_field", + "a_field._2gram", + "a_field._3gram", + "a_field._4gram" + ); + builder.type(MultiMatchQueryBuilder.Type.BOOL_PREFIX); + + final Query actual = builder.toQuery(queryShardContext); + assertThat(actual, instanceOf(DisjunctionMaxQuery.class)); + final DisjunctionMaxQuery disMaxQuery = (DisjunctionMaxQuery) actual; + assertThat(disMaxQuery.getDisjuncts(), hasSize(4)); + assertThat(disMaxQuery.getDisjuncts(), containsInAnyOrder( + buildBoolPrefixQuery( + "a_field", "a_field._index_prefix", asList("quick", "brown", "fox", "jump", "lazy", "dog")), + buildBoolPrefixQuery("a_field._2gram", "a_field._index_prefix", + asList("quick brown", "brown fox", "fox jump", "jump lazy", "lazy dog")), + buildBoolPrefixQuery("a_field._3gram", "a_field._index_prefix", + asList("quick brown fox", "brown fox jump", "fox jump lazy", "jump lazy dog")), + buildBoolPrefixQuery("a_field._4gram", "a_field._index_prefix", + asList("quick brown fox jump", "brown fox jump lazy", "fox jump lazy dog")))); + } + + private void documentParsingTestCase(Collection values) throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + final XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.startObject(); + if (values.size() > 1) { + builder.array("a_field", values.toArray(new String[0])); + } else { + builder.field("a_field", values.iterator().next()); + } + builder.endObject(); + final ParsedDocument parsedDocument = defaultMapper.parse( + new SourceToParse("test", "_doc", "1", BytesReference.bytes(builder), XContentType.JSON)); + + + final Set> rootFieldMatchers = values.stream() + .map(value -> indexableFieldMatcher(value, SearchAsYouTypeFieldType.class)) + .collect(Collectors.toSet()); + final Set> shingleFieldMatchers = values.stream() + .map(value -> indexableFieldMatcher(value, ShingleFieldType.class)) + .collect(Collectors.toSet()); + final Set> prefixFieldMatchers = values.stream() + .map(value -> indexableFieldMatcher(value, PrefixFieldType.class)) + .collect(Collectors.toSet()); + + // the use of new ArrayList<>() here is to avoid the varargs form of arrayContainingInAnyOrder + assertThat( + parsedDocument.rootDoc().getFields("a_field"), + arrayContainingInAnyOrder(new ArrayList<>(rootFieldMatchers))); + + assertThat( + parsedDocument.rootDoc().getFields("a_field._index_prefix"), + arrayContainingInAnyOrder(new ArrayList<>(prefixFieldMatchers))); + + for (String name : asList("a_field._2gram", "a_field._3gram")) { + assertThat(parsedDocument.rootDoc().getFields(name), arrayContainingInAnyOrder(new ArrayList<>(shingleFieldMatchers))); + } + } + + private static Matcher indexableFieldMatcher(String value, Class fieldTypeClass) { + return Matchers.allOf( + hasProperty(IndexableField::stringValue, equalTo(value)), + hasProperty(IndexableField::fieldType, instanceOf(fieldTypeClass)) + ); + } + + private static void assertRootFieldMapper(SearchAsYouTypeFieldMapper mapper, + int maxShingleSize, + String analyzerName) { + + assertThat(mapper.maxShingleSize(), equalTo(maxShingleSize)); + assertThat(mapper.fieldType(), notNullValue()); + assertSearchAsYouTypeFieldType(mapper.fieldType(), maxShingleSize, analyzerName, mapper.prefixField().fieldType()); + + assertThat(mapper.prefixField(), notNullValue()); + assertThat(mapper.prefixField().fieldType().parentField, equalTo(mapper.name())); + assertPrefixFieldType(mapper.prefixField().fieldType(), maxShingleSize, analyzerName); + + + for (int shingleSize = 2; shingleSize <= maxShingleSize; shingleSize++) { + final ShingleFieldMapper shingleFieldMapper = mapper.shingleFields()[shingleSize - 2]; + assertThat(shingleFieldMapper, notNullValue()); + assertShingleFieldType(shingleFieldMapper.fieldType(), shingleSize, analyzerName, mapper.prefixField().fieldType()); + } + + final int numberOfShingleSubfields = (maxShingleSize - 2) + 1; + assertThat(mapper.shingleFields().length, equalTo(numberOfShingleSubfields)); + } + + private static void assertSearchAsYouTypeFieldType(SearchAsYouTypeFieldType fieldType, int maxShingleSize, + String analyzerName, + PrefixFieldType prefixFieldType) { + + assertThat(fieldType.shingleFields.length, equalTo(maxShingleSize-1)); + for (NamedAnalyzer analyzer : asList(fieldType.indexAnalyzer(), fieldType.searchAnalyzer())) { + assertThat(analyzer.name(), equalTo(analyzerName)); + } + int shingleSize = 2; + for (ShingleFieldType shingleField : fieldType.shingleFields) { + assertShingleFieldType(shingleField, shingleSize++, analyzerName, prefixFieldType); + } + + assertThat(fieldType.prefixField, equalTo(prefixFieldType)); + } + + private static void assertShingleFieldType(ShingleFieldType fieldType, + int shingleSize, + String analyzerName, + PrefixFieldType prefixFieldType) { + + assertThat(fieldType.shingleSize, equalTo(shingleSize)); + + for (NamedAnalyzer analyzer : asList(fieldType.indexAnalyzer(), fieldType.searchAnalyzer())) { + assertThat(analyzer.name(), equalTo(analyzerName)); + if (shingleSize > 1) { + final SearchAsYouTypeAnalyzer wrappedAnalyzer = (SearchAsYouTypeAnalyzer) analyzer.analyzer(); + assertThat(wrappedAnalyzer.shingleSize(), equalTo(shingleSize)); + assertThat(wrappedAnalyzer.indexPrefixes(), equalTo(false)); + } + } + + assertThat(fieldType.prefixFieldType, equalTo(prefixFieldType)); + + } + + private static void assertPrefixFieldType(PrefixFieldType fieldType, int shingleSize, String analyzerName) { + for (NamedAnalyzer analyzer : asList(fieldType.indexAnalyzer(), fieldType.searchAnalyzer())) { + assertThat(analyzer.name(), equalTo(analyzerName)); + } + + final SearchAsYouTypeAnalyzer wrappedIndexAnalyzer = (SearchAsYouTypeAnalyzer) fieldType.indexAnalyzer().analyzer(); + final SearchAsYouTypeAnalyzer wrappedSearchAnalyzer = (SearchAsYouTypeAnalyzer) fieldType.searchAnalyzer().analyzer(); + for (SearchAsYouTypeAnalyzer analyzer : asList(wrappedIndexAnalyzer, wrappedSearchAnalyzer)) { + assertThat(analyzer.shingleSize(), equalTo(shingleSize)); + } + assertThat(wrappedIndexAnalyzer.indexPrefixes(), equalTo(true)); + assertThat(wrappedSearchAnalyzer.indexPrefixes(), equalTo(false)); + } + + private static SearchAsYouTypeFieldMapper getRootFieldMapper(DocumentMapper defaultMapper, String fieldName) { + final Mapper mapper = defaultMapper.mappers().getMapper(fieldName); + assertThat(mapper, instanceOf(SearchAsYouTypeFieldMapper.class)); + return (SearchAsYouTypeFieldMapper) mapper; + } + + private static ShingleFieldMapper getShingleFieldMapper(DocumentMapper defaultMapper, String fieldName) { + final Mapper mapper = defaultMapper.mappers().getMapper(fieldName); + assertThat(mapper, instanceOf(ShingleFieldMapper.class)); + return (ShingleFieldMapper) mapper; + } + + private static PrefixFieldMapper getPrefixFieldMapper(DocumentMapper defaultMapper, String fieldName) { + final Mapper mapper = defaultMapper.mappers().getMapper(fieldName); + assertThat(mapper, instanceOf(PrefixFieldMapper.class)); + return (PrefixFieldMapper) mapper; + } +} diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldTypeTests.java new file mode 100644 index 0000000000000..523de91809145 --- /dev/null +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldTypeTests.java @@ -0,0 +1,113 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.Defaults; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.PrefixFieldType; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.SearchAsYouTypeFieldType; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.ShingleFieldType; +import org.junit.Before; + +import static java.util.Arrays.asList; +import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE; +import static org.hamcrest.Matchers.equalTo; + +public class SearchAsYouTypeFieldTypeTests extends FieldTypeTestCase { + + private static final String NAME = "a_field"; + private static final String PREFIX_NAME = NAME + "._index_prefix"; + + @Before + public void setupProperties() { + addModifier(new Modifier("max_shingle_size", false) { + @Override + public void modify(MappedFieldType ft) { + SearchAsYouTypeFieldType fieldType = (SearchAsYouTypeFieldType) ft; + fieldType.setShingleFields(new ShingleFieldType[] { + new ShingleFieldType(fieldType, 2), + new ShingleFieldType(fieldType, 3) + }); + } + }); + addModifier(new Modifier("index_prefixes", false) { + @Override + public void modify(MappedFieldType ft) { + SearchAsYouTypeFieldType fieldType = (SearchAsYouTypeFieldType) ft; + fieldType.setPrefixField(new PrefixFieldType(NAME, PREFIX_NAME, 1, 10)); + } + }); + } + + @Override + protected SearchAsYouTypeFieldType createDefaultFieldType() { + final SearchAsYouTypeFieldType fieldType = new SearchAsYouTypeFieldType(); + fieldType.setName(NAME); + fieldType.setPrefixField(new PrefixFieldType(NAME, PREFIX_NAME, Defaults.MIN_GRAM, Defaults.MAX_GRAM)); + fieldType.setShingleFields(new ShingleFieldType[] { new ShingleFieldType(fieldType, 2) }); + return fieldType; + } + + public void testTermQuery() { + final MappedFieldType fieldType = createDefaultFieldType(); + + fieldType.setIndexOptions(IndexOptions.DOCS); + assertThat(fieldType.termQuery("foo", null), equalTo(new TermQuery(new Term(NAME, "foo")))); + + fieldType.setIndexOptions(IndexOptions.NONE); + final IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> fieldType.termQuery("foo", null)); + assertThat(e.getMessage(), equalTo("Cannot search on field [" + NAME + "] since it is not indexed.")); + } + + public void testTermsQuery() { + final MappedFieldType fieldType = createDefaultFieldType(); + + fieldType.setIndexOptions(IndexOptions.DOCS); + assertThat(fieldType.termsQuery(asList("foo", "bar"), null), + equalTo(new TermInSetQuery(NAME, asList(new BytesRef("foo"), new BytesRef("bar"))))); + + fieldType.setIndexOptions(IndexOptions.NONE); + final IllegalArgumentException e = + expectThrows(IllegalArgumentException.class, () -> fieldType.termsQuery(asList("foo", "bar"), null)); + assertThat(e.getMessage(), equalTo("Cannot search on field [" + NAME + "] since it is not indexed.")); + } + + public void testPrefixQuery() { + final SearchAsYouTypeFieldType fieldType = createDefaultFieldType(); + + // this term should be a length that can be rewriteable to a term query on the prefix field + final String withinBoundsTerm = "foo"; + assertThat(fieldType.prefixQuery(withinBoundsTerm, CONSTANT_SCORE_REWRITE, null), + equalTo(new ConstantScoreQuery(new TermQuery(new Term(PREFIX_NAME, withinBoundsTerm))))); + + // our defaults don't allow a situation where a term can be too small + + // this term should be too long to be rewriteable to a term query on the prefix field + final String longTerm = "toolongforourprefixfieldthistermis"; + assertThat(fieldType.prefixQuery(longTerm, CONSTANT_SCORE_REWRITE, null), + equalTo(new PrefixQuery(new Term(NAME, longTerm)))); + } +} diff --git a/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/10_basic.yml b/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/10_basic.yml new file mode 100644 index 0000000000000..f9b76a7399a37 --- /dev/null +++ b/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/10_basic.yml @@ -0,0 +1,1249 @@ +setup: + - skip: + version: " - 7.1.0" + reason: "added in 7.1.0" + + - do: + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + mappings: + properties: + a_field: + type: search_as_you_type + analyzer: simple + max_shingle_size: 4 + + - do: + index: + index: test + type: _doc + id: 1 + body: + a_field: "quick brown fox jump lazy dog" + + # this document should not be matched + - do: + index: + index: test + type: _doc + id: 2 + body: + a_field: "xylophone xylophone xylophone" + + - do: + indices.refresh: {} + +--- +"get document": + - do: + get: + index: test + type: _doc + id: 1 + + - is_true: found + - match: { _source.a_field: "quick brown fox jump lazy dog" } + +--- +"term query on root field": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field: "quick" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + +# these "search on Xgram" tests repeat the same search for each term we expect to generate +--- +"term query on 2gram": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._2gram: "quick brown" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._2gram: "brown fox" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._2gram: "fox jump" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._2gram: "jump lazy" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._2gram: "lazy dog" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"term query on 3gram": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._3gram: "quick brown fox" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._3gram: "brown fox jump" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._3gram: "fox jump lazy" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._3gram: "jump lazy dog" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"term query on 4gram": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._4gram: "quick brown fox jump" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._4gram: "brown fox jump lazy" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._4gram: "fox jump lazy dog" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +# we won't check all the terms that this field generates because there are many +--- +"term query on prefix field with prefix term": + + # search term as prefix + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._index_prefix: "quick br" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"term query on prefix field with infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._index_prefix: "jump la" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"term query on prefix field with trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._index_prefix: "do" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "quic" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "brown fo" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on 2gram with prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field._2gram: "quic" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on 2gram with infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field._2gram: "brown fo" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on 3gram with prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field._3gram: "quic" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on 3gram with infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field._3gram: "brown fo" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on 4gram with prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field._4gram: "quic" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on 4gram with infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field._4gram: "brown fo" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 1 prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "quic" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 2 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "quick b" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 3 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "quick brown fo" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 4 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "quick brown fox ju" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 1 infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "fo" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 2 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "fox jum" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 3 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "fox jump la" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 4 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "fox jump lazy do" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "do" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 1 prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "quick" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 2 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "quick brown" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 3 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "quick brown fox" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 4 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "quick brown fox jump" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 5 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "quick brown fox jump lazy" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 1 infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 2 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown fox" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 3 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown fox jump" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 4 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown fox jump lazy" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 5 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown fox jump lazy dog" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "dog" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 1 prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "qui" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 2 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "quick b" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 3 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "quick brown f" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 4 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "quick brown fox ju" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 5 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "quick brown fox jump la" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 1 infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "br" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 2 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "brown f" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 3 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "brown fox ju" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 4 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "brown fox jump la" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 5 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "brown fox jump lazy d" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "do" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 1 prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "qui" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 2 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "quick b" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 3 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "quick brown f" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 4 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "quick brown fox ju" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 5 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "quick brown fox jump la" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 1 infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "br" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 2 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown f" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 3 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown fox j" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 4 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown fox jump la" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 5 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown fox jump lazy d" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "do" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field out of order partial trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "fox jump brown do" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field out of order partial leading term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "fox jump brown qui" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 1 prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "qui" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 2 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "quick br" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 3 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "quick brown f" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 4 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "quick brown fox ju" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 5 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "quick brown fox jump la" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 1 infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "br" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 2 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown f" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 3 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 4 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "do" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query out of order with partial trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "fox jump brown do" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query out of order with partial leading term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "fox jump lazy qui" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } diff --git a/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/20_highlighting.yml b/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/20_highlighting.yml new file mode 100644 index 0000000000000..5a96a11a47586 --- /dev/null +++ b/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/20_highlighting.yml @@ -0,0 +1,202 @@ +setup: + - skip: + version: " - 7.1.0" + reason: "added in 7.1.0" + + - do: + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + mappings: + properties: + a_field: + type: search_as_you_type + analyzer: simple + max_shingle_size: 4 + text_field: + type: text + analyzer: simple + + - do: + index: + index: test + type: _doc + id: 1 + body: + a_field: "quick brown fox jump lazy dog" + text_field: "quick brown fox jump lazy dog" + + - do: + indices.refresh: {} + +--- +"phrase query": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown" + highlight: + fields: + a_field: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" } + +--- +"bool prefix query": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown fo" + highlight: + fields: + a_field: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query 1 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fo" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: null } + - match: { hits.hits.0.highlight.a_field\._3gram: null } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 2 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: null } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 3 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 4 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump lazy d" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._4gram: ["quick brown fox jump lazy dog"] } diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java index 9777174563626..7b195bdc7b434 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java @@ -591,7 +591,7 @@ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositi @Override public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { - return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions); + return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions, null, null); } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix.yml new file mode 100644 index 0000000000000..957d26036b4a8 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix.yml @@ -0,0 +1,363 @@ +setup: + - skip: + version: " - 7.1.0" + reason: "added in 7.1.0" + + - do: + indices.create: + index: test + body: + mappings: + properties: + my_field1: + type: text + my_field2: + type: text + + - do: + index: + index: test + id: 1 + body: + my_field1: "brown fox jump" + my_field2: "xylophone" + + - do: + index: + index: test + id: 2 + body: + my_field1: "brown emu jump" + my_field2: "xylophone" + + - do: + index: + index: test + id: 3 + body: + my_field1: "jumparound" + my_field2: "emu" + + - do: + index: + index: test + id: 4 + body: + my_field1: "dog" + my_field2: "brown fox jump lazy" + + - do: + indices.refresh: {} + +--- +"scoring complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: "brown fox jump" + + - match: { hits.total: 3 } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"scoring partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: "brown fox ju" + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"minimum should match": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "brown fox jump" + minimum_should_match: 3 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + +--- +"analyzer": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "BROWN dog" + analyzer: whitespace # this analyzer doesn't lowercase terms + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + +--- +"operator": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "brown fox jump" + operator: AND + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + +--- +"fuzziness": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field2: + query: "xylophoen foo" + fuzziness: 1 + prefix_length: 1 + max_expansions: 10 + fuzzy_transpositions: true + fuzzy_rewrite: constant_score + + - match: { hits.total: 2 } + - match: { hits.hits.0._source.my_field2: "xylophone" } + - match: { hits.hits.1._source.my_field2: "xylophone" } + +--- +"multi_match single field complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump" + type: bool_prefix + fields: [ "my_field1" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"multi_match single field partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: bool_prefix + fields: [ "my_field1" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"multi_match multiple fields complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump lazy" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1._source.my_field1: "brown fox jump" } + - match: { hits.hits.2._id: "2" } + - match: { hits.hits.2._source.my_field1: "brown emu jump" } + +--- +"multi_match multiple fields partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump laz" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1._source.my_field1: "brown fox jump" } + - match: { hits.hits.2._id: "2" } + - match: { hits.hits.2._source.my_field1: "brown emu jump" } + +--- +"multi_match multiple fields with analyzer": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "BROWN FOX JUMP dog" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + analyzer: whitespace # this analyzer doesn't lowercase terms + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with minimum_should_match": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + minimum_should_match: 4 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with fuzziness": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "dob nomatch" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + fuzziness: 1 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with boost": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown emu" + type: bool_prefix + fields: [ "my_field1", "my_field2^10" ] + fuzziness: 1 + + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0._source.my_field2: "emu" } + +--- +"multi_match multiple fields with slop throws exception": + + - do: + catch: /\[slop\] not allowed for type \[bool_prefix\]/ + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + slop: 1 + +--- +"multi_match multiple fields with cutoff_frequency throws exception": + + - do: + catch: /\[cutoff_frequency\] not allowed for type \[bool_prefix\]/ + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + cutoff_frequency: 0.001 diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 805b50e628bb1..5790248ead807 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -74,6 +74,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.function.IntPredicate; import static org.elasticsearch.index.mapper.TypeParsers.parseTextField; @@ -687,69 +688,12 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) } private Query analyzePhrasePrefix(TokenStream stream, int slop, int maxExpansions) throws IOException { - final MultiPhrasePrefixQuery query = createPhrasePrefixQuery(stream, name(), slop, maxExpansions); - - if (slop > 0 - || prefixFieldType == null - || prefixFieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { - return query; - } - - int lastPos = query.getTerms().length - 1; - final Term[][] terms = query.getTerms(); - final int[] positions = query.getPositions(); - for (Term term : terms[lastPos]) { - String value = term.text(); - if (value.length() < prefixFieldType.minChars || value.length() > prefixFieldType.maxChars) { - return query; - } - } - - if (terms.length == 1) { - Term[] newTerms = Arrays.stream(terms[0]) - .map(term -> new Term(prefixFieldType.name(), term.bytes())) - .toArray(Term[]::new); - return new SynonymQuery(newTerms); - } - - SpanNearQuery.Builder spanQuery = new SpanNearQuery.Builder(name(), true); - spanQuery.setSlop(slop); - int previousPos = -1; - for (int i = 0; i < terms.length; i++) { - Term[] posTerms = terms[i]; - int posInc = positions[i] - previousPos; - previousPos = positions[i]; - if (posInc > 1) { - spanQuery.addGap(posInc - 1); - } - if (i == lastPos) { - if (posTerms.length == 1) { - FieldMaskingSpanQuery fieldMask = - new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixFieldType.name(), posTerms[0].bytes())), name()); - spanQuery.addClause(fieldMask); - } else { - SpanQuery[] queries = Arrays.stream(posTerms) - .map(term -> new FieldMaskingSpanQuery( - new SpanTermQuery(new Term(prefixFieldType.name(), term.bytes())), name()) - ) - .toArray(SpanQuery[]::new); - spanQuery.addClause(new SpanOrQuery(queries)); - } - } else { - if (posTerms.length == 1) { - spanQuery.addClause(new SpanTermQuery(posTerms[0])); - } else { - SpanTermQuery[] queries = Arrays.stream(posTerms) - .map(SpanTermQuery::new) - .toArray(SpanTermQuery[]::new); - spanQuery.addClause(new SpanOrQuery(queries)); - } - } - } - return spanQuery.build(); + String prefixField = prefixFieldType == null || slop > 0 ? null : prefixFieldType.name(); + IntPredicate usePrefix = (len) -> len >= prefixFieldType.minChars && len <= prefixFieldType.maxChars; + return createPhrasePrefixQuery(stream, name(), slop, maxExpansions, prefixField, usePrefix); } - private static boolean hasGaps(TokenStream stream) throws IOException { + public static boolean hasGaps(TokenStream stream) throws IOException { assert stream instanceof CachingTokenFilter; PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class); stream.reset(); @@ -963,8 +907,8 @@ public static Query createPhraseQuery(TokenStream stream, String field, int slop return mpqb.build(); } - public static MultiPhrasePrefixQuery createPhrasePrefixQuery(TokenStream stream, String field, - int slop, int maxExpansions) throws IOException { + public static Query createPhrasePrefixQuery(TokenStream stream, String field, int slop, int maxExpansions, + String prefixField, IntPredicate usePrefixField) throws IOException { MultiPhrasePrefixQuery builder = new MultiPhrasePrefixQuery(field); builder.setSlop(slop); builder.setMaxExpansions(maxExpansions); @@ -987,6 +931,61 @@ public static MultiPhrasePrefixQuery createPhrasePrefixQuery(TokenStream stream, currentTerms.add(new Term(field, termAtt.getBytesRef())); } builder.add(currentTerms.toArray(new Term[0]), position); - return builder; + if (prefixField == null) { + return builder; + } + + int lastPos = builder.getTerms().length - 1; + final Term[][] terms = builder.getTerms(); + final int[] positions = builder.getPositions(); + for (Term term : terms[lastPos]) { + String value = term.text(); + if (usePrefixField.test(value.length()) == false) { + return builder; + } + } + + if (terms.length == 1) { + Term[] newTerms = Arrays.stream(terms[0]) + .map(term -> new Term(prefixField, term.bytes())) + .toArray(Term[]::new); + return new SynonymQuery(newTerms); + } + + SpanNearQuery.Builder spanQuery = new SpanNearQuery.Builder(field, true); + spanQuery.setSlop(slop); + int previousPos = -1; + for (int i = 0; i < terms.length; i++) { + Term[] posTerms = terms[i]; + int posInc = positions[i] - previousPos; + previousPos = positions[i]; + if (posInc > 1) { + spanQuery.addGap(posInc - 1); + } + if (i == lastPos) { + if (posTerms.length == 1) { + FieldMaskingSpanQuery fieldMask = + new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixField, posTerms[0].bytes())), field); + spanQuery.addClause(fieldMask); + } else { + SpanQuery[] queries = Arrays.stream(posTerms) + .map(term -> new FieldMaskingSpanQuery( + new SpanTermQuery(new Term(prefixField, term.bytes())), field) + ) + .toArray(SpanQuery[]::new); + spanQuery.addClause(new SpanOrQuery(queries)); + } + } else { + if (posTerms.length == 1) { + spanQuery.addClause(new SpanTermQuery(posTerms[0])); + } else { + SpanTermQuery[] queries = Arrays.stream(posTerms) + .map(SpanTermQuery::new) + .toArray(SpanTermQuery[]::new); + spanQuery.addClause(new SpanOrQuery(queries)); + } + } + } + return spanQuery.build(); } } diff --git a/server/src/main/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilder.java new file mode 100644 index 0000000000000..7f0c89f9df499 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilder.java @@ -0,0 +1,393 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.Query; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.query.support.QueryParsers; +import org.elasticsearch.index.search.MatchQuery; + +import java.io.IOException; +import java.util.Objects; + +import static org.elasticsearch.index.query.MatchQueryBuilder.FUZZY_REWRITE_FIELD; +import static org.elasticsearch.index.query.MatchQueryBuilder.FUZZY_TRANSPOSITIONS_FIELD; +import static org.elasticsearch.index.query.MatchQueryBuilder.MAX_EXPANSIONS_FIELD; +import static org.elasticsearch.index.query.MatchQueryBuilder.OPERATOR_FIELD; +import static org.elasticsearch.index.query.MatchQueryBuilder.PREFIX_LENGTH_FIELD; + +/** + * The boolean prefix query analyzes the input text and creates a boolean query containing a Term query for each term, except + * for the last term, which is used to create a prefix query + */ +public class MatchBoolPrefixQueryBuilder extends AbstractQueryBuilder { + + public static final String NAME = "match_bool_prefix"; + + private static final Operator DEFAULT_OPERATOR = Operator.OR; + + private final String fieldName; + + private final Object value; + + private String analyzer; + + private Operator operator = DEFAULT_OPERATOR; + + private String minimumShouldMatch; + + private Fuzziness fuzziness; + + private int prefixLength = FuzzyQuery.defaultPrefixLength; + + private int maxExpansions = FuzzyQuery.defaultMaxExpansions; + + private boolean fuzzyTranspositions = FuzzyQuery.defaultTranspositions; + + private String fuzzyRewrite; + + public MatchBoolPrefixQueryBuilder(String fieldName, Object value) { + if (Strings.isEmpty(fieldName)) { + throw new IllegalArgumentException("[" + NAME + "] requires fieldName"); + } + if (value == null) { + throw new IllegalArgumentException("[" + NAME + "] requires query value"); + } + this.fieldName = fieldName; + this.value = value; + } + + public MatchBoolPrefixQueryBuilder(StreamInput in) throws IOException { + super(in); + fieldName = in.readString(); + value = in.readGenericValue(); + analyzer = in.readOptionalString(); + operator = Operator.readFromStream(in); + minimumShouldMatch = in.readOptionalString(); + fuzziness = in.readOptionalWriteable(Fuzziness::new); + prefixLength = in.readVInt(); + maxExpansions = in.readVInt(); + fuzzyTranspositions = in.readBoolean(); + fuzzyRewrite = in.readOptionalString(); + } + + @Override + protected void doWriteTo(StreamOutput out) throws IOException { + out.writeString(fieldName); + out.writeGenericValue(value); + out.writeOptionalString(analyzer); + operator.writeTo(out); + out.writeOptionalString(minimumShouldMatch); + out.writeOptionalWriteable(fuzziness); + out.writeVInt(prefixLength); + out.writeVInt(maxExpansions); + out.writeBoolean(fuzzyTranspositions); + out.writeOptionalString(fuzzyRewrite); + } + + /** Returns the field name used in this query. */ + public String fieldName() { + return this.fieldName; + } + + /** Returns the value used in this query. */ + public Object value() { + return this.value; + } + + /** Get the analyzer to use, if previously set, otherwise {@code null} */ + public String analyzer() { + return this.analyzer; + } + + /** + * Explicitly set the analyzer to use. Defaults to use explicit mapping + * config for the field, or, if not set, the default search analyzer. + */ + public MatchBoolPrefixQueryBuilder analyzer(String analyzer) { + this.analyzer = analyzer; + return this; + } + + /** Sets the operator to use when using a boolean query. Defaults to {@code OR}. */ + public MatchBoolPrefixQueryBuilder operator(Operator operator) { + if (operator == null) { + throw new IllegalArgumentException("[" + NAME + "] requires operator to be non-null"); + } + this.operator = operator; + return this; + } + + /** Returns the operator to use in a boolean query.*/ + public Operator operator() { + return this.operator; + } + + /** Sets optional minimumShouldMatch value to apply to the query */ + public MatchBoolPrefixQueryBuilder minimumShouldMatch(String minimumShouldMatch) { + this.minimumShouldMatch = minimumShouldMatch; + return this; + } + + /** Gets the minimumShouldMatch value */ + public String minimumShouldMatch() { + return this.minimumShouldMatch; + } + + /** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */ + public MatchBoolPrefixQueryBuilder fuzziness(Object fuzziness) { + this.fuzziness = Fuzziness.build(fuzziness); + return this; + } + + /** Gets the fuzziness used when evaluated to a fuzzy query type. */ + public Fuzziness fuzziness() { + return this.fuzziness; + } + + /** + * Sets the length of a length of common (non-fuzzy) prefix for fuzzy match queries + * @param prefixLength non-negative length of prefix + * @throws IllegalArgumentException in case the prefix is negative + */ + public MatchBoolPrefixQueryBuilder prefixLength(int prefixLength) { + if (prefixLength < 0 ) { + throw new IllegalArgumentException("[" + NAME + "] requires prefix length to be non-negative."); + } + this.prefixLength = prefixLength; + return this; + } + + /** + * Gets the length of a length of common (non-fuzzy) prefix for fuzzy match queries + */ + public int prefixLength() { + return this.prefixLength; + } + + /** + * When using fuzzy or prefix type query, the number of term expansions to use. + */ + public MatchBoolPrefixQueryBuilder maxExpansions(int maxExpansions) { + if (maxExpansions <= 0 ) { + throw new IllegalArgumentException("[" + NAME + "] requires maxExpansions to be positive."); + } + this.maxExpansions = maxExpansions; + return this; + } + + /** + * Get the (optional) number of term expansions when using fuzzy or prefix type query. + */ + public int maxExpansions() { + return this.maxExpansions; + } + + /** + * Sets whether transpositions are supported in fuzzy queries.

+ * The default metric used by fuzzy queries to determine a match is the Damerau-Levenshtein + * distance formula which supports transpositions. Setting transposition to false will + * switch to classic Levenshtein distance.
+ * If not set, Damerau-Levenshtein distance metric will be used. + */ + public MatchBoolPrefixQueryBuilder fuzzyTranspositions(boolean fuzzyTranspositions) { + this.fuzzyTranspositions = fuzzyTranspositions; + return this; + } + + /** Gets the fuzzy query transposition setting. */ + public boolean fuzzyTranspositions() { + return this.fuzzyTranspositions; + } + + /** Sets the fuzzy_rewrite parameter controlling how the fuzzy query will get rewritten */ + public MatchBoolPrefixQueryBuilder fuzzyRewrite(String fuzzyRewrite) { + this.fuzzyRewrite = fuzzyRewrite; + return this; + } + + /** + * Get the fuzzy_rewrite parameter + * @see #fuzzyRewrite(String) + */ + public String fuzzyRewrite() { + return this.fuzzyRewrite; + } + + @Override + protected void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAME); + builder.startObject(fieldName); + builder.field(MatchQueryBuilder.QUERY_FIELD.getPreferredName(), value); + if (analyzer != null) { + builder.field(MatchQueryBuilder.ANALYZER_FIELD.getPreferredName(), analyzer); + } + builder.field(OPERATOR_FIELD.getPreferredName(), operator.toString()); + if (minimumShouldMatch != null) { + builder.field(MatchQueryBuilder.MINIMUM_SHOULD_MATCH_FIELD.getPreferredName(), minimumShouldMatch); + } + if (fuzziness != null) { + fuzziness.toXContent(builder, params); + } + builder.field(PREFIX_LENGTH_FIELD.getPreferredName(), prefixLength); + builder.field(MAX_EXPANSIONS_FIELD.getPreferredName(), maxExpansions); + builder.field(FUZZY_TRANSPOSITIONS_FIELD.getPreferredName(), fuzzyTranspositions); + if (fuzzyRewrite != null) { + builder.field(FUZZY_REWRITE_FIELD.getPreferredName(), fuzzyRewrite); + } + printBoostAndQueryName(builder); + builder.endObject(); + builder.endObject(); + } + + public static MatchBoolPrefixQueryBuilder fromXContent(XContentParser parser) throws IOException { + String fieldName = null; + Object value = null; + float boost = AbstractQueryBuilder.DEFAULT_BOOST; + String analyzer = null; + Operator operator = DEFAULT_OPERATOR; + String minimumShouldMatch = null; + Fuzziness fuzziness = null; + int prefixLength = FuzzyQuery.defaultPrefixLength; + int maxExpansion = FuzzyQuery.defaultMaxExpansions; + boolean fuzzyTranspositions = FuzzyQuery.defaultTranspositions; + String fuzzyRewrite = null; + String queryName = null; + XContentParser.Token token; + String currentFieldName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_OBJECT) { + throwParsingExceptionOnMultipleFields(NAME, parser.getTokenLocation(), fieldName, currentFieldName); + fieldName = currentFieldName; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + if (MatchQueryBuilder.QUERY_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + value = parser.objectText(); + } else if (MatchQueryBuilder.ANALYZER_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + analyzer = parser.text(); + } else if (OPERATOR_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + operator = Operator.fromString(parser.text()); + } else if (MatchQueryBuilder.MINIMUM_SHOULD_MATCH_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + minimumShouldMatch = parser.textOrNull(); + } else if (Fuzziness.FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + fuzziness = Fuzziness.parse(parser); + } else if (PREFIX_LENGTH_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + prefixLength = parser.intValue(); + } else if (MAX_EXPANSIONS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + maxExpansion = parser.intValue(); + } else if (FUZZY_TRANSPOSITIONS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + fuzzyTranspositions = parser.booleanValue(); + } else if (FUZZY_REWRITE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + fuzzyRewrite = parser.textOrNull(); + } else if (AbstractQueryBuilder.BOOST_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + boost = parser.floatValue(); + } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + queryName = parser.text(); + } else { + throw new ParsingException(parser.getTokenLocation(), + "[" + NAME + "] query does not support [" + currentFieldName + "]"); + } + } else { + throw new ParsingException(parser.getTokenLocation(), + "[" + NAME + "] unknown token [" + token + "] after [" + currentFieldName + "]"); + } + } + } else { + throwParsingExceptionOnMultipleFields(NAME, parser.getTokenLocation(), fieldName, parser.currentName()); + fieldName = parser.currentName(); + value = parser.objectText(); + } + } + + MatchBoolPrefixQueryBuilder queryBuilder = new MatchBoolPrefixQueryBuilder(fieldName, value); + queryBuilder.analyzer(analyzer); + queryBuilder.operator(operator); + queryBuilder.minimumShouldMatch(minimumShouldMatch); + queryBuilder.boost(boost); + queryBuilder.queryName(queryName); + if (fuzziness != null) { + queryBuilder.fuzziness(fuzziness); + } + queryBuilder.prefixLength(prefixLength); + queryBuilder.maxExpansions(maxExpansion); + queryBuilder.fuzzyTranspositions(fuzzyTranspositions); + queryBuilder.fuzzyRewrite(fuzzyRewrite); + return queryBuilder; + } + + @Override + protected Query doToQuery(QueryShardContext context) throws IOException { + if (analyzer != null && context.getIndexAnalyzers().get(analyzer) == null) { + throw new QueryShardException(context, "[" + NAME + "] analyzer [" + analyzer + "] not found"); + } + + final MatchQuery matchQuery = new MatchQuery(context); + if (analyzer != null) { + matchQuery.setAnalyzer(analyzer); + } + matchQuery.setOccur(operator.toBooleanClauseOccur()); + matchQuery.setFuzziness(fuzziness); + matchQuery.setFuzzyPrefixLength(prefixLength); + matchQuery.setMaxExpansions(maxExpansions); + matchQuery.setTranspositions(fuzzyTranspositions); + matchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(fuzzyRewrite, null, LoggingDeprecationHandler.INSTANCE)); + + final Query query = matchQuery.parse(MatchQuery.Type.BOOLEAN_PREFIX, fieldName, value); + return Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); + } + + @Override + protected boolean doEquals(MatchBoolPrefixQueryBuilder other) { + return Objects.equals(fieldName, other.fieldName) && + Objects.equals(value, other.value) && + Objects.equals(analyzer, other.analyzer) && + Objects.equals(operator, other.operator) && + Objects.equals(minimumShouldMatch, other.minimumShouldMatch) && + Objects.equals(fuzziness, other.fuzziness) && + Objects.equals(prefixLength, other.prefixLength) && + Objects.equals(maxExpansions, other.maxExpansions) && + Objects.equals(fuzzyTranspositions, other.fuzzyTranspositions) && + Objects.equals(fuzzyRewrite, other.fuzzyRewrite); + } + + @Override + protected int doHashCode() { + return Objects.hash(fieldName, value, analyzer, operator, minimumShouldMatch, fuzziness, prefixLength, maxExpansions, + fuzzyTranspositions, fuzzyRewrite); + } + + @Override + public String getWriteableName() { + return NAME; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java index 9f2c85106de08..ec8392c90c8de 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -129,7 +129,12 @@ public enum Type implements Writeable { * Uses the best matching phrase-prefix field as main score and uses * a tie-breaker to adjust the score based on remaining field matches */ - PHRASE_PREFIX(MatchQuery.Type.PHRASE_PREFIX, 0.0f, new ParseField("phrase_prefix")); + PHRASE_PREFIX(MatchQuery.Type.PHRASE_PREFIX, 0.0f, new ParseField("phrase_prefix")), + + /** + * Uses the sum of the matching boolean fields to score the query + */ + BOOL_PREFIX(MatchQuery.Type.BOOLEAN_PREFIX, 1.0f, new ParseField("bool_prefix")); private MatchQuery.Type matchQueryType; private final float tieBreaker; @@ -707,6 +712,16 @@ public static MultiMatchQueryBuilder fromXContent(XContentParser parser) throws "Fuzziness not allowed for type [" + type.parseField.getPreferredName() + "]"); } + if (slop != DEFAULT_PHRASE_SLOP && type == Type.BOOL_PREFIX) { + throw new ParsingException(parser.getTokenLocation(), + "[" + SLOP_FIELD.getPreferredName() + "] not allowed for type [" + type.parseField.getPreferredName() + "]"); + } + + if (cutoffFrequency != null && type == Type.BOOL_PREFIX) { + throw new ParsingException(parser.getTokenLocation(), + "[" + CUTOFF_FREQUENCY_FIELD.getPreferredName() + "] not allowed for type [" + type.parseField.getPreferredName() + "]"); + } + MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder(value) .fields(fieldsBoosts) .type(type) diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java index ad4b267eef643..da7273aa66303 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; @@ -51,7 +52,9 @@ import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.lucene.search.SpanBooleanQueryRewriteWithMaxClause; import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.TextFieldMapper; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.support.QueryParsers; @@ -78,7 +81,11 @@ public enum Type implements Writeable { /** * The text is analyzed and used in a phrase query, with the last term acting as a prefix. */ - PHRASE_PREFIX(2); + PHRASE_PREFIX(2), + /** + * The text is analyzed, terms are added to a boolean query with the last term acting as a prefix. + */ + BOOLEAN_PREFIX(3); private final int ordinal; @@ -244,11 +251,18 @@ public Query parse(Type type, String fieldName, Object value) throws IOException /* * If a keyword analyzer is used, we know that further analysis isn't - * needed and can immediately return a term query. + * needed and can immediately return a term query. If the query is a bool + * prefix query and the field type supports prefix queries, we return + * a prefix query instead */ - if (analyzer == Lucene.KEYWORD_ANALYZER - && type != Type.PHRASE_PREFIX) { - return builder.newTermQuery(new Term(fieldName, value.toString())); + if (analyzer == Lucene.KEYWORD_ANALYZER && type != Type.PHRASE_PREFIX) { + final Term term = new Term(fieldName, value.toString()); + if ((fieldType instanceof TextFieldMapper.TextFieldType || fieldType instanceof KeywordFieldMapper.KeywordFieldType) + && type == Type.BOOLEAN_PREFIX) { + return builder.newPrefixQuery(fieldName, term); + } else { + return builder.newTermQuery(term); + } } return parseInternal(type, fieldName, builder, value); @@ -265,6 +279,10 @@ protected final Query parseInternal(Type type, String fieldName, MatchQueryBuild } break; + case BOOLEAN_PREFIX: + query = builder.createBooleanPrefixQuery(fieldName, value.toString(), occur); + break; + case PHRASE: query = builder.createPhraseQuery(fieldName, value.toString(), phraseSlop); break; @@ -354,10 +372,28 @@ protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator return createQuery(field, queryText, type, operator, slop); } - public Query createPhrasePrefixQuery(String field, String queryText, int slop) { + /** + * Creates a phrase prefix query from the query text. + * + * @param field field name + * @param queryText text to be passed to the analyzer + * @return {@code PrefixQuery}, {@code MultiPhrasePrefixQuery}, based on the analysis of {@code queryText} + */ + protected Query createPhrasePrefixQuery(String field, String queryText, int slop) { return createQuery(field, queryText, Type.PHRASE_PREFIX, occur, slop); } + /** + * Creates a boolean prefix query from the query text. + * + * @param field field name + * @param queryText text to be passed to the analyzer + * @return {@code PrefixQuery}, {@code BooleanQuery}, based on the analysis of {@code queryText} + */ + protected Query createBooleanPrefixQuery(String field, String queryText, BooleanClause.Occur occur) { + return createQuery(field, queryText, Type.BOOLEAN_PREFIX, occur, 0); + } + private Query createFieldQuery(TokenStream source, Type type, BooleanClause.Occur operator, String field, int phraseSlop) { assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; @@ -405,14 +441,14 @@ private Query createFieldQuery(TokenStream source, Type type, BooleanClause.Occu if (type == Type.PHRASE_PREFIX) { return analyzePhrasePrefix(field, stream, phraseSlop, positionCount); } else { - return analyzeTerm(field, stream); + return analyzeTerm(field, stream, type == Type.BOOLEAN_PREFIX); } } else if (isGraph) { // graph if (type == Type.PHRASE || type == Type.PHRASE_PREFIX) { return analyzeGraphPhrase(stream, field, type, phraseSlop); } else { - return analyzeGraphBoolean(field, stream, operator); + return analyzeGraphBoolean(field, stream, operator, type == Type.BOOLEAN_PREFIX); } } else if (type == Type.PHRASE && positionCount > 1) { // phrase @@ -433,7 +469,7 @@ private Query createFieldQuery(TokenStream source, Type type, BooleanClause.Occu return analyzeBoolean(field, stream); } else { // complex case: multiple positions - return analyzeMultiBoolean(field, stream, operator); + return analyzeMultiBoolean(field, stream, operator, type == Type.BOOLEAN_PREFIX); } } } catch (IOException e) { @@ -462,13 +498,13 @@ private Query createQuery(String field, String queryText, Type type, BooleanClau } } - private SpanQuery newSpanQuery(Term[] terms, boolean prefix) { + private SpanQuery newSpanQuery(Term[] terms, boolean isPrefix) { if (terms.length == 1) { - return prefix ? fieldType.spanPrefixQuery(terms[0].text(), spanRewriteMethod, context) : new SpanTermQuery(terms[0]); + return isPrefix ? fieldType.spanPrefixQuery(terms[0].text(), spanRewriteMethod, context) : new SpanTermQuery(terms[0]); } SpanQuery[] spanQueries = new SpanQuery[terms.length]; for (int i = 0; i < terms.length; i++) { - spanQueries[i] = prefix ? new SpanTermQuery(terms[i]) : + spanQueries[i] = isPrefix ? new SpanTermQuery(terms[i]) : fieldType.spanPrefixQuery(terms[i].text(), spanRewriteMethod, context); } return new SpanOrQuery(spanQueries); @@ -479,7 +515,7 @@ protected SpanQuery createSpanQuery(TokenStream in, String field) throws IOExcep return createSpanQuery(in, field, false); } - private SpanQuery createSpanQuery(TokenStream in, String field, boolean prefix) throws IOException { + private SpanQuery createSpanQuery(TokenStream in, String field, boolean isPrefix) throws IOException { TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncAtt = in.getAttribute(PositionIncrementAttribute.class); if (termAtt == null) { @@ -498,7 +534,7 @@ private SpanQuery createSpanQuery(TokenStream in, String field, boolean prefix) lastTerm = new Term(field, termAtt.getBytesRef()); } if (lastTerm != null) { - SpanQuery spanQuery = prefix ? + SpanQuery spanQuery = isPrefix ? fieldType.spanPrefixQuery(lastTerm.text(), spanRewriteMethod, context) : new SpanTermQuery(lastTerm); builder.addClause(spanQuery); } @@ -537,6 +573,74 @@ protected Query newTermQuery(Term term) { } } + /** + * Builds a new prefix query instance. + */ + protected Query newPrefixQuery(String field, Term term) { + try { + return fieldType.prefixQuery(term.text(), null, context); + } catch (RuntimeException e) { + if (lenient) { + return newLenientFieldQuery(field, e); + } + throw e; + } + } + + private Query analyzeTerm(String field, TokenStream stream, boolean isPrefix) throws IOException { + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); + + stream.reset(); + if (!stream.incrementToken()) { + throw new AssertionError(); + } + final Term term = new Term(field, termAtt.getBytesRef()); + int lastOffset = offsetAtt.endOffset(); + stream.end(); + return isPrefix && lastOffset == offsetAtt.endOffset() ? newPrefixQuery(field, term) : newTermQuery(term); + } + + private void add(BooleanQuery.Builder q, String field, List current, BooleanClause.Occur operator, boolean isPrefix) { + if (current.isEmpty()) { + return; + } + if (current.size() == 1) { + if (isPrefix) { + q.add(newPrefixQuery(field, current.get(0)), operator); + } else { + q.add(newTermQuery(current.get(0)), operator); + } + } else { + // We don't apply prefix on synonyms + q.add(newSynonymQuery(current.toArray(new Term[current.size()])), operator); + } + } + + private Query analyzeMultiBoolean(String field, TokenStream stream, + BooleanClause.Occur operator, boolean isPrefix) throws IOException { + BooleanQuery.Builder q = newBooleanQuery(); + List currentQuery = new ArrayList<>(); + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); + + stream.reset(); + int lastOffset = 0; + while (stream.incrementToken()) { + if (posIncrAtt.getPositionIncrement() != 0) { + add(q, field, currentQuery, operator, false); + currentQuery.clear(); + } + currentQuery.add(new Term(field, termAtt.getBytesRef())); + lastOffset = offsetAtt.endOffset(); + } + stream.end(); + add(q, field, currentQuery, operator, isPrefix && lastOffset == offsetAtt.endOffset()); + return q.build(); + } + @Override protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { try { @@ -577,6 +681,62 @@ private Query analyzePhrasePrefix(String field, TokenStream stream, int slop, in } } + private Query analyzeGraphBoolean(String field, TokenStream source, + BooleanClause.Occur operator, boolean isPrefix) throws IOException { + source.reset(); + GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + int[] articulationPoints = graph.articulationPoints(); + int lastState = 0; + for (int i = 0; i <= articulationPoints.length; i++) { + int start = lastState; + int end = -1; + if (i < articulationPoints.length) { + end = articulationPoints[i]; + } + lastState = end; + final Query queryPos; + boolean usePrefix = isPrefix && end == -1; + if (graph.hasSidePath(start)) { + final Iterator it = graph.getFiniteStrings(start, end); + Iterator queries = new Iterator() { + @Override + public boolean hasNext() { + return it.hasNext(); + } + + @Override + public Query next() { + TokenStream ts = it.next(); + final Type type; + if (getAutoGenerateMultiTermSynonymsPhraseQuery()) { + type = usePrefix + ? Type.PHRASE_PREFIX + : Type.PHRASE; + } else { + type = Type.BOOLEAN; + } + return createFieldQuery(ts, type, BooleanClause.Occur.MUST, field, 0); + } + }; + queryPos = newGraphSynonymQuery(queries); + } else { + Term[] terms = graph.getTerms(field, start); + assert terms.length > 0; + if (terms.length == 1) { + queryPos = usePrefix ? newPrefixQuery(field, terms[0]) : newTermQuery(terms[0]); + } else { + // We don't apply prefix on synonyms + queryPos = newSynonymQuery(terms); + } + } + if (queryPos != null) { + builder.add(queryPos, operator); + } + } + return builder.build(); + } + private Query analyzeGraphPhrase(TokenStream source, String field, Type type, int slop) throws IOException { assert type == Type.PHRASE_PREFIX || type == Type.PHRASE; @@ -615,13 +775,13 @@ private Query analyzeGraphPhrase(TokenStream source, String field, Type type, in } lastState = end; final SpanQuery queryPos; - boolean endPrefix = end == -1 && type == Type.PHRASE_PREFIX; + boolean usePrefix = end == -1 && type == Type.PHRASE_PREFIX; if (graph.hasSidePath(start)) { List queries = new ArrayList<>(); Iterator it = graph.getFiniteStrings(start, end); while (it.hasNext()) { TokenStream ts = it.next(); - SpanQuery q = createSpanQuery(ts, field, endPrefix); + SpanQuery q = createSpanQuery(ts, field, usePrefix); if (q != null) { if (queries.size() >= maxClauseCount) { throw new BooleanQuery.TooManyClauses(); @@ -640,7 +800,7 @@ private Query analyzeGraphPhrase(TokenStream source, String field, Type type, in if (terms.length >= maxClauseCount) { throw new BooleanQuery.TooManyClauses(); } - queryPos = newSpanQuery(terms, endPrefix); + queryPos = newSpanQuery(terms, usePrefix); } if (queryPos != null) { diff --git a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java index 88fd5293392b5..667d3a3823db8 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java @@ -66,6 +66,7 @@ public Query parse(MultiMatchQueryBuilder.Type type, Map fieldNam case PHRASE_PREFIX: case BEST_FIELDS: case MOST_FIELDS: + case BOOL_PREFIX: queries = buildFieldQueries(type, fieldNames, value, minimumShouldMatch); break; @@ -179,10 +180,23 @@ protected Query newSynonymQuery(Term[] terms) { } @Override - public Query newTermQuery(Term term) { + protected Query newTermQuery(Term term) { return blendTerm(context, term.bytes(), commonTermsCutoff, tieBreaker, lenient, blendedFields); } + @Override + protected Query newPrefixQuery(String field, Term term) { + List disjunctions = new ArrayList<>(); + for (FieldAndBoost fieldType : blendedFields) { + Query query = fieldType.fieldType.prefixQuery(term.text(), null, context); + if (fieldType.boost != 1f) { + query = new BoostQuery(query, fieldType.boost); + } + disjunctions.add(query); + } + return new DisjunctionMaxQuery(disjunctions, tieBreaker); + } + @Override protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { List disjunctions = new ArrayList<>(); diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index bd1bbb98281cc..8a4f4981605a5 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -32,6 +32,7 @@ import org.elasticsearch.common.xcontent.ParseFieldRegistry; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.MatchBoolPrefixQueryBuilder; import org.elasticsearch.index.query.BoostingQueryBuilder; import org.elasticsearch.index.query.CommonTermsQueryBuilder; import org.elasticsearch.index.query.ConstantScoreQueryBuilder; @@ -826,6 +827,8 @@ private void registerQueryParsers(List plugins) { registerQuery(new QuerySpec<>(IntervalQueryBuilder.NAME, IntervalQueryBuilder::new, IntervalQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(DistanceFeatureQueryBuilder.NAME, DistanceFeatureQueryBuilder::new, DistanceFeatureQueryBuilder::fromXContent)); + registerQuery( + new QuerySpec<>(MatchBoolPrefixQueryBuilder.NAME, MatchBoolPrefixQueryBuilder::new, MatchBoolPrefixQueryBuilder::fromXContent)); if (ShapesAvailability.JTS_AVAILABLE && ShapesAvailability.SPATIAL4J_AVAILABLE) { registerQuery(new QuerySpec<>(GeoShapeQueryBuilder.NAME, GeoShapeQueryBuilder::new, GeoShapeQueryBuilder::fromXContent)); diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilderTests.java new file mode 100644 index 0000000000000..b3a3a2512a5ff --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilderTests.java @@ -0,0 +1,284 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.MockSynonymAnalyzer; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; +import org.apache.lucene.search.TermQuery; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.index.search.MatchQuery; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.test.AbstractQueryTestCase; + +import java.io.IOException; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static java.util.Arrays.asList; +import static org.hamcrest.CoreMatchers.anyOf; +import static org.hamcrest.CoreMatchers.everyItem; +import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.CoreMatchers.notNullValue; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.equalToIgnoringCase; +import static org.hamcrest.Matchers.hasProperty; +import static org.hamcrest.Matchers.hasSize; + +public class MatchBoolPrefixQueryBuilderTests extends AbstractQueryTestCase { + + @Override + protected MatchBoolPrefixQueryBuilder doCreateTestQueryBuilder() { + final String fieldName = randomFrom(STRING_FIELD_NAME, STRING_ALIAS_FIELD_NAME); + final Object value = IntStream.rangeClosed(0, randomIntBetween(0, 3)) + .mapToObj(i -> randomAlphaOfLengthBetween(1, 10) + " ") + .collect(Collectors.joining()) + .trim(); + + final MatchBoolPrefixQueryBuilder queryBuilder = new MatchBoolPrefixQueryBuilder(fieldName, value); + + if (randomBoolean() && isTextField(fieldName)) { + queryBuilder.analyzer(randomFrom("simple", "keyword", "whitespace")); + } + + if (randomBoolean()) { + queryBuilder.operator(randomFrom(Operator.values())); + } + + if (randomBoolean()) { + queryBuilder.minimumShouldMatch(randomMinimumShouldMatch()); + } + + if (randomBoolean()) { + queryBuilder.fuzziness(randomFuzziness(fieldName)); + } + + if (randomBoolean()) { + queryBuilder.prefixLength(randomIntBetween(0, 10)); + } + + if (randomBoolean()) { + queryBuilder.maxExpansions(randomIntBetween(1, 1000)); + } + + if (randomBoolean()) { + queryBuilder.fuzzyTranspositions(randomBoolean()); + } + + if (randomBoolean()) { + queryBuilder.fuzzyRewrite(getRandomRewriteMethod()); + } + + return queryBuilder; + } + + @Override + protected void doAssertLuceneQuery(MatchBoolPrefixQueryBuilder queryBuilder, Query query, SearchContext context) throws IOException { + assertThat(query, notNullValue()); + assertThat(query, anyOf(instanceOf(BooleanQuery.class), instanceOf(PrefixQuery.class))); + + if (query instanceof PrefixQuery) { + final PrefixQuery prefixQuery = (PrefixQuery) query; + assertThat(prefixQuery.getPrefix().text(), equalToIgnoringCase((String) queryBuilder.value())); + } else { + assertThat(query, instanceOf(BooleanQuery.class)); + final BooleanQuery booleanQuery = (BooleanQuery) query; + // all queries except the last should be TermQuery or SynonymQuery + final Set allQueriesExceptLast = IntStream.range(0, booleanQuery.clauses().size() - 1) + .mapToObj(booleanQuery.clauses()::get) + .map(BooleanClause::getQuery) + .collect(Collectors.toSet()); + assertThat(allQueriesExceptLast, anyOf( + everyItem(instanceOf(TermQuery.class)), + everyItem(instanceOf(SynonymQuery.class)), + everyItem(instanceOf(FuzzyQuery.class)) + )); + + if (allQueriesExceptLast.stream().anyMatch(subQuery -> subQuery instanceof FuzzyQuery)) { + assertThat(queryBuilder.fuzziness(), notNullValue()); + } + allQueriesExceptLast.stream().filter(subQuery -> subQuery instanceof FuzzyQuery).forEach(subQuery -> { + final FuzzyQuery fuzzyQuery = (FuzzyQuery) subQuery; + assertThat(fuzzyQuery.getPrefixLength(), equalTo(queryBuilder.prefixLength())); + assertThat(fuzzyQuery.getTranspositions(), equalTo(queryBuilder.fuzzyTranspositions())); + }); + + // the last query should be PrefixQuery + final Query shouldBePrefixQuery = booleanQuery.clauses().get(booleanQuery.clauses().size() - 1).getQuery(); + assertThat(shouldBePrefixQuery, instanceOf(PrefixQuery.class)); + + if (queryBuilder.minimumShouldMatch() != null) { + final int optionalClauses = + (int) booleanQuery.clauses().stream().filter(clause -> clause.getOccur() == BooleanClause.Occur.SHOULD).count(); + final int expected = Queries.calculateMinShouldMatch(optionalClauses, queryBuilder.minimumShouldMatch()); + assertThat(booleanQuery.getMinimumNumberShouldMatch(), equalTo(expected)); + } + } + } + + public void testIllegalValues() { + { + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new MatchBoolPrefixQueryBuilder(null, "value")); + assertEquals("[match_bool_prefix] requires fieldName", e.getMessage()); + } + + { + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new MatchBoolPrefixQueryBuilder("name", null)); + assertEquals("[match_bool_prefix] requires query value", e.getMessage()); + } + + { + final MatchBoolPrefixQueryBuilder builder = new MatchBoolPrefixQueryBuilder("name", "value"); + builder.analyzer("bogusAnalyzer"); + QueryShardException e = expectThrows(QueryShardException.class, () -> builder.toQuery(createShardContext())); + assertThat(e.getMessage(), containsString("analyzer [bogusAnalyzer] not found")); + } + } + + public void testFromSimpleJson() throws IOException { + final String simple = + "{" + + "\"match_bool_prefix\": {" + + "\"fieldName\": \"fieldValue\"" + + "}" + + "}"; + final String expected = + "{" + + "\"match_bool_prefix\": {" + + "\"fieldName\": {" + + "\"query\": \"fieldValue\"," + + "\"operator\": \"OR\"," + + "\"prefix_length\": 0," + + "\"max_expansions\": 50," + + "\"fuzzy_transpositions\": true," + + "\"boost\": 1.0" + + "}" + + "}" + + "}"; + + final MatchBoolPrefixQueryBuilder builder = (MatchBoolPrefixQueryBuilder) parseQuery(simple); + checkGeneratedJson(expected, builder); + } + + public void testFromJson() throws IOException { + final String expected = + "{" + + "\"match_bool_prefix\": {" + + "\"fieldName\": {" + + "\"query\": \"fieldValue\"," + + "\"analyzer\": \"simple\"," + + "\"operator\": \"AND\"," + + "\"minimum_should_match\": \"2\"," + + "\"fuzziness\": \"1\"," + + "\"prefix_length\": 1," + + "\"max_expansions\": 10," + + "\"fuzzy_transpositions\": false," + + "\"fuzzy_rewrite\": \"constant_score\"," + + "\"boost\": 2.0" + + "}" + + "}" + + "}"; + + final MatchBoolPrefixQueryBuilder builder = (MatchBoolPrefixQueryBuilder) parseQuery(expected); + checkGeneratedJson(expected, builder); + } + + public void testParseFailsWithMultipleFields() { + { + final String json = + "{" + + "\"match_bool_prefix\" : {" + + "\"field_name_1\" : {" + + "\"query\" : \"foo\"" + + "}," + + "\"field_name_2\" : {" + + "\"query\" : \"foo\"\n" + + "}" + + "}" + + "}"; + final ParsingException e = expectThrows(ParsingException.class, () -> parseQuery(json)); + assertEquals( + "[match_bool_prefix] query doesn't support multiple fields, found [field_name_1] and [field_name_2]", e.getMessage()); + } + + { + final String simpleJson = + "{" + + "\"match_bool_prefix\" : {" + + "\"field_name_1\" : \"foo\"," + + "\"field_name_2\" : \"foo\"" + + "}" + + "}"; + final ParsingException e = expectThrows(ParsingException.class, () -> parseQuery(simpleJson)); + assertEquals( + "[match_bool_prefix] query doesn't support multiple fields, found [field_name_1] and [field_name_2]", e.getMessage()); + } + } + + public void testAnalysis() throws Exception { + final MatchBoolPrefixQueryBuilder builder = new MatchBoolPrefixQueryBuilder(STRING_FIELD_NAME, "foo bar baz"); + final Query query = builder.toQuery(createShardContext()); + + assertBooleanQuery(query, asList( + new TermQuery(new Term(STRING_FIELD_NAME, "foo")), + new TermQuery(new Term(STRING_FIELD_NAME, "bar")), + new PrefixQuery(new Term(STRING_FIELD_NAME, "baz")) + )); + } + + public void testAnalysisSynonym() throws Exception { + final MatchQuery matchQuery = new MatchQuery(createShardContext()); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + final Query query = matchQuery.parse(MatchQuery.Type.BOOLEAN_PREFIX, STRING_FIELD_NAME, "fox dogs red"); + + assertBooleanQuery(query, asList( + new TermQuery(new Term(STRING_FIELD_NAME, "fox")), + new SynonymQuery(new Term(STRING_FIELD_NAME, "dogs"), new Term(STRING_FIELD_NAME, "dog")), + new PrefixQuery(new Term(STRING_FIELD_NAME, "red")) + )); + } + + public void testAnalysisSingleTerm() throws Exception { + final MatchBoolPrefixQueryBuilder builder = new MatchBoolPrefixQueryBuilder(STRING_FIELD_NAME, "foo"); + final Query query = builder.toQuery(createShardContext()); + assertThat(query, equalTo(new PrefixQuery(new Term(STRING_FIELD_NAME, "foo")))); + } + + private static void assertBooleanQuery(Query actual, List expectedClauseQueries) { + assertThat(actual, instanceOf(BooleanQuery.class)); + final BooleanQuery actualBooleanQuery = (BooleanQuery) actual; + assertThat(actualBooleanQuery.clauses(), hasSize(expectedClauseQueries.size())); + assertThat(actualBooleanQuery.clauses(), everyItem(hasProperty("occur", equalTo(BooleanClause.Occur.SHOULD)))); + + for (int i = 0; i < actualBooleanQuery.clauses().size(); i++) { + final Query clauseQuery = actualBooleanQuery.clauses().get(i).getQuery(); + assertThat(clauseQuery, equalTo(expectedClauseQueries.get(i))); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java index c258cce6c7c50..e9f2b447da133 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java @@ -21,6 +21,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CannedBinaryTokenStream; +import org.apache.lucene.analysis.MockSynonymAnalyzer; import org.apache.lucene.index.Term; import org.apache.lucene.queries.ExtendedCommonTermsQuery; import org.apache.lucene.search.BooleanClause; @@ -28,6 +29,7 @@ import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; @@ -394,6 +396,76 @@ public void testLenientPhraseQuery() throws Exception { containsString("field:[string_no_pos] was indexed without position data; cannot run PhraseQuery")); } + public void testAutoGenerateSynonymsPhraseQuery() throws Exception { + final MatchQuery matchQuery = new MatchQuery(createShardContext()); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + + { + matchQuery.setAutoGenerateSynonymsPhraseQuery(false); + final Query query = matchQuery.parse(Type.BOOLEAN, STRING_FIELD_NAME, "guinea pig"); + final Query expectedQuery = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new TermQuery(new Term(STRING_FIELD_NAME, "guinea")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term(STRING_FIELD_NAME, "pig")), BooleanClause.Occur.MUST) + .build(), + BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term(STRING_FIELD_NAME, "cavy")), BooleanClause.Occur.SHOULD) + .build(), + BooleanClause.Occur.SHOULD).build(); + assertThat(query, equalTo(expectedQuery)); + } + + { + matchQuery.setAutoGenerateSynonymsPhraseQuery(true); + final Query query = matchQuery.parse(Type.BOOLEAN, STRING_FIELD_NAME, "guinea pig"); + final Query expectedQuery = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new PhraseQuery.Builder() + .add(new Term(STRING_FIELD_NAME, "guinea")) + .add(new Term(STRING_FIELD_NAME, "pig")) + .build(), + BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term(STRING_FIELD_NAME, "cavy")), BooleanClause.Occur.SHOULD) + .build(), + BooleanClause.Occur.SHOULD).build(); + assertThat(query, equalTo(expectedQuery)); + } + + { + matchQuery.setAutoGenerateSynonymsPhraseQuery(false); + final Query query = matchQuery.parse(Type.BOOLEAN_PREFIX, STRING_FIELD_NAME, "guinea pig"); + final Query expectedQuery = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new TermQuery(new Term(STRING_FIELD_NAME, "guinea")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term(STRING_FIELD_NAME, "pig")), BooleanClause.Occur.MUST) + .build(), + BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term(STRING_FIELD_NAME, "cavy")), BooleanClause.Occur.SHOULD) + .build(), + BooleanClause.Occur.SHOULD).build(); + assertThat(query, equalTo(expectedQuery)); + } + + { + matchQuery.setAutoGenerateSynonymsPhraseQuery(true); + final Query query = matchQuery.parse(Type.BOOLEAN_PREFIX, STRING_FIELD_NAME, "guinea pig"); + final MultiPhrasePrefixQuery guineaPig = new MultiPhrasePrefixQuery(STRING_FIELD_NAME); + guineaPig.add(new Term(STRING_FIELD_NAME, "guinea")); + guineaPig.add(new Term(STRING_FIELD_NAME, "pig")); + final MultiPhrasePrefixQuery cavy = new MultiPhrasePrefixQuery(STRING_FIELD_NAME); + cavy.add(new Term(STRING_FIELD_NAME, "cavy")); + final Query expectedQuery = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(guineaPig, BooleanClause.Occur.SHOULD) + .add(cavy, BooleanClause.Occur.SHOULD) + .build(), + BooleanClause.Occur.SHOULD).build(); + assertThat(query, equalTo(expectedQuery)); + } + } + public void testMaxBooleanClause() { MatchQuery query = new MatchQuery(createShardContext()); query.setAnalyzer(new MockGraphAnalyzer(createGiantGraph(40))); diff --git a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java index 36ba370939b17..7ca722fc31139 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java @@ -31,6 +31,7 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -52,10 +53,11 @@ import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertBooleanSubQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertDisjunctionSubQuery; +import static org.hamcrest.CoreMatchers.anyOf; import static org.hamcrest.CoreMatchers.containsString; -import static org.hamcrest.CoreMatchers.either; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.collection.IsCollectionWithSize.hasSize; public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase { @@ -91,10 +93,11 @@ protected MultiMatchQueryBuilder doCreateTestQueryBuilder() { // sets other parameters of the multi match query if (randomBoolean()) { - if (fieldName.equals(STRING_FIELD_NAME)) { + if (fieldName.equals(STRING_FIELD_NAME) || fieldName.equals(STRING_ALIAS_FIELD_NAME) || fieldName.equals(STRING_FIELD_NAME_2)) { query.type(randomFrom(MultiMatchQueryBuilder.Type.values())); } else { - query.type(randomValueOtherThan(MultiMatchQueryBuilder.Type.PHRASE_PREFIX, + query.type(randomValueOtherThanMany( + (type) -> type == Type.PHRASE_PREFIX || type == Type.BOOL_PREFIX, () -> randomFrom(MultiMatchQueryBuilder.Type.values()))); } } @@ -104,7 +107,7 @@ protected MultiMatchQueryBuilder doCreateTestQueryBuilder() { if (randomBoolean() && fieldName.equals(STRING_FIELD_NAME)) { query.analyzer(randomAnalyzer()); } - if (randomBoolean()) { + if (randomBoolean() && query.type() != Type.BOOL_PREFIX) { query.slop(randomIntBetween(0, 5)); } if (fieldName.equals(STRING_FIELD_NAME) && randomBoolean() && @@ -126,7 +129,7 @@ protected MultiMatchQueryBuilder doCreateTestQueryBuilder() { if (randomBoolean()) { query.tieBreaker(randomFloat()); } - if (randomBoolean()) { + if (randomBoolean() && query.type() != Type.BOOL_PREFIX) { query.cutoffFrequency((float) 10 / randomIntBetween(1, 100)); } if (randomBoolean()) { @@ -158,12 +161,21 @@ protected Map getAlternateVersions() { @Override protected void doAssertLuceneQuery(MultiMatchQueryBuilder queryBuilder, Query query, SearchContext context) throws IOException { // we rely on integration tests for deeper checks here - assertThat(query, either(instanceOf(BoostQuery.class)).or(instanceOf(TermQuery.class)) - .or(instanceOf(BooleanQuery.class)).or(instanceOf(DisjunctionMaxQuery.class)) - .or(instanceOf(FuzzyQuery.class)).or(instanceOf(MultiPhrasePrefixQuery.class)) - .or(instanceOf(MatchAllDocsQuery.class)).or(instanceOf(ExtendedCommonTermsQuery.class)) - .or(instanceOf(MatchNoDocsQuery.class)).or(instanceOf(PhraseQuery.class)) - .or(instanceOf(PointRangeQuery.class)).or(instanceOf(IndexOrDocValuesQuery.class))); + assertThat(query, anyOf(Arrays.asList( + instanceOf(BoostQuery.class), + instanceOf(TermQuery.class), + instanceOf(BooleanQuery.class), + instanceOf(DisjunctionMaxQuery.class), + instanceOf(FuzzyQuery.class), + instanceOf(MultiPhrasePrefixQuery.class), + instanceOf(MatchAllDocsQuery.class), + instanceOf(ExtendedCommonTermsQuery.class), + instanceOf(MatchNoDocsQuery.class), + instanceOf(PhraseQuery.class), + instanceOf(PointRangeQuery.class), + instanceOf(IndexOrDocValuesQuery.class), + instanceOf(PrefixQuery.class) + ))); } public void testIllegaArguments() { @@ -240,6 +252,51 @@ public void testToQueryFieldMissing() throws Exception { instanceOf(MatchNoDocsQuery.class)); } + public void testToQueryBooleanPrefixSingleField() throws IOException { + final MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder("foo bar", STRING_FIELD_NAME); + builder.type(Type.BOOL_PREFIX); + final Query query = builder.toQuery(createShardContext()); + assertThat(query, instanceOf(BooleanQuery.class)); + final BooleanQuery booleanQuery = (BooleanQuery) query; + assertThat(booleanQuery.clauses(), hasSize(2)); + assertThat(assertBooleanSubQuery(booleanQuery, TermQuery.class, 0).getTerm(), equalTo(new Term(STRING_FIELD_NAME, "foo"))); + assertThat(assertBooleanSubQuery(booleanQuery, PrefixQuery.class, 1).getPrefix(), equalTo(new Term(STRING_FIELD_NAME, "bar"))); + } + + public void testToQueryBooleanPrefixMultipleFields() throws IOException { + { + final MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder("foo bar", STRING_FIELD_NAME, STRING_ALIAS_FIELD_NAME); + builder.type(Type.BOOL_PREFIX); + final Query query = builder.toQuery(createShardContext()); + assertThat(query, instanceOf(DisjunctionMaxQuery.class)); + final DisjunctionMaxQuery disMaxQuery = (DisjunctionMaxQuery) query; + assertThat(disMaxQuery.getDisjuncts(), hasSize(2)); + for (Query disjunctQuery : disMaxQuery.getDisjuncts()) { + assertThat(disjunctQuery, instanceOf(BooleanQuery.class)); + final BooleanQuery booleanQuery = (BooleanQuery) disjunctQuery; + assertThat(booleanQuery.clauses(), hasSize(2)); + assertThat(assertBooleanSubQuery(booleanQuery, TermQuery.class, 0).getTerm(), equalTo(new Term(STRING_FIELD_NAME, "foo"))); + assertThat(assertBooleanSubQuery(booleanQuery, PrefixQuery.class, 1).getPrefix(), + equalTo(new Term(STRING_FIELD_NAME, "bar"))); + } + } + + { + // STRING_FIELD_NAME_2 is a keyword field + final MultiMatchQueryBuilder queryBuilder = new MultiMatchQueryBuilder("foo bar", STRING_FIELD_NAME, STRING_FIELD_NAME_2); + queryBuilder.type(Type.BOOL_PREFIX); + final Query query = queryBuilder.toQuery(createShardContext()); + assertThat(query, instanceOf(DisjunctionMaxQuery.class)); + final DisjunctionMaxQuery disMaxQuery = (DisjunctionMaxQuery) query; + assertThat(disMaxQuery.getDisjuncts(), hasSize(2)); + final BooleanQuery firstDisjunct = assertDisjunctionSubQuery(disMaxQuery, BooleanQuery.class, 0); + assertThat(firstDisjunct.clauses(), hasSize(2)); + assertThat(assertBooleanSubQuery(firstDisjunct, TermQuery.class, 0).getTerm(), equalTo(new Term(STRING_FIELD_NAME, "foo"))); + final PrefixQuery secondDisjunct = assertDisjunctionSubQuery(disMaxQuery, PrefixQuery.class, 1); + assertThat(secondDisjunct.getPrefix(), equalTo(new Term(STRING_FIELD_NAME_2, "foo bar"))); + } + } + public void testFromJson() throws IOException { String json = "{\n" + diff --git a/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java b/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java index ccdfe28b44fdc..2a54cda752a9d 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java @@ -331,6 +331,7 @@ public List> getRescorers() { "intervals", "match", "match_all", + "match_bool_prefix", "match_none", "match_phrase", "match_phrase_prefix",