From f28412a5122013d1c263d9b65a973c72468f7375 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 22 Apr 2021 08:41:47 +0200 Subject: [PATCH 1/4] Add `match_only_text`, a space-efficient variant of `text`. (#66172) This adds a new `match_only_text` field, which indexes the same data as a `text` field that has `index_options: docs` and `norms: false` and uses the `_source` for positional queries like `match_phrase`. Unlike `text`, this field doesn't support scoring. --- docs/reference/mapping/types.asciidoc | 3 +- .../mapping/types/match-only-text.asciidoc | 59 +++ docs/reference/mapping/types/text.asciidoc | 18 +- modules/mapper-extras/build.gradle | 2 +- .../mapper/MatchOnlyTextFieldMapperTests.java | 156 +++++++ .../index/mapper/MapperExtrasPlugin.java | 1 + .../mapper/MatchOnlyTextFieldMapper.java | 347 +++++++++++++++ .../mapper/SearchAsYouTypeFieldMapper.java | 35 +- .../index/query/SourceConfirmedTextQuery.java | 379 ++++++++++++++++ .../index/query/SourceIntervalsSource.java | 195 ++++++++ .../mapper/MatchOnlyTextFieldTypeTests.java | 180 ++++++++ .../query/SourceConfirmedTextQueryTests.java | 420 ++++++++++++++++++ .../query/SourceIntervalsSourceTests.java | 136 ++++++ .../test/match_only_text/10_basic.yml | 254 +++++++++++ .../common/CheckedIntFunction.java | 14 + .../index/mapper/AbstractScriptFieldType.java | 6 +- .../index/mapper/MappedFieldType.java | 8 +- .../index/mapper/TextFieldMapper.java | 28 +- .../query/CombinedFieldsQueryBuilder.java | 9 +- .../index/search/MatchQueryParser.java | 17 +- .../index/search/MultiMatchQueryParser.java | 4 +- .../AbstractScriptFieldTypeTestCase.java | 6 +- .../MatchPhrasePrefixQueryBuilderTests.java | 2 +- .../index/mapper/FieldTypeTestCase.java | 6 + 24 files changed, 2241 insertions(+), 44 deletions(-) create mode 100644 docs/reference/mapping/types/match-only-text.asciidoc create mode 100644 modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java create mode 100644 modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java create mode 100644 modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceConfirmedTextQuery.java create mode 100644 modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java create mode 100644 modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java create mode 100644 modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceConfirmedTextQueryTests.java create mode 100644 modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceIntervalsSourceTests.java create mode 100644 modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml create mode 100644 server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc index c64c885d90886..14cd6a6379af1 100644 --- a/docs/reference/mapping/types.asciidoc +++ b/docs/reference/mapping/types.asciidoc @@ -69,7 +69,8 @@ values. [[text-search-types]] ==== Text search types -<>:: Analyzed, unstructured text. +<>:: The text family, including `text` and `match_only_text`. + Analyzed, unstructured text. {plugins}/mapper-annotated-text.html[`annotated-text`]:: Text containing special markup. Used for identifying named entities. <>:: Used for auto-complete suggestions. diff --git a/docs/reference/mapping/types/match-only-text.asciidoc b/docs/reference/mapping/types/match-only-text.asciidoc new file mode 100644 index 0000000000000..3f359faeaaa14 --- /dev/null +++ b/docs/reference/mapping/types/match-only-text.asciidoc @@ -0,0 +1,59 @@ +[discrete] +[[match-only-text-field-type]] +=== Match-only text field type + +A variant of <> that trades scoring and efficiency of +positional queries for space efficiency. This field effectively stores data the +same way as a `text` field that only indexes documents (`index_options: docs`) +and disables norms (`norms: false`). Term queries perform as fast if not faster +as on `text` fields, however queries that need positions such as the +<> perform slower as they +need to look at the `_source` document to verify whether a phrase matches. All +queries return constant scores that are equal to 1.0. + +Analysis is not configurable: text is always analyzed with the +<> +(<> by default). + +<> are not supported with this field, use +<> instead, or the +<> field type if you absolutely need span queries. + +Other than that, `match_only_text` supports the same queries as `text`. And +like `text`, it doesn't support sorting or aggregating. + +[source,console] +-------------------------------- +PUT logs +{ + "mappings": { + "properties": { + "@timestamp": { + "type": "date" + }, + "message": { + "type": "match_only_text" + } + } + } +} +-------------------------------- + +[discrete] +[[match-only-text-params]] +==== Parameters for match-only text fields + +The following mapping parameters are accepted: + +[horizontal] + +<>:: + + Multi-fields allow the same string value to be indexed in multiple ways for + different purposes, such as one field for search and a multi-field for + sorting and aggregations, or the same string value analyzed by different + analyzers. + +<>:: + + Metadata about the field. diff --git a/docs/reference/mapping/types/text.asciidoc b/docs/reference/mapping/types/text.asciidoc index 429b6d6d56f08..f22a0a97d6e5a 100644 --- a/docs/reference/mapping/types/text.asciidoc +++ b/docs/reference/mapping/types/text.asciidoc @@ -1,9 +1,23 @@ +[testenv="basic"] [[text]] -=== Text field type +=== Text type family ++++ Text ++++ +The text family includes the following field types: + +* <>, the traditional field type for full-text content +such as the body of an email or the description of a product. +* <>, a space-optimized variant +of `text` that disables scoring and performs slower on queries that need +positions. It is best suited for indexing log messages. + + +[discrete] +[[text-field-type]] +=== Text field type + A field to index full-text values, such as the body of an email or the description of a product. These fields are `analyzed`, that is they are passed through an <> to convert the string into a list of individual terms @@ -258,3 +272,5 @@ PUT my-index-000001 } } -------------------------------------------------- + +include::match-only-text.asciidoc[] diff --git a/modules/mapper-extras/build.gradle b/modules/mapper-extras/build.gradle index 5938770c73eac..19c0c9031b99b 100644 --- a/modules/mapper-extras/build.gradle +++ b/modules/mapper-extras/build.gradle @@ -16,6 +16,6 @@ esplugin { restResources { restApi { - include '_common', 'cluster', 'nodes', 'indices', 'index', 'search', 'get' + include '_common', 'cluster', 'field_caps', 'nodes', 'indices', 'index', 'search', 'get' } } diff --git a/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java new file mode 100644 index 0000000000000..dfb76b663695a --- /dev/null +++ b/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -0,0 +1,156 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.analysis.CannedTokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.plugins.Plugin; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +public class MatchOnlyTextFieldMapperTests extends MapperTestCase { + + @Override + protected Collection getPlugins() { + return List.of(new MapperExtrasPlugin()); + } + + @Override + protected Object getSampleValueForDocument() { + return "value"; + } + + public final void testExists() throws IOException { + MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); })); + assertExistsQuery(mapperService); + assertParseMinimalWarnings(); + } + + @Override + protected void registerParameters(ParameterChecker checker) throws IOException { + checker.registerUpdateCheck(b -> { + b.field("meta", Collections.singletonMap("format", "mysql.access")); + }, m -> assertEquals(Collections.singletonMap("format", "mysql.access"), m.fieldType().meta())); + } + + @Override + protected void minimalMapping(XContentBuilder b) throws IOException { + b.field("type", "match_only_text"); + } + + public void testDefaults() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString()); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertEquals("1234", fields[0].stringValue()); + IndexableFieldType fieldType = fields[0].fieldType(); + assertThat(fieldType.omitNorms(), equalTo(true)); + assertTrue(fieldType.tokenized()); + assertFalse(fieldType.stored()); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); + assertThat(fieldType.storeTermVectors(), equalTo(false)); + assertThat(fieldType.storeTermVectorOffsets(), equalTo(false)); + assertThat(fieldType.storeTermVectorPositions(), equalTo(false)); + assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + } + + public void testNullConfigValuesFail() throws MapperParsingException { + Exception e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(fieldMapping(b -> b.field("type", "match_only_text").field("meta", (String) null))) + ); + assertThat(e.getMessage(), containsString("[meta] on mapper [field] of type [match_only_text] must not have a [null] value")); + } + + public void testSimpleMerge() throws IOException { + XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "match_only_text")); + MapperService mapperService = createMapperService(startingMapping); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class)); + + merge(mapperService, startingMapping); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class)); + + XContentBuilder newField = mapping(b -> { + b.startObject("field") + .field("type", "match_only_text") + .startObject("meta") + .field("key", "value") + .endObject() + .endObject(); + b.startObject("other_field").field("type", "keyword").endObject(); + }); + merge(mapperService, newField); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class)); + assertThat(mapperService.documentMapper().mappers().getMapper("other_field"), instanceOf(KeywordFieldMapper.class)); + } + + public void testDisabledSource() throws IOException { + XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("_doc"); + { + mapping.startObject("properties"); + { + mapping.startObject("foo"); + { + mapping.field("type", "match_only_text"); + } + mapping.endObject(); + } + mapping.endObject(); + + mapping.startObject("_source"); + { + mapping.field("enabled", false); + } + mapping.endObject(); + } + mapping.endObject().endObject(); + + MapperService mapperService = createMapperService(mapping); + MappedFieldType ft = mapperService.fieldType("foo"); + SearchExecutionContext context = createSearchExecutionContext(mapperService); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7)); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.phraseQuery(ts, 0, true, context)); + assertThat(e.getMessage(), Matchers.containsString("cannot run positional queries since [_source] is disabled")); + + // Term queries are ok + ft.termQuery("a", context); // no exception + } + + @Override + protected Object generateRandomInputValue(MappedFieldType ft) { + assumeFalse("We don't have a way to assert things here", true); + return null; + } + + @Override + protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException { + assumeFalse("We don't have a way to assert things here", true); + } +} diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java index a3630fa2dde9a..2ea69007be8f2 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java @@ -29,6 +29,7 @@ public Map getMappers() { mappers.put(RankFeatureFieldMapper.CONTENT_TYPE, RankFeatureFieldMapper.PARSER); mappers.put(RankFeaturesFieldMapper.CONTENT_TYPE, RankFeaturesFieldMapper.PARSER); mappers.put(SearchAsYouTypeFieldMapper.CONTENT_TYPE, SearchAsYouTypeFieldMapper.PARSER); + mappers.put(MatchOnlyTextFieldMapper.CONTENT_TYPE, MatchOnlyTextFieldMapper.PARSER); return Collections.unmodifiableMap(mappers); } diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java new file mode 100644 index 0000000000000..cb081ca05ee13 --- /dev/null +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -0,0 +1,347 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.intervals.Intervals; +import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.Version; +import org.elasticsearch.common.CheckedIntFunction; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.index.query.SourceConfirmedTextQuery; +import org.elasticsearch.index.query.SourceIntervalsSource; +import org.elasticsearch.search.lookup.SearchLookup; +import org.elasticsearch.search.lookup.SourceLookup; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.function.Function; +import java.util.function.Supplier; + +/** + * A {@link FieldMapper} for full-text fields that only indexes + * {@link IndexOptions#DOCS} and runs positional queries by looking at the + * _source. + */ +public class MatchOnlyTextFieldMapper extends FieldMapper { + + public static final String CONTENT_TYPE = "match_only_text"; + + public static class Defaults { + public static final FieldType FIELD_TYPE = new FieldType(); + + static { + FIELD_TYPE.setTokenized(true); + FIELD_TYPE.setStored(false); + FIELD_TYPE.setStoreTermVectors(false); + FIELD_TYPE.setOmitNorms(true); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE.freeze(); + } + + } + + private static Builder builder(FieldMapper in) { + return ((MatchOnlyTextFieldMapper) in).builder; + } + + public static class Builder extends FieldMapper.Builder { + + private final Version indexCreatedVersion; + + private final Parameter> meta = Parameter.metaParam(); + + private final TextParams.Analyzers analyzers; + + public Builder(String name, IndexAnalyzers indexAnalyzers) { + this(name, Version.CURRENT, indexAnalyzers); + } + + public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) { + super(name); + this.indexCreatedVersion = indexCreatedVersion; + this.analyzers = new TextParams.Analyzers(indexAnalyzers, m -> builder(m).analyzers); + } + + public Builder addMultiField(FieldMapper.Builder builder) { + this.multiFieldsBuilder.add(builder); + return this; + } + + @Override + protected List> getParameters() { + return Arrays.asList(meta); + } + + private MatchOnlyTextFieldType buildFieldType(FieldType fieldType, ContentPath contentPath) { + NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); + NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); + NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); + TextSearchInfo tsi = new TextSearchInfo(fieldType, null, searchAnalyzer, searchQuoteAnalyzer); + MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType(buildFullName(contentPath), tsi, indexAnalyzer, meta.getValue()); + return ft; + } + + @Override + public MatchOnlyTextFieldMapper build(ContentPath contentPath) { + MatchOnlyTextFieldType tft = buildFieldType(Defaults.FIELD_TYPE, contentPath); + MultiFields multiFields = multiFieldsBuilder.build(this, contentPath); + return new MatchOnlyTextFieldMapper( + name, + Defaults.FIELD_TYPE, + tft, + analyzers.getIndexAnalyzer(), + multiFields, + copyTo.build(), + this + ); + } + } + + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); + + public static class MatchOnlyTextFieldType extends StringFieldType { + + private final Analyzer indexAnalyzer; + private final TextFieldType textFieldType; + + public MatchOnlyTextFieldType(String name, TextSearchInfo tsi, Analyzer indexAnalyzer, Map meta) { + super(name, true, false, false, tsi, meta); + this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); + this.textFieldType = new TextFieldType(name); + } + + public MatchOnlyTextFieldType(String name, boolean stored, Map meta) { + super( + name, + true, + stored, + false, + new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + meta + ); + this.indexAnalyzer = Lucene.STANDARD_ANALYZER; + this.textFieldType = new TextFieldType(name); + } + + public MatchOnlyTextFieldType(String name) { + this( + name, + new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + Lucene.STANDARD_ANALYZER, + Collections.emptyMap() + ); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public String familyTypeName() { + return TextFieldMapper.CONTENT_TYPE; + } + + @Override + public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { + return SourceValueFetcher.toString(name(), context, format); + } + + private Function, IOException>> getValueFetcherProvider( + SearchExecutionContext searchExecutionContext) { + if (searchExecutionContext.isSourceEnabled() == false) { + throw new IllegalArgumentException( + "Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled." + ); + } + SourceLookup sourceLookup = searchExecutionContext.lookup().source(); + ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null); + return context -> { + valueFetcher.setNextReader(context); + return docID -> { + try { + sourceLookup.setSegmentAndDocument(context, docID); + return valueFetcher.fetchValues(sourceLookup); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + }; + } + + private Query toQuery(Query query, SearchExecutionContext searchExecutionContext) { + return new ConstantScoreQuery( + new SourceConfirmedTextQuery(query, getValueFetcherProvider(searchExecutionContext), indexAnalyzer)); + } + + private IntervalsSource toIntervalsSource( + IntervalsSource source, + Query approximation, + SearchExecutionContext searchExecutionContext) { + return new SourceIntervalsSource(source, approximation, getValueFetcherProvider(searchExecutionContext), indexAnalyzer); + } + + @Override + public Query termQuery(Object value, SearchExecutionContext context) { + // Disable scoring + return new ConstantScoreQuery(super.termQuery(value, context)); + } + + @Override + public Query fuzzyQuery( + Object value, + Fuzziness fuzziness, + int prefixLength, + int maxExpansions, + boolean transpositions, + SearchExecutionContext context + ) { + // Disable scoring + return new ConstantScoreQuery(super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context)); + } + + @Override + public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext context) { + return toIntervalsSource(Intervals.term(term), new TermQuery(new Term(name(), term)), context); + } + + @Override + public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext context) { + return toIntervalsSource(Intervals.prefix(term), new PrefixQuery(new Term(name(), term)), context); + } + + @Override + public IntervalsSource fuzzyIntervals(String term, int maxDistance, int prefixLength, + boolean transpositions, SearchExecutionContext context) { + FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term(name(), term), + maxDistance, prefixLength, 128, transpositions); + fuzzyQuery.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE); + IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), term); + return toIntervalsSource(fuzzyIntervals, fuzzyQuery, context); + } + + @Override + public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) { + return toIntervalsSource( + Intervals.wildcard(pattern), + new MatchAllDocsQuery(), // wildcard queries can be expensive, what should the approximation be? + context); + } + + @Override + public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext) + throws IOException { + final Query query = textFieldType.phraseQuery(stream, slop, enablePosIncrements, queryShardContext); + return toQuery(query, queryShardContext); + } + + @Override + public Query multiPhraseQuery( + TokenStream stream, + int slop, + boolean enablePositionIncrements, + SearchExecutionContext queryShardContext + ) throws IOException { + final Query query = textFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, queryShardContext); + return toQuery(query, queryShardContext); + } + + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext queryShardContext) + throws IOException { + final Query query = textFieldType.phrasePrefixQuery(stream, slop, maxExpansions, queryShardContext); + return toQuery(query, queryShardContext); + } + + @Override + public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) { + throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations"); + } + + } + + private final Builder builder; + private final FieldType fieldType; + + private MatchOnlyTextFieldMapper( + String simpleName, + FieldType fieldType, + MatchOnlyTextFieldType mappedFieldType, + NamedAnalyzer indexAnalyzer, + MultiFields multiFields, + CopyTo copyTo, + Builder builder + ) { + super(simpleName, mappedFieldType, indexAnalyzer, multiFields, copyTo); + assert mappedFieldType.getTextSearchInfo().isTokenized(); + assert mappedFieldType.hasDocValues() == false; + this.fieldType = fieldType; + this.builder = builder; + } + + @Override + public FieldMapper.Builder getMergeBuilder() { + return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this); + } + + @Override + protected void parseCreateField(ParseContext context) throws IOException { + final String value; + if (context.externalValueSet()) { + value = context.externalValue().toString(); + } else { + value = context.parser().textOrNull(); + } + + if (value == null) { + return; + } + + Field field = new Field(fieldType().name(), value, fieldType); + context.doc().add(field); + createFieldNamesField(context); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + public MatchOnlyTextFieldType fieldType() { + return (MatchOnlyTextFieldType) super.fieldType(); + } + +} diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java index e26073fd81b16..bf54422db618c 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java @@ -274,38 +274,52 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool } } + private void checkForPositions() { + if (getTextSearchInfo().hasPositions() == false) { + throw new IllegalStateException("field:[" + name() + "] was indexed without position data; cannot run PhraseQuery"); + } + } + @Override - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + SearchExecutionContext context) throws IOException { + checkForPositions(); int numPos = countPosition(stream); if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); } final ShingleFieldType shingleField = shingleFieldForPositions(numPos); stream = new FixedShingleFilter(stream, shingleField.shingleSize); - return shingleField.phraseQuery(stream, 0, true); + return shingleField.phraseQuery(stream, 0, true, context); } @Override - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + SearchExecutionContext context) throws IOException { + checkForPositions(); int numPos = countPosition(stream); if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); } final ShingleFieldType shingleField = shingleFieldForPositions(numPos); stream = new FixedShingleFilter(stream, shingleField.shingleSize); - return shingleField.multiPhraseQuery(stream, 0, true); + return shingleField.multiPhraseQuery(stream, 0, true, context); } @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, + SearchExecutionContext context) throws IOException { int numPos = countPosition(stream); + if (numPos > 1) { + checkForPositions(); + } if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions, null, null); } final ShingleFieldType shingleField = shingleFieldForPositions(numPos); stream = new FixedShingleFilter(stream, shingleField.shingleSize); - return shingleField.phrasePrefixQuery(stream, 0, maxExpansions); + return shingleField.phrasePrefixQuery(stream, 0, maxExpansions, context); } @Override @@ -502,17 +516,20 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool } @Override - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + SearchExecutionContext context) throws IOException { return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); } @Override - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + SearchExecutionContext context) throws IOException { return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); } @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, + SearchExecutionContext context) throws IOException { final String prefixFieldName = slop > 0 ? null : prefixFieldType.name(); diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceConfirmedTextQuery.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceConfirmedTextQuery.java new file mode 100644 index 0000000000000..baba8b0345f1f --- /dev/null +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceConfirmedTextQuery.java @@ -0,0 +1,379 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermStates; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.CollectionStatistics; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafSimScorer; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.elasticsearch.common.CheckedIntFunction; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; + +/** + * A variant of {@link TermQuery}, {@link PhraseQuery}, {@link MultiPhraseQuery} + * and span queries that uses postings for its approximation, but falls back to + * stored fields or _source whenever term frequencies or positions are needed. + * This query matches and scores the same way as the wrapped query. + */ +public final class SourceConfirmedTextQuery extends Query { + + /** + * Create an approximation for the given query. The returned approximation + * should match a superset of the matches of the provided query. + */ + public static Query approximate(Query query) { + if (query instanceof TermQuery) { + return query; + } else if (query instanceof PhraseQuery) { + return approximate((PhraseQuery) query); + } else if (query instanceof MultiPhraseQuery) { + return approximate((MultiPhraseQuery) query); + } else if (query instanceof MultiPhrasePrefixQuery) { + return approximate((MultiPhrasePrefixQuery) query); + } else { + return new MatchAllDocsQuery(); + } + } + + private static Query approximate(PhraseQuery query) { + BooleanQuery.Builder approximation = new BooleanQuery.Builder(); + for (Term term : query.getTerms()) { + approximation.add(new TermQuery(term), Occur.FILTER); + } + return approximation.build(); + } + + private static Query approximate(MultiPhraseQuery query) { + BooleanQuery.Builder approximation = new BooleanQuery.Builder(); + for (Term[] termArray : query.getTermArrays()) { + BooleanQuery.Builder approximationClause = new BooleanQuery.Builder(); + for (Term term : termArray) { + approximationClause.add(new TermQuery(term), Occur.SHOULD); + } + approximation.add(approximationClause.build(), Occur.FILTER); + } + return approximation.build(); + } + + private static Query approximate(MultiPhrasePrefixQuery query) { + Term[][] terms = query.getTerms(); + if (terms.length == 0) { + return new MatchNoDocsQuery(); + } else if (terms.length == 1) { + // Only a prefix, approximate with a prefix query + BooleanQuery.Builder approximation = new BooleanQuery.Builder(); + for (Term term : terms[0]) { + approximation.add(new PrefixQuery(term), Occur.FILTER); + } + return approximation.build(); + } + // A combination of a phrase and a prefix query, only use terms of the phrase for the approximation + BooleanQuery.Builder approximation = new BooleanQuery.Builder(); + for (int i = 0; i < terms.length - 1; ++i) { // ignore the last set of terms, which are prefixes + Term[] termArray = terms[i]; + BooleanQuery.Builder approximationClause = new BooleanQuery.Builder(); + for (Term term : termArray) { + approximationClause.add(new TermQuery(term), Occur.SHOULD); + } + approximation.add(approximationClause.build(), Occur.FILTER); + } + return approximation.build(); + } + + /** + * Similarity that produces the frequency as a score. + */ + private static final Similarity FREQ_SIMILARITY = new Similarity() { + + @Override + public long computeNorm(FieldInvertState state) { + return 1L; + } + + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return new SimScorer() { + @Override + public float score(float freq, long norm) { + return freq; + } + }; + } + }; + + private final Query in; + private final Function, IOException>> valueFetcherProvider; + private final Analyzer indexAnalyzer; + + public SourceConfirmedTextQuery( + Query in, + Function, IOException>> valueFetcherProvider, + Analyzer indexAnalyzer + ) { + this.in = in; + this.valueFetcherProvider = valueFetcherProvider; + this.indexAnalyzer = indexAnalyzer; + } + + public Query getQuery() { + return in; + } + + @Override + public String toString(String field) { + return in.toString(field); + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != getClass()) { + return false; + } + SourceConfirmedTextQuery that = (SourceConfirmedTextQuery) obj; + return Objects.equals(in, that.in) + && Objects.equals(valueFetcherProvider, that.valueFetcherProvider) + && Objects.equals(indexAnalyzer, that.indexAnalyzer); + } + + @Override + public int hashCode() { + return 31 * Objects.hash(in, valueFetcherProvider, indexAnalyzer) + classHash(); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query inRewritten = in.rewrite(reader); + if (inRewritten != in) { + return new SourceConfirmedTextQuery(inRewritten, valueFetcherProvider, indexAnalyzer); + } else if (in instanceof ConstantScoreQuery) { + Query sub = ((ConstantScoreQuery) in).getQuery(); + return new ConstantScoreQuery(new SourceConfirmedTextQuery(sub, valueFetcherProvider, indexAnalyzer)); + } else if (in instanceof BoostQuery) { + Query sub = ((BoostQuery) in).getQuery(); + float boost = ((BoostQuery) in).getBoost(); + return new BoostQuery(new SourceConfirmedTextQuery(sub, valueFetcherProvider, indexAnalyzer), boost); + } else if (in instanceof MatchNoDocsQuery) { + return in; // e.g. empty phrase query + } + return super.rewrite(reader); + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + if (scoreMode.needsScores() == false && in instanceof TermQuery) { + // No need to ever look at the _source for non-scoring term queries + return in.createWeight(searcher, scoreMode, boost); + } + + final Set terms = new HashSet<>(); + in.visit(QueryVisitor.termCollector(terms)); + if (terms.isEmpty()) { + throw new IllegalStateException("Query " + in + " doesn't have any term"); + } + final String field = terms.iterator().next().field(); + final Map termStates = new HashMap<>(); + final List termStats = new ArrayList<>(); + for (Term term : terms) { + TermStates ts = termStates.computeIfAbsent(term, t -> { + try { + return TermStates.build(searcher.getTopReaderContext(), t, scoreMode.needsScores()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + if (scoreMode.needsScores()) { + if (ts.docFreq() > 0) { + termStats.add(searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq())); + } + } else { + termStats.add(new TermStatistics(term.bytes(), 1, 1L)); + } + } + final SimScorer simScorer = searcher.getSimilarity() + .scorer(boost, searcher.collectionStatistics(field), termStats.toArray(TermStatistics[]::new)); + final Weight approximationWeight = searcher.createWeight(approximate(in), ScoreMode.COMPLETE_NO_SCORES, 1f); + + return new Weight(this) { + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + // Don't cache queries that may perform linear scans + return false; + } + + @Override + public void extractTerms(Set termSet) { + termSet.addAll(terms); + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + RuntimePhraseScorer scorer = scorer(context); + if (scorer == null) { + return Explanation.noMatch("No matching phrase"); + } + final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator(); + if (twoPhase.approximation().advance(doc) != doc || scorer.twoPhaseIterator().matches() == false) { + return Explanation.noMatch("No matching phrase"); + } + float phraseFreq = scorer.freq(); + Explanation freqExplanation = Explanation.match(phraseFreq, "phraseFreq=" + phraseFreq); + final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, context.reader(), field, scoreMode.needsScores()); + Explanation scoreExplanation = leafSimScorer.explain(doc, freqExplanation); + return Explanation.match( + scoreExplanation.getValue(), + "weight(" + getQuery() + " in " + doc + ") [" + searcher.getSimilarity().getClass().getSimpleName() + "], result of:", + scoreExplanation + ); + } + + @Override + public RuntimePhraseScorer scorer(LeafReaderContext context) throws IOException { + final Scorer approximationScorer = approximationWeight.scorer(context); + if (approximationScorer == null) { + return null; + } + final DocIdSetIterator approximation = approximationScorer.iterator(); + final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, context.reader(), field, scoreMode.needsScores()); + final CheckedIntFunction, IOException> valueFetcher = valueFetcherProvider.apply(context); + return new RuntimePhraseScorer(this, approximation, leafSimScorer, valueFetcher, field, in); + } + + }; + } + + private class RuntimePhraseScorer extends Scorer { + + private final LeafSimScorer scorer; + private final CheckedIntFunction, IOException> valueFetcher; + private final String field; + private final Query query; + private final TwoPhaseIterator twoPhase; + + private int doc = -1; + private float freq; + + private RuntimePhraseScorer( + Weight weight, + DocIdSetIterator approximation, + LeafSimScorer scorer, + CheckedIntFunction, IOException> valueFetcher, + String field, + Query query + ) { + super(weight); + this.scorer = scorer; + this.valueFetcher = valueFetcher; + this.field = field; + this.query = query; + twoPhase = new TwoPhaseIterator(approximation) { + + @Override + public boolean matches() throws IOException { + return freq() > 0; + } + + @Override + public float matchCost() { + // TODO what is a right value? + // Defaults to a high-ish value so that it likely runs last. + return 10_000f; + } + + }; + } + + @Override + public DocIdSetIterator iterator() { + return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); + } + + @Override + public TwoPhaseIterator twoPhaseIterator() { + return twoPhase; + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return scorer.getSimScorer().score(Float.MAX_VALUE, 1L); + } + + @Override + public float score() throws IOException { + return scorer.score(docID(), freq()); + } + + @Override + public int docID() { + return twoPhase.approximation().docID(); + } + + private float freq() throws IOException { + if (doc != docID()) { + doc = docID(); + freq = computeFreq(); + } + return freq; + } + + private float computeFreq() throws IOException { + MemoryIndex index = new MemoryIndex(); + index.setSimilarity(FREQ_SIMILARITY); + List values = valueFetcher.apply(docID()); + float freq = 0; + for (Object value : values) { + if (value == null) { + continue; + } + index.addField(field, value.toString(), indexAnalyzer); + freq += index.search(query); + index.reset(); + } + return freq; + } + } + +} diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java new file mode 100644 index 0000000000000..aa98574abe240 --- /dev/null +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java @@ -0,0 +1,195 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.queries.intervals.IntervalIterator; +import org.apache.lucene.queries.intervals.IntervalMatchesIterator; +import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.elasticsearch.common.CheckedIntFunction; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +/** + * A wrapper of {@link IntervalsSource} for the case when positions are not indexed. + */ +public final class SourceIntervalsSource extends IntervalsSource { + + private final IntervalsSource in; + private final Query approximation; + private final Function, IOException>> valueFetcherProvider; + private final Analyzer indexAnalyzer; + + public SourceIntervalsSource(IntervalsSource in, + Query approximation, + Function, IOException>> valueFetcherProvider, + Analyzer indexAnalyzer) { + this.in = Objects.requireNonNull(in); + this.approximation = Objects.requireNonNull(approximation); + this.valueFetcherProvider = Objects.requireNonNull(valueFetcherProvider); + this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); + } + + public IntervalsSource getIntervalsSource() { + return in; + } + + private LeafReaderContext createSingleDocLeafReaderContext(String field, List values) { + MemoryIndex index = new MemoryIndex(); + for (Object value : values) { + if (value == null) { + continue; + } + index.addField(field, value.toString(), indexAnalyzer); + } + index.freeze(); + return index.createSearcher().getIndexReader().leaves().get(0); + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + final IndexSearcher searcher = new IndexSearcher(ctx.reader()); + final Weight weight = searcher.createWeight(searcher.rewrite(approximation), ScoreMode.COMPLETE_NO_SCORES, 1f); + final Scorer scorer = weight.scorer(ctx.reader().getContext()); + if (scorer == null) { + return null; + } + final DocIdSetIterator approximation = scorer.iterator(); + + final CheckedIntFunction, IOException> valueFetcher = valueFetcherProvider.apply(ctx); + return new IntervalIterator() { + + private IntervalIterator in; + + @Override + public int docID() { + return approximation.docID(); + } + + @Override + public long cost() { + return approximation.cost(); + } + + @Override + public int nextDoc() throws IOException { + return doNext(approximation.nextDoc()); + } + + @Override + public int advance(int target) throws IOException { + return doNext(approximation.advance(target)); + } + + private int doNext(int doc) throws IOException { + while (doc != NO_MORE_DOCS && setIterator(doc) == false) { + doc = approximation.nextDoc(); + } + return doc; + } + + private boolean setIterator(int doc) throws IOException { + final List values = valueFetcher.apply(doc); + final LeafReaderContext singleDocContext = createSingleDocLeafReaderContext(field, values); + in = SourceIntervalsSource.this.in.intervals(field, singleDocContext); + final boolean isSet = in != null && in.nextDoc() != NO_MORE_DOCS; + assert isSet == false || in.docID() == 0; + return isSet; + } + + @Override + public int start() { + return in.start(); + } + + @Override + public int end() { + return in.end(); + } + + @Override + public int gaps() { + return in.gaps(); + } + + @Override + public int nextInterval() throws IOException { + return in.nextInterval(); + } + + @Override + public float matchCost() { + // a high number since we need to parse the _source + return 10_000; + } + + }; + } + + @Override + public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException { + final CheckedIntFunction, IOException> valueFetcher = valueFetcherProvider.apply(ctx); + final List values = valueFetcher.apply(doc); + final LeafReaderContext singleDocContext = createSingleDocLeafReaderContext(field, values); + return in.matches(field, singleDocContext, 0); + } + + @Override + public void visit(String field, QueryVisitor visitor) { + in.visit(field, visitor); + } + + @Override + public int minExtent() { + return in.minExtent(); + } + + @Override + public Collection pullUpDisjunctions() { + return Collections.singleton(this); + } + + @Override + public int hashCode() { + // Not using matchesProvider and valueFetcherProvider, which don't identify this source but are only used to avoid scanning linearly + // through all documents + return Objects.hash(in, indexAnalyzer); + } + + @Override + public boolean equals(Object other) { + if (other == null || getClass() != other.getClass()) { + return false; + } + SourceIntervalsSource that = (SourceIntervalsSource) other; + // Not using matchesProvider and valueFetcherProvider, which don't identify this source but are only used to avoid scanning linearly + // through all documents + return in.equals(that.in) && indexAnalyzer.equals(that.indexAnalyzer); + } + + @Override + public String toString() { + return in.toString(); + } + +} diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java new file mode 100644 index 0000000000000..8c3cd70c8af27 --- /dev/null +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java @@ -0,0 +1,180 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ +package org.elasticsearch.index.mapper; + +import org.apache.lucene.analysis.CannedTokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.intervals.Intervals; +import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.lucene.BytesRefs; +import org.elasticsearch.common.lucene.search.AutomatonQueries; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.mapper.MatchOnlyTextFieldMapper.MatchOnlyTextFieldType; +import org.elasticsearch.index.query.SourceConfirmedTextQuery; +import org.elasticsearch.index.query.SourceIntervalsSource; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class MatchOnlyTextFieldTypeTests extends FieldTypeTestCase { + + public void testTermQuery() { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null)); + assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null)); + } + + public void testTermsQuery() { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + List terms = new ArrayList<>(); + terms.add(new BytesRef("foo")); + terms.add(new BytesRef("bar")); + assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "bar"), null)); + } + + public void testRangeQuery() { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + assertEquals( + new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false), + ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT) + ); + + ElasticsearchException ee = expectThrows( + ElasticsearchException.class, + () -> ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE) + ); + assertEquals( + "[range] queries on [text] or [keyword] fields cannot be executed when " + "'search.allow_expensive_queries' is set to false.", + ee.getMessage() + ); + } + + public void testRegexpQuery() { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT)); + + ElasticsearchException ee = expectThrows( + ElasticsearchException.class, + () -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE) + ); + assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); + } + + public void testFuzzyQuery() { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + assertEquals( + new ConstantScoreQuery(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true)), + ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT) + ); + + ElasticsearchException ee = expectThrows( + ElasticsearchException.class, + () -> ft.fuzzyQuery( + "foo", + Fuzziness.AUTO, + randomInt(10) + 1, + randomInt(10) + 1, + randomBoolean(), + MOCK_CONTEXT_DISALLOW_EXPENSIVE + ) + ); + assertEquals("[fuzzy] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); + } + + public void testFetchSourceValue() throws IOException { + MatchOnlyTextFieldType fieldType = new MatchOnlyTextFieldType("field"); + + assertEquals(List.of("value"), fetchSourceValue(fieldType, "value")); + assertEquals(List.of("42"), fetchSourceValue(fieldType, 42L)); + assertEquals(List.of("true"), fetchSourceValue(fieldType, true)); + } + + private Query unwrapPositionalQuery(Query query) { + query = ((ConstantScoreQuery) query).getQuery(); + query = ((SourceConfirmedTextQuery) query).getQuery(); + return query; + } + + public void testPhraseQuery() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7)); + Query query = ft.phraseQuery(ts, 0, true, MOCK_CONTEXT); + Query delegate = unwrapPositionalQuery(query); + assertEquals(new PhraseQuery("field", "a", "b"), delegate); + assertNotEquals(new MatchAllDocsQuery(), SourceConfirmedTextQuery.approximate(delegate)); + } + + public void testMultiPhraseQuery() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7)); + Query query = ft.multiPhraseQuery(ts, 0, true, MOCK_CONTEXT); + Query delegate = unwrapPositionalQuery(query); + MultiPhraseQuery expected = new MultiPhraseQuery.Builder().add(new Term[] { new Term("field", "a"), new Term("field", "b") }) + .add(new Term("field", "c")) + .build(); + assertEquals(expected, delegate); + assertNotEquals(new MatchAllDocsQuery(), SourceConfirmedTextQuery.approximate(delegate)); + } + + public void testPhrasePrefixQuery() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7)); + Query query = ft.phrasePrefixQuery(ts, 0, 10, MOCK_CONTEXT); + Query delegate = unwrapPositionalQuery(query); + MultiPhrasePrefixQuery expected = new MultiPhrasePrefixQuery("field"); + expected.add(new Term[] { new Term("field", "a"), new Term("field", "b") }); + expected.add(new Term("field", "c")); + assertEquals(expected, delegate); + assertNotEquals(new MatchAllDocsQuery(), SourceConfirmedTextQuery.approximate(delegate)); + } + + public void testTermIntervals() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + IntervalsSource termIntervals = ft.termIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(termIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals(Intervals.term(new BytesRef("foo")), ((SourceIntervalsSource) termIntervals).getIntervalsSource()); + } + + public void testPrefixIntervals() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(prefixIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals(Intervals.prefix(new BytesRef("foo")), ((SourceIntervalsSource) prefixIntervals).getIntervalsSource()); + } + + public void testWildcardIntervals() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(wildcardIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals(Intervals.wildcard(new BytesRef("foo")), ((SourceIntervalsSource) wildcardIntervals).getIntervalsSource()); + } + + public void testFuzzyIntervals() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT); + assertThat(fuzzyIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + } +} diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceConfirmedTextQueryTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceConfirmedTextQueryTests.java new file mode 100644 index 0000000000000..249b02eed1614 --- /dev/null +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceConfirmedTextQueryTests.java @@ -0,0 +1,420 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.CheckedIntFunction; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; + +public class SourceConfirmedTextQueryTests extends ESTestCase { + + private static final Function, IOException>> SOURCE_FETCHER_PROVIDER = context -> { + return docID -> Collections.singletonList(context.reader().document(docID).get("body")); + }; + + public void testTerm() throws Exception { + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + + Document doc = new Document(); + doc.add(new TextField("body", "a b c b a b c", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b d", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b c d", Store.YES)); + w.addDocument(doc); + + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = new IndexSearcher(reader); + + TermQuery query = new TermQuery(new Term("body", "c")); + Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + ScoreDoc[] phraseHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, phraseHits.length); + ScoreDoc[] sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phraseHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Term query with missing term + query = new TermQuery(new Term("body", "e")); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + } + } + } + + public void testPhrase() throws Exception { + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + + Document doc = new Document(); + doc.add(new TextField("body", "a b c b a b c", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b d", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b c d", Store.YES)); + w.addDocument(doc); + + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = new IndexSearcher(reader); + + PhraseQuery query = new PhraseQuery("body", "b", "c"); + Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + ScoreDoc[] phraseHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, phraseHits.length); + ScoreDoc[] sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phraseHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Sloppy phrase query + query = new PhraseQuery(1, "body", "b", "d"); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + phraseHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, phraseHits.length); + sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phraseHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Phrase query with no matches + query = new PhraseQuery("body", "d", "c"); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + + // Phrase query with one missing term + query = new PhraseQuery("body", "b", "e"); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + } + } + } + + public void testMultiPhrase() throws Exception { + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + + Document doc = new Document(); + doc.add(new TextField("body", "a b c b a b c", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b d", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b c d", Store.YES)); + w.addDocument(doc); + + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = new IndexSearcher(reader); + + MultiPhraseQuery query = new MultiPhraseQuery.Builder().add(new Term[] { new Term("body", "a"), new Term("body", "b") }, 0) + .add(new Term[] { new Term("body", "c") }, 1) + .build(); + + Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + + ScoreDoc[] phraseHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, phraseHits.length); + ScoreDoc[] sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phraseHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Sloppy multi phrase query + query = new MultiPhraseQuery.Builder().add(new Term[] { new Term("body", "a"), new Term("body", "b") }, 0) + .add(new Term[] { new Term("body", "d") }, 1) + .setSlop(1) + .build(); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + phraseHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, phraseHits.length); + sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phraseHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Multi phrase query with no matches + query = new MultiPhraseQuery.Builder().add(new Term[] { new Term("body", "d"), new Term("body", "c") }, 0) + .add(new Term[] { new Term("body", "a") }, 1) + .build(); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + + // Multi phrase query with one missing term + query = new MultiPhraseQuery.Builder().add(new Term[] { new Term("body", "d"), new Term("body", "c") }, 0) + .add(new Term[] { new Term("body", "e") }, 1) + .build(); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + } + } + } + + public void testMultiPhrasePrefix() throws Exception { + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + + Document doc = new Document(); + doc.add(new TextField("body", "a b cd b a b cd", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b d", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b cd e", Store.YES)); + w.addDocument(doc); + + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = new IndexSearcher(reader); + + MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("body"); + Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + ScoreDoc[] phrasePrefixHits = searcher.search(query, 10).scoreDocs; + ScoreDoc[] sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phrasePrefixHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + + query = new MultiPhrasePrefixQuery("body"); + query.add(new Term("body", "c")); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + phrasePrefixHits = searcher.search(query, 10).scoreDocs; + sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phrasePrefixHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + + query = new MultiPhrasePrefixQuery("body"); + query.add(new Term("body", "b")); + query.add(new Term("body", "c")); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + phrasePrefixHits = searcher.search(query, 10).scoreDocs; + sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phrasePrefixHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + + // Sloppy multi phrase prefix query + query = new MultiPhrasePrefixQuery("body"); + query.add(new Term("body", "a")); + query.add(new Term("body", "c")); + query.setSlop(2); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + phrasePrefixHits = searcher.search(query, 10).scoreDocs; + sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phrasePrefixHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + + // Multi phrase prefix query with no matches + query = new MultiPhrasePrefixQuery("body"); + query.add(new Term("body", "d")); + query.add(new Term("body", "b")); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + + // Multi phrase query with one missing term + query = new MultiPhrasePrefixQuery("body"); + query.add(new Term("body", "d")); + query.add(new Term("body", "f")); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(0, searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + } + } + } + + public void testSpanNear() throws Exception { + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + + Document doc = new Document(); + doc.add(new TextField("body", "a b c b a b c", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b d", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b c d", Store.YES)); + w.addDocument(doc); + + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = new IndexSearcher(reader); + + SpanNearQuery query = new SpanNearQuery( + new SpanQuery[] { new SpanTermQuery(new Term("body", "b")), new SpanTermQuery(new Term("body", "c")) }, + 0, + false + ); + Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + ScoreDoc[] spanHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, spanHits.length); + ScoreDoc[] sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, spanHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Sloppy span near query + query = new SpanNearQuery( + new SpanQuery[] { new SpanTermQuery(new Term("body", "b")), new SpanTermQuery(new Term("body", "c")) }, + 1, + false + ); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + spanHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, spanHits.length); + sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, spanHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Span near query with no matches + query = new SpanNearQuery( + new SpanQuery[] { new SpanTermQuery(new Term("body", "a")), new SpanTermQuery(new Term("body", "d")) }, + 0, + false + ); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + + // Span near query with one missing term + query = new SpanNearQuery( + new SpanQuery[] { new SpanTermQuery(new Term("body", "b")), new SpanTermQuery(new Term("body", "e")) }, + 0, + false + ); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + } + } + } + + public void testToString() { + PhraseQuery query = new PhraseQuery("body", "b", "c"); + Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(query.toString(), sourceConfirmedPhraseQuery.toString()); + } + + public void testEqualsHashCode() { + PhraseQuery query1 = new PhraseQuery("body", "b", "c"); + Query sourceConfirmedPhraseQuery1 = new SourceConfirmedTextQuery(query1, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + + assertEquals(sourceConfirmedPhraseQuery1, sourceConfirmedPhraseQuery1); + assertEquals(sourceConfirmedPhraseQuery1.hashCode(), sourceConfirmedPhraseQuery1.hashCode()); + + PhraseQuery query2 = new PhraseQuery("body", "b", "c"); + Query sourceConfirmedPhraseQuery2 = new SourceConfirmedTextQuery(query2, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(sourceConfirmedPhraseQuery1, sourceConfirmedPhraseQuery2); + + PhraseQuery query3 = new PhraseQuery("body", "b", "d"); + Query sourceConfirmedPhraseQuery3 = new SourceConfirmedTextQuery(query3, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertNotEquals(sourceConfirmedPhraseQuery1, sourceConfirmedPhraseQuery3); + + Query sourceConfirmedPhraseQuery4 = new SourceConfirmedTextQuery(query1, context -> null, Lucene.STANDARD_ANALYZER); + assertNotEquals(sourceConfirmedPhraseQuery1, sourceConfirmedPhraseQuery4); + + Query sourceConfirmedPhraseQuery5 = new SourceConfirmedTextQuery(query1, SOURCE_FETCHER_PROVIDER, Lucene.KEYWORD_ANALYZER); + assertNotEquals(sourceConfirmedPhraseQuery1, sourceConfirmedPhraseQuery5); + } + + public void testApproximation() { + assertEquals( + new TermQuery(new Term("body", "text")), + SourceConfirmedTextQuery.approximate(new TermQuery(new Term("body", "text"))) + ); + + assertEquals( + new BooleanQuery.Builder().add(new TermQuery(new Term("body", "a")), Occur.FILTER) + .add(new TermQuery(new Term("body", "b")), Occur.FILTER) + .build(), + SourceConfirmedTextQuery.approximate(new PhraseQuery("body", "a", "b")) + ); + + MultiPhraseQuery query = new MultiPhraseQuery.Builder().add(new Term("body", "a")) + .add(new Term[] { new Term("body", "b"), new Term("body", "c") }) + .build(); + Query approximation = new BooleanQuery.Builder().add( + new BooleanQuery.Builder().add(new TermQuery(new Term("body", "a")), Occur.SHOULD).build(), + Occur.FILTER + ) + .add( + new BooleanQuery.Builder().add(new TermQuery(new Term("body", "b")), Occur.SHOULD) + .add(new TermQuery(new Term("body", "c")), Occur.SHOULD) + .build(), + Occur.FILTER + ) + .build(); + assertEquals(approximation, SourceConfirmedTextQuery.approximate(query)); + + MultiPhrasePrefixQuery phrasePrefixQuery = new MultiPhrasePrefixQuery("body"); + assertEquals(new MatchNoDocsQuery(), SourceConfirmedTextQuery.approximate(phrasePrefixQuery)); + + phrasePrefixQuery.add(new Term("body", "apache")); + approximation = new BooleanQuery.Builder().add(new PrefixQuery(new Term("body", "apache")), Occur.FILTER).build(); + assertEquals(approximation, SourceConfirmedTextQuery.approximate(phrasePrefixQuery)); + + phrasePrefixQuery.add(new Term("body", "luc")); + approximation = new BooleanQuery.Builder().add( + new BooleanQuery.Builder().add(new TermQuery(new Term("body", "apache")), Occur.SHOULD).build(), + Occur.FILTER + ).build(); + assertEquals(approximation, SourceConfirmedTextQuery.approximate(phrasePrefixQuery)); + } +} diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceIntervalsSourceTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceIntervalsSourceTests.java new file mode 100644 index 0000000000000..f8da46d98809f --- /dev/null +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceIntervalsSourceTests.java @@ -0,0 +1,136 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.intervals.IntervalIterator; +import org.apache.lucene.queries.intervals.Intervals; +import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.CheckedIntFunction; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; + +public class SourceIntervalsSourceTests extends ESTestCase { + + private static final Function, IOException>> SOURCE_FETCHER_PROVIDER = context -> { + return docID -> Collections.singletonList(context.reader().document(docID).get("body")); + }; + + public void testIntervals() throws IOException { + final FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.setIndexOptions(IndexOptions.DOCS); + ft.freeze(); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + + Document doc = new Document(); + doc.add(new Field("body", "a b", ft)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new Field("body", "b d a d", ft)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new Field("body", "b c d", ft)); + w.addDocument(doc); + + DirectoryReader.open(w).close(); + + doc = new Document(); + w.addDocument(doc); + + try (IndexReader reader = DirectoryReader.open(w)) { + assertEquals(2, reader.leaves().size()); + + IntervalsSource source = new SourceIntervalsSource( + Intervals.term(new BytesRef("d")), + new TermQuery(new Term("body", "d")), + SOURCE_FETCHER_PROVIDER, + Lucene.STANDARD_ANALYZER); + + IntervalIterator intervals = source.intervals("body", reader.leaves().get(0)); + + assertEquals(1, intervals.nextDoc()); + assertEquals(-1, intervals.start()); + assertEquals(-1, intervals.end()); + assertEquals(1, intervals.nextInterval()); + assertEquals(1, intervals.start()); + assertEquals(1, intervals.end()); + assertEquals(3, intervals.nextInterval()); + assertEquals(3, intervals.start()); + assertEquals(3, intervals.end()); + assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.nextInterval()); + + assertEquals(2, intervals.nextDoc()); + assertEquals(-1, intervals.start()); + assertEquals(-1, intervals.end()); + assertEquals(2, intervals.nextInterval()); + assertEquals(2, intervals.start()); + assertEquals(2, intervals.end()); + assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.nextInterval()); + + assertEquals(DocIdSetIterator.NO_MORE_DOCS, intervals.nextDoc()); + + assertEquals(null, source.intervals("body", reader.leaves().get(1))); + + // Same test, but with a bad approximation now + source = new SourceIntervalsSource( + Intervals.term(new BytesRef("d")), + new MatchAllDocsQuery(), + SOURCE_FETCHER_PROVIDER, + Lucene.STANDARD_ANALYZER); + + intervals = source.intervals("body", reader.leaves().get(0)); + + assertEquals(1, intervals.nextDoc()); + assertEquals(-1, intervals.start()); + assertEquals(-1, intervals.end()); + assertEquals(1, intervals.nextInterval()); + assertEquals(1, intervals.start()); + assertEquals(1, intervals.end()); + assertEquals(3, intervals.nextInterval()); + assertEquals(3, intervals.start()); + assertEquals(3, intervals.end()); + assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.nextInterval()); + + assertEquals(2, intervals.nextDoc()); + assertEquals(-1, intervals.start()); + assertEquals(-1, intervals.end()); + assertEquals(2, intervals.nextInterval()); + assertEquals(2, intervals.start()); + assertEquals(2, intervals.end()); + assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.nextInterval()); + + assertEquals(DocIdSetIterator.NO_MORE_DOCS, intervals.nextDoc()); + + intervals = source.intervals("body", reader.leaves().get(1)); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, intervals.nextDoc()); + } + } + } +} diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml new file mode 100644 index 0000000000000..d58ef74ea6316 --- /dev/null +++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml @@ -0,0 +1,254 @@ +setup: + + - skip: + version: " - 7.13.99" + reason: "match_only_text was added in 7.14" + + - do: + indices.create: + index: test + body: + mappings: + properties: + foo: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: {} + + - do: + index: + index: test + id: 2 + body: { "foo": "Apache Lucene powers Elasticsearch" } + + - do: + index: + index: test + id: 3 + body: { "foo": "Elasticsearch is based on Apache Lucene" } + + - do: + index: + index: test + id: 4 + body: { "foo": "The Apache Software Foundation manages many projects including Lucene" } + + - do: + indices.refresh: {} + +--- +"Field caps": + + - do: + field_caps: + index: test + fields: [ foo ] + + - match: { fields.foo.text.searchable: true } + - match: { fields.foo.text.aggregatable: false } + +--- +"Exist query": + + - do: + search: + index: test + body: + query: + exists: + field: foo + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Match query": + + - do: + search: + index: test + body: + query: + match: + foo: powers + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Match Phrase query": + + - do: + search: + index: test + body: + query: + match_phrase: + foo: "lucene powers" + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Match Phrase Prefix query": + + - do: + search: + index: test + body: + query: + match_phrase_prefix: + foo: "lucene pow" + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Query String query with phrase": + + - do: + search: + index: test + body: + query: + query_string: + query: '"lucene powers"' + default_field: "foo" + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + + + +--- +"Regexp query": + + - do: + search: + index: test + body: + query: + regexp: + foo: "lu.*ne" + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Wildcard query": + + - do: + search: + index: test + body: + query: + wildcard: + foo: "lu*ne" + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Prefix query": + + - do: + search: + index: test + body: + query: + prefix: + foo: "luc" + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Fuzzy query": + + - do: + search: + index: test + body: + query: + fuzzy: + foo: "lucane" + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Span query": + + - do: + catch: bad_request + search: + index: test + body: + query: + span_term: + foo: lucene + +--- +"Term intervals query": + + - do: + search: + index: test + body: + query: + intervals: + foo: + match: + query: "apache lucene" + max_gaps: 1 + + - match: { "hits.total.value": 2 } + +--- +"Prefix intervals query": + + - do: + search: + index: test + body: + query: + intervals: + foo: + prefix: + prefix: "luc" + + - match: { "hits.total.value": 3 } + +--- +"Wildcard intervals query": + + - do: + search: + index: test + body: + query: + intervals: + foo: + wildcard: + pattern: "*ase*" + + - match: { "hits.total.value": 1 } + +--- +"Fuzzy intervals query": + + - do: + search: + index: test + body: + query: + intervals: + foo: + fuzzy: + term: "lucane" + + - match: { "hits.total.value": 3 } diff --git a/server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java b/server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java new file mode 100644 index 0000000000000..e07d92c8d984a --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java @@ -0,0 +1,14 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common; + +@FunctionalInterface +public interface CheckedIntFunction { + T apply(int input) throws E; +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java index ca43c13d1135c..0f2038b6e1e0b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java @@ -140,17 +140,17 @@ public Query regexpQuery( } @Override - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, SearchExecutionContext context) { throw new IllegalArgumentException(unsupported("phrase", "text")); } @Override - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, SearchExecutionContext context) { throw new IllegalArgumentException(unsupported("phrase", "text")); } @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext context) { throw new IllegalArgumentException(unsupported("phrase prefix", "text")); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 86771d92e2308..055b31c901a49 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -263,17 +263,19 @@ public Query existsQuery(SearchExecutionContext context) { } } - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + SearchExecutionContext context) throws IOException { throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + SearchExecutionContext context) throws IOException { throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext context) throws IOException { throw new IllegalArgumentException("Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 3ac87ef133065..08e6faf0cfafe 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -721,9 +721,17 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex return Intervals.wildcard(pattern); } + private void checkForPositions() { + if (getTextSearchInfo().hasPositions() == false) { + throw new IllegalStateException("field:[" + name() + "] was indexed without position data; cannot run PhraseQuery"); + } + } + @Override - public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, + SearchExecutionContext context) throws IOException { String field = name(); + checkForPositions(); // we can't use the index_phrases shortcut with slop, if there are gaps in the stream, // or if the incoming token stream is the output of a token graph due to // https://issues.apache.org/jira/browse/LUCENE-8916 @@ -756,7 +764,8 @@ public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncremen } @Override - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + SearchExecutionContext context) throws IOException { String field = name(); if (indexPhrases && slop == 0 && hasGaps(stream) == false) { stream = new FixedShingleFilter(stream, 2); @@ -765,8 +774,21 @@ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositi return createPhraseQuery(stream, field, slop, enablePositionIncrements); } + private int countTokens(TokenStream ts) throws IOException { + ts.reset(); + int count = 0; + while (ts.incrementToken()) { + count++; + } + ts.end(); + return count; + } + @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext context) throws IOException { + if (countTokens(stream) > 1) { + checkForPositions(); + } return analyzePhrasePrefix(stream, slop, maxExpansions); } diff --git a/server/src/main/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilder.java index 5bfd24761a595..b12828a1093e9 100644 --- a/server/src/main/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilder.java @@ -315,7 +315,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { } CombinedFieldsBuilder builder = new CombinedFieldsBuilder(fieldsAndBoosts, - sharedAnalyzer, canGenerateSynonymsPhraseQuery); + sharedAnalyzer, canGenerateSynonymsPhraseQuery, context); Query query = builder.createBooleanQuery(placeholderFieldName, value.toString(), operator.toBooleanClauseOccur()); query = Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); @@ -353,13 +353,16 @@ private static final class FieldAndBoost { private static class CombinedFieldsBuilder extends QueryBuilder { private final List fields; + private final SearchExecutionContext context; CombinedFieldsBuilder(List fields, Analyzer analyzer, - boolean autoGenerateSynonymsPhraseQuery) { + boolean autoGenerateSynonymsPhraseQuery, + SearchExecutionContext context) { super(analyzer); this.fields = fields; setAutoGenerateMultiTermSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery); + this.context = context; } @Override @@ -409,7 +412,7 @@ protected Query newTermQuery(Term term, float boost) { protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (FieldAndBoost fieldAndBoost : fields) { - Query query = fieldAndBoost.fieldType.phraseQuery(stream, slop, enablePositionIncrements); + Query query = fieldAndBoost.fieldType.phraseQuery(stream, slop, enablePositionIncrements, context); if (fieldAndBoost.boost != 1f) { query = new BoostQuery(query, fieldAndBoost.boost); } diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQueryParser.java b/server/src/main/java/org/elasticsearch/index/search/MatchQueryParser.java index 9512062fae8aa..17ea7e2fd3472 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MatchQueryParser.java +++ b/server/src/main/java/org/elasticsearch/index/search/MatchQueryParser.java @@ -605,8 +605,7 @@ private Query analyzeMultiBoolean(String field, TokenStream stream, @Override protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { try { - checkForPositions(field); - return fieldType.phraseQuery(stream, slop, enablePositionIncrements); + return fieldType.phraseQuery(stream, slop, enablePositionIncrements, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -618,8 +617,7 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws @Override protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { try { - checkForPositions(field); - return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -630,10 +628,7 @@ protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) t private Query analyzePhrasePrefix(String field, TokenStream stream, int slop, int positionCount) throws IOException { try { - if (positionCount > 1) { - checkForPositions(field); - } - return fieldType.phrasePrefixQuery(stream, slop, maxExpansions); + return fieldType.phrasePrefixQuery(stream, slop, maxExpansions, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -783,11 +778,5 @@ private Query analyzeGraphPhrase(TokenStream source, String field, Type type, in return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), 0, true); } } - - private void checkForPositions(String field) { - if (fieldType.getTextSearchInfo().hasPositions() == false) { - throw new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); - } - } } } diff --git a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQueryParser.java b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQueryParser.java index 9715ae02c9777..d29500b21b79c 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQueryParser.java +++ b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQueryParser.java @@ -212,7 +212,7 @@ protected Query newPrefixQuery(Term term) { protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { List disjunctions = new ArrayList<>(); for (FieldAndBoost fieldType : blendedFields) { - Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements); + Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements, context); if (fieldType.boost != 1f) { query = new BoostQuery(query, fieldType.boost); } @@ -225,7 +225,7 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { List disjunctions = new ArrayList<>(); for (FieldAndBoost fieldType : blendedFields) { - Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); if (fieldType.boost != 1f) { query = new BoostQuery(query, fieldType.boost); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java b/server/src/test/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java index a76aa7b606c98..99815fc85d283 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java @@ -256,17 +256,17 @@ public void testTermsQueryInLoop() { public void testPhraseQueryIsError() { assumeTrue("Impl does not support term queries", supportsTermQueries()); - assertQueryOnlyOnText("phrase", () -> simpleMappedFieldType().phraseQuery(null, 1, false)); + assertQueryOnlyOnText("phrase", () -> simpleMappedFieldType().phraseQuery(null, 1, false, null)); } public void testPhrasePrefixQueryIsError() { assumeTrue("Impl does not support term queries", supportsTermQueries()); - assertQueryOnlyOnText("phrase prefix", () -> simpleMappedFieldType().phrasePrefixQuery(null, 1, 1)); + assertQueryOnlyOnText("phrase prefix", () -> simpleMappedFieldType().phrasePrefixQuery(null, 1, 1, null)); } public void testMultiPhraseQueryIsError() { assumeTrue("Impl does not support term queries", supportsTermQueries()); - assertQueryOnlyOnText("phrase", () -> simpleMappedFieldType().multiPhraseQuery(null, 1, false)); + assertQueryOnlyOnText("phrase", () -> simpleMappedFieldType().multiPhraseQuery(null, 1, false, null)); } public void testSpanPrefixQueryIsError() { diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java index 7f3a328b98503..ae451def9b3f0 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java @@ -112,7 +112,7 @@ public void testBadAnalyzer() throws IOException { public void testPhraseOnFieldWithNoTerms() { MatchPhrasePrefixQueryBuilder matchQuery = new MatchPhrasePrefixQueryBuilder(DATE_FIELD_NAME, "three term phrase"); matchQuery.analyzer("whitespace"); - expectThrows(IllegalStateException.class, () -> matchQuery.doToQuery(createSearchExecutionContext())); + expectThrows(IllegalArgumentException.class, () -> matchQuery.doToQuery(createSearchExecutionContext())); } public void testPhrasePrefixZeroTermsQuery() throws IOException { diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java index 16104bdcdfa56..465af088357b0 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java @@ -7,6 +7,7 @@ */ package org.elasticsearch.index.mapper; +import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.lookup.SourceLookup; import org.elasticsearch.test.ESTestCase; @@ -31,6 +32,11 @@ protected SearchExecutionContext randomMockContext() { private static SearchExecutionContext createMockSearchExecutionContext(boolean allowExpensiveQueries) { SearchExecutionContext searchExecutionContext = mock(SearchExecutionContext.class); when(searchExecutionContext.allowExpensiveQueries()).thenReturn(allowExpensiveQueries); + when(searchExecutionContext.isSourceEnabled()).thenReturn(true); + SourceLookup sourceLookup = mock(SourceLookup.class); + SearchLookup searchLookup = mock(SearchLookup.class); + when(searchLookup.source()).thenReturn(sourceLookup); + when(searchExecutionContext.lookup()).thenReturn(searchLookup); return searchExecutionContext; } From 226d4bfdb735036dc2e11f19675935a03edae8b7 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 22 Apr 2021 09:05:44 +0200 Subject: [PATCH 2/4] Fix compilation. --- .../org/elasticsearch/index/query/SourceConfirmedTextQuery.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceConfirmedTextQuery.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceConfirmedTextQuery.java index baba8b0345f1f..3cb13fb285763 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceConfirmedTextQuery.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceConfirmedTextQuery.java @@ -233,7 +233,7 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo } } final SimScorer simScorer = searcher.getSimilarity() - .scorer(boost, searcher.collectionStatistics(field), termStats.toArray(TermStatistics[]::new)); + .scorer(boost, searcher.collectionStatistics(field), termStats.toArray(new TermStatistics[0])); final Weight approximationWeight = searcher.createWeight(approximate(in), ScoreMode.COMPLETE_NO_SCORES, 1f); return new Weight(this) { From 60c05809e8e9bd1bf260c34341b763571d5b8d1e Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 22 Apr 2021 09:53:41 +0200 Subject: [PATCH 3/4] Fix compilation. --- .../index/mapper/MatchOnlyTextFieldTypeTests.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java index 8c3cd70c8af27..f187c8c2cf45e 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java @@ -37,6 +37,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; public class MatchOnlyTextFieldTypeTests extends FieldTypeTestCase { @@ -107,9 +108,9 @@ public void testFuzzyQuery() { public void testFetchSourceValue() throws IOException { MatchOnlyTextFieldType fieldType = new MatchOnlyTextFieldType("field"); - assertEquals(List.of("value"), fetchSourceValue(fieldType, "value")); - assertEquals(List.of("42"), fetchSourceValue(fieldType, 42L)); - assertEquals(List.of("true"), fetchSourceValue(fieldType, true)); + assertEquals(Collections.singletonList("value"), fetchSourceValue(fieldType, "value")); + assertEquals(Collections.singletonList("42"), fetchSourceValue(fieldType, 42L)); + assertEquals(Collections.singletonList("true"), fetchSourceValue(fieldType, true)); } private Query unwrapPositionalQuery(Query query) { From 3fc608d07bcccfe02c524bf46572ef1a98e64a55 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 22 Apr 2021 10:05:59 +0200 Subject: [PATCH 4/4] Fix compilation. --- .../index/mapper/MatchOnlyTextFieldMapperTests.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java index dfb76b663695a..ad9835c42be60 100644 --- a/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -25,7 +25,6 @@ import java.io.IOException; import java.util.Collection; import java.util.Collections; -import java.util.List; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -35,7 +34,7 @@ public class MatchOnlyTextFieldMapperTests extends MapperTestCase { @Override protected Collection getPlugins() { - return List.of(new MapperExtrasPlugin()); + return Collections.singleton(new MapperExtrasPlugin()); } @Override