From 0bfd3877c9e887c108c3286f24ba21a1a9a0d443 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 7 Dec 2020 10:12:39 +0100 Subject: [PATCH 01/22] Add `match_only_text`, a space-efficient variant of `text`. This adds a new `match_only_text` field, which indexes the same data as a `text` field that has `index_options: docs` and `norms: false` and uses the `_source` for positional queries like `match_phrase`. Unlike `text`, this field doesn't support scoring. An alternative to this new field could have been to make the `text` field still able to run positional queries when positions are not indexed, but I like this new field better because it avoids questions around how scoring should perform. --- docs/reference/mapping/types.asciidoc | 3 + .../mapping/types/match-only-text.asciidoc | 79 ++++ .../mapper/SearchAsYouTypeFieldMapper.java | 35 +- .../common/CheckedIntFunction.java | 25 ++ .../index/mapper/FieldMapper.java | 2 +- .../index/mapper/MappedFieldType.java | 8 +- .../index/mapper/TextFieldMapper.java | 28 +- .../index/search/MatchQuery.java | 17 +- .../index/search/MultiMatchQuery.java | 4 +- .../MatchPhrasePrefixQueryBuilderTests.java | 2 +- .../mapper-match-only-text/build.gradle | 16 + .../mapper/MatchOnlyTextFieldMapperTests.java | 235 ++++++++++++ .../MatchOnlyTextMapperPlugin.java | 24 ++ .../mapper/MatchOnlyTextFieldMapper.java | 302 +++++++++++++++ .../query/SourceConfirmedTextQuery.java | 345 ++++++++++++++++++ .../mapper/MatchOnlyTextFieldTypeTests.java | 123 +++++++ .../query/SourceConfirmedTextQueryTests.java | 325 +++++++++++++++++ .../mapper/AbstractScriptFieldType.java | 6 +- .../test/match_only_text/10_basic.yml | 161 ++++++++ 19 files changed, 1703 insertions(+), 37 deletions(-) create mode 100644 docs/reference/mapping/types/match-only-text.asciidoc create mode 100644 server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java create mode 100644 x-pack/plugin/mapper-match-only-text/build.gradle create mode 100644 x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java create mode 100644 x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/MatchOnlyTextMapperPlugin.java create mode 100644 x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java create mode 100644 x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQuery.java create mode 100644 x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java create mode 100644 x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQueryTests.java create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc index 97f6454cb716f..1549eacdcac1b 100644 --- a/docs/reference/mapping/types.asciidoc +++ b/docs/reference/mapping/types.asciidoc @@ -69,6 +69,7 @@ values. ==== Text search types <>:: Analyzed, unstructured text. +<>:: A more space-efficient variant of `text`. {plugins}/mapper-annotated-text.html[`annotated-text`]:: Text containing special markup. Used for identifying named entities. <>:: Used for auto-complete suggestions. @@ -154,6 +155,8 @@ include::types/parent-join.asciidoc[] include::types/keyword.asciidoc[] +include::types/match-only-text.asciidoc[] + include::types/nested.asciidoc[] include::types/numeric.asciidoc[] diff --git a/docs/reference/mapping/types/match-only-text.asciidoc b/docs/reference/mapping/types/match-only-text.asciidoc new file mode 100644 index 0000000000000..ccc13fb69fac0 --- /dev/null +++ b/docs/reference/mapping/types/match-only-text.asciidoc @@ -0,0 +1,79 @@ +[role="xpack"] +[testenv="basic"] + +[discrete] +[[match-only-text]] +=== Match-only text field type + +A variant of <> that trades scoring and efficiency of positional +queries for space efficiency. This field effectively stores data the same way as +a `text` field that only indexes documents (`index_options: docs`) and disables +norms (`norms: false`). Term queries perform as fast if not faster as on `text` +fields, however queries that need positions such as the +<> perform slower as they +need to look at the `_source` document to verify whether a phrase matches. All +queries return constant scores that are equal to 1.0. + +[source,console] +-------------------------------- +PUT logs +{ + "mappings": { + "properties": { + "@timestamp": { + "type": "date" + }, + "message": { + "type": "match_only_text" + } + } + } +} +-------------------------------- + +`match_only_text` supports the same queries as `text`. And like `text`, it +doesn't support sorting or aggregating. + +[discrete] +[[match-only-text-params]] +==== Parameters for match-only text fields + +The following mapping parameters are accepted: + +[horizontal] + +<>:: + + The <> which should be used for + the `text` field, both at index-time and at + search-time (unless overridden by the <>). + Defaults to the default index analyzer, or the + <>. + +<>:: + + Multi-fields allow the same string value to be indexed in multiple ways for + different purposes, such as one field for search and a multi-field for + sorting and aggregations, or the same string value analyzed by different + analyzers. + +<>:: + + Metadata about the field. + +<>:: + + The <> that should be used at search time on + the `text` field. Defaults to the `analyzer` setting. + +<>:: + + The <> that should be used at search time when a + phrase is encountered. Defaults to the `search_analyzer` setting. + +<>:: + + Whether the field value should be stored and retrievable separately from + the <> field. Accepts `true` or `false` + (default). + diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java index d4e236003696c..48b083a568cfb 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java @@ -287,38 +287,52 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool } } + private void checkForPositions() { + if (getTextSearchInfo().hasPositions() == false) { + throw new IllegalStateException("field:[" + name() + "] was indexed without position data; cannot run PhraseQuery"); + } + } + @Override - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + QueryShardContext context) throws IOException { + checkForPositions(); int numPos = countPosition(stream); if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); } final ShingleFieldType shingleField = shingleFieldForPositions(numPos); stream = new FixedShingleFilter(stream, shingleField.shingleSize); - return shingleField.phraseQuery(stream, 0, true); + return shingleField.phraseQuery(stream, 0, true, context); } @Override - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + QueryShardContext context) throws IOException { + checkForPositions(); int numPos = countPosition(stream); if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); } final ShingleFieldType shingleField = shingleFieldForPositions(numPos); stream = new FixedShingleFilter(stream, shingleField.shingleSize); - return shingleField.multiPhraseQuery(stream, 0, true); + return shingleField.multiPhraseQuery(stream, 0, true, context); } @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, + QueryShardContext context) throws IOException { int numPos = countPosition(stream); + if (numPos > 1) { + checkForPositions(); + } if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions, null, null); } final ShingleFieldType shingleField = shingleFieldForPositions(numPos); stream = new FixedShingleFilter(stream, shingleField.shingleSize); - return shingleField.phrasePrefixQuery(stream, 0, maxExpansions); + return shingleField.phrasePrefixQuery(stream, 0, maxExpansions, context); } @Override @@ -513,17 +527,20 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool } @Override - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + QueryShardContext context) throws IOException { return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); } @Override - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + QueryShardContext context) throws IOException { return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); } @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, + QueryShardContext context) throws IOException { final String prefixFieldName = slop > 0 ? null : prefixFieldType.name(); diff --git a/server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java b/server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java new file mode 100644 index 0000000000000..aac8ea7e960da --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java @@ -0,0 +1,25 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common; + +@FunctionalInterface +public interface CheckedIntFunction { + T apply(int input) throws E; +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index b90fb8ff8fe81..e28da5389aab6 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -670,7 +670,7 @@ private void merge(FieldMapper toMerge, Conflicts conflicts) { } } - protected void toXContent(XContentBuilder builder, boolean includeDefaults) throws IOException { + public void toXContent(XContentBuilder builder, boolean includeDefaults) throws IOException { if (serializerCheck.check(includeDefaults, isConfigured(), get())) { serializer.serialize(builder, name, getValue()); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 02550b18c23f1..d42af7dcfd4b6 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -72,7 +72,6 @@ public abstract class MappedFieldType { private final boolean isStored; private final TextSearchInfo textSearchInfo; private final Map meta; - private NamedAnalyzer indexAnalyzer; private boolean eagerGlobalOrdinals; public MappedFieldType(String name, boolean isIndexed, boolean isStored, @@ -266,17 +265,18 @@ public Query existsQuery(QueryShardContext context) { } } - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + QueryShardContext context) throws IOException { throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { throw new IllegalArgumentException("Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index e0b8e9a9cf77d..599e0cf6f01e6 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -697,9 +697,17 @@ public IntervalsSource intervals(String text, int maxGaps, boolean ordered, return builder.analyzeText(text, maxGaps, ordered); } + private void checkForPositions() { + if (getTextSearchInfo().hasPositions() == false) { + throw new IllegalStateException("field:[" + name() + "] was indexed without position data; cannot run PhraseQuery"); + } + } + @Override - public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, + QueryShardContext queryShardContext) throws IOException { String field = name(); + checkForPositions(); // we can't use the index_phrases shortcut with slop, if there are gaps in the stream, // or if the incoming token stream is the output of a token graph due to // https://issues.apache.org/jira/browse/LUCENE-8916 @@ -732,7 +740,8 @@ public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncremen } @Override - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + QueryShardContext context) throws IOException { String field = name(); if (indexPhrases && slop == 0 && hasGaps(stream) == false) { stream = new FixedShingleFilter(stream, 2); @@ -741,8 +750,21 @@ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositi return createPhraseQuery(stream, field, slop, enablePositionIncrements); } + private int countTokens(TokenStream ts) throws IOException { + ts.reset(); + int count = 0; + while (ts.incrementToken()) { + count++; + } + ts.end(); + return count; + } + @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + if (countTokens(stream) > 1) { + checkForPositions(); + } return analyzePhrasePrefix(stream, slop, maxExpansions); } diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 8df437c1b5752..b91ccc1ac9c68 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -632,8 +632,7 @@ private Query analyzeMultiBoolean(String field, TokenStream stream, @Override protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { try { - checkForPositions(field); - return fieldType.phraseQuery(stream, slop, enablePositionIncrements); + return fieldType.phraseQuery(stream, slop, enablePositionIncrements, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -645,8 +644,7 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws @Override protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { try { - checkForPositions(field); - return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -657,10 +655,7 @@ protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) t private Query analyzePhrasePrefix(String field, TokenStream stream, int slop, int positionCount) throws IOException { try { - if (positionCount > 1) { - checkForPositions(field); - } - return fieldType.phrasePrefixQuery(stream, slop, maxExpansions); + return fieldType.phrasePrefixQuery(stream, slop, maxExpansions, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -810,11 +805,5 @@ private Query analyzeGraphPhrase(TokenStream source, String field, Type type, in return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), 0, true); } } - - private void checkForPositions(String field) { - if (fieldType.getTextSearchInfo().hasPositions() == false) { - throw new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); - } - } } } diff --git a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java index 963c24dffa10b..f29672015fd83 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java @@ -210,7 +210,7 @@ protected Query newPrefixQuery(Term term) { protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { List disjunctions = new ArrayList<>(); for (FieldAndBoost fieldType : blendedFields) { - Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements); + Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements, context); if (fieldType.boost != 1f) { query = new BoostQuery(query, fieldType.boost); } @@ -223,7 +223,7 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { List disjunctions = new ArrayList<>(); for (FieldAndBoost fieldType : blendedFields) { - Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); if (fieldType.boost != 1f) { query = new BoostQuery(query, fieldType.boost); } diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java index 4ad364707af92..b80ed053a66fe 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java @@ -124,7 +124,7 @@ public void testBadAnalyzer() throws IOException { public void testPhraseOnFieldWithNoTerms() { MatchPhrasePrefixQueryBuilder matchQuery = new MatchPhrasePrefixQueryBuilder(DATE_FIELD_NAME, "three term phrase"); matchQuery.analyzer("whitespace"); - expectThrows(IllegalStateException.class, () -> matchQuery.doToQuery(createShardContext())); + expectThrows(IllegalArgumentException.class, () -> matchQuery.doToQuery(createShardContext())); } public void testPhrasePrefixZeroTermsQuery() throws IOException { diff --git a/x-pack/plugin/mapper-match-only-text/build.gradle b/x-pack/plugin/mapper-match-only-text/build.gradle new file mode 100644 index 0000000000000..630ae2d18c4d9 --- /dev/null +++ b/x-pack/plugin/mapper-match-only-text/build.gradle @@ -0,0 +1,16 @@ +apply plugin: 'elasticsearch.esplugin' +apply plugin: 'elasticsearch.internal-cluster-test' + +esplugin { + name 'match-only-text' + description 'Module for the match-only-text field type, which is a specialization of text field for the case when scoring is not needed and space efficiency is important.' + classname 'org.elasticsearch.xpack.matchonlytext.MatchOnlyTextMapperPlugin' + extendedPlugins = ['x-pack-core'] +} +archivesBaseName = 'x-pack-match-only-text' + +dependencies { + compileOnly project(path: xpackModule('core'), configuration: 'default') + internalClusterTestImplementation project(path: xpackModule('core'), configuration: 'testArtifacts') +} + diff --git a/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java b/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java new file mode 100644 index 0000000000000..d29d95f182fed --- /dev/null +++ b/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java @@ -0,0 +1,235 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.matchonlytext.mapper; + +import org.apache.lucene.analysis.StopFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AnalyzerScope; +import org.elasticsearch.index.analysis.CharFilterFactory; +import org.elasticsearch.index.analysis.CustomAnalyzer; +import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.analysis.StandardTokenizerFactory; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperTestCase; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.mapper.TextFieldMapper; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xpack.matchonlytext.MatchOnlyTextMapperPlugin; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +public class MatchOnlyTextFieldMapperTests extends MapperTestCase { + + @Override + protected Collection getPlugins() { + return List.of(new MatchOnlyTextMapperPlugin()); + } + + @Override + protected Object getSampleValueForDocument() { + return "value"; + } + + public final void testExists() throws IOException { + MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); })); + assertExistsQuery(mapperService); + assertParseMinimalWarnings(); + } + + @Override + protected void registerParameters(ParameterChecker checker) throws IOException { + checker.registerUpdateCheck(b -> { + b.field("analyzer", "default"); + b.field("search_analyzer", "keyword"); + }, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchAnalyzer().name())); + checker.registerUpdateCheck(b -> { + b.field("analyzer", "default"); + b.field("search_analyzer", "keyword"); + b.field("search_quote_analyzer", "keyword"); + }, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchQuoteAnalyzer().name())); + + checker.registerConflictCheck("store", b -> b.field("store", true)); + checker.registerConflictCheck("analyzer", b -> b.field("analyzer", "keyword")); + } + + @Override + protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) { + NamedAnalyzer dflt = new NamedAnalyzer( + "default", + AnalyzerScope.INDEX, + new StandardAnalyzer(), + TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + ); + NamedAnalyzer standard = new NamedAnalyzer("standard", AnalyzerScope.INDEX, new StandardAnalyzer()); + NamedAnalyzer keyword = new NamedAnalyzer("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer()); + NamedAnalyzer whitespace = new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer()); + NamedAnalyzer stop = new NamedAnalyzer( + "my_stop_analyzer", + AnalyzerScope.INDEX, + new CustomAnalyzer( + new StandardTokenizerFactory(indexSettings, null, "standard", indexSettings.getSettings()), + new CharFilterFactory[0], + new TokenFilterFactory[] { new TokenFilterFactory() { + @Override + public String name() { + return "stop"; + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new StopFilter(tokenStream, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET); + } + } } + ) + ); + return new IndexAnalyzers( + Map.of("default", dflt, "standard", standard, "keyword", keyword, "whitespace", whitespace, "my_stop_analyzer", stop), + Map.of(), + Map.of() + ); + } + + @Override + protected void minimalMapping(XContentBuilder b) throws IOException { + b.field("type", "match_only_text"); + } + + public void testDefaults() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString()); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertEquals("1234", fields[0].stringValue()); + IndexableFieldType fieldType = fields[0].fieldType(); + assertThat(fieldType.omitNorms(), equalTo(true)); + assertTrue(fieldType.tokenized()); + assertFalse(fieldType.stored()); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); + assertThat(fieldType.storeTermVectors(), equalTo(false)); + assertThat(fieldType.storeTermVectorOffsets(), equalTo(false)); + assertThat(fieldType.storeTermVectorPositions(), equalTo(false)); + assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + } + + public void testEnableStore() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "match_only_text").field("store", true))); + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertTrue(fields[0].fieldType().stored()); + } + + public void testSearchAnalyzerSerialization() throws IOException { + XContentBuilder mapping = fieldMapping( + b -> b.field("type", "match_only_text").field("analyzer", "standard").field("search_analyzer", "keyword") + ); + assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString()); + + // special case: default index analyzer + mapping = fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", "default").field("search_analyzer", "keyword")); + assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString()); + + // special case: default search analyzer + mapping = fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", "keyword").field("search_analyzer", "default")); + assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString()); + + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.startObject(); + createDocumentMapper(fieldMapping(this::minimalMapping)).toXContent( + builder, + new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true")) + ); + builder.endObject(); + String mappingString = Strings.toString(builder); + assertTrue(mappingString.contains("analyzer")); + assertTrue(mappingString.contains("search_analyzer")); + assertTrue(mappingString.contains("search_quote_analyzer")); + } + + public void testSearchQuoteAnalyzerSerialization() throws IOException { + XContentBuilder mapping = fieldMapping( + b -> b.field("type", "match_only_text") + .field("analyzer", "standard") + .field("search_analyzer", "standard") + .field("search_quote_analyzer", "keyword") + ); + assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString()); + + // special case: default index/search analyzer + mapping = fieldMapping( + b -> b.field("type", "match_only_text") + .field("analyzer", "default") + .field("search_analyzer", "default") + .field("search_quote_analyzer", "keyword") + ); + assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString()); + } + + public void testNullConfigValuesFail() throws MapperParsingException { + Exception e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(fieldMapping(b -> b.field("type", "match_only_text").field("store", (String) null))) + ); + assertThat(e.getMessage(), containsString("[store] on mapper [field] of type [match_only_text] must not have a [null] value")); + } + + public void testSimpleMerge() throws IOException { + XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "match_only_text").field("store", true)); + MapperService mapperService = createMapperService(startingMapping); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class)); + + merge(mapperService, startingMapping); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class)); + + XContentBuilder differentStore = fieldMapping(b -> b.field("type", "match_only_text").field("store", false)); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentStore)); + assertThat(e.getMessage(), containsString("Cannot update parameter [store]")); + + XContentBuilder newField = mapping(b -> { + b.startObject("field") + .field("type", "match_only_text") + .field("store", true) + .startObject("meta") + .field("key", "value") + .endObject() + .endObject(); + b.startObject("other_field").field("type", "keyword").endObject(); + }); + merge(mapperService, newField); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class)); + assertThat(mapperService.documentMapper().mappers().getMapper("other_field"), instanceOf(KeywordFieldMapper.class)); + } +} diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/MatchOnlyTextMapperPlugin.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/MatchOnlyTextMapperPlugin.java new file mode 100644 index 0000000000000..148141593a7aa --- /dev/null +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/MatchOnlyTextMapperPlugin.java @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.matchonlytext; + +import org.elasticsearch.index.mapper.Mapper; +import org.elasticsearch.plugins.MapperPlugin; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xpack.matchonlytext.mapper.MatchOnlyTextFieldMapper; + +import java.util.Map; + +import static java.util.Collections.singletonMap; + +public class MatchOnlyTextMapperPlugin extends Plugin implements MapperPlugin { + @Override + public Map getMappers() { + return singletonMap(MatchOnlyTextFieldMapper.CONTENT_TYPE, MatchOnlyTextFieldMapper.PARSER); + } + +} diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java new file mode 100644 index 0000000000000..8cef8f058548d --- /dev/null +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java @@ -0,0 +1,302 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.matchonlytext.mapper; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.Query; +import org.elasticsearch.Version; +import org.elasticsearch.common.CheckedIntFunction; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.mapper.ContentPath; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.SourceValueFetcher; +import org.elasticsearch.index.mapper.StringFieldType; +import org.elasticsearch.index.mapper.TextFieldMapper; +import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; +import org.elasticsearch.index.mapper.TextParams; +import org.elasticsearch.index.mapper.TextSearchInfo; +import org.elasticsearch.index.mapper.ValueFetcher; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.search.lookup.SearchLookup; +import org.elasticsearch.search.lookup.SourceLookup; +import org.elasticsearch.xpack.matchonlytext.query.SourceConfirmedTextQuery; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.function.Function; +import java.util.function.Supplier; + +/** + * A {@link FieldMapper} for full-text fields that only indexes + * {@link IndexOptions#DOCS} and runs positional queries by looking at the + * _source. + */ +public class MatchOnlyTextFieldMapper extends FieldMapper { + + public static final String CONTENT_TYPE = "match_only_text"; + + public static class Defaults { + public static final FieldType FIELD_TYPE = new FieldType(); + + static { + FIELD_TYPE.setTokenized(true); + FIELD_TYPE.setStored(false); + FIELD_TYPE.setStoreTermVectors(false); + FIELD_TYPE.setOmitNorms(true); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE.freeze(); + } + + } + + private static Builder builder(FieldMapper in) { + return ((MatchOnlyTextFieldMapper) in).builder; + } + + public static class Builder extends FieldMapper.Builder { + + private final Version indexCreatedVersion; + + private final Parameter store = Parameter.storeParam(m -> builder(m).store.getValue(), false); + + private final Parameter> meta = Parameter.metaParam(); + + private final TextParams.Analyzers analyzers; + + public Builder(String name, IndexAnalyzers indexAnalyzers) { + this(name, Version.CURRENT, indexAnalyzers); + } + + public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) { + super(name); + this.indexCreatedVersion = indexCreatedVersion; + this.analyzers = new TextParams.Analyzers(indexAnalyzers, m -> builder(m).analyzers); + } + + public Builder store(boolean store) { + this.store.setValue(store); + return this; + } + + public Builder addMultiField(FieldMapper.Builder builder) { + this.multiFieldsBuilder.add(builder); + return this; + } + + @Override + protected List> getParameters() { + return Arrays.asList(store, analyzers.indexAnalyzer, analyzers.searchAnalyzer, analyzers.searchQuoteAnalyzer, meta); + } + + private MatchOnlyTextFieldType buildFieldType(FieldType fieldType, ContentPath contentPath) { + NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); + NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); + NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); + TextSearchInfo tsi = new TextSearchInfo(fieldType, null, searchAnalyzer, searchQuoteAnalyzer); + MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType( + buildFullName(contentPath), + store.getValue(), + tsi, + indexAnalyzer, + meta.getValue() + ); + return ft; + } + + @Override + public MatchOnlyTextFieldMapper build(ContentPath contentPath) { + FieldType fieldType = new FieldType(Defaults.FIELD_TYPE); + fieldType.setStored(store.get()); + MatchOnlyTextFieldType tft = buildFieldType(fieldType, contentPath); + MultiFields multiFields = multiFieldsBuilder.build(this, contentPath); + return new MatchOnlyTextFieldMapper(name, fieldType, tft, analyzers.getIndexAnalyzer(), multiFields, copyTo.build(), this); + } + } + + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); + + public static class MatchOnlyTextFieldType extends StringFieldType { + + private final Analyzer indexAnalyzer; + private final TextFieldType textFieldType; + + public MatchOnlyTextFieldType(String name, boolean stored, TextSearchInfo tsi, Analyzer indexAnalyzer, Map meta) { + super(name, true, stored, false, tsi, meta); + this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); + this.textFieldType = new TextFieldType(name); + } + + public MatchOnlyTextFieldType(String name, boolean stored, Map meta) { + super( + name, + true, + stored, + false, + new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + meta + ); + this.indexAnalyzer = Lucene.STANDARD_ANALYZER; + this.textFieldType = new TextFieldType(name); + } + + public MatchOnlyTextFieldType(String name) { + this( + name, + false, + new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + Lucene.STANDARD_ANALYZER, + Collections.emptyMap() + ); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public String familyTypeName() { + return TextFieldMapper.CONTENT_TYPE; + } + + @Override + public ValueFetcher valueFetcher(QueryShardContext context, String format) { + return SourceValueFetcher.toString(name(), context, format); + } + + private Query toQuery(Query query, QueryShardContext queryShardContext) { + Function, IOException>> valueFetcherProvider = context -> { + SourceLookup sourceLookup = new SourceLookup(); + ValueFetcher valueFetcher = valueFetcher(queryShardContext, null); + valueFetcher.setNextReader(context); + return docID -> { + try { + sourceLookup.setSegmentAndDocument(context, docID); + return valueFetcher.fetchValues(sourceLookup); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + }; + return new ConstantScoreQuery(new SourceConfirmedTextQuery(query, valueFetcherProvider, indexAnalyzer)); + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + // Disable scoring + return new ConstantScoreQuery(super.termQuery(value, context)); + } + + @Override + public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, QueryShardContext queryShardContext) + throws IOException { + final Query query = textFieldType.phraseQuery(stream, slop, enablePosIncrements, queryShardContext); + return toQuery(query, queryShardContext); + } + + @Override + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext queryShardContext) + throws IOException { + final Query query = textFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, queryShardContext); + return toQuery(query, queryShardContext); + } + + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext queryShardContext) + throws IOException { + final Query query = textFieldType.phrasePrefixQuery(stream, slop, maxExpansions, queryShardContext); + return toQuery(query, queryShardContext); + } + + @Override + public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) { + throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations"); + } + + } + + private final Builder builder; + private final FieldType fieldType; + + private MatchOnlyTextFieldMapper( + String simpleName, + FieldType fieldType, + MatchOnlyTextFieldType mappedFieldType, + NamedAnalyzer indexAnalyzer, + MultiFields multiFields, + CopyTo copyTo, + Builder builder + ) { + super(simpleName, mappedFieldType, indexAnalyzer, multiFields, copyTo); + assert mappedFieldType.getTextSearchInfo().isTokenized(); + assert mappedFieldType.hasDocValues() == false; + this.fieldType = fieldType; + this.builder = builder; + } + + @Override + public FieldMapper.Builder getMergeBuilder() { + return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this); + } + + @Override + protected void parseCreateField(ParseContext context) throws IOException { + final String value; + if (context.externalValueSet()) { + value = context.externalValue().toString(); + } else { + value = context.parser().textOrNull(); + } + + if (value == null) { + return; + } + + Field field = new Field(fieldType().name(), value, fieldType); + context.doc().add(field); + createFieldNamesField(context); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + public MatchOnlyTextFieldType fieldType() { + return (MatchOnlyTextFieldType) super.fieldType(); + } + + @Override + protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { + // this is a pain, but we have to do this to maintain BWC + builder.field("type", contentType()); + this.builder.store.toXContent(builder, includeDefaults); + this.multiFields.toXContent(builder, params); + this.copyTo.toXContent(builder, params); + this.builder.meta.toXContent(builder, includeDefaults); + this.builder.analyzers.indexAnalyzer.toXContent(builder, includeDefaults); + this.builder.analyzers.searchAnalyzer.toXContent(builder, includeDefaults); + this.builder.analyzers.searchQuoteAnalyzer.toXContent(builder, includeDefaults); + } +} diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQuery.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQuery.java new file mode 100644 index 0000000000000..73a39a373d092 --- /dev/null +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQuery.java @@ -0,0 +1,345 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.matchonlytext.query; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermStates; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.CollectionStatistics; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafSimScorer; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.elasticsearch.common.CheckedIntFunction; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; + +/** + * A variant of {@link TermQuery}, {@link PhraseQuery}, {@link MultiPhraseQuery} + * and span queries that uses postings for its approximation, but falls back to + * stored fields or _source whenever term frequencies or positions are needed. + * This query matches and scores the same way as the wrapped query. + */ +public final class SourceConfirmedTextQuery extends Query { + + /** + * Create an approximation for the given query. The returned approximation + * should match a superset of the matches of the provided query. + */ + public static Query approximate(Query query) { + if (query instanceof TermQuery) { + return query; + } else if (query instanceof PhraseQuery) { + return approximate((PhraseQuery) query); + } else if (query instanceof MultiPhraseQuery) { + return approximate((MultiPhraseQuery) query); + } else { + // TODO: spans and intervals + return new MatchAllDocsQuery(); + } + } + + private static Query approximate(PhraseQuery query) { + BooleanQuery.Builder approximation = new BooleanQuery.Builder(); + for (Term term : query.getTerms()) { + approximation.add(new TermQuery(term), Occur.FILTER); + } + return approximation.build(); + } + + private static Query approximate(MultiPhraseQuery query) { + BooleanQuery.Builder approximation = new BooleanQuery.Builder(); + for (Term[] termArray : query.getTermArrays()) { + BooleanQuery.Builder approximationClause = new BooleanQuery.Builder(); + for (Term term : termArray) { + approximationClause.add(new TermQuery(term), Occur.SHOULD); + } + approximation.add(approximationClause.build(), Occur.FILTER); + } + return approximation.build(); + } + + /** + * Similarity that produces the frequency as a score. + */ + private static final Similarity FREQ_SIMILARITY = new Similarity() { + + @Override + public long computeNorm(FieldInvertState state) { + return 1L; + } + + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return new SimScorer() { + @Override + public float score(float freq, long norm) { + return freq; + } + }; + } + }; + + private final Query in; + private final Function, IOException>> valueFetcherProvider; + private final Analyzer indexAnalyzer; + + public SourceConfirmedTextQuery( + Query in, + Function, IOException>> valueFetcherProvider, + Analyzer indexAnalyzer + ) { + this.in = in; + this.valueFetcherProvider = valueFetcherProvider; + this.indexAnalyzer = indexAnalyzer; + } + + @Override + public String toString(String field) { + return in.toString(field); + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != getClass()) { + return false; + } + SourceConfirmedTextQuery that = (SourceConfirmedTextQuery) obj; + return Objects.equals(in, that.in) + && Objects.equals(valueFetcherProvider, that.valueFetcherProvider) + && Objects.equals(indexAnalyzer, that.indexAnalyzer); + } + + @Override + public int hashCode() { + return 31 * Objects.hash(in, valueFetcherProvider, indexAnalyzer) + classHash(); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query inRewritten = in.rewrite(reader); + if (inRewritten != in) { + return new SourceConfirmedTextQuery(inRewritten, valueFetcherProvider, indexAnalyzer); + } else if (in instanceof ConstantScoreQuery) { + Query sub = ((ConstantScoreQuery) in).getQuery(); + return new ConstantScoreQuery(new SourceConfirmedTextQuery(sub, valueFetcherProvider, indexAnalyzer)); + } else if (in instanceof BoostQuery) { + Query sub = ((BoostQuery) in).getQuery(); + float boost = ((BoostQuery) in).getBoost(); + return new BoostQuery(new SourceConfirmedTextQuery(sub, valueFetcherProvider, indexAnalyzer), boost); + } else if (in instanceof MatchNoDocsQuery) { + return in; // e.g. empty phrase query + } + return super.rewrite(reader); + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + if (scoreMode.needsScores() == false && in instanceof TermQuery) { + // No need to ever look at the _source for non-scoring term queries + return in.createWeight(searcher, scoreMode, boost); + } + + final Set terms = new HashSet<>(); + in.visit(QueryVisitor.termCollector(terms)); + if (terms.isEmpty()) { + throw new IllegalStateException("Query " + in + " doesn't have any term"); + } + final String field = terms.iterator().next().field(); + final Map termStates = new HashMap<>(); + final List termStats = new ArrayList<>(); + for (Term term : terms) { + TermStates ts = termStates.computeIfAbsent(term, t -> { + try { + return TermStates.build(searcher.getTopReaderContext(), t, scoreMode.needsScores()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + if (scoreMode.needsScores()) { + if (ts.docFreq() > 0) { + termStats.add(searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq())); + } + } else { + termStats.add(new TermStatistics(term.bytes(), 1, 1L)); + } + } + final SimScorer simScorer = searcher.getSimilarity() + .scorer(boost, searcher.collectionStatistics(field), termStats.toArray(TermStatistics[]::new)); + final Weight approximationWeight = searcher.createWeight(approximate(in), ScoreMode.COMPLETE_NO_SCORES, 1f); + + return new Weight(this) { + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + // Don't cache queries that may perform linear scans + return false; + } + + @Override + public void extractTerms(Set termSet) { + termSet.addAll(terms); + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + RuntimePhraseScorer scorer = scorer(context); + if (scorer == null) { + return Explanation.noMatch("No matching phrase"); + } + final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator(); + if (twoPhase.approximation().advance(doc) != doc || scorer.twoPhaseIterator().matches() == false) { + return Explanation.noMatch("No matching phrase"); + } + float phraseFreq = scorer.freq(); + Explanation freqExplanation = Explanation.match(phraseFreq, "phraseFreq=" + phraseFreq); + final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, context.reader(), field, scoreMode.needsScores()); + Explanation scoreExplanation = leafSimScorer.explain(doc, freqExplanation); + return Explanation.match( + scoreExplanation.getValue(), + "weight(" + getQuery() + " in " + doc + ") [" + searcher.getSimilarity().getClass().getSimpleName() + "], result of:", + scoreExplanation + ); + } + + @Override + public RuntimePhraseScorer scorer(LeafReaderContext context) throws IOException { + final Scorer approximationScorer = approximationWeight.scorer(context); + if (approximationScorer == null) { + return null; + } + final DocIdSetIterator approximation = approximationScorer.iterator(); + final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, context.reader(), field, scoreMode.needsScores()); + final CheckedIntFunction, IOException> valueFetcher = valueFetcherProvider.apply(context); + return new RuntimePhraseScorer(this, approximation, leafSimScorer, valueFetcher, field, in); + } + + }; + } + + private class RuntimePhraseScorer extends Scorer { + + private final LeafSimScorer scorer; + private final CheckedIntFunction, IOException> valueFetcher; + private final String field; + private final Query query; + private final TwoPhaseIterator twoPhase; + + private int doc = -1; + private float freq; + + private RuntimePhraseScorer( + Weight weight, + DocIdSetIterator approximation, + LeafSimScorer scorer, + CheckedIntFunction, IOException> valueFetcher, + String field, + Query query + ) { + super(weight); + this.scorer = scorer; + this.valueFetcher = valueFetcher; + this.field = field; + this.query = query; + twoPhase = new TwoPhaseIterator(approximation) { + + @Override + public boolean matches() throws IOException { + return freq() > 0; + } + + @Override + public float matchCost() { + // TODO what is a right value? + // Defaults to a high-ish value so that it likely runs last. + return 10_000f; + } + + }; + } + + @Override + public DocIdSetIterator iterator() { + return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator()); + } + + @Override + public TwoPhaseIterator twoPhaseIterator() { + return twoPhase; + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return scorer.getSimScorer().score(Float.MAX_VALUE, 1L); + } + + @Override + public float score() throws IOException { + return scorer.score(docID(), freq()); + } + + @Override + public int docID() { + return twoPhase.approximation().docID(); + } + + private float freq() throws IOException { + if (doc != docID()) { + doc = docID(); + freq = computeFreq(); + } + return freq; + } + + private float computeFreq() throws IOException { + MemoryIndex index = new MemoryIndex(); + index.setSimilarity(FREQ_SIMILARITY); + List values = valueFetcher.apply(docID()); + float freq = 0; + for (Object value : values) { + if (value == null) { + continue; + } + index.addField(field, value.toString(), indexAnalyzer); + freq += index.search(query); + index.reset(); + } + return freq; + } + } + +} diff --git a/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java b/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java new file mode 100644 index 0000000000000..759e1e81e031d --- /dev/null +++ b/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java @@ -0,0 +1,123 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.matchonlytext.mapper; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.lucene.BytesRefs; +import org.elasticsearch.common.lucene.search.AutomatonQueries; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.mapper.FieldTypeTestCase; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; +import org.elasticsearch.xpack.matchonlytext.mapper.MatchOnlyTextFieldMapper.MatchOnlyTextFieldType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class MatchOnlyTextFieldTypeTests extends FieldTypeTestCase { + + public void testTermQuery() { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null)); + assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null)); + + MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("bar", null)); + assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + } + + public void testTermsQuery() { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + List terms = new ArrayList<>(); + terms.add(new BytesRef("foo")); + terms.add(new BytesRef("bar")); + assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "bar"), null)); + + MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> unsearchable.termsQuery(Arrays.asList("foo", "bar"), null) + ); + assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + } + + public void testRangeQuery() { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + assertEquals( + new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false), + ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC) + ); + + ElasticsearchException ee = expectThrows( + ElasticsearchException.class, + () -> ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC_DISALLOW_EXPENSIVE) + ); + assertEquals( + "[range] queries on [text] or [keyword] fields cannot be executed when " + "'search.allow_expensive_queries' is set to false.", + ee.getMessage() + ); + } + + public void testRegexpQuery() { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC)); + + MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC) + ); + assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + + ElasticsearchException ee = expectThrows( + ElasticsearchException.class, + () -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE) + ); + assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); + } + + public void testFuzzyQuery() { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + assertEquals( + new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true), + ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) + ); + + MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) + ); + assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + + ElasticsearchException ee = expectThrows( + ElasticsearchException.class, + () -> ft.fuzzyQuery("foo", Fuzziness.AUTO, randomInt(10) + 1, randomInt(10) + 1, randomBoolean(), MOCK_QSC_DISALLOW_EXPENSIVE) + ); + assertEquals("[fuzzy] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); + } + + public void testFetchSourceValue() throws IOException { + MatchOnlyTextFieldType fieldType = new MatchOnlyTextFieldType("field"); + + assertEquals(List.of("value"), fetchSourceValue(fieldType, "value")); + assertEquals(List.of("42"), fetchSourceValue(fieldType, 42L)); + assertEquals(List.of("true"), fetchSourceValue(fieldType, true)); + } + +} diff --git a/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQueryTests.java b/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQueryTests.java new file mode 100644 index 0000000000000..6e559f033bae3 --- /dev/null +++ b/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQueryTests.java @@ -0,0 +1,325 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.matchonlytext.query; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.CheckedIntFunction; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; + +public class SourceConfirmedTextQueryTests extends ESTestCase { + + private static final Function, IOException>> SOURCE_FETCHER_PROVIDER = context -> { + return docID -> Collections.singletonList(context.reader().document(docID).get("body")); + }; + + public void testTerm() throws Exception { + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + + Document doc = new Document(); + doc.add(new TextField("body", "a b c b a b c", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b d", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b c d", Store.YES)); + w.addDocument(doc); + + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = new IndexSearcher(reader); + + TermQuery query = new TermQuery(new Term("body", "c")); + Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + ScoreDoc[] phraseHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, phraseHits.length); + ScoreDoc[] sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phraseHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Term query with missing term + query = new TermQuery(new Term("body", "e")); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + } + } + } + + public void testPhrase() throws Exception { + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + + Document doc = new Document(); + doc.add(new TextField("body", "a b c b a b c", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b d", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b c d", Store.YES)); + w.addDocument(doc); + + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = new IndexSearcher(reader); + + PhraseQuery query = new PhraseQuery("body", "b", "c"); + Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + ScoreDoc[] phraseHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, phraseHits.length); + ScoreDoc[] sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phraseHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Sloppy phrase query + query = new PhraseQuery(1, "body", "b", "d"); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + phraseHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, phraseHits.length); + sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phraseHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Phrase query with no matches + query = new PhraseQuery("body", "d", "c"); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + + // Phrase query with one missing term + query = new PhraseQuery("body", "b", "e"); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + } + } + } + + public void testMultiPhrase() throws Exception { + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + + Document doc = new Document(); + doc.add(new TextField("body", "a b c b a b c", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b d", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b c d", Store.YES)); + w.addDocument(doc); + + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = new IndexSearcher(reader); + + MultiPhraseQuery query = new MultiPhraseQuery.Builder().add(new Term[] { new Term("body", "a"), new Term("body", "b") }, 0) + .add(new Term[] { new Term("body", "c") }, 1) + .build(); + + Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + + ScoreDoc[] phraseHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, phraseHits.length); + ScoreDoc[] sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phraseHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Sloppy multi phrase query + query = new MultiPhraseQuery.Builder().add(new Term[] { new Term("body", "a"), new Term("body", "b") }, 0) + .add(new Term[] { new Term("body", "d") }, 1) + .setSlop(1) + .build(); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + phraseHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, phraseHits.length); + sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phraseHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Multi phrase query with no matches + query = new MultiPhraseQuery.Builder().add(new Term[] { new Term("body", "d"), new Term("body", "c") }, 0) + .add(new Term[] { new Term("body", "a") }, 1) + .build(); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + + // Multi phrase query with one missing term + query = new MultiPhraseQuery.Builder().add(new Term[] { new Term("body", "d"), new Term("body", "c") }, 0) + .add(new Term[] { new Term("body", "e") }, 1) + .build(); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + } + } + } + + public void testSpanNear() throws Exception { + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + + Document doc = new Document(); + doc.add(new TextField("body", "a b c b a b c", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b d", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b c d", Store.YES)); + w.addDocument(doc); + + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = new IndexSearcher(reader); + + SpanNearQuery query = new SpanNearQuery( + new SpanQuery[] { new SpanTermQuery(new Term("body", "b")), new SpanTermQuery(new Term("body", "c")) }, + 0, + false + ); + Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + ScoreDoc[] spanHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, spanHits.length); + ScoreDoc[] sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, spanHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Sloppy span near query + query = new SpanNearQuery( + new SpanQuery[] { new SpanTermQuery(new Term("body", "b")), new SpanTermQuery(new Term("body", "c")) }, + 1, + false + ); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + spanHits = searcher.search(query, 10).scoreDocs; + assertEquals(2, spanHits.length); + sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, spanHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + + // Span near query with no matches + query = new SpanNearQuery( + new SpanQuery[] { new SpanTermQuery(new Term("body", "a")), new SpanTermQuery(new Term("body", "d")) }, + 0, + false + ); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + + // Span near query with one missing term + query = new SpanNearQuery( + new SpanQuery[] { new SpanTermQuery(new Term("body", "b")), new SpanTermQuery(new Term("body", "e")) }, + 0, + false + ); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + } + } + } + + public void testToString() { + PhraseQuery query = new PhraseQuery("body", "b", "c"); + Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(query.toString(), sourceConfirmedPhraseQuery.toString()); + } + + public void testEqualsHashCode() { + PhraseQuery query1 = new PhraseQuery("body", "b", "c"); + Query sourceConfirmedPhraseQuery1 = new SourceConfirmedTextQuery(query1, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + + assertEquals(sourceConfirmedPhraseQuery1, sourceConfirmedPhraseQuery1); + assertEquals(sourceConfirmedPhraseQuery1.hashCode(), sourceConfirmedPhraseQuery1.hashCode()); + + PhraseQuery query2 = new PhraseQuery("body", "b", "c"); + Query sourceConfirmedPhraseQuery2 = new SourceConfirmedTextQuery(query2, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(sourceConfirmedPhraseQuery1, sourceConfirmedPhraseQuery2); + + PhraseQuery query3 = new PhraseQuery("body", "b", "d"); + Query sourceConfirmedPhraseQuery3 = new SourceConfirmedTextQuery(query3, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertNotEquals(sourceConfirmedPhraseQuery1, sourceConfirmedPhraseQuery3); + + Query sourceConfirmedPhraseQuery4 = new SourceConfirmedTextQuery(query1, context -> null, Lucene.STANDARD_ANALYZER); + assertNotEquals(sourceConfirmedPhraseQuery1, sourceConfirmedPhraseQuery4); + + Query sourceConfirmedPhraseQuery5 = new SourceConfirmedTextQuery(query1, SOURCE_FETCHER_PROVIDER, Lucene.KEYWORD_ANALYZER); + assertNotEquals(sourceConfirmedPhraseQuery1, sourceConfirmedPhraseQuery5); + } + + public void testApproximation() { + assertEquals( + new TermQuery(new Term("body", "text")), + SourceConfirmedTextQuery.approximate(new TermQuery(new Term("body", "text"))) + ); + + assertEquals( + new BooleanQuery.Builder().add(new TermQuery(new Term("body", "a")), Occur.FILTER) + .add(new TermQuery(new Term("body", "b")), Occur.FILTER) + .build(), + SourceConfirmedTextQuery.approximate(new PhraseQuery("body", "a", "b")) + ); + + MultiPhraseQuery query = new MultiPhraseQuery.Builder().add(new Term("body", "a")) + .add(new Term[] { new Term("body", "b"), new Term("body", "c") }) + .build(); + Query approximation = new BooleanQuery.Builder().add( + new BooleanQuery.Builder().add(new TermQuery(new Term("body", "a")), Occur.SHOULD).build(), + Occur.FILTER + ) + .add( + new BooleanQuery.Builder().add(new TermQuery(new Term("body", "b")), Occur.SHOULD) + .add(new TermQuery(new Term("body", "c")), Occur.SHOULD) + .build(), + Occur.FILTER + ) + .build(); + assertEquals(approximation, SourceConfirmedTextQuery.approximate(query)); + } +} diff --git a/x-pack/plugin/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptFieldType.java b/x-pack/plugin/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptFieldType.java index 67f880e05287f..a40de6238661d 100644 --- a/x-pack/plugin/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptFieldType.java +++ b/x-pack/plugin/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptFieldType.java @@ -158,17 +158,17 @@ public Query regexpQuery( } @Override - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) { throw new IllegalArgumentException(unsupported("phrase", "text")); } @Override - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) { throw new IllegalArgumentException(unsupported("phrase", "text")); } @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) { throw new IllegalArgumentException(unsupported("phrase prefix", "text")); } diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml new file mode 100644 index 0000000000000..8a33dc22af768 --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml @@ -0,0 +1,161 @@ +setup: + + - skip: + version: " - 7.99.99" + reason: "match_only_text was added in 7.11" + + - do: + indices.create: + index: test + body: + mappings: + properties: + foo: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: {} + + - do: + index: + index: test + id: 2 + body: { "foo": "Apache Lucene powers Elasticsearch" } + + - do: + index: + index: test + id: 3 + body: { "foo": "Elasticsearch is based on Apache Lucene" } + + - do: + indices.refresh: {} + +--- +"Field caps": + + - do: + field_caps: + index: test + fields: [ foo ] + + - match: { fields.foo.text.searchable: true } + - match: { fields.foo.text.aggregatable: false } + +--- +"Exist query": + + - do: + search: + index: test + body: + query: + exists: + field: foo + + - match: { "hits.total.value": 2 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Match query": + + - do: + search: + index: test + body: + query: + match: + foo: powers + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Match Phrase query": + + - do: + search: + index: test + body: + query: + match_phrase: + foo: "lucene powers" + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Match Phrase Prefix query": + + - do: + search: + index: test + body: + query: + match_phrase_prefix: + foo: "lucene pow" + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Query String query with phrase": + + - do: + search: + index: test + body: + query: + query_string: + query: '"lucene powers"' + default_field: "foo" + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + + + +--- +"Regexp query": + + - do: + search: + index: test + body: + query: + regexp: + foo: "lu.*ne" + + - match: { "hits.total.value": 2 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Wildcard query": + + - do: + search: + index: test + body: + query: + wildcard: + foo: "lu*ne" + + - match: { "hits.total.value": 2 } + - match: { "hits.hits.0._score": 1.0 } + +--- +"Prefix query": + + - do: + search: + index: test + body: + query: + prefix: + foo: "luc" + + - match: { "hits.total.value": 2 } + - match: { "hits.hits.0._score": 1.0 } From 6b0cb2107d2518b7af18ee5993240253657dc844 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 10 Dec 2020 16:17:29 +0100 Subject: [PATCH 02/22] iter --- .../org/elasticsearch/index/mapper/FieldMapper.java | 2 +- .../mapper/MatchOnlyTextFieldMapper.java | 13 ------------- .../mapper/AbstractScriptFieldTypeTestCase.java | 6 +++--- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index e28da5389aab6..b90fb8ff8fe81 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -670,7 +670,7 @@ private void merge(FieldMapper toMerge, Conflicts conflicts) { } } - public void toXContent(XContentBuilder builder, boolean includeDefaults) throws IOException { + protected void toXContent(XContentBuilder builder, boolean includeDefaults) throws IOException { if (serializerCheck.check(includeDefaults, isConfigured(), get())) { serializer.serialize(builder, name, getValue()); } diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java index 8cef8f058548d..dfa196667bc3d 100644 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java @@ -17,7 +17,6 @@ import org.elasticsearch.Version; import org.elasticsearch.common.CheckedIntFunction; import org.elasticsearch.common.lucene.Lucene; -import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; @@ -287,16 +286,4 @@ public MatchOnlyTextFieldType fieldType() { return (MatchOnlyTextFieldType) super.fieldType(); } - @Override - protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { - // this is a pain, but we have to do this to maintain BWC - builder.field("type", contentType()); - this.builder.store.toXContent(builder, includeDefaults); - this.multiFields.toXContent(builder, params); - this.copyTo.toXContent(builder, params); - this.builder.meta.toXContent(builder, includeDefaults); - this.builder.analyzers.indexAnalyzer.toXContent(builder, includeDefaults); - this.builder.analyzers.searchAnalyzer.toXContent(builder, includeDefaults); - this.builder.analyzers.searchQuoteAnalyzer.toXContent(builder, includeDefaults); - } } diff --git a/x-pack/plugin/runtime-fields/src/test/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptFieldTypeTestCase.java b/x-pack/plugin/runtime-fields/src/test/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptFieldTypeTestCase.java index 8ea41db677fd9..85f58743d6986 100644 --- a/x-pack/plugin/runtime-fields/src/test/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptFieldTypeTestCase.java +++ b/x-pack/plugin/runtime-fields/src/test/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptFieldTypeTestCase.java @@ -257,17 +257,17 @@ public void testTermsQueryInLoop() { public void testPhraseQueryIsError() { assumeTrue("Impl does not support term queries", supportsTermQueries()); - assertQueryOnlyOnText("phrase", () -> simpleMappedFieldType().phraseQuery(null, 1, false)); + assertQueryOnlyOnText("phrase", () -> simpleMappedFieldType().phraseQuery(null, 1, false, null)); } public void testPhrasePrefixQueryIsError() { assumeTrue("Impl does not support term queries", supportsTermQueries()); - assertQueryOnlyOnText("phrase prefix", () -> simpleMappedFieldType().phrasePrefixQuery(null, 1, 1)); + assertQueryOnlyOnText("phrase prefix", () -> simpleMappedFieldType().phrasePrefixQuery(null, 1, 1, null)); } public void testMultiPhraseQueryIsError() { assumeTrue("Impl does not support term queries", supportsTermQueries()); - assertQueryOnlyOnText("phrase", () -> simpleMappedFieldType().multiPhraseQuery(null, 1, false)); + assertQueryOnlyOnText("phrase", () -> simpleMappedFieldType().multiPhraseQuery(null, 1, false, null)); } public void testSpanPrefixQueryIsError() { From e57699ef719d5797c87590e2887604dbb59f15da Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 16 Dec 2020 10:36:30 +0100 Subject: [PATCH 03/22] Use source lookup from the shard context. --- .../xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java index dfa196667bc3d..19f16d9600650 100644 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java @@ -185,7 +185,7 @@ public ValueFetcher valueFetcher(QueryShardContext context, String format) { private Query toQuery(Query query, QueryShardContext queryShardContext) { Function, IOException>> valueFetcherProvider = context -> { - SourceLookup sourceLookup = new SourceLookup(); + SourceLookup sourceLookup = queryShardContext.lookup().source(); ValueFetcher valueFetcher = valueFetcher(queryShardContext, null); valueFetcher.setNextReader(context); return docID -> { From 9ec31c686df6bfd50284fe166f9e9e6397d4d950 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 16 Dec 2020 10:37:12 +0100 Subject: [PATCH 04/22] Update release version. --- .../resources/rest-api-spec/test/match_only_text/10_basic.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml index 8a33dc22af768..b1bd116e93266 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml @@ -2,7 +2,7 @@ setup: - skip: version: " - 7.99.99" - reason: "match_only_text was added in 7.11" + reason: "match_only_text was added in 7.12" - do: indices.create: From 7a03a0f974470b7c37038156dd10ef1c59c12f8e Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 16 Dec 2020 12:54:14 +0100 Subject: [PATCH 05/22] Consolidate docs with `text`. --- docs/reference/mapping/types.asciidoc | 6 ++---- .../mapping/types/match-only-text.asciidoc | 2 +- docs/reference/mapping/types/text.asciidoc | 18 +++++++++++++++++- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc index a63219bb1b105..6282c94ba340b 100644 --- a/docs/reference/mapping/types.asciidoc +++ b/docs/reference/mapping/types.asciidoc @@ -69,8 +69,8 @@ values. [[text-search-types]] ==== Text search types -<>:: Analyzed, unstructured text. -<>:: A more space-efficient variant of `text`. +<>:: The text family, including `text` and `match_only_text`. + Analyzed, unstructured text. {plugins}/mapper-annotated-text.html[`annotated-text`]:: Text containing special markup. Used for identifying named entities. <>:: Used for auto-complete suggestions. @@ -158,8 +158,6 @@ include::types/parent-join.asciidoc[] include::types/keyword.asciidoc[] -include::types/match-only-text.asciidoc[] - include::types/nested.asciidoc[] include::types/numeric.asciidoc[] diff --git a/docs/reference/mapping/types/match-only-text.asciidoc b/docs/reference/mapping/types/match-only-text.asciidoc index ccc13fb69fac0..80c51b89bec2d 100644 --- a/docs/reference/mapping/types/match-only-text.asciidoc +++ b/docs/reference/mapping/types/match-only-text.asciidoc @@ -2,7 +2,7 @@ [testenv="basic"] [discrete] -[[match-only-text]] +[[match-only-text-field-type]] === Match-only text field type A variant of <> that trades scoring and efficiency of positional diff --git a/docs/reference/mapping/types/text.asciidoc b/docs/reference/mapping/types/text.asciidoc index 72970e582913d..c12d325f6f474 100644 --- a/docs/reference/mapping/types/text.asciidoc +++ b/docs/reference/mapping/types/text.asciidoc @@ -1,9 +1,23 @@ +[testenv="basic"] [[text]] -=== Text field type +=== Text type family ++++ Text ++++ +The text family includes the following field types: + +* <>, the traditional field type for full-text content +such as the body of an email or the description of a product. +* <>, a space-optimized variant +of `text` that disables scoring and performs slower on queries that need +positions. It is best suited for indexing log messages. + + +[discrete] +[[text-field-type]] +=== Text field type + A field to index full-text values, such as the body of an email or the description of a product. These fields are `analyzed`, that is they are passed through an <> to convert the string into a list of individual terms @@ -250,3 +264,5 @@ PUT my-index-000001 } } -------------------------------------------------- + +include::match-only-text.asciidoc[] From 5774bc9fb375feb6a019bbe0ff7d5101a43fc7c3 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 17 Dec 2020 13:48:01 +0100 Subject: [PATCH 06/22] Fail phrase queries when _source is disabled. --- .../mapper/MatchOnlyTextFieldMapperTests.java | 39 +++++++++++++++++++ .../mapper/MatchOnlyTextFieldMapper.java | 8 +++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java b/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java index d29d95f182fed..43b3e62da72f0 100644 --- a/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java +++ b/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java @@ -6,7 +6,9 @@ package org.elasticsearch.xpack.matchonlytext.mapper; +import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.StopFilter; +import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; @@ -29,14 +31,18 @@ import org.elasticsearch.index.analysis.StandardTokenizerFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperTestCase; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.TextFieldMapper; +import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xpack.matchonlytext.MatchOnlyTextMapperPlugin; +import org.hamcrest.Matchers; import java.io.IOException; import java.util.Collection; @@ -232,4 +238,37 @@ public void testSimpleMerge() throws IOException { assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class)); assertThat(mapperService.documentMapper().mappers().getMapper("other_field"), instanceOf(KeywordFieldMapper.class)); } + + public void testDisabledSource() throws IOException { + XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("_doc"); + { + mapping.startObject("properties"); + { + mapping.startObject("foo"); + { + mapping.field("type", "match_only_text"); + } + mapping.endObject(); + } + mapping.endObject(); + + mapping.startObject("_source"); + { + mapping.field("enabled", false); + } + mapping.endObject(); + } + mapping.endObject().endObject(); + + MapperService mapperService = createMapperService(mapping); + MappedFieldType ft = ((FieldMapper) mapperService.documentMapper().mapping().root().getMapper("foo")).fieldType(); + QueryShardContext context = createQueryShardContext(mapperService); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7)); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> ft.phraseQuery(ts, 0, true, context)); + assertThat(e.getMessage(), Matchers.containsString("cannot run positional queries since [_source] is disabled")); + + // Term queries are ok + ft.termQuery("a", context); // no exception + } } diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java index 19f16d9600650..c06008dbadeab 100644 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java @@ -184,9 +184,13 @@ public ValueFetcher valueFetcher(QueryShardContext context, String format) { } private Query toQuery(Query query, QueryShardContext queryShardContext) { + if (queryShardContext.isSourceEnabled() == false) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + CONTENT_TYPE + + "] cannot run positional queries since [_source] is disabled."); + } + SourceLookup sourceLookup = queryShardContext.lookup().source(); + ValueFetcher valueFetcher = valueFetcher(queryShardContext, null); Function, IOException>> valueFetcherProvider = context -> { - SourceLookup sourceLookup = queryShardContext.lookup().source(); - ValueFetcher valueFetcher = valueFetcher(queryShardContext, null); valueFetcher.setNextReader(context); return docID -> { try { From c0be502abefa800b63194d4f81c4bd060d816e32 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 17 Dec 2020 13:54:09 +0100 Subject: [PATCH 07/22] Remove support for `store`. --- .../mapping/types/match-only-text.asciidoc | 7 ------ .../mapper/MatchOnlyTextFieldMapperTests.java | 22 ++++++------------- .../mapper/MatchOnlyTextFieldMapper.java | 21 +++++------------- 3 files changed, 12 insertions(+), 38 deletions(-) diff --git a/docs/reference/mapping/types/match-only-text.asciidoc b/docs/reference/mapping/types/match-only-text.asciidoc index 80c51b89bec2d..9722d7380bc55 100644 --- a/docs/reference/mapping/types/match-only-text.asciidoc +++ b/docs/reference/mapping/types/match-only-text.asciidoc @@ -70,10 +70,3 @@ The following mapping parameters are accepted: The <> that should be used at search time when a phrase is encountered. Defaults to the `search_analyzer` setting. - -<>:: - - Whether the field value should be stored and retrievable separately from - the <> field. Accepts `true` or `false` - (default). - diff --git a/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java b/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java index 43b3e62da72f0..681b497d1bde9 100644 --- a/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java +++ b/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java @@ -84,7 +84,6 @@ protected void registerParameters(ParameterChecker checker) throws IOException { b.field("search_quote_analyzer", "keyword"); }, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchQuoteAnalyzer().name())); - checker.registerConflictCheck("store", b -> b.field("store", true)); checker.registerConflictCheck("analyzer", b -> b.field("analyzer", "keyword")); } @@ -150,13 +149,6 @@ public void testDefaults() throws IOException { assertEquals(DocValuesType.NONE, fieldType.docValuesType()); } - public void testEnableStore() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "match_only_text").field("store", true))); - ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); - IndexableField[] fields = doc.rootDoc().getFields("field"); - assertEquals(1, fields.length); - assertTrue(fields[0].fieldType().stored()); - } public void testSearchAnalyzerSerialization() throws IOException { XContentBuilder mapping = fieldMapping( @@ -207,27 +199,27 @@ public void testSearchQuoteAnalyzerSerialization() throws IOException { public void testNullConfigValuesFail() throws MapperParsingException { Exception e = expectThrows( MapperParsingException.class, - () -> createDocumentMapper(fieldMapping(b -> b.field("type", "match_only_text").field("store", (String) null))) + () -> createDocumentMapper(fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", (String) null))) ); - assertThat(e.getMessage(), containsString("[store] on mapper [field] of type [match_only_text] must not have a [null] value")); + assertThat(e.getMessage(), containsString("[analyzer] on mapper [field] of type [match_only_text] must not have a [null] value")); } public void testSimpleMerge() throws IOException { - XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "match_only_text").field("store", true)); + XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", "whitespace")); MapperService mapperService = createMapperService(startingMapping); assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class)); merge(mapperService, startingMapping); assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class)); - XContentBuilder differentStore = fieldMapping(b -> b.field("type", "match_only_text").field("store", false)); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentStore)); - assertThat(e.getMessage(), containsString("Cannot update parameter [store]")); + XContentBuilder differentAnalyzer = fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", "keyword")); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentAnalyzer)); + assertThat(e.getMessage(), containsString("Cannot update parameter [analyzer]")); XContentBuilder newField = mapping(b -> { b.startObject("field") .field("type", "match_only_text") - .field("store", true) + .field("analyzer", "whitespace") .startObject("meta") .field("key", "value") .endObject() diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java index c06008dbadeab..20200c089b85c 100644 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java @@ -76,8 +76,6 @@ public static class Builder extends FieldMapper.Builder { private final Version indexCreatedVersion; - private final Parameter store = Parameter.storeParam(m -> builder(m).store.getValue(), false); - private final Parameter> meta = Parameter.metaParam(); private final TextParams.Analyzers analyzers; @@ -92,11 +90,6 @@ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAna this.analyzers = new TextParams.Analyzers(indexAnalyzers, m -> builder(m).analyzers); } - public Builder store(boolean store) { - this.store.setValue(store); - return this; - } - public Builder addMultiField(FieldMapper.Builder builder) { this.multiFieldsBuilder.add(builder); return this; @@ -104,7 +97,7 @@ public Builder addMultiField(FieldMapper.Builder builder) { @Override protected List> getParameters() { - return Arrays.asList(store, analyzers.indexAnalyzer, analyzers.searchAnalyzer, analyzers.searchQuoteAnalyzer, meta); + return Arrays.asList(analyzers.indexAnalyzer, analyzers.searchAnalyzer, analyzers.searchQuoteAnalyzer, meta); } private MatchOnlyTextFieldType buildFieldType(FieldType fieldType, ContentPath contentPath) { @@ -114,7 +107,6 @@ private MatchOnlyTextFieldType buildFieldType(FieldType fieldType, ContentPath c TextSearchInfo tsi = new TextSearchInfo(fieldType, null, searchAnalyzer, searchQuoteAnalyzer); MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType( buildFullName(contentPath), - store.getValue(), tsi, indexAnalyzer, meta.getValue() @@ -124,11 +116,9 @@ private MatchOnlyTextFieldType buildFieldType(FieldType fieldType, ContentPath c @Override public MatchOnlyTextFieldMapper build(ContentPath contentPath) { - FieldType fieldType = new FieldType(Defaults.FIELD_TYPE); - fieldType.setStored(store.get()); - MatchOnlyTextFieldType tft = buildFieldType(fieldType, contentPath); + MatchOnlyTextFieldType tft = buildFieldType(Defaults.FIELD_TYPE, contentPath); MultiFields multiFields = multiFieldsBuilder.build(this, contentPath); - return new MatchOnlyTextFieldMapper(name, fieldType, tft, analyzers.getIndexAnalyzer(), multiFields, copyTo.build(), this); + return new MatchOnlyTextFieldMapper(name, Defaults.FIELD_TYPE, tft, analyzers.getIndexAnalyzer(), multiFields, copyTo.build(), this); } } @@ -139,8 +129,8 @@ public static class MatchOnlyTextFieldType extends StringFieldType { private final Analyzer indexAnalyzer; private final TextFieldType textFieldType; - public MatchOnlyTextFieldType(String name, boolean stored, TextSearchInfo tsi, Analyzer indexAnalyzer, Map meta) { - super(name, true, stored, false, tsi, meta); + public MatchOnlyTextFieldType(String name, TextSearchInfo tsi, Analyzer indexAnalyzer, Map meta) { + super(name, true, false, false, tsi, meta); this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); this.textFieldType = new TextFieldType(name); } @@ -161,7 +151,6 @@ public MatchOnlyTextFieldType(String name, boolean stored, Map m public MatchOnlyTextFieldType(String name) { this( name, - false, new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), Lucene.STANDARD_ANALYZER, Collections.emptyMap() From feaf2f8d8780dbed491e2ef507e17932afeb5192 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 17 Dec 2020 14:08:32 +0100 Subject: [PATCH 08/22] Add tests for span and intervals queries. --- .../mapping/types/match-only-text.asciidoc | 14 ++++++---- .../test/match_only_text/10_basic.yml | 27 +++++++++++++++++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/docs/reference/mapping/types/match-only-text.asciidoc b/docs/reference/mapping/types/match-only-text.asciidoc index 9722d7380bc55..ee0d50ae1955f 100644 --- a/docs/reference/mapping/types/match-only-text.asciidoc +++ b/docs/reference/mapping/types/match-only-text.asciidoc @@ -5,15 +5,19 @@ [[match-only-text-field-type]] === Match-only text field type -A variant of <> that trades scoring and efficiency of positional -queries for space efficiency. This field effectively stores data the same way as -a `text` field that only indexes documents (`index_options: docs`) and disables -norms (`norms: false`). Term queries perform as fast if not faster as on `text` -fields, however queries that need positions such as the +A variant of <> that trades scoring and efficiency of +positional queries for space efficiency. This field effectively stores data the +same way as a `text` field that only indexes documents (`index_options: docs`) +and disables norms (`norms: false`). Term queries perform as fast if not faster +as on `text` fields, however queries that need positions such as the <> perform slower as they need to look at the `_source` document to verify whether a phrase matches. All queries return constant scores that are equal to 1.0. +<> and <> +are not supported by this field. Use the <> field type +if you need them. + [source,console] -------------------------------- PUT logs diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml index b1bd116e93266..c2f2328335dad 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml @@ -159,3 +159,30 @@ setup: - match: { "hits.total.value": 2 } - match: { "hits.hits.0._score": 1.0 } + +--- +"Span query": + + - do: + catch: bad_request + search: + index: test + body: + query: + span_term: + foo: lucene + +--- +"Intervals query": + + - do: + catch: bad_request + search: + index: test + body: + query: + intervals: + foo: + match: + query: "apache lucene" + From d51db6c08454c3b13f4ba622fc6e56f47c56c166 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 17 Dec 2020 14:19:32 +0100 Subject: [PATCH 09/22] Test for fuzzy query. --- .../mapper/MatchOnlyTextFieldMapper.java | 8 ++++++++ .../test/match_only_text/10_basic.yml | 14 ++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java index 20200c089b85c..0701ae52ded82 100644 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java @@ -17,6 +17,7 @@ import org.elasticsearch.Version; import org.elasticsearch.common.CheckedIntFunction; import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; @@ -199,6 +200,13 @@ public Query termQuery(Object value, QueryShardContext context) { return new ConstantScoreQuery(super.termQuery(value, context)); } + @Override + public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions, + boolean transpositions, QueryShardContext context) { + // Disable scoring + return new ConstantScoreQuery(super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context)); + } + @Override public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, QueryShardContext queryShardContext) throws IOException { diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml index c2f2328335dad..212ae9f54c76f 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml @@ -160,6 +160,20 @@ setup: - match: { "hits.total.value": 2 } - match: { "hits.hits.0._score": 1.0 } +--- +"Fuzzy query": + + - do: + search: + index: test + body: + query: + fuzzy: + foo: "lucane" + + - match: { "hits.total.value": 2 } + - match: { "hits.hits.0._score": 1.0 } + --- "Span query": From 71adb75c68d8711941363d321b2badcce5e4ea38 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 17 Dec 2020 16:02:57 +0100 Subject: [PATCH 10/22] More tests. --- .../index/mapper/FieldTypeTestCase.java | 6 ++ .../mapper/MatchOnlyTextFieldMapperTests.java | 4 +- .../mapper/MatchOnlyTextFieldMapper.java | 32 ++++--- .../query/SourceConfirmedTextQuery.java | 34 ++++++- .../mapper/MatchOnlyTextFieldTypeTests.java | 76 +++++++++------ .../query/SourceConfirmedTextQueryTests.java | 93 +++++++++++++++++++ 6 files changed, 202 insertions(+), 43 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java index c17d4741e96be..dd1d2a1147d4d 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.mapper; import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.lookup.SourceLookup; import org.elasticsearch.test.ESTestCase; @@ -43,6 +44,11 @@ protected QueryShardContext randomMockShardContext() { static QueryShardContext createMockQueryShardContext(boolean allowExpensiveQueries) { QueryShardContext queryShardContext = mock(QueryShardContext.class); when(queryShardContext.allowExpensiveQueries()).thenReturn(allowExpensiveQueries); + when(queryShardContext.isSourceEnabled()).thenReturn(true); + SourceLookup sourceLookup = mock(SourceLookup.class); + SearchLookup searchLookup = mock(SearchLookup.class); + when(searchLookup.source()).thenReturn(sourceLookup); + when(queryShardContext.lookup()).thenReturn(searchLookup); return queryShardContext; } diff --git a/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java b/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java index 681b497d1bde9..0013f1e73b5f8 100644 --- a/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java +++ b/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java @@ -149,7 +149,6 @@ public void testDefaults() throws IOException { assertEquals(DocValuesType.NONE, fieldType.docValuesType()); } - public void testSearchAnalyzerSerialization() throws IOException { XContentBuilder mapping = fieldMapping( b -> b.field("type", "match_only_text").field("analyzer", "standard").field("search_analyzer", "keyword") @@ -256,8 +255,7 @@ public void testDisabledSource() throws IOException { MappedFieldType ft = ((FieldMapper) mapperService.documentMapper().mapping().root().getMapper("foo")).fieldType(); QueryShardContext context = createQueryShardContext(mapperService); TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7)); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> ft.phraseQuery(ts, 0, true, context)); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.phraseQuery(ts, 0, true, context)); assertThat(e.getMessage(), Matchers.containsString("cannot run positional queries since [_source] is disabled")); // Term queries are ok diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java index 0701ae52ded82..0772ad66c06d8 100644 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java @@ -106,12 +106,7 @@ private MatchOnlyTextFieldType buildFieldType(FieldType fieldType, ContentPath c NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); TextSearchInfo tsi = new TextSearchInfo(fieldType, null, searchAnalyzer, searchQuoteAnalyzer); - MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType( - buildFullName(contentPath), - tsi, - indexAnalyzer, - meta.getValue() - ); + MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType(buildFullName(contentPath), tsi, indexAnalyzer, meta.getValue()); return ft; } @@ -119,7 +114,15 @@ private MatchOnlyTextFieldType buildFieldType(FieldType fieldType, ContentPath c public MatchOnlyTextFieldMapper build(ContentPath contentPath) { MatchOnlyTextFieldType tft = buildFieldType(Defaults.FIELD_TYPE, contentPath); MultiFields multiFields = multiFieldsBuilder.build(this, contentPath); - return new MatchOnlyTextFieldMapper(name, Defaults.FIELD_TYPE, tft, analyzers.getIndexAnalyzer(), multiFields, copyTo.build(), this); + return new MatchOnlyTextFieldMapper( + name, + Defaults.FIELD_TYPE, + tft, + analyzers.getIndexAnalyzer(), + multiFields, + copyTo.build(), + this + ); } } @@ -175,8 +178,9 @@ public ValueFetcher valueFetcher(QueryShardContext context, String format) { private Query toQuery(Query query, QueryShardContext queryShardContext) { if (queryShardContext.isSourceEnabled() == false) { - throw new IllegalArgumentException("Field [" + name() + "] of type [" + CONTENT_TYPE + - "] cannot run positional queries since [_source] is disabled."); + throw new IllegalArgumentException( + "Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled." + ); } SourceLookup sourceLookup = queryShardContext.lookup().source(); ValueFetcher valueFetcher = valueFetcher(queryShardContext, null); @@ -201,8 +205,14 @@ public Query termQuery(Object value, QueryShardContext context) { } @Override - public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions, - boolean transpositions, QueryShardContext context) { + public Query fuzzyQuery( + Object value, + Fuzziness fuzziness, + int prefixLength, + int maxExpansions, + boolean transpositions, + QueryShardContext context + ) { // Disable scoring return new ConstantScoreQuery(super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context)); } diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQuery.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQuery.java index 73a39a373d092..ecfaf8dbc0918 100644 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQuery.java +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQuery.java @@ -26,6 +26,7 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.ScoreMode; @@ -37,6 +38,7 @@ import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.elasticsearch.common.CheckedIntFunction; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import java.io.IOException; import java.io.UncheckedIOException; @@ -68,8 +70,9 @@ public static Query approximate(Query query) { return approximate((PhraseQuery) query); } else if (query instanceof MultiPhraseQuery) { return approximate((MultiPhraseQuery) query); + } else if (query instanceof MultiPhrasePrefixQuery) { + return approximate((MultiPhrasePrefixQuery) query); } else { - // TODO: spans and intervals return new MatchAllDocsQuery(); } } @@ -94,6 +97,31 @@ private static Query approximate(MultiPhraseQuery query) { return approximation.build(); } + private static Query approximate(MultiPhrasePrefixQuery query) { + Term[][] terms = query.getTerms(); + if (terms.length == 0) { + return new MatchNoDocsQuery(); + } else if (terms.length == 1) { + // Only a prefix, approximate with a prefix query + BooleanQuery.Builder approximation = new BooleanQuery.Builder(); + for (Term term : terms[0]) { + approximation.add(new PrefixQuery(term), Occur.FILTER); + } + return approximation.build(); + } + // A combination of a phrase and a prefix query, only use terms of the phrase for the approximation + BooleanQuery.Builder approximation = new BooleanQuery.Builder(); + for (int i = 0; i < terms.length - 1; ++i) { // ignore the last set of terms, which are prefixes + Term[] termArray = terms[i]; + BooleanQuery.Builder approximationClause = new BooleanQuery.Builder(); + for (Term term : termArray) { + approximationClause.add(new TermQuery(term), Occur.SHOULD); + } + approximation.add(approximationClause.build(), Occur.FILTER); + } + return approximation.build(); + } + /** * Similarity that produces the frequency as a score. */ @@ -128,6 +156,10 @@ public SourceConfirmedTextQuery( this.indexAnalyzer = indexAnalyzer; } + public Query getQuery() { + return in; + } + @Override public String toString(String field) { return in.toString(field); diff --git a/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java b/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java index 759e1e81e031d..74e34e2cccdf8 100644 --- a/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java +++ b/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java @@ -6,9 +6,16 @@ package org.elasticsearch.xpack.matchonlytext.mapper; +import org.apache.lucene.analysis.CannedTokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.Term; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; @@ -17,16 +24,16 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.lucene.search.AutomatonQueries; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.FieldTypeTestCase; import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; import org.elasticsearch.xpack.matchonlytext.mapper.MatchOnlyTextFieldMapper.MatchOnlyTextFieldType; +import org.elasticsearch.xpack.matchonlytext.query.SourceConfirmedTextQuery; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.List; public class MatchOnlyTextFieldTypeTests extends FieldTypeTestCase { @@ -35,10 +42,6 @@ public void testTermQuery() { MappedFieldType ft = new MatchOnlyTextFieldType("field"); assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null)); assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null)); - - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("bar", null)); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); } public void testTermsQuery() { @@ -47,13 +50,6 @@ public void testTermsQuery() { terms.add(new BytesRef("foo")); terms.add(new BytesRef("bar")); assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "bar"), null)); - - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); - IllegalArgumentException e = expectThrows( - IllegalArgumentException.class, - () -> unsearchable.termsQuery(Arrays.asList("foo", "bar"), null) - ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); } public void testRangeQuery() { @@ -77,13 +73,6 @@ public void testRegexpQuery() { MappedFieldType ft = new MatchOnlyTextFieldType("field"); assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC)); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); - IllegalArgumentException e = expectThrows( - IllegalArgumentException.class, - () -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC) - ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); - ElasticsearchException ee = expectThrows( ElasticsearchException.class, () -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE) @@ -94,17 +83,10 @@ public void testRegexpQuery() { public void testFuzzyQuery() { MappedFieldType ft = new MatchOnlyTextFieldType("field"); assertEquals( - new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true), + new ConstantScoreQuery(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true)), ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) ); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); - IllegalArgumentException e = expectThrows( - IllegalArgumentException.class, - () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) - ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); - ElasticsearchException ee = expectThrows( ElasticsearchException.class, () -> ft.fuzzyQuery("foo", Fuzziness.AUTO, randomInt(10) + 1, randomInt(10) + 1, randomBoolean(), MOCK_QSC_DISALLOW_EXPENSIVE) @@ -120,4 +102,42 @@ public void testFetchSourceValue() throws IOException { assertEquals(List.of("true"), fetchSourceValue(fieldType, true)); } + private Query unwrapPositionalQuery(Query query) { + query = ((ConstantScoreQuery) query).getQuery(); + query = ((SourceConfirmedTextQuery) query).getQuery(); + return query; + } + + public void testPhraseQuery() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7)); + Query query = ft.phraseQuery(ts, 0, true, MOCK_QSC); + Query delegate = unwrapPositionalQuery(query); + assertEquals(new PhraseQuery("field", "a", "b"), delegate); + assertNotEquals(new MatchAllDocsQuery(), SourceConfirmedTextQuery.approximate(delegate)); + } + + public void testMultiPhraseQuery() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7)); + Query query = ft.multiPhraseQuery(ts, 0, true, MOCK_QSC); + Query delegate = unwrapPositionalQuery(query); + MultiPhraseQuery expected = new MultiPhraseQuery.Builder().add(new Term[] { new Term("field", "a"), new Term("field", "b") }) + .add(new Term("field", "c")) + .build(); + assertEquals(expected, delegate); + assertNotEquals(new MatchAllDocsQuery(), SourceConfirmedTextQuery.approximate(delegate)); + } + + public void testPhrasePrefixQuery() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7)); + Query query = ft.phrasePrefixQuery(ts, 0, 10, MOCK_QSC); + Query delegate = unwrapPositionalQuery(query); + MultiPhrasePrefixQuery expected = new MultiPhrasePrefixQuery("field"); + expected.add(new Term[] { new Term("field", "a"), new Term("field", "b") }); + expected.add(new Term("field", "c")); + assertEquals(expected, delegate); + assertNotEquals(new MatchAllDocsQuery(), SourceConfirmedTextQuery.approximate(delegate)); + } } diff --git a/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQueryTests.java b/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQueryTests.java index 6e559f033bae3..fb2356c04e345 100644 --- a/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQueryTests.java +++ b/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQueryTests.java @@ -18,8 +18,10 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.CheckHits; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; @@ -29,6 +31,7 @@ import org.apache.lucene.store.Directory; import org.elasticsearch.common.CheckedIntFunction; import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.test.ESTestCase; import java.io.IOException; @@ -196,6 +199,82 @@ public void testMultiPhrase() throws Exception { } } + public void testMultiPhrasePrefix() throws Exception { + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + + Document doc = new Document(); + doc.add(new TextField("body", "a b cd b a b cd", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b d", Store.YES)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("body", "b cd e", Store.YES)); + w.addDocument(doc); + + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = new IndexSearcher(reader); + + MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("body"); + Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + ScoreDoc[] phrasePrefixHits = searcher.search(query, 10).scoreDocs; + ScoreDoc[] sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phrasePrefixHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + + query = new MultiPhrasePrefixQuery("body"); + query.add(new Term("body", "c")); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + phrasePrefixHits = searcher.search(query, 10).scoreDocs; + sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phrasePrefixHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + + query = new MultiPhrasePrefixQuery("body"); + query.add(new Term("body", "b")); + query.add(new Term("body", "c")); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + phrasePrefixHits = searcher.search(query, 10).scoreDocs; + sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phrasePrefixHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + + // Sloppy multi phrase prefix query + query = new MultiPhrasePrefixQuery("body"); + query.add(new Term("body", "a")); + query.add(new Term("body", "c")); + query.setSlop(2); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + phrasePrefixHits = searcher.search(query, 10).scoreDocs; + sourceConfirmedHits = searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs; + CheckHits.checkEqual(query, phrasePrefixHits, sourceConfirmedHits); + CheckHits.checkExplanations(sourceConfirmedPhraseQuery, "body", searcher); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + + // Multi phrase prefix query with no matches + query = new MultiPhrasePrefixQuery("body"); + query.add(new Term("body", "d")); + query.add(new Term("body", "b")); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(searcher.count(query), searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + + // Multi phrase query with one missing term + query = new MultiPhrasePrefixQuery("body"); + query.add(new Term("body", "d")); + query.add(new Term("body", "f")); + sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER); + assertEquals(0, searcher.count(sourceConfirmedPhraseQuery)); + assertArrayEquals(new ScoreDoc[0], searcher.search(sourceConfirmedPhraseQuery, 10).scoreDocs); + } + } + } + public void testSpanNear() throws Exception { try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { @@ -321,5 +400,19 @@ public void testApproximation() { ) .build(); assertEquals(approximation, SourceConfirmedTextQuery.approximate(query)); + + MultiPhrasePrefixQuery phrasePrefixQuery = new MultiPhrasePrefixQuery("body"); + assertEquals(new MatchNoDocsQuery(), SourceConfirmedTextQuery.approximate(phrasePrefixQuery)); + + phrasePrefixQuery.add(new Term("body", "apache")); + approximation = new BooleanQuery.Builder().add(new PrefixQuery(new Term("body", "apache")), Occur.FILTER).build(); + assertEquals(approximation, SourceConfirmedTextQuery.approximate(phrasePrefixQuery)); + + phrasePrefixQuery.add(new Term("body", "luc")); + approximation = new BooleanQuery.Builder().add( + new BooleanQuery.Builder().add(new TermQuery(new Term("body", "apache")), Occur.SHOULD).build(), + Occur.FILTER + ).build(); + assertEquals(approximation, SourceConfirmedTextQuery.approximate(phrasePrefixQuery)); } } From 24b345e149ffed4d706ef207bb3dd488873fa505 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 1 Feb 2021 12:14:08 +0100 Subject: [PATCH 11/22] Fix compilation. --- .../mapper/SearchAsYouTypeFieldMapper.java | 12 ++++----- .../index/mapper/MappedFieldType.java | 7 +++--- .../index/mapper/TextFieldMapper.java | 6 ++--- .../mapper/MatchOnlyTextFieldMapperTests.java | 7 +++--- .../mapper/MatchOnlyTextFieldMapper.java | 22 +++++++++------- .../mapper/MatchOnlyTextFieldTypeTests.java | 25 ++++++++++++------- .../mapper/AbstractScriptFieldType.java | 6 ++--- 7 files changed, 48 insertions(+), 37 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java index 28bdee85abff3..dff2f26e4d8d8 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java @@ -293,7 +293,7 @@ private void checkForPositions() { @Override public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, - QueryShardContext context) throws IOException { + SearchExecutionContext context) throws IOException { checkForPositions(); int numPos = countPosition(stream); if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { @@ -306,7 +306,7 @@ public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionInc @Override public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, - QueryShardContext context) throws IOException { + SearchExecutionContext context) throws IOException { checkForPositions(); int numPos = countPosition(stream); if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { @@ -319,7 +319,7 @@ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositi @Override public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, - QueryShardContext context) throws IOException { + SearchExecutionContext context) throws IOException { int numPos = countPosition(stream); if (numPos > 1) { checkForPositions(); @@ -528,19 +528,19 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool @Override public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, - QueryShardContext context) throws IOException { + SearchExecutionContext context) throws IOException { return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); } @Override public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, - QueryShardContext context) throws IOException { + SearchExecutionContext context) throws IOException { return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); } @Override public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, - QueryShardContext context) throws IOException { + SearchExecutionContext context) throws IOException { final String prefixFieldName = slop > 0 ? null : prefixFieldType.name(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 8dbe64667c662..515b2ade8c434 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -265,18 +265,19 @@ public Query existsQuery(SearchExecutionContext context) { } } - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, + SearchExecutionContext context) throws IOException { throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, - QueryShardContext context) throws IOException { + SearchExecutionContext context) throws IOException { throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext context) throws IOException { throw new IllegalArgumentException("Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index de2c0d36e9541..685ce33a013ac 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -707,7 +707,7 @@ private void checkForPositions() { @Override public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, - QueryShardContext queryShardContext) throws IOException { + SearchExecutionContext context) throws IOException { String field = name(); checkForPositions(); // we can't use the index_phrases shortcut with slop, if there are gaps in the stream, @@ -743,7 +743,7 @@ public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncremen @Override public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, - QueryShardContext context) throws IOException { + SearchExecutionContext context) throws IOException { String field = name(); if (indexPhrases && slop == 0 && hasGaps(stream) == false) { stream = new FixedShingleFilter(stream, 2); @@ -763,7 +763,7 @@ private int countTokens(TokenStream ts) throws IOException { } @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext context) throws IOException { if (countTokens(stream) > 1) { checkForPositions(); } diff --git a/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java b/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java index 0013f1e73b5f8..994662695a348 100644 --- a/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java +++ b/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java @@ -31,7 +31,6 @@ import org.elasticsearch.index.analysis.StandardTokenizerFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.mapper.DocumentMapper; -import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperParsingException; @@ -39,7 +38,7 @@ import org.elasticsearch.index.mapper.MapperTestCase; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.TextFieldMapper; -import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xpack.matchonlytext.MatchOnlyTextMapperPlugin; import org.hamcrest.Matchers; @@ -252,8 +251,8 @@ public void testDisabledSource() throws IOException { mapping.endObject().endObject(); MapperService mapperService = createMapperService(mapping); - MappedFieldType ft = ((FieldMapper) mapperService.documentMapper().mapping().root().getMapper("foo")).fieldType(); - QueryShardContext context = createQueryShardContext(mapperService); + MappedFieldType ft = mapperService.fieldType("foo"); + SearchExecutionContext context = createSearchExecutionContext(mapperService); TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7)); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.phraseQuery(ts, 0, true, context)); assertThat(e.getMessage(), Matchers.containsString("cannot run positional queries since [_source] is disabled")); diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java index 0772ad66c06d8..722b3f586769a 100644 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java @@ -31,7 +31,7 @@ import org.elasticsearch.index.mapper.TextParams; import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.index.mapper.ValueFetcher; -import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.lookup.SourceLookup; import org.elasticsearch.xpack.matchonlytext.query.SourceConfirmedTextQuery; @@ -172,11 +172,11 @@ public String familyTypeName() { } @Override - public ValueFetcher valueFetcher(QueryShardContext context, String format) { + public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { return SourceValueFetcher.toString(name(), context, format); } - private Query toQuery(Query query, QueryShardContext queryShardContext) { + private Query toQuery(Query query, SearchExecutionContext queryShardContext) { if (queryShardContext.isSourceEnabled() == false) { throw new IllegalArgumentException( "Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled." @@ -199,7 +199,7 @@ private Query toQuery(Query query, QueryShardContext queryShardContext) { } @Override - public Query termQuery(Object value, QueryShardContext context) { + public Query termQuery(Object value, SearchExecutionContext context) { // Disable scoring return new ConstantScoreQuery(super.termQuery(value, context)); } @@ -211,28 +211,32 @@ public Query fuzzyQuery( int prefixLength, int maxExpansions, boolean transpositions, - QueryShardContext context + SearchExecutionContext context ) { // Disable scoring return new ConstantScoreQuery(super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context)); } @Override - public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, QueryShardContext queryShardContext) + public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext) throws IOException { final Query query = textFieldType.phraseQuery(stream, slop, enablePosIncrements, queryShardContext); return toQuery(query, queryShardContext); } @Override - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext queryShardContext) - throws IOException { + public Query multiPhraseQuery( + TokenStream stream, + int slop, + boolean enablePositionIncrements, + SearchExecutionContext queryShardContext + ) throws IOException { final Query query = textFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, queryShardContext); return toQuery(query, queryShardContext); } @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext queryShardContext) + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext queryShardContext) throws IOException { final Query query = textFieldType.phrasePrefixQuery(stream, slop, maxExpansions, queryShardContext); return toQuery(query, queryShardContext); diff --git a/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java b/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java index 74e34e2cccdf8..09ee9329e5d3d 100644 --- a/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java +++ b/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java @@ -56,12 +56,12 @@ public void testRangeQuery() { MappedFieldType ft = new MatchOnlyTextFieldType("field"); assertEquals( new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false), - ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC) + ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT) ); ElasticsearchException ee = expectThrows( ElasticsearchException.class, - () -> ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC_DISALLOW_EXPENSIVE) + () -> ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE) ); assertEquals( "[range] queries on [text] or [keyword] fields cannot be executed when " + "'search.allow_expensive_queries' is set to false.", @@ -71,11 +71,11 @@ public void testRangeQuery() { public void testRegexpQuery() { MappedFieldType ft = new MatchOnlyTextFieldType("field"); - assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC)); + assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT)); ElasticsearchException ee = expectThrows( ElasticsearchException.class, - () -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE) + () -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE) ); assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); } @@ -84,12 +84,19 @@ public void testFuzzyQuery() { MappedFieldType ft = new MatchOnlyTextFieldType("field"); assertEquals( new ConstantScoreQuery(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true)), - ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) + ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT) ); ElasticsearchException ee = expectThrows( ElasticsearchException.class, - () -> ft.fuzzyQuery("foo", Fuzziness.AUTO, randomInt(10) + 1, randomInt(10) + 1, randomBoolean(), MOCK_QSC_DISALLOW_EXPENSIVE) + () -> ft.fuzzyQuery( + "foo", + Fuzziness.AUTO, + randomInt(10) + 1, + randomInt(10) + 1, + randomBoolean(), + MOCK_CONTEXT_DISALLOW_EXPENSIVE + ) ); assertEquals("[fuzzy] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); } @@ -111,7 +118,7 @@ private Query unwrapPositionalQuery(Query query) { public void testPhraseQuery() throws IOException { MappedFieldType ft = new MatchOnlyTextFieldType("field"); TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7)); - Query query = ft.phraseQuery(ts, 0, true, MOCK_QSC); + Query query = ft.phraseQuery(ts, 0, true, MOCK_CONTEXT); Query delegate = unwrapPositionalQuery(query); assertEquals(new PhraseQuery("field", "a", "b"), delegate); assertNotEquals(new MatchAllDocsQuery(), SourceConfirmedTextQuery.approximate(delegate)); @@ -120,7 +127,7 @@ public void testPhraseQuery() throws IOException { public void testMultiPhraseQuery() throws IOException { MappedFieldType ft = new MatchOnlyTextFieldType("field"); TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7)); - Query query = ft.multiPhraseQuery(ts, 0, true, MOCK_QSC); + Query query = ft.multiPhraseQuery(ts, 0, true, MOCK_CONTEXT); Query delegate = unwrapPositionalQuery(query); MultiPhraseQuery expected = new MultiPhraseQuery.Builder().add(new Term[] { new Term("field", "a"), new Term("field", "b") }) .add(new Term("field", "c")) @@ -132,7 +139,7 @@ public void testMultiPhraseQuery() throws IOException { public void testPhrasePrefixQuery() throws IOException { MappedFieldType ft = new MatchOnlyTextFieldType("field"); TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7)); - Query query = ft.phrasePrefixQuery(ts, 0, 10, MOCK_QSC); + Query query = ft.phrasePrefixQuery(ts, 0, 10, MOCK_CONTEXT); Query delegate = unwrapPositionalQuery(query); MultiPhrasePrefixQuery expected = new MultiPhrasePrefixQuery("field"); expected.add(new Term[] { new Term("field", "a"), new Term("field", "b") }); diff --git a/x-pack/plugin/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptFieldType.java b/x-pack/plugin/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptFieldType.java index 167c11047879f..23e172d22ca5d 100644 --- a/x-pack/plugin/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptFieldType.java +++ b/x-pack/plugin/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/mapper/AbstractScriptFieldType.java @@ -158,17 +158,17 @@ public Query regexpQuery( } @Override - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, SearchExecutionContext context) { throw new IllegalArgumentException(unsupported("phrase", "text")); } @Override - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, SearchExecutionContext context) { throw new IllegalArgumentException(unsupported("phrase", "text")); } @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext context) { throw new IllegalArgumentException(unsupported("phrase prefix", "text")); } From 34743ef00d8f53c875f974e3156ffe32e0aee199 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 9 Feb 2021 16:08:12 +0100 Subject: [PATCH 12/22] iter --- .../index/mapper/MappedFieldType.java | 2 +- .../index/mapper/TextFieldMapper.java | 2 +- .../index/PositionsLeafReaderWrapper.java | 124 +++++++++++++ .../mapper/MatchOnlyTextFieldMapper.java | 48 ++++- .../query/SourceIntervalsSource.java | 172 ++++++++++++++++++ 5 files changed, 340 insertions(+), 8 deletions(-) create mode 100644 x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/index/PositionsLeafReaderWrapper.java create mode 100644 x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceIntervalsSource.java diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 515b2ade8c434..7c5467958dc73 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -296,7 +296,7 @@ public Query distanceFeatureQuery(Object origin, String pivot, SearchExecutionCo * Create an {@link IntervalsSource} to be used for proximity queries */ public IntervalsSource intervals(String query, int max_gaps, boolean ordered, - NamedAnalyzer analyzer, boolean prefix) throws IOException { + NamedAnalyzer analyzer, boolean prefix, SearchExecutionContext context) throws IOException { throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 685ce33a013ac..9d3ff1708c6a7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -681,7 +681,7 @@ public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRew @Override public IntervalsSource intervals(String text, int maxGaps, boolean ordered, - NamedAnalyzer analyzer, boolean prefix) throws IOException { + NamedAnalyzer analyzer, boolean prefix, SearchExecutionContext context) throws IOException { if (getTextSearchInfo().hasPositions() == false) { throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed"); } diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/index/PositionsLeafReaderWrapper.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/index/PositionsLeafReaderWrapper.java new file mode 100644 index 0000000000000..c8f93ef7be66b --- /dev/null +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/index/PositionsLeafReaderWrapper.java @@ -0,0 +1,124 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.matchonlytext.index; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FilterLeafReader; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.elasticsearch.common.CheckedIntFunction; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; + +public final class PositionsLeafReaderWrapper extends FilterLeafReader { + + private final String field; + private final Function, IOException>> valueFetcherProvider; + private final Analyzer indexAnalyzer; + + public PositionsLeafReaderWrapper(LeafReader in, String field, Function, IOException>> valueFetcherProvider, Analyzer indexAnalyzer) { + super(in); + this.field = field; + this.valueFetcherProvider = valueFetcherProvider; + this.indexAnalyzer = indexAnalyzer; + } + + @Override + public CacheHelper getCoreCacheHelper() { + return null; + } + + @Override + public CacheHelper getReaderCacheHelper() { + return null; + } + + @Override + public FieldInfos getFieldInfos() { + List infos = new ArrayList<>(); + for (FieldInfo info : super.getFieldInfos()) { + if (info.name.equals(field) == false || info.getIndexOptions() == IndexOptions.NONE) { + infos.add(info); + continue; + } + FieldInfo newInfo = new FieldInfo(info.name, info.number, info.hasVectors(), info.omitsNorms(), info.hasPayloads(), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, info.getDocValuesType(), info.getDocValuesGen(), info.attributes(), info.getPointDimensionCount(), info.getPointIndexDimensionCount(), info.getPointNumBytes(), info.isSoftDeletesField()); + infos.add(newInfo); + } + return new FieldInfos(infos.toArray(FieldInfo[]::new)); + } + + @Override + public Terms terms(String field) throws IOException { + if (this.field.equals(field) == false) { + return in.terms(field); + } + final Terms in = super.terms(field); + return new Terms() { + + @Override + public TermsEnum iterator() throws IOException { + + } + + @Override + public long size() throws IOException { + return in.size(); + } + + @Override + public long getSumTotalTermFreq() throws IOException { + // TODO Auto-generated method stub + return 0; + } + + @Override + public long getSumDocFreq() throws IOException { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getDocCount() throws IOException { + // TODO Auto-generated method stub + return 0; + } + + @Override + public boolean hasFreqs() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean hasOffsets() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean hasPositions() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean hasPayloads() { + // TODO Auto-generated method stub + return false; + } + + }; + } +} diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java index 722b3f586769a..b300a5a9c90e5 100644 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java @@ -8,12 +8,20 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.intervals.IntervalIterator; +import org.apache.lucene.queries.intervals.IntervalMatchesIterator; +import org.apache.lucene.queries.intervals.Intervals; +import org.apache.lucene.queries.intervals.IntervalsSource; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.common.CheckedIntFunction; import org.elasticsearch.common.lucene.Lucene; @@ -31,6 +39,7 @@ import org.elasticsearch.index.mapper.TextParams; import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.index.mapper.ValueFetcher; +import org.elasticsearch.index.query.IntervalBuilder; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.lookup.SourceLookup; @@ -39,6 +48,7 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; @@ -176,15 +186,15 @@ public ValueFetcher valueFetcher(SearchExecutionContext context, String format) return SourceValueFetcher.toString(name(), context, format); } - private Query toQuery(Query query, SearchExecutionContext queryShardContext) { - if (queryShardContext.isSourceEnabled() == false) { + private Function, IOException>> getValueFetcherProvider(SearchExecutionContext searchExecutionContext) { + if (searchExecutionContext.isSourceEnabled() == false) { throw new IllegalArgumentException( "Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled." ); } - SourceLookup sourceLookup = queryShardContext.lookup().source(); - ValueFetcher valueFetcher = valueFetcher(queryShardContext, null); - Function, IOException>> valueFetcherProvider = context -> { + SourceLookup sourceLookup = searchExecutionContext.lookup().source(); + ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null); + return context -> { valueFetcher.setNextReader(context); return docID -> { try { @@ -195,7 +205,10 @@ private Query toQuery(Query query, SearchExecutionContext queryShardContext) { } }; }; - return new ConstantScoreQuery(new SourceConfirmedTextQuery(query, valueFetcherProvider, indexAnalyzer)); + } + + private Query toQuery(Query query, SearchExecutionContext searchExecutionContext) { + return new ConstantScoreQuery(new SourceConfirmedTextQuery(query, getValueFetcherProvider(searchExecutionContext), indexAnalyzer)); } @Override @@ -217,6 +230,29 @@ public Query fuzzyQuery( return new ConstantScoreQuery(super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context)); } + @Override + public IntervalsSource intervals(String text, int maxGaps, boolean ordered, + NamedAnalyzer analyzer, boolean prefix, SearchExecutionContext context) throws IOException { + final IntervalsSource intervalsSource = textFieldType.intervals(text, maxGaps, ordered, analyzer, prefix, context); + + if (analyzer == null) { + analyzer = getTextSearchInfo().getSearchAnalyzer(); + } + Query approximation; + if (prefix) { + approximation = new PrefixQuery(name(), analyzer.normalize(name(), text)); + } else { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + try (TokenStream ts = analyzer.tokenStream(name(), text)) { + TermToBytesRefAttribute term = ts.addAttribute(TermToBytesRefAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + + } + } + } + } + @Override public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext) throws IOException { diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceIntervalsSource.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceIntervalsSource.java new file mode 100644 index 0000000000000..1d9ebf42fa93c --- /dev/null +++ b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceIntervalsSource.java @@ -0,0 +1,172 @@ +package org.elasticsearch.xpack.matchonlytext.query; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.queries.intervals.IntervalIterator; +import org.apache.lucene.queries.intervals.IntervalMatchesIterator; +import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.QueryVisitor; +import org.elasticsearch.common.CheckedFunction; +import org.elasticsearch.common.CheckedIntFunction; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +/** + * A wrapper of {@link IntervalsSource} for the case when positions are not indexed. + */ +public final class SourceIntervalsSource extends IntervalsSource { + + private final IntervalsSource in; + private final Function, IOException>> valueFetcherProvider; + private final Analyzer indexAnalyzer; + + public SourceIntervalsSource(IntervalsSource in, + Function, IOException>> valueFetcherProvider, + Analyzer indexAnalyzer) { + this.in = Objects.requireNonNull(in); + this.valueFetcherProvider = Objects.requireNonNull(valueFetcherProvider); + this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); + } + + private LeafReaderContext createSingleDocLeafReaderContext(String field, List values) { + MemoryIndex index = new MemoryIndex(); + for (Object value : values) { + if (value == null) { + continue; + } + index.addField(field, value.toString(), indexAnalyzer); + } + index.freeze(); + return index.createSearcher().getIndexReader().leaves().get(0); + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + // TODO: How can we extract a better approximation from this IntervalsSource? + final DocIdSetIterator approximation = DocIdSetIterator.all(ctx.reader().maxDoc()); + final CheckedIntFunction, IOException> valueFetcher = valueFetcherProvider.apply(ctx); + return new IntervalIterator() { + + private IntervalIterator in; + + @Override + public int docID() { + return approximation.docID(); + } + + @Override + public long cost() { + return approximation.cost(); + } + + @Override + public int nextDoc() throws IOException { + return doNext(approximation.nextDoc()); + } + + @Override + public int advance(int target) throws IOException { + return doNext(approximation.advance(target)); + } + + private int doNext(int doc) throws IOException { + while (doc != NO_MORE_DOCS && setIterator(doc) == false) { + doc = approximation.nextDoc(); + } + return doc; + } + + private boolean setIterator(int doc) { + try { + final List values = valueFetcher.apply(doc); + final LeafReaderContext singleDocContext = createSingleDocLeafReaderContext(field, values); + in = SourceIntervalsSource.this.in.intervals(field, singleDocContext); + return in.nextDoc() != NO_MORE_DOCS; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public int start() { + return in.start(); + } + + @Override + public int end() { + return in.end(); + } + + @Override + public int gaps() { + return in.gaps(); + } + + @Override + public int nextInterval() throws IOException { + return in.nextInterval(); + } + + @Override + public float matchCost() { + // a high number since we need to parse the _source + return 10_000; + } + + }; + } + + @Override + public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException { + final CheckedIntFunction, IOException> valueFetcher = valueFetcherProvider.apply(ctx); + final List values = valueFetcher.apply(doc); + final LeafReaderContext singleDocContext = createSingleDocLeafReaderContext(field, values); + return in.matches(field, singleDocContext, 0); + } + + @Override + public void visit(String field, QueryVisitor visitor) { + in.visit(field, visitor); + } + + @Override + public int minExtent() { + return in.minExtent(); + } + + @Override + public Collection pullUpDisjunctions() { + return Collections.singleton(this); + } + + @Override + public int hashCode() { + // Not using matchesProvider and valueFetcherProvider, which don't identify this source but are only used to avoid scanning linearly through all documents + return Objects.hash(in, indexAnalyzer); + } + + @Override + public boolean equals(Object other) { + if (other == null || getClass() != other.getClass()) { + return false; + } + SourceIntervalsSource that = (SourceIntervalsSource) other; + // Not using matchesProvider and valueFetcherProvider, which don't identify this source but are only used to avoid scanning linearly through all documents + return in.equals(that.in) && indexAnalyzer.equals(that.indexAnalyzer); + } + + @Override + public String toString() { + return in.toString(); + } + +} From 7114fdc89e69d1bbc7a7b4e335ce1e58aafd1857 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 30 Mar 2021 18:54:37 +0200 Subject: [PATCH 13/22] iter --- .../mapper/MatchOnlyTextFieldMapperIT.java | 0 .../index/mapper/MapperExtrasPlugin.java | 1 + .../mapper/MatchOnlyTextFieldMapper.java | 52 +++----- .../query/SourceConfirmedTextQuery.java | 8 +- .../index}/query/SourceIntervalsSource.java | 13 +- .../mapper/MatchOnlyTextFieldTypeTests.java | 15 +-- .../query/SourceConfirmedTextQueryTests.java | 8 +- .../test/match_only_text/10_basic.yml | 0 .../index/mapper/TextFieldMapper.java | 2 +- .../index/query/IntervalBuilder.java | 11 +- .../mapper-match-only-text/build.gradle | 16 --- .../MatchOnlyTextMapperPlugin.java | 24 ---- .../index/PositionsLeafReaderWrapper.java | 124 ------------------ 13 files changed, 57 insertions(+), 217 deletions(-) rename x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java => modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperIT.java (100%) rename {x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext => modules/mapper-extras/src/main/java/org/elasticsearch/index}/mapper/MatchOnlyTextFieldMapper.java (87%) rename {x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext => modules/mapper-extras/src/main/java/org/elasticsearch/index}/query/SourceConfirmedTextQuery.java (98%) rename {x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext => modules/mapper-extras/src/main/java/org/elasticsearch/index}/query/SourceIntervalsSource.java (93%) rename {x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext => modules/mapper-extras/src/test/java/org/elasticsearch/index}/mapper/MatchOnlyTextFieldTypeTests.java (93%) rename {x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext => modules/mapper-extras/src/test/java/org/elasticsearch/index}/query/SourceConfirmedTextQueryTests.java (98%) rename {x-pack/plugin/src/test => modules/mapper-extras/src/yamlRestTest}/resources/rest-api-spec/test/match_only_text/10_basic.yml (100%) delete mode 100644 x-pack/plugin/mapper-match-only-text/build.gradle delete mode 100644 x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/MatchOnlyTextMapperPlugin.java delete mode 100644 x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/index/PositionsLeafReaderWrapper.java diff --git a/x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java b/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperIT.java similarity index 100% rename from x-pack/plugin/mapper-match-only-text/src/internalClusterTest/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapperTests.java rename to modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperIT.java diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java index a3630fa2dde9a..2ea69007be8f2 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java @@ -29,6 +29,7 @@ public Map getMappers() { mappers.put(RankFeatureFieldMapper.CONTENT_TYPE, RankFeatureFieldMapper.PARSER); mappers.put(RankFeaturesFieldMapper.CONTENT_TYPE, RankFeaturesFieldMapper.PARSER); mappers.put(SearchAsYouTypeFieldMapper.CONTENT_TYPE, SearchAsYouTypeFieldMapper.PARSER); + mappers.put(MatchOnlyTextFieldMapper.CONTENT_TYPE, MatchOnlyTextFieldMapper.PARSER); return Collections.unmodifiableMap(mappers); } diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java similarity index 87% rename from x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java rename to modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java index b300a5a9c90e5..1e73b8d95f893 100644 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -1,26 +1,23 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. */ -package org.elasticsearch.xpack.matchonlytext.mapper; +package org.elasticsearch.index.mapper; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.queries.intervals.IntervalIterator; -import org.apache.lucene.queries.intervals.IntervalMatchesIterator; import org.apache.lucene.queries.intervals.Intervals; import org.apache.lucene.queries.intervals.IntervalsSource; import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.common.CheckedIntFunction; @@ -29,26 +26,17 @@ import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; -import org.elasticsearch.index.mapper.ContentPath; -import org.elasticsearch.index.mapper.FieldMapper; -import org.elasticsearch.index.mapper.ParseContext; -import org.elasticsearch.index.mapper.SourceValueFetcher; -import org.elasticsearch.index.mapper.StringFieldType; -import org.elasticsearch.index.mapper.TextFieldMapper; import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; -import org.elasticsearch.index.mapper.TextParams; -import org.elasticsearch.index.mapper.TextSearchInfo; -import org.elasticsearch.index.mapper.ValueFetcher; import org.elasticsearch.index.query.IntervalBuilder; import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.index.query.SourceConfirmedTextQuery; +import org.elasticsearch.index.query.SourceIntervalsSource; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.lookup.SourceLookup; -import org.elasticsearch.xpack.matchonlytext.query.SourceConfirmedTextQuery; import java.io.IOException; import java.io.UncheckedIOException; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; @@ -211,6 +199,10 @@ private Query toQuery(Query query, SearchExecutionContext searchExecutionContext return new ConstantScoreQuery(new SourceConfirmedTextQuery(query, getValueFetcherProvider(searchExecutionContext), indexAnalyzer)); } + private IntervalsSource toIntervalsSource(IntervalsSource source, SearchExecutionContext searchExecutionContext) { + return new SourceIntervalsSource(source, getValueFetcherProvider(searchExecutionContext), indexAnalyzer); + } + @Override public Query termQuery(Object value, SearchExecutionContext context) { // Disable scoring @@ -233,24 +225,20 @@ public Query fuzzyQuery( @Override public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer, boolean prefix, SearchExecutionContext context) throws IOException { - final IntervalsSource intervalsSource = textFieldType.intervals(text, maxGaps, ordered, analyzer, prefix, context); - if (analyzer == null) { analyzer = getTextSearchInfo().getSearchAnalyzer(); } - Query approximation; if (prefix) { - approximation = new PrefixQuery(name(), analyzer.normalize(name(), text)); - } else { - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - try (TokenStream ts = analyzer.tokenStream(name(), text)) { - TermToBytesRefAttribute term = ts.addAttribute(TermToBytesRefAttribute.class); - ts.reset(); - while (ts.incrementToken()) { - - } - } + BytesRef normalizedTerm = analyzer.normalize(name(), text); + return toIntervalsSource(Intervals.prefix(normalizedTerm), context); } + IntervalBuilder builder = new IntervalBuilder(name(), analyzer) { + @Override + protected IntervalsSource termIntervals(BytesRef term) { + return toIntervalsSource(super.termIntervals(term), context); + } + }; + return builder.analyzeText(text, maxGaps, ordered); } @Override diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQuery.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceConfirmedTextQuery.java similarity index 98% rename from x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQuery.java rename to modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceConfirmedTextQuery.java index ecfaf8dbc0918..baba8b0345f1f 100644 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQuery.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceConfirmedTextQuery.java @@ -1,10 +1,12 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. */ -package org.elasticsearch.xpack.matchonlytext.query; +package org.elasticsearch.index.query; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.FieldInvertState; diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceIntervalsSource.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java similarity index 93% rename from x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceIntervalsSource.java rename to modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java index 1d9ebf42fa93c..9484dbaed1755 100644 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/query/SourceIntervalsSource.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java @@ -1,15 +1,22 @@ -package org.elasticsearch.xpack.matchonlytext.query; +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.query; import org.apache.lucene.analysis.Analyzer; + import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.queries.intervals.IntervalIterator; import org.apache.lucene.queries.intervals.IntervalMatchesIterator; import org.apache.lucene.queries.intervals.IntervalsSource; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.QueryVisitor; -import org.elasticsearch.common.CheckedFunction; import org.elasticsearch.common.CheckedIntFunction; import java.io.IOException; diff --git a/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java similarity index 93% rename from x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java rename to modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java index 09ee9329e5d3d..8eb9c04536712 100644 --- a/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/mapper/MatchOnlyTextFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java @@ -1,10 +1,11 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. */ - -package org.elasticsearch.xpack.matchonlytext.mapper; +package org.elasticsearch.index.mapper; import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.Token; @@ -26,10 +27,8 @@ import org.elasticsearch.common.lucene.search.AutomatonQueries; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.unit.Fuzziness; -import org.elasticsearch.index.mapper.FieldTypeTestCase; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.xpack.matchonlytext.mapper.MatchOnlyTextFieldMapper.MatchOnlyTextFieldType; -import org.elasticsearch.xpack.matchonlytext.query.SourceConfirmedTextQuery; +import org.elasticsearch.index.mapper.MatchOnlyTextFieldMapper.MatchOnlyTextFieldType; +import org.elasticsearch.index.query.SourceConfirmedTextQuery; import java.io.IOException; import java.util.ArrayList; diff --git a/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQueryTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceConfirmedTextQueryTests.java similarity index 98% rename from x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQueryTests.java rename to modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceConfirmedTextQueryTests.java index fb2356c04e345..249b02eed1614 100644 --- a/x-pack/plugin/mapper-match-only-text/src/test/java/org/elasticsearch/xpack/matchonlytext/query/SourceConfirmedTextQueryTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceConfirmedTextQueryTests.java @@ -1,10 +1,12 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. */ -package org.elasticsearch.xpack.matchonlytext.query; +package org.elasticsearch.index.query; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml similarity index 100% rename from x-pack/plugin/src/test/resources/rest-api-spec/test/match_only_text/10_basic.yml rename to modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 1ce344a85d9db..f2ff5b6adcde9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -691,7 +691,7 @@ public IntervalsSource intervals(String text, int maxGaps, boolean ordered, } return Intervals.prefix(normalizedTerm); } - IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? getTextSearchInfo().getSearchAnalyzer() : analyzer); + IntervalBuilder builder = new IntervalBuilder(name(), analyzer); return builder.analyzeText(text, maxGaps, ordered); } diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java index 0f0bf4ffe39dd..c26ce12dc6794 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -51,6 +51,11 @@ public IntervalsSource analyzeText(String query, int maxGaps, boolean ordered) t } } + /** Create term intervals for the provided term. */ + protected IntervalsSource termIntervals(BytesRef term) { + return termIntervals(term); + } + protected IntervalsSource analyzeText(CachingTokenFilter stream, int maxGaps, boolean ordered) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); @@ -109,7 +114,7 @@ protected IntervalsSource analyzeTerm(TokenStream ts) throws IOException { TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); ts.reset(); ts.incrementToken(); - return Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef())); + return termIntervals(BytesRef.deepCopyOf(bytesAtt.getBytesRef())); } protected static IntervalsSource combineSources(List sources, int maxGaps, boolean ordered) { @@ -138,7 +143,7 @@ protected List analyzeTerms(TokenStream ts) throws IOException while (ts.incrementToken()) { BytesRef term = bytesAtt.getBytesRef(); int precedingSpaces = posAtt.getPositionIncrement() - 1; - terms.add(extend(Intervals.term(BytesRef.deepCopyOf(term)), precedingSpaces)); + terms.add(extend(termIntervals(BytesRef.deepCopyOf(term)), precedingSpaces)); } ts.end(); return terms; @@ -170,7 +175,7 @@ else if (synonyms.size() > 1) { synonyms.clear(); spaces = posInc - 1; } - synonyms.add(Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef()))); + synonyms.add(termIntervals(BytesRef.deepCopyOf(bytesAtt.getBytesRef()))); } if (synonyms.size() == 1) { terms.add(extend(synonyms.get(0), spaces)); diff --git a/x-pack/plugin/mapper-match-only-text/build.gradle b/x-pack/plugin/mapper-match-only-text/build.gradle deleted file mode 100644 index 630ae2d18c4d9..0000000000000 --- a/x-pack/plugin/mapper-match-only-text/build.gradle +++ /dev/null @@ -1,16 +0,0 @@ -apply plugin: 'elasticsearch.esplugin' -apply plugin: 'elasticsearch.internal-cluster-test' - -esplugin { - name 'match-only-text' - description 'Module for the match-only-text field type, which is a specialization of text field for the case when scoring is not needed and space efficiency is important.' - classname 'org.elasticsearch.xpack.matchonlytext.MatchOnlyTextMapperPlugin' - extendedPlugins = ['x-pack-core'] -} -archivesBaseName = 'x-pack-match-only-text' - -dependencies { - compileOnly project(path: xpackModule('core'), configuration: 'default') - internalClusterTestImplementation project(path: xpackModule('core'), configuration: 'testArtifacts') -} - diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/MatchOnlyTextMapperPlugin.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/MatchOnlyTextMapperPlugin.java deleted file mode 100644 index 148141593a7aa..0000000000000 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/MatchOnlyTextMapperPlugin.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ - -package org.elasticsearch.xpack.matchonlytext; - -import org.elasticsearch.index.mapper.Mapper; -import org.elasticsearch.plugins.MapperPlugin; -import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.xpack.matchonlytext.mapper.MatchOnlyTextFieldMapper; - -import java.util.Map; - -import static java.util.Collections.singletonMap; - -public class MatchOnlyTextMapperPlugin extends Plugin implements MapperPlugin { - @Override - public Map getMappers() { - return singletonMap(MatchOnlyTextFieldMapper.CONTENT_TYPE, MatchOnlyTextFieldMapper.PARSER); - } - -} diff --git a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/index/PositionsLeafReaderWrapper.java b/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/index/PositionsLeafReaderWrapper.java deleted file mode 100644 index c8f93ef7be66b..0000000000000 --- a/x-pack/plugin/mapper-match-only-text/src/main/java/org/elasticsearch/xpack/matchonlytext/index/PositionsLeafReaderWrapper.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ - -package org.elasticsearch.xpack.matchonlytext.index; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.FilterLeafReader; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.elasticsearch.common.CheckedIntFunction; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.function.Function; - -public final class PositionsLeafReaderWrapper extends FilterLeafReader { - - private final String field; - private final Function, IOException>> valueFetcherProvider; - private final Analyzer indexAnalyzer; - - public PositionsLeafReaderWrapper(LeafReader in, String field, Function, IOException>> valueFetcherProvider, Analyzer indexAnalyzer) { - super(in); - this.field = field; - this.valueFetcherProvider = valueFetcherProvider; - this.indexAnalyzer = indexAnalyzer; - } - - @Override - public CacheHelper getCoreCacheHelper() { - return null; - } - - @Override - public CacheHelper getReaderCacheHelper() { - return null; - } - - @Override - public FieldInfos getFieldInfos() { - List infos = new ArrayList<>(); - for (FieldInfo info : super.getFieldInfos()) { - if (info.name.equals(field) == false || info.getIndexOptions() == IndexOptions.NONE) { - infos.add(info); - continue; - } - FieldInfo newInfo = new FieldInfo(info.name, info.number, info.hasVectors(), info.omitsNorms(), info.hasPayloads(), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, info.getDocValuesType(), info.getDocValuesGen(), info.attributes(), info.getPointDimensionCount(), info.getPointIndexDimensionCount(), info.getPointNumBytes(), info.isSoftDeletesField()); - infos.add(newInfo); - } - return new FieldInfos(infos.toArray(FieldInfo[]::new)); - } - - @Override - public Terms terms(String field) throws IOException { - if (this.field.equals(field) == false) { - return in.terms(field); - } - final Terms in = super.terms(field); - return new Terms() { - - @Override - public TermsEnum iterator() throws IOException { - - } - - @Override - public long size() throws IOException { - return in.size(); - } - - @Override - public long getSumTotalTermFreq() throws IOException { - // TODO Auto-generated method stub - return 0; - } - - @Override - public long getSumDocFreq() throws IOException { - // TODO Auto-generated method stub - return 0; - } - - @Override - public int getDocCount() throws IOException { - // TODO Auto-generated method stub - return 0; - } - - @Override - public boolean hasFreqs() { - // TODO Auto-generated method stub - return false; - } - - @Override - public boolean hasOffsets() { - // TODO Auto-generated method stub - return false; - } - - @Override - public boolean hasPositions() { - // TODO Auto-generated method stub - return false; - } - - @Override - public boolean hasPayloads() { - // TODO Auto-generated method stub - return false; - } - - }; - } -} From 2030545187f5c8562cc95a6387be8deb4fc7fe0f Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 1 Apr 2021 10:35:23 +0200 Subject: [PATCH 14/22] iter --- .../mapping/types/match-only-text.asciidoc | 19 ++- modules/mapper-extras/build.gradle | 2 +- ...ava => MatchOnlyTextFieldMapperTests.java} | 32 +++-- .../mapper/MatchOnlyTextFieldMapper.java | 23 +++- .../index/query/SourceIntervalsSource.java | 29 +++- .../query/SourceIntervalsSourceTests.java | 128 ++++++++++++++++++ .../test/match_only_text/10_basic.yml | 51 ++++++- .../index/query/IntervalsSourceProvider.java | 8 +- .../test/rest/yaml/ClientYamlTestClient.java | 3 +- 9 files changed, 245 insertions(+), 50 deletions(-) rename modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/{MatchOnlyTextFieldMapperIT.java => MatchOnlyTextFieldMapperTests.java} (94%) create mode 100644 modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceIntervalsSourceTests.java diff --git a/docs/reference/mapping/types/match-only-text.asciidoc b/docs/reference/mapping/types/match-only-text.asciidoc index ee0d50ae1955f..3466e392e153e 100644 --- a/docs/reference/mapping/types/match-only-text.asciidoc +++ b/docs/reference/mapping/types/match-only-text.asciidoc @@ -1,6 +1,3 @@ -[role="xpack"] -[testenv="basic"] - [discrete] [[match-only-text-field-type]] === Match-only text field type @@ -14,9 +11,12 @@ as on `text` fields, however queries that need positions such as the need to look at the `_source` document to verify whether a phrase matches. All queries return constant scores that are equal to 1.0. -<> and <> -are not supported by this field. Use the <> field type -if you need them. +<>, as well as `wildcard` and `fuzzy` rules of +<> are not supported by this field. +Use the <> field type if you need them. + +Other than that, `match_only_text` supports the same queries as `text`. And +like `text`, it doesn't support sorting or aggregating. [source,console] -------------------------------- @@ -35,9 +35,6 @@ PUT logs } -------------------------------- -`match_only_text` supports the same queries as `text`. And like `text`, it -doesn't support sorting or aggregating. - [discrete] [[match-only-text-params]] ==== Parameters for match-only text fields @@ -49,7 +46,7 @@ The following mapping parameters are accepted: <>:: The <> which should be used for - the `text` field, both at index-time and at + the `match_only_text` field, both at index-time and at search-time (unless overridden by the <>). Defaults to the default index analyzer, or the <>. @@ -68,7 +65,7 @@ The following mapping parameters are accepted: <>:: The <> that should be used at search time on - the `text` field. Defaults to the `analyzer` setting. + the `match_only_text` field. Defaults to the `analyzer` setting. <>:: diff --git a/modules/mapper-extras/build.gradle b/modules/mapper-extras/build.gradle index 4677205226975..a220dade10d06 100644 --- a/modules/mapper-extras/build.gradle +++ b/modules/mapper-extras/build.gradle @@ -15,6 +15,6 @@ esplugin { restResources { restApi { - include '_common', 'cluster', 'nodes', 'indices', 'index', 'search', 'get' + include '_common', 'cluster', 'field_caps', 'nodes', 'indices', 'index', 'search', 'get' } } diff --git a/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperIT.java b/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java similarity index 94% rename from modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperIT.java rename to modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java index 994662695a348..f30f251141e43 100644 --- a/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperIT.java +++ b/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -1,10 +1,12 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. */ -package org.elasticsearch.xpack.matchonlytext.mapper; +package org.elasticsearch.index.mapper; import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.StopFilter; @@ -30,17 +32,8 @@ import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.StandardTokenizerFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.mapper.DocumentMapper; -import org.elasticsearch.index.mapper.KeywordFieldMapper; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.MapperParsingException; -import org.elasticsearch.index.mapper.MapperService; -import org.elasticsearch.index.mapper.MapperTestCase; -import org.elasticsearch.index.mapper.ParsedDocument; -import org.elasticsearch.index.mapper.TextFieldMapper; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.xpack.matchonlytext.MatchOnlyTextMapperPlugin; import org.hamcrest.Matchers; import java.io.IOException; @@ -57,7 +50,7 @@ public class MatchOnlyTextFieldMapperTests extends MapperTestCase { @Override protected Collection getPlugins() { - return List.of(new MatchOnlyTextMapperPlugin()); + return List.of(new MapperExtrasPlugin()); } @Override @@ -164,7 +157,7 @@ public void testSearchAnalyzerSerialization() throws IOException { XContentBuilder builder = XContentFactory.jsonBuilder(); builder.startObject(); - createDocumentMapper(fieldMapping(this::minimalMapping)).toXContent( + createDocumentMapper(fieldMapping(this::minimalMapping)).mapping().toXContent( builder, new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true")) ); @@ -260,4 +253,15 @@ public void testDisabledSource() throws IOException { // Term queries are ok ft.termQuery("a", context); // no exception } + + @Override + protected Object generateRandomInputValue(MappedFieldType ft) { + assumeFalse("We don't have a way to assert things here", true); + return null; + } + + @Override + protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException { + assumeFalse("We don't have a way to assert things here", true); + } } diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java index 1e73b8d95f893..dc09f9498c2ba 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -14,10 +14,13 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; import org.apache.lucene.queries.intervals.Intervals; import org.apache.lucene.queries.intervals.IntervalsSource; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.common.CheckedIntFunction; @@ -174,7 +177,8 @@ public ValueFetcher valueFetcher(SearchExecutionContext context, String format) return SourceValueFetcher.toString(name(), context, format); } - private Function, IOException>> getValueFetcherProvider(SearchExecutionContext searchExecutionContext) { + private Function, IOException>> getValueFetcherProvider( + SearchExecutionContext searchExecutionContext) { if (searchExecutionContext.isSourceEnabled() == false) { throw new IllegalArgumentException( "Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled." @@ -196,11 +200,15 @@ private Function, IOException } private Query toQuery(Query query, SearchExecutionContext searchExecutionContext) { - return new ConstantScoreQuery(new SourceConfirmedTextQuery(query, getValueFetcherProvider(searchExecutionContext), indexAnalyzer)); + return new ConstantScoreQuery( + new SourceConfirmedTextQuery(query, getValueFetcherProvider(searchExecutionContext), indexAnalyzer)); } - private IntervalsSource toIntervalsSource(IntervalsSource source, SearchExecutionContext searchExecutionContext) { - return new SourceIntervalsSource(source, getValueFetcherProvider(searchExecutionContext), indexAnalyzer); + private IntervalsSource toIntervalsSource( + IntervalsSource source, + Query approximation, + SearchExecutionContext searchExecutionContext) { + return new SourceIntervalsSource(source, approximation, getValueFetcherProvider(searchExecutionContext), indexAnalyzer); } @Override @@ -230,12 +238,15 @@ public IntervalsSource intervals(String text, int maxGaps, boolean ordered, } if (prefix) { BytesRef normalizedTerm = analyzer.normalize(name(), text); - return toIntervalsSource(Intervals.prefix(normalizedTerm), context); + // Using a MatchAllDocsQuery as an approximation means that prefix intervals will be slow. + return toIntervalsSource(Intervals.prefix(normalizedTerm), new MatchAllDocsQuery(), context); } IntervalBuilder builder = new IntervalBuilder(name(), analyzer) { @Override protected IntervalsSource termIntervals(BytesRef term) { - return toIntervalsSource(super.termIntervals(term), context); + // Approximate the intervals with a TermQuery so that we can avoid parsing the _source + // on documents that don't contain the expected term. + return toIntervalsSource(Intervals.term(term), new TermQuery(new Term(name(), term)), context); } }; return builder.analyzeText(text, maxGaps, ordered); diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java index 9484dbaed1755..e2e29efbd5e0d 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java @@ -9,14 +9,18 @@ package org.elasticsearch.index.query; import org.apache.lucene.analysis.Analyzer; - import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.queries.intervals.IntervalIterator; import org.apache.lucene.queries.intervals.IntervalMatchesIterator; import org.apache.lucene.queries.intervals.IntervalsSource; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; import org.elasticsearch.common.CheckedIntFunction; import java.io.IOException; @@ -33,13 +37,16 @@ public final class SourceIntervalsSource extends IntervalsSource { private final IntervalsSource in; + private final Query approximation; private final Function, IOException>> valueFetcherProvider; private final Analyzer indexAnalyzer; public SourceIntervalsSource(IntervalsSource in, + Query approximation, Function, IOException>> valueFetcherProvider, Analyzer indexAnalyzer) { this.in = Objects.requireNonNull(in); + this.approximation = Objects.requireNonNull(approximation); this.valueFetcherProvider = Objects.requireNonNull(valueFetcherProvider); this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); } @@ -58,8 +65,14 @@ private LeafReaderContext createSingleDocLeafReaderContext(String field, List, IOException> valueFetcher = valueFetcherProvider.apply(ctx); return new IntervalIterator() { @@ -97,7 +110,9 @@ private boolean setIterator(int doc) { final List values = valueFetcher.apply(doc); final LeafReaderContext singleDocContext = createSingleDocLeafReaderContext(field, values); in = SourceIntervalsSource.this.in.intervals(field, singleDocContext); - return in.nextDoc() != NO_MORE_DOCS; + final boolean isSet = in != null && in.nextDoc() != NO_MORE_DOCS; + assert isSet == false || in.docID() == 0; + return isSet; } catch (IOException e) { throw new UncheckedIOException(e); } @@ -157,7 +172,8 @@ public Collection pullUpDisjunctions() { @Override public int hashCode() { - // Not using matchesProvider and valueFetcherProvider, which don't identify this source but are only used to avoid scanning linearly through all documents + // Not using matchesProvider and valueFetcherProvider, which don't identify this source but are only used to avoid scanning linearly + // through all documents return Objects.hash(in, indexAnalyzer); } @@ -167,7 +183,8 @@ public boolean equals(Object other) { return false; } SourceIntervalsSource that = (SourceIntervalsSource) other; - // Not using matchesProvider and valueFetcherProvider, which don't identify this source but are only used to avoid scanning linearly through all documents + // Not using matchesProvider and valueFetcherProvider, which don't identify this source but are only used to avoid scanning linearly + // through all documents return in.equals(that.in) && indexAnalyzer.equals(that.indexAnalyzer); } diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceIntervalsSourceTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceIntervalsSourceTests.java new file mode 100644 index 0000000000000..e4bc7e93381bd --- /dev/null +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceIntervalsSourceTests.java @@ -0,0 +1,128 @@ +package org.elasticsearch.index.query; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.intervals.IntervalIterator; +import org.apache.lucene.queries.intervals.Intervals; +import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.CheckedIntFunction; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; + +public class SourceIntervalsSourceTests extends ESTestCase { + + private static final Function, IOException>> SOURCE_FETCHER_PROVIDER = context -> { + return docID -> Collections.singletonList(context.reader().document(docID).get("body")); + }; + + public void testIntervals() throws IOException { + final FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.setIndexOptions(IndexOptions.DOCS); + ft.freeze(); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + + Document doc = new Document(); + doc.add(new Field("body", "a b", ft)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new Field("body", "b d a d", ft)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new Field("body", "b c d", ft)); + w.addDocument(doc); + + DirectoryReader.open(w).close(); + + doc = new Document(); + w.addDocument(doc); + + try (IndexReader reader = DirectoryReader.open(w)) { + assertEquals(2, reader.leaves().size()); + + IntervalsSource source = new SourceIntervalsSource( + Intervals.term(new BytesRef("d")), + new TermQuery(new Term("body", "d")), + SOURCE_FETCHER_PROVIDER, + Lucene.STANDARD_ANALYZER); + + IntervalIterator intervals = source.intervals("body", reader.leaves().get(0)); + + assertEquals(1, intervals.nextDoc()); + assertEquals(-1, intervals.start()); + assertEquals(-1, intervals.end()); + assertEquals(1, intervals.nextInterval()); + assertEquals(1, intervals.start()); + assertEquals(1, intervals.end()); + assertEquals(3, intervals.nextInterval()); + assertEquals(3, intervals.start()); + assertEquals(3, intervals.end()); + assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.nextInterval()); + + assertEquals(2, intervals.nextDoc()); + assertEquals(-1, intervals.start()); + assertEquals(-1, intervals.end()); + assertEquals(2, intervals.nextInterval()); + assertEquals(2, intervals.start()); + assertEquals(2, intervals.end()); + assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.nextInterval()); + + assertEquals(DocIdSetIterator.NO_MORE_DOCS, intervals.nextDoc()); + + assertEquals(null, source.intervals("body", reader.leaves().get(1))); + + // Same test, but with a bad approximation now + source = new SourceIntervalsSource( + Intervals.term(new BytesRef("d")), + new MatchAllDocsQuery(), + SOURCE_FETCHER_PROVIDER, + Lucene.STANDARD_ANALYZER); + + intervals = source.intervals("body", reader.leaves().get(0)); + + assertEquals(1, intervals.nextDoc()); + assertEquals(-1, intervals.start()); + assertEquals(-1, intervals.end()); + assertEquals(1, intervals.nextInterval()); + assertEquals(1, intervals.start()); + assertEquals(1, intervals.end()); + assertEquals(3, intervals.nextInterval()); + assertEquals(3, intervals.start()); + assertEquals(3, intervals.end()); + assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.nextInterval()); + + assertEquals(2, intervals.nextDoc()); + assertEquals(-1, intervals.start()); + assertEquals(-1, intervals.end()); + assertEquals(2, intervals.nextInterval()); + assertEquals(2, intervals.start()); + assertEquals(2, intervals.end()); + assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.nextInterval()); + + assertEquals(DocIdSetIterator.NO_MORE_DOCS, intervals.nextDoc()); + + intervals = source.intervals("body", reader.leaves().get(1)); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, intervals.nextDoc()); + } + } + } +} diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml index 212ae9f54c76f..424e211862900 100644 --- a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml +++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml @@ -31,6 +31,12 @@ setup: id: 3 body: { "foo": "Elasticsearch is based on Apache Lucene" } + - do: + index: + index: test + id: 4 + body: { "foo": "The Apache Software Foundation manages many projects including Lucene" } + - do: indices.refresh: {} @@ -56,7 +62,7 @@ setup: exists: field: foo - - match: { "hits.total.value": 2 } + - match: { "hits.total.value": 3 } - match: { "hits.hits.0._score": 1.0 } --- @@ -129,7 +135,7 @@ setup: regexp: foo: "lu.*ne" - - match: { "hits.total.value": 2 } + - match: { "hits.total.value": 3 } - match: { "hits.hits.0._score": 1.0 } --- @@ -143,7 +149,7 @@ setup: wildcard: foo: "lu*ne" - - match: { "hits.total.value": 2 } + - match: { "hits.total.value": 3 } - match: { "hits.hits.0._score": 1.0 } --- @@ -157,7 +163,7 @@ setup: prefix: foo: "luc" - - match: { "hits.total.value": 2 } + - match: { "hits.total.value": 3 } - match: { "hits.hits.0._score": 1.0 } --- @@ -171,7 +177,7 @@ setup: fuzzy: foo: "lucane" - - match: { "hits.total.value": 2 } + - match: { "hits.total.value": 3 } - match: { "hits.hits.0._score": 1.0 } --- @@ -187,10 +193,9 @@ setup: foo: lucene --- -"Intervals query": +"Term intervals query": - do: - catch: bad_request search: index: test body: @@ -199,4 +204,36 @@ setup: foo: match: query: "apache lucene" + max_gaps: 1 + + - match: { "hits.total.value": 2 } + +--- +"Prefix intervals query": + + - do: + search: + index: test + body: + query: + intervals: + foo: + prefix: + prefix: "luc" + + - match: { "hits.total.value": 3 } + +--- +"Wildcard intervals query": + + - do: + catch: bad_request + search: + index: test + body: + query: + intervals: + foo: + wildcard: + pattern: "luc" diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index 858fbab45221c..9db0bbe82e38c 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -138,10 +138,10 @@ public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType if (useField != null) { fieldType = context.getFieldType(useField); assert fieldType != null; - source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer, false)); + source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer, false, context)); } else { - source = fieldType.intervals(query, maxGaps, ordered, analyzer, false); + source = fieldType.intervals(query, maxGaps, ordered, analyzer, false, context); } if (filter != null) { return filter.filter(source, context, fieldType); @@ -521,10 +521,10 @@ public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType if (useField != null) { fieldType = context.getFieldType(useField); assert fieldType != null; - source = Intervals.fixField(useField, fieldType.intervals(prefix, 0, false, analyzer, true)); + source = Intervals.fixField(useField, fieldType.intervals(prefix, 0, false, analyzer, true, context)); } else { - source = fieldType.intervals(prefix, 0, false, analyzer, true); + source = fieldType.intervals(prefix, 0, false, analyzer, true, context); } return source; } diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java index ca516a9e6c094..447d70ad47ca4 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java @@ -259,7 +259,8 @@ private static boolean sendBodyAsSourceParam(List supportedMethods, Stri private ClientYamlSuiteRestApi restApi(String apiName) { ClientYamlSuiteRestApi restApi = restSpec.getApi(apiName); if (restApi == null) { - throw new IllegalArgumentException("rest api [" + apiName + "] doesn't exist in the rest spec"); + throw new IllegalArgumentException("rest api [" + apiName + "] doesn't exist in the rest spec, expected one of: " + + restSpec.getApis().stream().map(ClientYamlSuiteRestApi::getName).collect(Collectors.toList())); } return restApi; } From 3a85af4e67de873a3954f330c5a010da6b557b5b Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 1 Apr 2021 10:58:48 +0200 Subject: [PATCH 15/22] iter --- .../mapper/MatchOnlyTextFieldMapper.java | 2 +- .../query/SourceIntervalsSourceTests.java | 8 +++++++ .../common/CheckedIntFunction.java | 21 +++++-------------- .../index/query/IntervalBuilder.java | 2 +- 4 files changed, 15 insertions(+), 18 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java index dc09f9498c2ba..897dfdb509df5 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -246,7 +246,7 @@ public IntervalsSource intervals(String text, int maxGaps, boolean ordered, protected IntervalsSource termIntervals(BytesRef term) { // Approximate the intervals with a TermQuery so that we can avoid parsing the _source // on documents that don't contain the expected term. - return toIntervalsSource(Intervals.term(term), new TermQuery(new Term(name(), term)), context); + return toIntervalsSource(super.termIntervals(term), new TermQuery(new Term(name(), term)), context); } }; return builder.analyzeText(text, maxGaps, ordered); diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceIntervalsSourceTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceIntervalsSourceTests.java index e4bc7e93381bd..f8da46d98809f 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceIntervalsSourceTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/SourceIntervalsSourceTests.java @@ -1,3 +1,11 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + package org.elasticsearch.index.query; import org.apache.lucene.document.Document; diff --git a/server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java b/server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java index aac8ea7e960da..e07d92c8d984a 100644 --- a/server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java +++ b/server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java @@ -1,20 +1,9 @@ /* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. */ package org.elasticsearch.common; diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java index c26ce12dc6794..a6e9043fa04ef 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -53,7 +53,7 @@ public IntervalsSource analyzeText(String query, int maxGaps, boolean ordered) t /** Create term intervals for the provided term. */ protected IntervalsSource termIntervals(BytesRef term) { - return termIntervals(term); + return Intervals.term(term); } protected IntervalsSource analyzeText(CachingTokenFilter stream, int maxGaps, boolean ordered) throws IOException { From 448eb28bcb2fa6e840a1aaee9f5d50f8fbda1ad7 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 1 Apr 2021 11:10:12 +0200 Subject: [PATCH 16/22] iter --- .../resources/rest-api-spec/test/match_only_text/10_basic.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml index 424e211862900..6aa974031b0d4 100644 --- a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml +++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml @@ -2,7 +2,7 @@ setup: - skip: version: " - 7.99.99" - reason: "match_only_text was added in 7.12" + reason: "match_only_text was added in 7.13" - do: indices.create: From f3e77f8a61076edc55ba75aad86235ff5728c380 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 1 Apr 2021 16:02:01 +0200 Subject: [PATCH 17/22] Fix compilation. --- .../index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java index 7de8a93ac38bc..d17f73f15f89e 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java @@ -26,7 +26,7 @@ public class AnnotatedTextFieldTypeTests extends FieldTypeTestCase { public void testIntervals() throws IOException { MappedFieldType ft = new AnnotatedTextFieldMapper.AnnotatedTextFieldType("field", Collections.emptyMap()); NamedAnalyzer a = new NamedAnalyzer("name", AnalyzerScope.INDEX, new StandardAnalyzer()); - IntervalsSource source = ft.intervals("Donald Trump", 0, true, a, false); + IntervalsSource source = ft.intervals("Donald Trump", 0, true, a, false, null); assertEquals(Intervals.phrase(Intervals.term("donald"), Intervals.term("trump")), source); } From c5f4f041b85751968767b51a8c04a50e0ff18347 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 2 Apr 2021 14:29:53 +0200 Subject: [PATCH 18/22] Analysis is no longer configurable. --- .../mapping/types/match-only-text.asciidoc | 22 +--- .../mapper/MatchOnlyTextFieldMapperTests.java | 121 +----------------- .../mapper/MatchOnlyTextFieldMapper.java | 2 +- 3 files changed, 10 insertions(+), 135 deletions(-) diff --git a/docs/reference/mapping/types/match-only-text.asciidoc b/docs/reference/mapping/types/match-only-text.asciidoc index 3466e392e153e..69870b7e6a9e1 100644 --- a/docs/reference/mapping/types/match-only-text.asciidoc +++ b/docs/reference/mapping/types/match-only-text.asciidoc @@ -11,6 +11,10 @@ as on `text` fields, however queries that need positions such as the need to look at the `_source` document to verify whether a phrase matches. All queries return constant scores that are equal to 1.0. +Analysis is not configurable: text is always analyzed with the +<> +(<> by default). + <>, as well as `wildcard` and `fuzzy` rules of <> are not supported by this field. Use the <> field type if you need them. @@ -43,14 +47,6 @@ The following mapping parameters are accepted: [horizontal] -<>:: - - The <> which should be used for - the `match_only_text` field, both at index-time and at - search-time (unless overridden by the <>). - Defaults to the default index analyzer, or the - <>. - <>:: Multi-fields allow the same string value to be indexed in multiple ways for @@ -61,13 +57,3 @@ The following mapping parameters are accepted: <>:: Metadata about the field. - -<>:: - - The <> that should be used at search time on - the `match_only_text` field. Defaults to the `analyzer` setting. - -<>:: - - The <> that should be used at search time when a - phrase is encountered. Defaults to the `search_analyzer` setting. diff --git a/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java index f30f251141e43..dfb76b663695a 100644 --- a/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/modules/mapper-extras/src/internalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -9,29 +9,15 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.analysis.CannedTokenStream; -import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.core.KeywordAnalyzer; -import org.apache.lucene.analysis.core.WhitespaceAnalyzer; -import org.apache.lucene.analysis.en.EnglishAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; import org.elasticsearch.common.Strings; -import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.AnalyzerScope; -import org.elasticsearch.index.analysis.CharFilterFactory; -import org.elasticsearch.index.analysis.CustomAnalyzer; -import org.elasticsearch.index.analysis.IndexAnalyzers; -import org.elasticsearch.index.analysis.NamedAnalyzer; -import org.elasticsearch.index.analysis.StandardTokenizerFactory; -import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.plugins.Plugin; import org.hamcrest.Matchers; @@ -40,7 +26,6 @@ import java.util.Collection; import java.util.Collections; import java.util.List; -import java.util.Map; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -67,53 +52,8 @@ public final void testExists() throws IOException { @Override protected void registerParameters(ParameterChecker checker) throws IOException { checker.registerUpdateCheck(b -> { - b.field("analyzer", "default"); - b.field("search_analyzer", "keyword"); - }, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchAnalyzer().name())); - checker.registerUpdateCheck(b -> { - b.field("analyzer", "default"); - b.field("search_analyzer", "keyword"); - b.field("search_quote_analyzer", "keyword"); - }, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchQuoteAnalyzer().name())); - - checker.registerConflictCheck("analyzer", b -> b.field("analyzer", "keyword")); - } - - @Override - protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) { - NamedAnalyzer dflt = new NamedAnalyzer( - "default", - AnalyzerScope.INDEX, - new StandardAnalyzer(), - TextFieldMapper.Defaults.POSITION_INCREMENT_GAP - ); - NamedAnalyzer standard = new NamedAnalyzer("standard", AnalyzerScope.INDEX, new StandardAnalyzer()); - NamedAnalyzer keyword = new NamedAnalyzer("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer()); - NamedAnalyzer whitespace = new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer()); - NamedAnalyzer stop = new NamedAnalyzer( - "my_stop_analyzer", - AnalyzerScope.INDEX, - new CustomAnalyzer( - new StandardTokenizerFactory(indexSettings, null, "standard", indexSettings.getSettings()), - new CharFilterFactory[0], - new TokenFilterFactory[] { new TokenFilterFactory() { - @Override - public String name() { - return "stop"; - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new StopFilter(tokenStream, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET); - } - } } - ) - ); - return new IndexAnalyzers( - Map.of("default", dflt, "standard", standard, "keyword", keyword, "whitespace", whitespace, "my_stop_analyzer", stop), - Map.of(), - Map.of() - ); + b.field("meta", Collections.singletonMap("format", "mysql.access")); + }, m -> assertEquals(Collections.singletonMap("format", "mysql.access"), m.fieldType().meta())); } @Override @@ -141,76 +81,25 @@ public void testDefaults() throws IOException { assertEquals(DocValuesType.NONE, fieldType.docValuesType()); } - public void testSearchAnalyzerSerialization() throws IOException { - XContentBuilder mapping = fieldMapping( - b -> b.field("type", "match_only_text").field("analyzer", "standard").field("search_analyzer", "keyword") - ); - assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString()); - - // special case: default index analyzer - mapping = fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", "default").field("search_analyzer", "keyword")); - assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString()); - - // special case: default search analyzer - mapping = fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", "keyword").field("search_analyzer", "default")); - assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString()); - - XContentBuilder builder = XContentFactory.jsonBuilder(); - builder.startObject(); - createDocumentMapper(fieldMapping(this::minimalMapping)).mapping().toXContent( - builder, - new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true")) - ); - builder.endObject(); - String mappingString = Strings.toString(builder); - assertTrue(mappingString.contains("analyzer")); - assertTrue(mappingString.contains("search_analyzer")); - assertTrue(mappingString.contains("search_quote_analyzer")); - } - - public void testSearchQuoteAnalyzerSerialization() throws IOException { - XContentBuilder mapping = fieldMapping( - b -> b.field("type", "match_only_text") - .field("analyzer", "standard") - .field("search_analyzer", "standard") - .field("search_quote_analyzer", "keyword") - ); - assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString()); - - // special case: default index/search analyzer - mapping = fieldMapping( - b -> b.field("type", "match_only_text") - .field("analyzer", "default") - .field("search_analyzer", "default") - .field("search_quote_analyzer", "keyword") - ); - assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString()); - } - public void testNullConfigValuesFail() throws MapperParsingException { Exception e = expectThrows( MapperParsingException.class, - () -> createDocumentMapper(fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", (String) null))) + () -> createDocumentMapper(fieldMapping(b -> b.field("type", "match_only_text").field("meta", (String) null))) ); - assertThat(e.getMessage(), containsString("[analyzer] on mapper [field] of type [match_only_text] must not have a [null] value")); + assertThat(e.getMessage(), containsString("[meta] on mapper [field] of type [match_only_text] must not have a [null] value")); } public void testSimpleMerge() throws IOException { - XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", "whitespace")); + XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "match_only_text")); MapperService mapperService = createMapperService(startingMapping); assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class)); merge(mapperService, startingMapping); assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class)); - XContentBuilder differentAnalyzer = fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", "keyword")); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentAnalyzer)); - assertThat(e.getMessage(), containsString("Cannot update parameter [analyzer]")); - XContentBuilder newField = mapping(b -> { b.startObject("field") .field("type", "match_only_text") - .field("analyzer", "whitespace") .startObject("meta") .field("key", "value") .endObject() diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java index 897dfdb509df5..26d65f9d28d6a 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -99,7 +99,7 @@ public Builder addMultiField(FieldMapper.Builder builder) { @Override protected List> getParameters() { - return Arrays.asList(analyzers.indexAnalyzer, analyzers.searchAnalyzer, analyzers.searchQuoteAnalyzer, meta); + return Arrays.asList(meta); } private MatchOnlyTextFieldType buildFieldType(FieldType fieldType, ContentPath contentPath) { From 4818edc46dd97b80fc20e4487c969fefde811cac Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 7 Apr 2021 15:21:27 +0200 Subject: [PATCH 19/22] iter --- .../index/query/SourceIntervalsSource.java | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java index e2e29efbd5e0d..e23c44761bde1 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java @@ -24,7 +24,6 @@ import org.elasticsearch.common.CheckedIntFunction; import java.io.IOException; -import java.io.UncheckedIOException; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -105,17 +104,13 @@ private int doNext(int doc) throws IOException { return doc; } - private boolean setIterator(int doc) { - try { - final List values = valueFetcher.apply(doc); - final LeafReaderContext singleDocContext = createSingleDocLeafReaderContext(field, values); - in = SourceIntervalsSource.this.in.intervals(field, singleDocContext); - final boolean isSet = in != null && in.nextDoc() != NO_MORE_DOCS; - assert isSet == false || in.docID() == 0; - return isSet; - } catch (IOException e) { - throw new UncheckedIOException(e); - } + private boolean setIterator(int doc) throws IOException { + final List values = valueFetcher.apply(doc); + final LeafReaderContext singleDocContext = createSingleDocLeafReaderContext(field, values); + in = SourceIntervalsSource.this.in.intervals(field, singleDocContext); + final boolean isSet = in != null && in.nextDoc() != NO_MORE_DOCS; + assert isSet == false || in.docID() == 0; + return isSet; } @Override From e652aa40995aee83e6b21074291ac67bdf653644 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 21 Apr 2021 13:33:47 +0200 Subject: [PATCH 20/22] Intervals unit tests. --- .../mapper/MatchOnlyTextFieldMapper.java | 2 +- .../index/query/SourceIntervalsSource.java | 4 +++ .../mapper/MatchOnlyTextFieldTypeTests.java | 31 +++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java index 3ae492e320a36..cb081ca05ee13 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -256,7 +256,7 @@ public IntervalsSource fuzzyIntervals(String term, int maxDistance, int prefixLe public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) { return toIntervalsSource( Intervals.wildcard(pattern), - new MatchAllDocsQuery(), // wildcard queries can be expensive, what should be the approximation? + new MatchAllDocsQuery(), // wildcard queries can be expensive, what should the approximation be? context); } diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java index e23c44761bde1..aa98574abe240 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/SourceIntervalsSource.java @@ -50,6 +50,10 @@ public SourceIntervalsSource(IntervalsSource in, this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); } + public IntervalsSource getIntervalsSource() { + return in; + } + private LeafReaderContext createSingleDocLeafReaderContext(String field, List values) { MemoryIndex index = new MemoryIndex(); for (Object value : values) { diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java index 8eb9c04536712..8c3cd70c8af27 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldTypeTests.java @@ -11,6 +11,8 @@ import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.intervals.Intervals; +import org.apache.lucene.queries.intervals.IntervalsSource; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchAllDocsQuery; @@ -29,6 +31,8 @@ import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.MatchOnlyTextFieldMapper.MatchOnlyTextFieldType; import org.elasticsearch.index.query.SourceConfirmedTextQuery; +import org.elasticsearch.index.query.SourceIntervalsSource; +import org.hamcrest.Matchers; import java.io.IOException; import java.util.ArrayList; @@ -146,4 +150,31 @@ public void testPhrasePrefixQuery() throws IOException { assertEquals(expected, delegate); assertNotEquals(new MatchAllDocsQuery(), SourceConfirmedTextQuery.approximate(delegate)); } + + public void testTermIntervals() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + IntervalsSource termIntervals = ft.termIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(termIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals(Intervals.term(new BytesRef("foo")), ((SourceIntervalsSource) termIntervals).getIntervalsSource()); + } + + public void testPrefixIntervals() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(prefixIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals(Intervals.prefix(new BytesRef("foo")), ((SourceIntervalsSource) prefixIntervals).getIntervalsSource()); + } + + public void testWildcardIntervals() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(wildcardIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals(Intervals.wildcard(new BytesRef("foo")), ((SourceIntervalsSource) wildcardIntervals).getIntervalsSource()); + } + + public void testFuzzyIntervals() throws IOException { + MappedFieldType ft = new MatchOnlyTextFieldType("field"); + IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT); + assertThat(fuzzyIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + } } From 31a5bbad4b40a3f137a302e180e65dcf02c71b1c Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 21 Apr 2021 13:36:19 +0200 Subject: [PATCH 21/22] Fix docs now that `match_only_text` supports all interval queries. --- docs/reference/mapping/types/match-only-text.asciidoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/reference/mapping/types/match-only-text.asciidoc b/docs/reference/mapping/types/match-only-text.asciidoc index 69870b7e6a9e1..3f359faeaaa14 100644 --- a/docs/reference/mapping/types/match-only-text.asciidoc +++ b/docs/reference/mapping/types/match-only-text.asciidoc @@ -15,9 +15,9 @@ Analysis is not configurable: text is always analyzed with the <> (<> by default). -<>, as well as `wildcard` and `fuzzy` rules of -<> are not supported by this field. -Use the <> field type if you need them. +<> are not supported with this field, use +<> instead, or the +<> field type if you absolutely need span queries. Other than that, `match_only_text` supports the same queries as `text`. And like `text`, it doesn't support sorting or aggregating. From 3783f18defe8e1f614242674fea7368a183ea846 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 21 Apr 2021 13:53:19 +0200 Subject: [PATCH 22/22] Undo testing hack. --- .../org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java index 447d70ad47ca4..ca516a9e6c094 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java @@ -259,8 +259,7 @@ private static boolean sendBodyAsSourceParam(List supportedMethods, Stri private ClientYamlSuiteRestApi restApi(String apiName) { ClientYamlSuiteRestApi restApi = restSpec.getApi(apiName); if (restApi == null) { - throw new IllegalArgumentException("rest api [" + apiName + "] doesn't exist in the rest spec, expected one of: " - + restSpec.getApis().stream().map(ClientYamlSuiteRestApi::getName).collect(Collectors.toList())); + throw new IllegalArgumentException("rest api [" + apiName + "] doesn't exist in the rest spec"); } return restApi; }