elastic · jpountz · Apr 22, 2021 · Dec 7, 2020 · Dec 10, 2020 · Dec 16, 2020
diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc
@@ -69,6 +69,7 @@ values.
 ==== Text search types
 
 <<text,`text`>>:: Analyzed, unstructured text.
+<<match-only-text,`match_only_text`>>:: A more space-efficient variant of `text`.
 {plugins}/mapper-annotated-text.html[`annotated-text`]:: Text containing special
 markup. Used for identifying named entities.
 <<completion-suggester,`completion`>>:: Used for auto-complete suggestions.
@@ -154,6 +155,8 @@ include::types/parent-join.asciidoc[]
 
 include::types/keyword.asciidoc[]
 
+include::types/match-only-text.asciidoc[]
+
 include::types/nested.asciidoc[]
 
 include::types/numeric.asciidoc[]

diff --git a/docs/reference/mapping/types/match-only-text.asciidoc b/docs/reference/mapping/types/match-only-text.asciidoc
@@ -0,0 +1,79 @@
+[role="xpack"]
+[testenv="basic"]
+
+[discrete]
+[[match-only-text]]
+=== Match-only text field type
+
+A variant of <<text,`text`>> that trades scoring and efficiency of positional
+queries for space efficiency. This field effectively stores data the same way as
+a `text` field that only indexes documents (`index_options: docs`) and disables
+norms (`norms: false`). Term queries perform as fast if not faster as on `text`
+fields, however queries that need positions such as the
+<<query-dsl-match-query-phrase,`match_phrase` query>> perform slower as they
+need to look at the `_source` document to verify whether a phrase matches. All
+queries return constant scores that are equal to 1.0.
+
+[source,console]
+--------------------------------
+PUT logs
+{
+  "mappings": {
+    "properties": {
+      "@timestamp": {
+        "type": "date"
+      },
+      "message": {
+        "type": "match_only_text"
+      }
+    }
+  }
+}
+--------------------------------
+
+`match_only_text` supports the same queries as `text`. And like `text`, it
+doesn't support sorting or aggregating.
+
+[discrete]
+[[match-only-text-params]]
+==== Parameters for match-only text fields
+
+The following mapping parameters are accepted:
+
+[horizontal]
+
+<<analyzer,`analyzer`>>::
+
+    The <<analysis,analyzer>> which should be used for
+    the `text` field, both at index-time and at
+    search-time (unless overridden by the  <<search-analyzer,`search_analyzer`>>).
+    Defaults to the default index analyzer, or the
+    <<analysis-standard-analyzer,`standard` analyzer>>.
+
+<<multi-fields,`fields`>>::
+
+    Multi-fields allow the same string value to be indexed in multiple ways for
+    different purposes, such as one field for search and a multi-field for
+    sorting and aggregations, or the same string value analyzed by different
+    analyzers.
+
+<<mapping-field-meta,`meta`>>::
+
+    Metadata about the field.
+
+<<search-analyzer,`search_analyzer`>>::
+
+    The <<analyzer,`analyzer`>> that should be used at search time on
+    the `text` field. Defaults to the `analyzer` setting.
+
+<<search-quote-analyzer,`search_quote_analyzer`>>::
+
+    The <<analyzer,`analyzer`>> that should be used at search time when a
+    phrase is encountered. Defaults to the `search_analyzer` setting.
+
+<<mapping-store,`store`>>::
+
+    Whether the field value should be stored and retrievable separately from
+    the <<mapping-source-field,`_source`>> field. Accepts `true` or `false`
+    (default).
+
diff --git a/...apper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java b/...apper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java
@@ -287,38 +287,52 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool
             }
         }
 
+        private void checkForPositions() {
+            if (getTextSearchInfo().hasPositions() == false) {
+                throw new IllegalStateException("field:[" + name() + "] was indexed without position data; cannot run PhraseQuery");
+            }
+        }
+
         @Override
-        public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
+        public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements,
+                QueryShardContext context) throws IOException {
+            checkForPositions();
             int numPos = countPosition(stream);
             if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) {
                 return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements);
             }
             final ShingleFieldType shingleField = shingleFieldForPositions(numPos);
             stream = new FixedShingleFilter(stream, shingleField.shingleSize);
-            return shingleField.phraseQuery(stream, 0, true);
+            return shingleField.phraseQuery(stream, 0, true, context);
         }
 
         @Override
-        public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
+        public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements,
+                QueryShardContext context) throws IOException {
+            checkForPositions();
             int numPos = countPosition(stream);
             if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) {
                 return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements);
             }
             final ShingleFieldType shingleField = shingleFieldForPositions(numPos);
             stream = new FixedShingleFilter(stream, shingleField.shingleSize);
-            return shingleField.multiPhraseQuery(stream, 0, true);
+            return shingleField.multiPhraseQuery(stream, 0, true, context);
         }
 
         @Override
-        public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException {
+        public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,
+                QueryShardContext context) throws IOException {
             int numPos = countPosition(stream);
+            if (numPos > 1) {
+                checkForPositions();
+            }
             if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) {
                 return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions,
                     null, null);
             }
             final ShingleFieldType shingleField = shingleFieldForPositions(numPos);
             stream = new FixedShingleFilter(stream, shingleField.shingleSize);
-            return shingleField.phrasePrefixQuery(stream, 0, maxExpansions);
+            return shingleField.phrasePrefixQuery(stream, 0, maxExpansions, context);
         }
 
         @Override
@@ -513,17 +527,20 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool
         }
 
         @Override
-        public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
+        public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements,
+                QueryShardContext context) throws IOException {
             return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements);
         }
 
         @Override
-        public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
+        public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements,
+                QueryShardContext context) throws IOException {
             return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements);
         }
 
         @Override
-        public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException {
+        public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,
+                QueryShardContext context) throws IOException {
             final String prefixFieldName = slop > 0
                 ? null
                 : prefixFieldType.name();

diff --git a/server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java b/server/src/main/java/org/elasticsearch/common/CheckedIntFunction.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.common;
+
+@FunctionalInterface
+public interface CheckedIntFunction<T, E extends Exception> {
+    T apply(int input) throws E;
+}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java
@@ -72,7 +72,6 @@ public abstract class MappedFieldType {
     private final boolean isStored;
     private final TextSearchInfo textSearchInfo;
     private final Map<String, String> meta;
-    private NamedAnalyzer indexAnalyzer;
     private boolean eagerGlobalOrdinals;
 
     public MappedFieldType(String name, boolean isIndexed, boolean isStored,
@@ -266,17 +265,18 @@ public Query existsQuery(QueryShardContext context) {
         }
     }
 
-    public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
+    public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException {
         throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name
             + "] which is of type [" + typeName() + "]");
     }
 
-    public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
+    public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements,
+            QueryShardContext context) throws IOException {
         throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name
             + "] which is of type [" + typeName() + "]");
     }
 
-    public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException {
+    public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException {
         throw new IllegalArgumentException("Can only use phrase prefix queries on text fields - not on [" + name
             + "] which is of type [" + typeName() + "]");
     }

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -697,9 +697,17 @@ public IntervalsSource intervals(String text, int maxGaps, boolean ordered,
             return builder.analyzeText(text, maxGaps, ordered);
         }
 
+        private void checkForPositions() {
+            if (getTextSearchInfo().hasPositions() == false) {
+                throw new IllegalStateException("field:[" + name() + "] was indexed without position data; cannot run PhraseQuery");
+            }
+        }
+
         @Override
-        public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements) throws IOException {
+        public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements,
+                QueryShardContext queryShardContext) throws IOException {
             String field = name();
+            checkForPositions();
             // we can't use the index_phrases shortcut with slop, if there are gaps in the stream,
             // or if the incoming token stream is the output of a token graph due to
             // https://issues.apache.org/jira/browse/LUCENE-8916
@@ -732,7 +740,8 @@ public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncremen
         }
 
         @Override
-        public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
+        public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements,
+                QueryShardContext context) throws IOException {
             String field = name();
             if (indexPhrases && slop == 0 && hasGaps(stream) == false) {
                 stream = new FixedShingleFilter(stream, 2);
@@ -741,8 +750,21 @@ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositi
             return createPhraseQuery(stream, field, slop, enablePositionIncrements);
         }
 
+        private int countTokens(TokenStream ts) throws IOException {
+            ts.reset();
+            int count = 0;
+            while (ts.incrementToken()) {
+                count++;
+            }
+            ts.end();
+            return count;
+        }
+
         @Override
-        public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException {
+        public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException {
+            if (countTokens(stream) > 1) {
+                checkForPositions();
+            }
             return analyzePhrasePrefix(stream, slop, maxExpansions);
         }
 

diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java
@@ -632,8 +632,7 @@ private Query analyzeMultiBoolean(String field, TokenStream stream,
         @Override
         protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
             try {
-                checkForPositions(field);
-                return fieldType.phraseQuery(stream, slop, enablePositionIncrements);
+                return fieldType.phraseQuery(stream, slop, enablePositionIncrements, context);
             } catch (IllegalArgumentException | IllegalStateException e) {
                 if (lenient) {
                     return newLenientFieldQuery(field, e);
@@ -645,8 +644,7 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws
         @Override
         protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException {
             try {
-                checkForPositions(field);
-                return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements);
+                return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context);
             } catch (IllegalArgumentException | IllegalStateException e) {
                 if (lenient) {
                     return newLenientFieldQuery(field, e);
@@ -657,10 +655,7 @@ protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) t
 
         private Query analyzePhrasePrefix(String field, TokenStream stream, int slop, int positionCount) throws IOException {
             try {
-                if (positionCount > 1) {
-                    checkForPositions(field);
-                }
-                return fieldType.phrasePrefixQuery(stream, slop, maxExpansions);
+                return fieldType.phrasePrefixQuery(stream, slop, maxExpansions, context);
             } catch (IllegalArgumentException | IllegalStateException e) {
                 if (lenient) {
                     return newLenientFieldQuery(field, e);
@@ -810,11 +805,5 @@ private Query analyzeGraphPhrase(TokenStream source, String field, Type type, in
                 return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), 0, true);
             }
         }
-
-        private void checkForPositions(String field) {
-            if (fieldType.getTextSearchInfo().hasPositions() == false) {
-                throw new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery");
-            }
-        }
     }
 }
diff --git a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java
@@ -210,7 +210,7 @@ protected Query newPrefixQuery(Term term) {
         protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
             List<Query> disjunctions = new ArrayList<>();
             for (FieldAndBoost fieldType : blendedFields) {
-                Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements);
+                Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements, context);
                 if (fieldType.boost != 1f) {
                     query = new BoostQuery(query, fieldType.boost);
                 }
@@ -223,7 +223,7 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws
         protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException {
             List<Query> disjunctions = new ArrayList<>();
             for (FieldAndBoost fieldType : blendedFields) {
-                Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements);
+                Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context);
                 if (fieldType.boost != 1f) {
                     query = new BoostQuery(query, fieldType.boost);
                 }

diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java
@@ -124,7 +124,7 @@ public void testBadAnalyzer() throws IOException {
     public void testPhraseOnFieldWithNoTerms() {
         MatchPhrasePrefixQueryBuilder matchQuery = new MatchPhrasePrefixQueryBuilder(DATE_FIELD_NAME, "three term phrase");
         matchQuery.analyzer("whitespace");
-        expectThrows(IllegalStateException.class, () -> matchQuery.doToQuery(createShardContext()));
+        expectThrows(IllegalArgumentException.class, () -> matchQuery.doToQuery(createShardContext()));
     }
 
     public void testPhrasePrefixZeroTermsQuery() throws IOException {

diff --git a/x-pack/plugin/mapper-match-only-text/build.gradle b/x-pack/plugin/mapper-match-only-text/build.gradle
@@ -0,0 +1,16 @@
+apply plugin: 'elasticsearch.esplugin'
+apply plugin: 'elasticsearch.internal-cluster-test'
+
+esplugin {
+  name 'match-only-text'
+  description 'Module for the match-only-text field type, which is a specialization of text field for the case when scoring is not needed and space efficiency is important.'
+  classname 'org.elasticsearch.xpack.matchonlytext.MatchOnlyTextMapperPlugin'
+  extendedPlugins = ['x-pack-core']
+}
+archivesBaseName = 'x-pack-match-only-text'
+
+dependencies {
+  compileOnly project(path: xpackModule('core'), configuration: 'default')
+  internalClusterTestImplementation project(path: xpackModule('core'), configuration: 'testArtifacts')
+}
+