Work in progress - fields enum api,

This implementation has the following issues: * probably many `terms_enum` references that need renaming to `fields_enum` (a lot of copy and paste was used) * the implementation offers infix matching although I expect we’ll revert to prefix matching only once we’ve got the Kibana requirements, Closes elastic#74816
markharwood · Jan 12, 2022 · 1065f97 · 1065f97
1 parent 3f9b322
commit 1065f97
Show file tree

Hide file tree

Showing 27 changed files with 2,574 additions and 14 deletions.
diff --git a/docs/reference/search/fields-enum.asciidoc b/docs/reference/search/fields-enum.asciidoc
@@ -0,0 +1,105 @@
+[[search-terms-enum]]
+=== Fields enum API
+++++
+<titleabbrev>Fields enum</titleabbrev>
+++++
+
+The Fields enum API can be used to discover field names in the index that match
+a partial string. This is used for auto-complete:
+
+[source,console]
+--------------------------------------------------
+POST stackoverflow/_fields_enum
+{
+    "field" : "tags",
+    "string" : "kiba"
+}
+--------------------------------------------------
+// TEST[setup:stackoverflow]
+
+
+The API returns the following response:
+
+[source,console-result]
+--------------------------------------------------
+{
+  "_shards": {
+    "total": 1,
+    "successful": 1,
+    "failed": 0
+  },
+  "terms": [
+    "kibana"
+  ],
+  "complete" : true
+}
+--------------------------------------------------
+
+The "complete" flag is false if time or space constraints were met and the
+set of terms examined was not the full set of available values.
+
+[[search-fields-enum-api-request]]
+==== {api-request-title}
+
+`GET /<target>/_fields_enum`
+
+
+[[search-fields-enum-api-desc]]
+==== {api-description-title}
+
+The fields_enum API  can be used to discover field names in the index that begin with the provided
+string. It is designed for low-latency look-ups used in auto-complete scenarios.
+
+
+[[search-fields-enum-api-path-params]]
+==== {api-path-parms-title}
+
+`<target>`::
+(Required, string) Comma-separated list of data streams, indices, and aliases
+to search. Supports wildcards (`*`). To search all data streams or indices, omit
+this parameter or use `*` or `_all`.
+
+[[search-fields-enum-api-request-body]]
+==== {api-request-body-title}
+
+[[fields-enum-field-param]]
+`field`::
+(Mandatory, string)
+Which field to match
+
+[[fields-enum-string-param]]
+`string`::
+(Optional, string)
+The string to match at the start of indexed terms. If not provided, all terms in the field
+are considered.
+
+[[fields-enum-size-param]]
+`size`::
+(Optional, integer)
+How many matching terms to return. Defaults to 10
+
+[[fields-enum-timeout-param]]
+`timeout`::
+(Optional, <<time-units,time value>>)
+The maximum length of time to spend collecting results. Defaults to "1s" (one second).
+If the timeout is exceeded the `complete` flag set to false in the response and the results may
+be partial or empty.
+
+[[fields-enum-case_insensitive-param]]
+`case_insensitive`::
+(Optional, boolean)
+When true the provided search string is matched against index terms without case sensitivity.
+Defaults to false.
+
+[[fields-enum-index_filter-param]]
+`index_filter`::
+(Optional,  <<query-dsl,query object>> Allows to filter an index shard if the provided
+query rewrites to `match_none`.
+
+[[fields-enum-search_after-param]]
+`string`::
+(Optional, string)
+The string after which terms in the index should be returned. Allows for a form of
+pagination if the last result from one request is passed as the search_after
+parameter for a subsequent request.
+
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/fields_enum.json b/rest-api-spec/src/main/resources/rest-api-spec/api/fields_enum.json
@@ -0,0 +1,35 @@
+{
+  "fields_enum":{
+    "documentation":{
+      "url":"https://www.elastic.co/guide/en/elasticsearch/reference/current/search-fields-enum.html",
+      "description": "The fields enum API  can be used to discover field names in the index that begin with the provided string. It is designed for low-latency look-ups used in auto-complete scenarios."
+    },
+    "stability":"beta",
+    "visibility":"public",
+    "headers":{
+      "accept": [ "application/json"],
+      "content_type": ["application/json"]
+    },
+    "url":{
+      "paths":[
+        {
+          "path": "/{index}/_fields_enum",
+          "methods": [
+            "GET",
+            "POST"
+          ],
+          "parts": {
+            "index": {
+              "type": "list",
+              "description": "A comma-separated list of index names to search; use `_all` or empty string to perform the operation on all indices"
+            }
+          }
+        }
+      ]
+    },
+    "params":{},
+    "body":{
+      "description":"field name, string which is the prefix expected in matching terms, timeout and size for max number of results"
+    }
+  }
+}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java
@@ -562,4 +562,32 @@ public void validateMatchedRoutingPath() {
                 + "]."
         );
     }
+
+    /**
+     * This method is used to support auto-complete services and implementations
+     * are expected to find field names containing the provided string very quickly.
+     * If fields cannot look up matching names quickly they should return null.
+     * The returned TermEnum should implement next(), term() and doc_freq() methods
+     * but postings etc are not required.
+     * @param caseInsensitive if matches should be case insensitive
+     * @param string the partially complete word the user has typed (can be empty)
+     * @param queryShardContext the shard context
+     * @return null or an enumeration of matching terms 
+     * @throws IOException Errors accessing data
+     */
+    public TermsEnum getMatchingFieldNames(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext)
+        throws IOException {
+        SimpleTermCountEnum result = null;
+        if (caseInsensitive) {
+            if (name().contains(string)) {
+                result = new SimpleTermCountEnum(new TermCount(name(), 1));
+            }
+        } else {
+            if (name().toLowerCase().contains(string.toLowerCase())) {
+                result = new SimpleTermCountEnum(new TermCount(name(), 1));
+            }
+        }
+        return result;
+    }
+
 }
diff --git a/...termsenum/action/SimpleTermCountEnum.java → ...rch/index/mapper/SimpleTermCountEnum.java b/...termsenum/action/SimpleTermCountEnum.java → ...rch/index/mapper/SimpleTermCountEnum.java
@@ -4,15 +4,14 @@
  * 2.0; you may not use this file except in compliance with the Elastic License
  * 2.0.
  */
-package org.elasticsearch.xpack.core.termsenum.action;
+package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.index.ImpactsEnum;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
-import org.elasticsearch.index.mapper.MappedFieldType;
 
 import java.io.IOException;
 import java.util.Arrays;

diff --git a/...pack/core/termsenum/action/TermCount.java → ...elasticsearch/index/mapper/TermCount.java b/...pack/core/termsenum/action/TermCount.java → ...elasticsearch/index/mapper/TermCount.java
@@ -4,7 +4,7 @@
  * 2.0; you may not use this file except in compliance with the Elastic License
  * 2.0.
  */
-package org.elasticsearch.xpack.core.termsenum.action;
+package org.elasticsearch.index.mapper;
 
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
@@ -87,7 +87,7 @@ public int hashCode() {
         return Objects.hash(getTerm(), getDocCount());
     }
 
-    void addToDocCount(long extra) {
+    public void addToDocCount(long extra) {
         docCount += extra;
     }