From 2e847e8817c47197a89ba421d89fdc289976b1ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <istvan.szabo@elastic.co>
Date: Tue, 16 Apr 2024 09:39:36 +0200
Subject: [PATCH] [DOCS] Documents the rerank task type of the Inference API
 (#107404)

* [DOCS] Documents the rerank task type of the Inference API.
---
 .../inference/post-inference.asciidoc         | 137 ++++++++++++++----
 .../inference/put-inference.asciidoc          |  97 +++++++++----
 2 files changed, 180 insertions(+), 54 deletions(-)

diff --git a/docs/reference/inference/post-inference.asciidoc b/docs/reference/inference/post-inference.asciidoc
index 023566d3e40ee..5a9ae283e895c 100644
--- a/docs/reference/inference/post-inference.asciidoc
+++ b/docs/reference/inference/post-inference.asciidoc
@@ -69,27 +69,40 @@ seconds.
 ==== {api-request-body-title}
 
 `input`::
-(Required, array of strings)
+(Required, string or array of strings)
 The text on which you want to perform the {infer} task.
 `input` can be a single string or an array.
++
+--
 [NOTE]
 ====
-Inference endpoints for the `completion` task type currently only support a single string as input.
+Inference endpoints for the `completion` task type currently only support a
+single string as input.
 ====
+--
+
+`query`::
+(Required, string)
+Only for `rerank` {infer} endpoints. The search query text.
 
 
 [discrete]
 [[post-inference-api-example]]
 ==== {api-examples-title}
 
-The following example performs sparse embedding on the example sentence.
+
+[discrete]
+[[inference-example-completion]]
+===== Completion example
+
+The following example performs a completion on the example question.
 
 
 [source,console]
 ------------------------------------------------------------
-POST _inference/sparse_embedding/my-elser-model
+POST _inference/completion/openai_chat_completions
 {
-  "input": "The sky above the port was the color of television tuned to a dead channel."
+  "input": "What is Elastic?"
 }
 ------------------------------------------------------------
 // TEST[skip:TBD]
@@ -101,39 +114,90 @@ The API returns the following response:
 [source,console-result]
 ------------------------------------------------------------
 {
-  "sparse_embedding": [
+  "completion": [
     {
-      "port": 2.1259406,
-      "sky": 1.7073475,
-      "color": 1.6922266,
-      "dead": 1.6247464,
-      "television": 1.3525393,
-      "above": 1.2425821,
-      "tuned": 1.1440028,
-      "colors": 1.1218185,
-      "tv": 1.0111054,
-      "ports": 1.0067928,
-      "poem": 1.0042328,
-      "channel": 0.99471164,
-      "tune": 0.96235967,
-      "scene": 0.9020516,
-      (...)
-    },
-    (...)
+      "result": "Elastic is a company that provides a range of software solutions for search, logging, security, and analytics. Their flagship product is Elasticsearch, an open-source, distributed search engine that allows users to search, analyze, and visualize large volumes of data in real-time. Elastic also offers products such as Kibana, a data visualization tool, and Logstash, a log management and pipeline tool, as well as various other tools and solutions for data analysis and management."
+    }
   ]
 }
 ------------------------------------------------------------
 // NOTCONSOLE
 
+[discrete]
+[[inference-example-rerank]]
+===== Rerank example
 
-The next example performs a completion on the example question.
+The following example performs reranking on the example input.
+
+[source,console]
+------------------------------------------------------------
+POST _inference/rerank/cohere_rerank
+{
+  "input": ["luke", "like", "leia", "chewy","r2d2", "star", "wars"], 
+  "query": "star wars main character" 
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+The API returns the following response:
+
+
+[source,console-result]
+------------------------------------------------------------
+{
+  "rerank": [
+    {
+      "index": "2",
+      "relevance_score": "0.011597361",
+      "text": "leia"
+    },
+    {
+      "index": "0",
+      "relevance_score": "0.006338922",
+      "text": "luke"
+    },
+    {
+      "index": "5",
+      "relevance_score": "0.0016166499",
+      "text": "star"
+    },
+    {
+      "index": "4",
+      "relevance_score": "0.0011695103",
+      "text": "r2d2"
+    },
+    {
+      "index": "1",
+      "relevance_score": "5.614787E-4",
+      "text": "like"
+    },
+    {
+      "index": "6",
+      "relevance_score": "3.7850367E-4",
+      "text": "wars"
+    },
+    {
+      "index": "3",
+      "relevance_score": "1.2508839E-5",
+      "text": "chewy"
+    }
+  ]
+}
+------------------------------------------------------------
+
+
+[discrete]
+[[inference-example-sparse]]
+===== Sparse embedding example
+
+The following example performs sparse embedding on the example sentence.
 
 
 [source,console]
 ------------------------------------------------------------
-POST _inference/completion/openai_chat_completions
+POST _inference/sparse_embedding/my-elser-model
 {
-  "input": "What is Elastic?"
+  "input": "The sky above the port was the color of television tuned to a dead channel."
 }
 ------------------------------------------------------------
 // TEST[skip:TBD]
@@ -145,10 +209,25 @@ The API returns the following response:
 [source,console-result]
 ------------------------------------------------------------
 {
-  "completion": [
+  "sparse_embedding": [
     {
-      "result": "Elastic is a company that provides a range of software solutions for search, logging, security, and analytics. Their flagship product is Elasticsearch, an open-source, distributed search engine that allows users to search, analyze, and visualize large volumes of data in real-time. Elastic also offers products such as Kibana, a data visualization tool, and Logstash, a log management and pipeline tool, as well as various other tools and solutions for data analysis and management."
-    }
+      "port": 2.1259406,
+      "sky": 1.7073475,
+      "color": 1.6922266,
+      "dead": 1.6247464,
+      "television": 1.3525393,
+      "above": 1.2425821,
+      "tuned": 1.1440028,
+      "colors": 1.1218185,
+      "tv": 1.0111054,
+      "ports": 1.0067928,
+      "poem": 1.0042328,
+      "channel": 0.99471164,
+      "tune": 0.96235967,
+      "scene": 0.9020516,
+      (...)
+    },
+    (...)
   ]
 }
 ------------------------------------------------------------
diff --git a/docs/reference/inference/put-inference.asciidoc b/docs/reference/inference/put-inference.asciidoc
index 9825e71adea0d..332752e52f068 100644
--- a/docs/reference/inference/put-inference.asciidoc
+++ b/docs/reference/inference/put-inference.asciidoc
@@ -57,24 +57,27 @@ The unique identifier of the {infer} endpoint.
 `<task_type>`::
 (Required, string)
 The type of the {infer} task that the model will perform. Available task types:
+* `completion`,
+* `rerank`,
 * `sparse_embedding`,
-* `text_embedding`,
-* `completion`
+* `text_embedding`.
 
 
 [discrete]
 [[put-inference-api-request-body]]
-== {api-request-body-title}
+==== {api-request-body-title}
 
 `service`::
 (Required, string)
 The type of service supported for the specified task type.
 Available services:
-* `cohere`: specify the `text_embedding` task type to use the Cohere service.
+* `cohere`: specify the `text_embedding` or the `rerank` task type to use the
+Cohere service.
 * `elser`: specify the `sparse_embedding` task type to use the ELSER service.
 * `hugging_face`: specify the `text_embedding` task type to use the Hugging Face
 service.
-* `openai`: specify the `text_embedding` task type to use the OpenAI service.
+* `openai`: specify the `completion` or `text_embedding` task type to use the
+OpenAI service.
 * `elasticsearch`: specify the `text_embedding` task type to use the E5
 built-in model or text embedding models uploaded by Eland.
 
@@ -100,7 +103,8 @@ the same name and the updated API key.
 
 `embedding_type`::
 (Optional, string)
-Specifies the types of embeddings you want to get back. Defaults to `float`.
+Only for `text_embedding`. Specifies the types of embeddings you want to get
+back. Defaults to `float`.
 Valid values are:
   * `byte`: use it for signed int8 embeddings (this is a synonym of `int8`).
   * `float`: use it for the default float embeddings.
@@ -108,10 +112,13 @@ Valid values are:
 
 `model_id`::
 (Optional, string)
-The name of the model to use for the {infer} task. To review the available
-models, refer to the
-https://docs.cohere.com/reference/embed[Cohere docs]. Defaults to
-`embed-english-v2.0`.
+The name of the model to use for the {infer} task.
+To review the availble `rerank` models, refer to the
+https://docs.cohere.com/reference/rerank-1[Cohere docs].
+
+To review the available `text_embedding` models, refer to the
+https://docs.cohere.com/reference/embed[Cohere docs]. The default value for
+`text_embedding` is `embed-english-v2.0`.
 =====
 +
 .`service_settings` for the `elser` service
@@ -210,11 +217,34 @@ allocations. Must be a power of 2. Max allowed value is 32.
 Settings to configure the {infer} task. These settings are specific to the
 `<task_type>` you specified.
 +
+.`task_settings` for the `completion` task type
+[%collapsible%closed]
+=====
+`user`:::
+(Optional, string)
+For `openai` service only. Specifies the user issuing the request, which can be
+used for abuse detection.
+=====
++
+.`task_settings` for the `rerank` task type
+[%collapsible%closed]
+=====
+`return_documents`::
+(Optional, boolean)
+For `cohere` service only. Specify whether to return doc text within the 
+results.
+
+`top_n`::
+(Optional, integer)
+The number of most relevant documents to return, defaults to the number of the
+documents.
+=====
++
 .`task_settings` for the `text_embedding` task type
 [%collapsible%closed]
 =====
 `input_type`:::
-(optional, string)
+(Optional, string)
 For `cohere` service only. Specifies the type of input passed to the model.
 Valid values are:
   * `classification`: use it for embeddings passed through a text classifier.
@@ -236,15 +266,8 @@ maximum token length. Defaults to `END`. Valid values are:
 
 `user`:::
 (optional, string)
-For `openai` service only. Specifies the user issuing the request, which can be used for abuse detection.
-=====
-+
-.`task_settings` for the `completion` task type
-[%collapsible%closed]
-=====
-`user`:::
-(optional, string)
-For `openai` service only. Specifies the user issuing the request, which can be used for abuse detection.
+For `openai` service only. Specifies the user issuing the request, which can be
+used for abuse detection.
 =====
 
 
@@ -260,7 +283,7 @@ This section contains example API calls for every service type.
 ===== Cohere service
 
 The following example shows how to create an {infer} endpoint called
-`cohere_embeddings` to perform a `text_embedding` task type.
+`cohere-embeddings` to perform a `text_embedding` task type.
 
 [source,console]
 ------------------------------------------------------------
@@ -277,6 +300,30 @@ PUT _inference/text_embedding/cohere-embeddings
 // TEST[skip:TBD]
 
 
+The following example shows how to create an {infer} endpoint called
+`cohere-rerank` to perform a `rerank` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/rerank/cohere-rerank 
+{
+    "service": "cohere",
+    "service_settings": {
+        "api_key": "<API-KEY>", 
+        "model_id": "rerank-english-v3.0"
+    },
+    "task_settings": {
+        "top_n": 10,
+        "return_documents": true
+    }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+For more examples, also review the 
+https://docs.cohere.com/docs/elasticsearch-and-cohere#rerank-search-results-with-cohere-and-elasticsearch[Cohere documentation].
+
+
 [discrete]
 [[inference-example-e5]]
 ===== E5 via the elasticsearch service
@@ -414,11 +461,11 @@ been
 ===== OpenAI service
 
 The following example shows how to create an {infer} endpoint called
-`openai_embeddings` to perform a `text_embedding` task type.
+`openai-embeddings` to perform a `text_embedding` task type.
 
 [source,console]
 ------------------------------------------------------------
-PUT _inference/text_embedding/openai_embeddings
+PUT _inference/text_embedding/openai-embeddings
 {
     "service": "openai",
     "service_settings": {
@@ -430,11 +477,11 @@ PUT _inference/text_embedding/openai_embeddings
 // TEST[skip:TBD]
 
 The next example shows how to create an {infer} endpoint called
-`openai_completion` to perform a `completion` task type.
+`openai-completion` to perform a `completion` task type.
 
 [source,console]
 ------------------------------------------------------------
-PUT _inference/completion/openai_completion
+PUT _inference/completion/openai-completion
 {
     "service": "openai",
     "service_settings": {