From 2e847e8817c47197a89ba421d89fdc289976b1ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 16 Apr 2024 09:39:36 +0200 Subject: [PATCH] [DOCS] Documents the rerank task type of the Inference API (#107404) * [DOCS] Documents the rerank task type of the Inference API. --- .../inference/post-inference.asciidoc | 137 ++++++++++++++---- .../inference/put-inference.asciidoc | 97 +++++++++---- 2 files changed, 180 insertions(+), 54 deletions(-) diff --git a/docs/reference/inference/post-inference.asciidoc b/docs/reference/inference/post-inference.asciidoc index 023566d3e40ee..5a9ae283e895c 100644 --- a/docs/reference/inference/post-inference.asciidoc +++ b/docs/reference/inference/post-inference.asciidoc @@ -69,27 +69,40 @@ seconds. ==== {api-request-body-title} `input`:: -(Required, array of strings) +(Required, string or array of strings) The text on which you want to perform the {infer} task. `input` can be a single string or an array. ++ +-- [NOTE] ==== -Inference endpoints for the `completion` task type currently only support a single string as input. +Inference endpoints for the `completion` task type currently only support a +single string as input. ==== +-- + +`query`:: +(Required, string) +Only for `rerank` {infer} endpoints. The search query text. [discrete] [[post-inference-api-example]] ==== {api-examples-title} -The following example performs sparse embedding on the example sentence. + +[discrete] +[[inference-example-completion]] +===== Completion example + +The following example performs a completion on the example question. [source,console] ------------------------------------------------------------ -POST _inference/sparse_embedding/my-elser-model +POST _inference/completion/openai_chat_completions { - "input": "The sky above the port was the color of television tuned to a dead channel." + "input": "What is Elastic?" } ------------------------------------------------------------ // TEST[skip:TBD] @@ -101,39 +114,90 @@ The API returns the following response: [source,console-result] ------------------------------------------------------------ { - "sparse_embedding": [ + "completion": [ { - "port": 2.1259406, - "sky": 1.7073475, - "color": 1.6922266, - "dead": 1.6247464, - "television": 1.3525393, - "above": 1.2425821, - "tuned": 1.1440028, - "colors": 1.1218185, - "tv": 1.0111054, - "ports": 1.0067928, - "poem": 1.0042328, - "channel": 0.99471164, - "tune": 0.96235967, - "scene": 0.9020516, - (...) - }, - (...) + "result": "Elastic is a company that provides a range of software solutions for search, logging, security, and analytics. Their flagship product is Elasticsearch, an open-source, distributed search engine that allows users to search, analyze, and visualize large volumes of data in real-time. Elastic also offers products such as Kibana, a data visualization tool, and Logstash, a log management and pipeline tool, as well as various other tools and solutions for data analysis and management." + } ] } ------------------------------------------------------------ // NOTCONSOLE +[discrete] +[[inference-example-rerank]] +===== Rerank example -The next example performs a completion on the example question. +The following example performs reranking on the example input. + +[source,console] +------------------------------------------------------------ +POST _inference/rerank/cohere_rerank +{ + "input": ["luke", "like", "leia", "chewy","r2d2", "star", "wars"], + "query": "star wars main character" +} +------------------------------------------------------------ +// TEST[skip:TBD] + +The API returns the following response: + + +[source,console-result] +------------------------------------------------------------ +{ + "rerank": [ + { + "index": "2", + "relevance_score": "0.011597361", + "text": "leia" + }, + { + "index": "0", + "relevance_score": "0.006338922", + "text": "luke" + }, + { + "index": "5", + "relevance_score": "0.0016166499", + "text": "star" + }, + { + "index": "4", + "relevance_score": "0.0011695103", + "text": "r2d2" + }, + { + "index": "1", + "relevance_score": "5.614787E-4", + "text": "like" + }, + { + "index": "6", + "relevance_score": "3.7850367E-4", + "text": "wars" + }, + { + "index": "3", + "relevance_score": "1.2508839E-5", + "text": "chewy" + } + ] +} +------------------------------------------------------------ + + +[discrete] +[[inference-example-sparse]] +===== Sparse embedding example + +The following example performs sparse embedding on the example sentence. [source,console] ------------------------------------------------------------ -POST _inference/completion/openai_chat_completions +POST _inference/sparse_embedding/my-elser-model { - "input": "What is Elastic?" + "input": "The sky above the port was the color of television tuned to a dead channel." } ------------------------------------------------------------ // TEST[skip:TBD] @@ -145,10 +209,25 @@ The API returns the following response: [source,console-result] ------------------------------------------------------------ { - "completion": [ + "sparse_embedding": [ { - "result": "Elastic is a company that provides a range of software solutions for search, logging, security, and analytics. Their flagship product is Elasticsearch, an open-source, distributed search engine that allows users to search, analyze, and visualize large volumes of data in real-time. Elastic also offers products such as Kibana, a data visualization tool, and Logstash, a log management and pipeline tool, as well as various other tools and solutions for data analysis and management." - } + "port": 2.1259406, + "sky": 1.7073475, + "color": 1.6922266, + "dead": 1.6247464, + "television": 1.3525393, + "above": 1.2425821, + "tuned": 1.1440028, + "colors": 1.1218185, + "tv": 1.0111054, + "ports": 1.0067928, + "poem": 1.0042328, + "channel": 0.99471164, + "tune": 0.96235967, + "scene": 0.9020516, + (...) + }, + (...) ] } ------------------------------------------------------------ diff --git a/docs/reference/inference/put-inference.asciidoc b/docs/reference/inference/put-inference.asciidoc index 9825e71adea0d..332752e52f068 100644 --- a/docs/reference/inference/put-inference.asciidoc +++ b/docs/reference/inference/put-inference.asciidoc @@ -57,24 +57,27 @@ The unique identifier of the {infer} endpoint. ``:: (Required, string) The type of the {infer} task that the model will perform. Available task types: +* `completion`, +* `rerank`, * `sparse_embedding`, -* `text_embedding`, -* `completion` +* `text_embedding`. [discrete] [[put-inference-api-request-body]] -== {api-request-body-title} +==== {api-request-body-title} `service`:: (Required, string) The type of service supported for the specified task type. Available services: -* `cohere`: specify the `text_embedding` task type to use the Cohere service. +* `cohere`: specify the `text_embedding` or the `rerank` task type to use the +Cohere service. * `elser`: specify the `sparse_embedding` task type to use the ELSER service. * `hugging_face`: specify the `text_embedding` task type to use the Hugging Face service. -* `openai`: specify the `text_embedding` task type to use the OpenAI service. +* `openai`: specify the `completion` or `text_embedding` task type to use the +OpenAI service. * `elasticsearch`: specify the `text_embedding` task type to use the E5 built-in model or text embedding models uploaded by Eland. @@ -100,7 +103,8 @@ the same name and the updated API key. `embedding_type`:: (Optional, string) -Specifies the types of embeddings you want to get back. Defaults to `float`. +Only for `text_embedding`. Specifies the types of embeddings you want to get +back. Defaults to `float`. Valid values are: * `byte`: use it for signed int8 embeddings (this is a synonym of `int8`). * `float`: use it for the default float embeddings. @@ -108,10 +112,13 @@ Valid values are: `model_id`:: (Optional, string) -The name of the model to use for the {infer} task. To review the available -models, refer to the -https://docs.cohere.com/reference/embed[Cohere docs]. Defaults to -`embed-english-v2.0`. +The name of the model to use for the {infer} task. +To review the availble `rerank` models, refer to the +https://docs.cohere.com/reference/rerank-1[Cohere docs]. + +To review the available `text_embedding` models, refer to the +https://docs.cohere.com/reference/embed[Cohere docs]. The default value for +`text_embedding` is `embed-english-v2.0`. ===== + .`service_settings` for the `elser` service @@ -210,11 +217,34 @@ allocations. Must be a power of 2. Max allowed value is 32. Settings to configure the {infer} task. These settings are specific to the `` you specified. + +.`task_settings` for the `completion` task type +[%collapsible%closed] +===== +`user`::: +(Optional, string) +For `openai` service only. Specifies the user issuing the request, which can be +used for abuse detection. +===== ++ +.`task_settings` for the `rerank` task type +[%collapsible%closed] +===== +`return_documents`:: +(Optional, boolean) +For `cohere` service only. Specify whether to return doc text within the +results. + +`top_n`:: +(Optional, integer) +The number of most relevant documents to return, defaults to the number of the +documents. +===== ++ .`task_settings` for the `text_embedding` task type [%collapsible%closed] ===== `input_type`::: -(optional, string) +(Optional, string) For `cohere` service only. Specifies the type of input passed to the model. Valid values are: * `classification`: use it for embeddings passed through a text classifier. @@ -236,15 +266,8 @@ maximum token length. Defaults to `END`. Valid values are: `user`::: (optional, string) -For `openai` service only. Specifies the user issuing the request, which can be used for abuse detection. -===== -+ -.`task_settings` for the `completion` task type -[%collapsible%closed] -===== -`user`::: -(optional, string) -For `openai` service only. Specifies the user issuing the request, which can be used for abuse detection. +For `openai` service only. Specifies the user issuing the request, which can be +used for abuse detection. ===== @@ -260,7 +283,7 @@ This section contains example API calls for every service type. ===== Cohere service The following example shows how to create an {infer} endpoint called -`cohere_embeddings` to perform a `text_embedding` task type. +`cohere-embeddings` to perform a `text_embedding` task type. [source,console] ------------------------------------------------------------ @@ -277,6 +300,30 @@ PUT _inference/text_embedding/cohere-embeddings // TEST[skip:TBD] +The following example shows how to create an {infer} endpoint called +`cohere-rerank` to perform a `rerank` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/rerank/cohere-rerank +{ + "service": "cohere", + "service_settings": { + "api_key": "", + "model_id": "rerank-english-v3.0" + }, + "task_settings": { + "top_n": 10, + "return_documents": true + } +} +------------------------------------------------------------ +// TEST[skip:TBD] + +For more examples, also review the +https://docs.cohere.com/docs/elasticsearch-and-cohere#rerank-search-results-with-cohere-and-elasticsearch[Cohere documentation]. + + [discrete] [[inference-example-e5]] ===== E5 via the elasticsearch service @@ -414,11 +461,11 @@ been ===== OpenAI service The following example shows how to create an {infer} endpoint called -`openai_embeddings` to perform a `text_embedding` task type. +`openai-embeddings` to perform a `text_embedding` task type. [source,console] ------------------------------------------------------------ -PUT _inference/text_embedding/openai_embeddings +PUT _inference/text_embedding/openai-embeddings { "service": "openai", "service_settings": { @@ -430,11 +477,11 @@ PUT _inference/text_embedding/openai_embeddings // TEST[skip:TBD] The next example shows how to create an {infer} endpoint called -`openai_completion` to perform a `completion` task type. +`openai-completion` to perform a `completion` task type. [source,console] ------------------------------------------------------------ -PUT _inference/completion/openai_completion +PUT _inference/completion/openai-completion { "service": "openai", "service_settings": {