diff --git a/docs/changelog/116026.yaml b/docs/changelog/116026.yaml new file mode 100644 index 0000000000000..9beb6a1891bb8 --- /dev/null +++ b/docs/changelog/116026.yaml @@ -0,0 +1,13 @@ +pr: 116026 +summary: Change Elasticsearch timeouts to 429 response instead of 5xx +area: Infra/Core +type: breaking +issues: [] +breaking: + title: Change most Elasticsearch timeouts to 429 response instead of 5xx + area: REST API + details: When a timeout occurs in most REST requests, whether via a per-request timeout, or a system default, the + request would return a 5xx response code. The response code from those APIs when a timeout occurs is now 429. + impact: Adjust any code relying on retrying on 5xx responses for timeouts to look for a 429 response code and + inspect the response to determine whether a timeout occured. + notable: false diff --git a/docs/changelog/120168.yaml b/docs/changelog/120168.yaml new file mode 100644 index 0000000000000..d4bb321895160 --- /dev/null +++ b/docs/changelog/120168.yaml @@ -0,0 +1,5 @@ +pr: 120168 +summary: Reduce Data Loss in System Indices Migration +area: Infra/Core +type: bug +issues: [] diff --git a/docs/changelog/120222.yaml b/docs/changelog/120222.yaml new file mode 100644 index 0000000000000..c9ded878ac031 --- /dev/null +++ b/docs/changelog/120222.yaml @@ -0,0 +1,5 @@ +pr: 120222 +summary: Adding linear retriever to support weighted sums of sub-retrievers +area: "Search" +type: enhancement +issues: [] diff --git a/docs/changelog/120551.yaml b/docs/changelog/120551.yaml new file mode 100644 index 0000000000000..171d639be3e89 --- /dev/null +++ b/docs/changelog/120551.yaml @@ -0,0 +1,5 @@ +pr: 120551 +summary: Set default reranker for text similarity reranker to Elastic reranker +area: Ranking +type: enhancement +issues: [] diff --git a/docs/changelog/120842.yaml b/docs/changelog/120842.yaml new file mode 100644 index 0000000000000..98227cf399b56 --- /dev/null +++ b/docs/changelog/120842.yaml @@ -0,0 +1,5 @@ +pr: 120842 +summary: Remove Elastic Inference Service feature flag and deprecated setting +area: Inference +type: enhancement +issues: [] diff --git a/docs/changelog/120913.yaml b/docs/changelog/120913.yaml new file mode 100644 index 0000000000000..69db6027caa69 --- /dev/null +++ b/docs/changelog/120913.yaml @@ -0,0 +1,5 @@ +pr: 120913 +summary: Automatically rollover legacy .ml-anomalies indices +area: Machine Learning +type: upgrade +issues: [] diff --git a/docs/changelog/120974.yaml b/docs/changelog/120974.yaml new file mode 100644 index 0000000000000..ed52eefd9f5f8 --- /dev/null +++ b/docs/changelog/120974.yaml @@ -0,0 +1,6 @@ +pr: 120974 +summary: Tweak `copy_to` handling in synthetic `_source` to account for nested objects +area: Mapping +type: bug +issues: + - 120831 diff --git a/docs/changelog/121048.yaml b/docs/changelog/121048.yaml new file mode 100644 index 0000000000000..e1a9d665315ff --- /dev/null +++ b/docs/changelog/121048.yaml @@ -0,0 +1,5 @@ +pr: 121048 +summary: Updating Inference Update API documentation to have the correct PUT method +area: Machine Learning +type: bug +issues: [] diff --git a/docs/reference/alias.asciidoc b/docs/reference/alias.asciidoc index f676644c4ec48..3f8553c3b96d9 100644 --- a/docs/reference/alias.asciidoc +++ b/docs/reference/alias.asciidoc @@ -2,12 +2,14 @@ [[aliases]] = Aliases -An alias is a secondary name for a group of data streams or indices. Most {es} +An alias points to one or more indices or data streams. Most {es} APIs accept an alias in place of a data stream or index name. -You can change the data streams or indices of an alias at any time. If you use -aliases in your application's {es} requests, you can reindex data with no -downtime or changes to your app's code. +Aliases enable you to: + +* Query multiple indices/data streams together with a single name +* Change which indices/data streams your application uses in real time +* <> data without downtime [discrete] [[alias-types]] diff --git a/docs/reference/migration/migrate_9_0.asciidoc b/docs/reference/migration/migrate_9_0.asciidoc index 8f0b16e31b56e..71516fdd540d6 100644 --- a/docs/reference/migration/migrate_9_0.asciidoc +++ b/docs/reference/migration/migrate_9_0.asciidoc @@ -73,6 +73,7 @@ Lucene 10 ships with an updated Korean dictionary (mecab-ko-dic-2.1.1). For det The change is small and should generally provide better analysis results. Existing indices for full-text use cases should be reindexed though. ==== + [discrete] [[breaking_90_cluster_and_node_setting_changes]] ==== Cluster and node setting changes @@ -318,3 +319,320 @@ The `elser` service of the inference API will be removed in an upcoming release. In the current version there is no impact. In a future version, users of the `elser` service will no longer be able to use it, and will be required to use the `elasticsearch` service to access elser through the inference API. ==== +[discrete] +[[breaking_90_anomaly_detection_results]] +=== Anomaly detection results migration + +The {anomaly-detect} result indices `.ml-anomalies-*` created in {es} 7.x must be either reindexed, marked read-only, or deleted before upgrading to 9.x. + +**Reindexing**: While anomaly detection results are being reindexed, jobs continue to run and process new data. +However, you cannot completely delete an {anomaly-job} that stores results in this index until the reindexing is complete. + +**Marking indices as read-only**: This is useful for large indexes that contain the results of only one or a few {anomaly-jobs}. +If you delete these jobs later, you will not be able to create a new job with the same name. + +**Deleting**: Delete jobs that are no longer needed in the {ml-app} in {kib}. +The result index is deleted when all jobs that store results in it have been deleted. + +[[which_indices_require_attention]] +.Which indices require attention? +[%collapsible] +==== + +To identify indices that require action, use the <>: + +[source,console] +------------------------------------------------------------ +GET /.ml-anomalies-*/_migration/deprecations +------------------------------------------------------------ +// TEST[skip:TBD] + +The response contains the list of critical deprecation warnings in the `index_settings` section: + +[source,console-result] +------------------------------------------------------------ +"index_settings": { + ".ml-anomalies-shared": [ + { + "level": "critical", + "message": "Index created before 8.0", + "url": "https://ela.st/es-deprecation-8-reindex", + "details": "This index was created with version 7.8.23 and is not compatible with 9.0. Reindex or remove the index before upgrading.", + "resolve_during_rolling_upgrade": false + } + ] + } +------------------------------------------------------------ +// NOTCONSOLE + + +==== + +[[reindex_anomaly_result_index]] +.Reindexing anomaly result indices +[%collapsible] +==== +For an index with less than 10GB that contains results from multiple jobs that are still required, we recommend reindexing into a new format using UI. +You can use the <> to obtain the size of an index: + +[source,console] +------------------------------------------------------------ +GET _cat/indices/.ml-anomalies-custom-example?v&h=index,store.size +------------------------------------------------------------ +// TEST[skip:TBD] + +The reindexing can be initiated in the Kibana Upgrade Assistant. + +If an index size is greater than 10 GB it is recommended to use the Reindex API. +Reindexing consists of the following steps: + +. Set the original index to read-only. ++ +-- +[source,console] +------------------------------------------------------------ +PUT .ml-anomalies-custom-example/_block/read_only +------------------------------------------------------------ +// TEST[skip:TBD] +-- + +. Create a new index from the legacy index. ++ +-- +[source,console] +------------------------------------------------------------ +POST _create_from/.ml-anomalies-custom-example/.reindexed-v9-ml-anomalies-custom-example +------------------------------------------------------------ +// TEST[skip:TBD] +-- + +. Reindex documents. +To accelerate the reindexing process, it is recommended that the number of replicas be set to `0` before the reindexing and then set back to the original number once it is completed. +.. Get the number of replicas. ++ +-- +[source,console] +------------------------------------------------------------ +GET /.reindexed-v9-ml-anomalies-custom-example/_settings +------------------------------------------------------------ +// TEST[skip:TBD] +Note the number of replicas in the response. For example: +[source,console-result] +------------------------------------------------------------ +{ + ".reindexed-v9-ml-anomalies-custom-example": { + "settings": { + "index": { + "number_of_replicas": "1", + "number_of_shards": "1" + } + } + } +} +------------------------------------------------------------ +// NOTCONSOLE +-- +.. Set the number of replicas to `0`. ++ +-- +[source,console] +------------------------------------------------------------ +PUT /.reindexed-v9-ml-anomalies-custom-example/_settings +{ + "index": { + "number_of_replicas": 0 + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +-- +.. Start the reindexing process in asynchronous mode. ++ +-- +[source,console] +------------------------------------------------------------ +POST _reindex?wait_for_completion=false +{ + "source": { + "index": ".ml-anomalies-custom-example" + }, + "dest": { + "index": ".reindexed-v9-ml-anomalies-custom-example" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +The response will contain a task_id. You can check when the task is completed using the following command: +[source,console] +------------------------------------------------------------ +GET _tasks/ +------------------------------------------------------------ +// TEST[skip:TBD] +-- +.. Set the number of replicas to the original number when the reindexing is finished. ++ +-- +[source,console] +------------------------------------------------------------ +PUT /.reindexed-v9-ml-anomalies-custom-example/_settings +{ + "index": { + "number_of_replicas": "" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +-- + +. Get the aliases the original index is pointing to. ++ +-- +[source,console] +------------------------------------------------------------ +GET .ml-anomalies-custom-example/_alias +------------------------------------------------------------ +// TEST[skip:TBD] + +The response may contain multiple aliases if the results of multiple jobs are stored in the same index. + +[source,console-result] +------------------------------------------------------------ +{ + ".ml-anomalies-custom-example": { + "aliases": { + ".ml-anomalies-example1": { + "filter": { + "term": { + "job_id": { + "value": "example1" + } + } + }, + "is_hidden": true + }, + ".ml-anomalies-example2": { + "filter": { + "term": { + "job_id": { + "value": "example2" + } + } + }, + "is_hidden": true + } + } + } +} +------------------------------------------------------------ +// NOTCONSOLE +-- + +. Now you can reassign the aliases to the new index and delete the original index in one step. +Note that when adding the new index to the aliases, you must use the same filter and is_hidden parameters as for the original index. ++ +-- +[source,console] +------------------------------------------------------------ +POST _aliases +{ + "actions": [ + { + "add": { + "index": ".reindexed-v9-ml-anomalies-custom-example", + "alias": ".ml-anomalies-example1", + "filter": { + "term": { + "job_id": { + "value": "example1" + } + } + }, + "is_hidden": true + } + }, + { + "add": { + "index": ".reindexed-v9-ml-anomalies-custom-example", + "alias": ".ml-anomalies-example2", + "filter": { + "term": { + "job_id": { + "value": "example2" + } + } + }, + "is_hidden": true + } + }, + { + "remove": { + "index": ".ml-anomalies-custom-example", + "aliases": ".ml-anomalies-*" + } + }, + { + "remove_index": { + "index": ".ml-anomalies-custom-example" + } + }, + { + "add": { + "index": ".reindexed-v9-ml-anomalies-custom-example", + "alias": ".ml-anomalies-custom-example", + "is_hidden": true + } + } + ] +} +------------------------------------------------------------ +// TEST[skip:TBD] +-- +==== + +[[mark_anomaly_result_index_read_only]] +.Marking anomaly result indices as read-only +[%collapsible] +==== +Legacy indexes created in {es} 7.x can be made read-only and supported in {es} 9.x. +Making an index with a large amount of historical results read-only allows for a quick migration to the next major release, since you don't have to wait for the data to be reindexed into the new format. +However, it has the limitation that even after deleting an {anomaly-job}, the historical results associated with this job are not completely deleted. +Therefore, the system will prevent you from creating a new job with the same name. + +To set the index as read-only, add the `write` block to the index: + +[source,console] +------------------------------------------------------------ +PUT .ml-anomalies-custom-example/_block/write +------------------------------------------------------------ +// TEST[skip:TBD] + +Indices created in {es} 7.x that have a `write` block will not raise a critical deprecation warning. +==== + +[[delete_anomaly_result_index]] +.Deleting anomaly result indices +[%collapsible] +==== +If an index contains results of the jobs that are no longer required. +To list all jobs that stored results in an index, use the terms aggregation: + +[source,console] +------------------------------------------------------------ +GET .ml-anomalies-custom-example/_search +{ + "size": 0, + "aggs": { + "job_ids": { + "terms": { + "field": "job_id", + "size": 100 + } + } + } +} +------------------------------------------------------------ +// TEST[skip:TBD] + +The jobs can be deleted in the UI. +After the last job is deleted, the index will be deleted as well. +==== diff --git a/docs/reference/rest-api/common-parms.asciidoc b/docs/reference/rest-api/common-parms.asciidoc index 5db1ae10ae902..37c5528812900 100644 --- a/docs/reference/rest-api/common-parms.asciidoc +++ b/docs/reference/rest-api/common-parms.asciidoc @@ -1338,7 +1338,7 @@ that lower ranked documents have more influence. This value must be greater than equal to `1`. Defaults to `60`. end::rrf-rank-constant[] -tag::rrf-rank-window-size[] +tag::compound-retriever-rank-window-size[] `rank_window_size`:: (Optional, integer) + @@ -1347,15 +1347,54 @@ query. A higher value will improve result relevance at the cost of performance. ranked result set is pruned down to the search request's <>. `rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter. -end::rrf-rank-window-size[] +end::compound-retriever-rank-window-size[] -tag::rrf-filter[] +tag::compound-retriever-filter[] `filter`:: (Optional, <>) + Applies the specified <> to all of the specified sub-retrievers, according to each retriever's specifications. -end::rrf-filter[] +end::compound-retriever-filter[] + +tag::linear-retriever-components[] +`retrievers`:: +(Required, array of objects) ++ +A list of the sub-retrievers' configuration, that we will take into account and whose result sets +we will merge through a weighted sum. Each configuration can have a different weight and normalization depending +on the specified retriever. + +Each entry specifies the following parameters: + +* `retriever`:: +(Required, a <> object) ++ +Specifies the retriever for which we will compute the top documents for. The retriever will produce `rank_window_size` +results, which will later be merged based on the specified `weight` and `normalizer`. + +* `weight`:: +(Optional, float) ++ +The weight that each score of this retriever's top docs will be multiplied with. Must be greater or equal to 0. Defaults to 1.0. + +* `normalizer`:: +(Optional, String) ++ +Specifies how we will normalize the retriever's scores, before applying the specified `weight`. +Available values are: `minmax`, and `none`. Defaults to `none`. + +** `none` +** `minmax` : +A `MinMaxScoreNormalizer` that normalizes scores based on the following formula ++ +``` +score = (score - min) / (max - min) +``` + +See also <> using a linear retriever on how to +independently configure and apply normalizers to retrievers. +end::linear-retriever-components[] tag::knn-rescore-vector[] diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 4cccf4d204d99..fe959c4e8cbee 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -28,6 +28,9 @@ A <> that replaces the functionality of a traditi `knn`:: A <> that replaces the functionality of a <>. +`linear`:: +A <> that linearly combines the scores of other retrievers for the top documents. + `rescorer`:: A <> that replaces the functionality of the <>. @@ -45,6 +48,8 @@ A <> that applies contextual <> to pin o A standard retriever returns top documents from a traditional <>. +[discrete] +[[standard-retriever-parameters]] ===== Parameters: `query`:: @@ -195,6 +200,8 @@ Documents matching these conditions will have increased relevancy scores. A kNN retriever returns top documents from a <>. +[discrete] +[[knn-retriever-parameters]] ===== Parameters `field`:: @@ -265,21 +272,37 @@ GET /restaurants/_search This value must be fewer than or equal to `num_candidates`. <5> The size of the initial candidate set from which the final `k` nearest neighbors are selected. +[[linear-retriever]] +==== Linear Retriever +A retriever that normalizes and linearly combines the scores of other retrievers. + +[discrete] +[[linear-retriever-parameters]] +===== Parameters + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=linear-retriever-components] + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=compound-retriever-rank-window-size] + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=compound-retriever-filter] + [[rrf-retriever]] ==== RRF Retriever An <> retriever returns top documents based on the RRF formula, equally weighting two or more child retrievers. Reciprocal rank fusion (RRF) is a method for combining multiple result sets with different relevance indicators into a single result set. +[discrete] +[[rrf-retriever-parameters]] ===== Parameters include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-retrievers] include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-rank-constant] -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-rank-window-size] +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=compound-retriever-rank-window-size] -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-filter] +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=compound-retriever-filter] [discrete] [[rrf-retriever-example-hybrid]] @@ -540,6 +563,8 @@ score = ln(score), if score < 0 ---- ==== +[discrete] +[[text-similarity-reranker-retriever-parameters]] ===== Parameters `retriever`:: diff --git a/docs/reference/search/rrf.asciidoc b/docs/reference/search/rrf.asciidoc index 842bd7049e3bf..59976cec9c0aa 100644 --- a/docs/reference/search/rrf.asciidoc +++ b/docs/reference/search/rrf.asciidoc @@ -45,7 +45,7 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-retrievers] include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-rank-constant] -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-rank-window-size] +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=compound-retriever-rank-window-size] An example request using RRF: @@ -791,11 +791,11 @@ A more specific example of highlighting in RRF can also be found in the <> functionality, allowing you to retrieve -related nested or parent/child documents alongside your main search results. Inner hits can be -specified as part of any nested sub-retriever and will be propagated to the top-level parent -retriever. Note that the inner hit computation will take place only at end of `rrf` retriever's -evaluation on the top matching documents, and not as part of the query execution of the nested +The `rrf` retriever supports <> functionality, allowing you to retrieve +related nested or parent/child documents alongside your main search results. Inner hits can be +specified as part of any nested sub-retriever and will be propagated to the top-level parent +retriever. Note that the inner hit computation will take place only at end of `rrf` retriever's +evaluation on the top matching documents, and not as part of the query execution of the nested sub-retrievers. [IMPORTANT] diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc index c0be7432aa179..bc5f891a759b6 100644 --- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc @@ -36,6 +36,9 @@ PUT retrievers_example }, "topic": { "type": "keyword" + }, + "timestamp": { + "type": "date" } } } @@ -46,7 +49,8 @@ POST /retrievers_example/_doc/1 "vector": [0.23, 0.67, 0.89], "text": "Large language models are revolutionizing information retrieval by boosting search precision, deepening contextual understanding, and reshaping user experiences in data-rich environments.", "year": 2024, - "topic": ["llm", "ai", "information_retrieval"] + "topic": ["llm", "ai", "information_retrieval"], + "timestamp": "2021-01-01T12:10:30" } POST /retrievers_example/_doc/2 @@ -54,7 +58,8 @@ POST /retrievers_example/_doc/2 "vector": [0.12, 0.56, 0.78], "text": "Artificial intelligence is transforming medicine, from advancing diagnostics and tailoring treatment plans to empowering predictive patient care for improved health outcomes.", "year": 2023, - "topic": ["ai", "medicine"] + "topic": ["ai", "medicine"], + "timestamp": "2022-01-01T12:10:30" } POST /retrievers_example/_doc/3 @@ -62,7 +67,8 @@ POST /retrievers_example/_doc/3 "vector": [0.45, 0.32, 0.91], "text": "AI is redefining security by enabling advanced threat detection, proactive risk analysis, and dynamic defenses against increasingly sophisticated cyber threats.", "year": 2024, - "topic": ["ai", "security"] + "topic": ["ai", "security"], + "timestamp": "2023-01-01T12:10:30" } POST /retrievers_example/_doc/4 @@ -70,7 +76,8 @@ POST /retrievers_example/_doc/4 "vector": [0.34, 0.21, 0.98], "text": "Elastic introduces Elastic AI Assistant, the open, generative AI sidekick powered by ESRE to democratize cybersecurity and enable users of every skill level.", "year": 2023, - "topic": ["ai", "elastic", "assistant"] + "topic": ["ai", "elastic", "assistant"], + "timestamp": "2024-01-01T12:10:30" } POST /retrievers_example/_doc/5 @@ -78,7 +85,8 @@ POST /retrievers_example/_doc/5 "vector": [0.11, 0.65, 0.47], "text": "Learn how to spin up a deployment of our hosted Elasticsearch Service and use Elastic Observability to gain deeper insight into the behavior of your applications and systems.", "year": 2024, - "topic": ["documentation", "observability", "elastic"] + "topic": ["documentation", "observability", "elastic"], + "timestamp": "2025-01-01T12:10:30" } POST /retrievers_example/_refresh @@ -185,6 +193,248 @@ This returns the following response based on the final rrf score for each result // TESTRESPONSE[s/"took": 42/"took": $body.took/] ============== +[discrete] +[[retrievers-examples-linear-retriever]] +==== Example: Hybrid search with linear retriever + +A different, and more intuitive, way to provide hybrid search, is to linearly combine the top documents of different +retrievers using a weighted sum of the original scores. Since, as above, the scores could lie in different ranges, +we can also specify a `normalizer` that would ensure that all scores for the top ranked documents of a retriever +lie in a specific range. + +To implement this, we define a `linear` retriever, and along with a set of retrievers that will generate the heterogeneous +results sets that we will combine. We will solve a problem similar to the above, by merging the results of a `standard` and a `knn` +retriever. As the `standard` retriever's scores are based on BM25 and are not strictly bounded, we will also define a +`minmax` normalizer to ensure that the scores lie in the [0, 1] range. We will apply the same normalizer to `knn` as well +to ensure that we capture the importance of each document within the result set. + +So, let's now specify the `linear` retriever whose final score is computed as follows: + +[source, text] +---- +score = weight(standard) * score(standard) + weight(knn) * score(knn) +score = 2 * score(standard) + 1.5 * score(knn) +---- +// NOTCONSOLE + +[source,console] +---- +GET /retrievers_example/_search +{ + "retriever": { + "linear": { + "retrievers": [ + { + "retriever": { + "standard": { + "query": { + "query_string": { + "query": "(information retrieval) OR (artificial intelligence)", + "default_field": "text" + } + } + } + }, + "weight": 2, + "normalizer": "minmax" + }, + { + "retriever": { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + }, + "weight": 1.5, + "normalizer": "minmax" + } + ], + "rank_window_size": 10 + } + }, + "_source": false +} +---- +// TEST[continued] + +This returns the following response based on the normalized weighted score for each result. + +.Example response +[%collapsible] +============== +[source,console-result] +---- +{ + "took": 42, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": -1, + "hits": [ + { + "_index": "retrievers_example", + "_id": "2", + "_score": -1 + }, + { + "_index": "retrievers_example", + "_id": "1", + "_score": -2 + }, + { + "_index": "retrievers_example", + "_id": "3", + "_score": -3 + } + ] + } +} +---- +// TESTRESPONSE[s/"took": 42/"took": $body.took/] +// TESTRESPONSE[s/"max_score": -1/"max_score": $body.hits.max_score/] +// TESTRESPONSE[s/"_score": -1/"_score": $body.hits.hits.0._score/] +// TESTRESPONSE[s/"_score": -2/"_score": $body.hits.hits.1._score/] +// TESTRESPONSE[s/"_score": -3/"_score": $body.hits.hits.2._score/] +============== + +By normalizing scores and leveraging `function_score` queries, we can also implement more complex ranking strategies, +such as sorting results based on their timestamps, assign the timestamp as a score, and then normalizing this score to +[0, 1]. +Then, we can easily combine the above with a `knn` retriever as follows: + +[source,console] +---- +GET /retrievers_example/_search +{ + "retriever": { + "linear": { + "retrievers": [ + { + "retriever": { + "standard": { + "query": { + "function_score": { + "query": { + "term": { + "topic": "ai" + } + }, + "functions": [ + { + "script_score": { + "script": { + "source": "doc['timestamp'].value.millis" + } + } + } + ], + "boost_mode": "replace" + } + }, + "sort": { + "timestamp": { + "order": "asc" + } + } + } + }, + "weight": 2, + "normalizer": "minmax" + }, + { + "retriever": { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + }, + "weight": 1.5 + } + ], + "rank_window_size": 10 + } + }, + "_source": false +} +---- +// TEST[continued] + +Which would return the following results: + +.Example response +[%collapsible] +============== +[source,console-result] +---- +{ + "took": 42, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": -1, + "hits": [ + { + "_index": "retrievers_example", + "_id": "3", + "_score": -1 + }, + { + "_index": "retrievers_example", + "_id": "2", + "_score": -2 + }, + { + "_index": "retrievers_example", + "_id": "4", + "_score": -3 + }, + { + "_index": "retrievers_example", + "_id": "1", + "_score": -4 + } + ] + } +} +---- +// TESTRESPONSE[s/"took": 42/"took": $body.took/] +// TESTRESPONSE[s/"max_score": -1/"max_score": $body.hits.max_score/] +// TESTRESPONSE[s/"_score": -1/"_score": $body.hits.hits.0._score/] +// TESTRESPONSE[s/"_score": -2/"_score": $body.hits.hits.1._score/] +// TESTRESPONSE[s/"_score": -3/"_score": $body.hits.hits.2._score/] +// TESTRESPONSE[s/"_score": -4/"_score": $body.hits.hits.3._score/] +============== + [discrete] [[retrievers-examples-collapsing-retriever-results]] ==== Example: Grouping results by year with `collapse` diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-overview.asciidoc index 1771b5bb0d849..1a94ae18a5c20 100644 --- a/docs/reference/search/search-your-data/retrievers-overview.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-overview.asciidoc @@ -23,6 +23,9 @@ This ensures backward compatibility as existing `_search` requests remain suppor That way you can transition to the new abstraction at your own pace without mixing syntaxes. * <>. Returns top documents from a <>, in the context of a retriever framework. +* <>. +Combines the top results from multiple sub-retrievers using a weighted sum of their scores. Allows to specify different +weights for each retriever, as well as independently normalize the scores from each result set. * <>. Combines and ranks multiple first-stage retrievers using the reciprocal rank fusion (RRF) algorithm. Allows you to combine multiple result sets with different relevance indicators into a single result set. diff --git a/libs/entitlement/bridge/build.gradle b/libs/entitlement/bridge/build.gradle index a9f8f6e3a3b0a..5dec95b4b9bb4 100644 --- a/libs/entitlement/bridge/build.gradle +++ b/libs/entitlement/bridge/build.gradle @@ -19,6 +19,9 @@ tasks.named('jar').configure { } } +// The bridge only uses things within the jdk, but the checker +// needs to have many forbidden apis in its signatures. Suppressing +// each use of forbidden apis would be tedious and not useful. tasks.withType(CheckForbiddenApisTask).configureEach { - replaceSignatureFiles 'jdk-signatures' + enabled = false } diff --git a/libs/entitlement/bridge/src/main/java/org/elasticsearch/entitlement/bridge/EntitlementChecker.java b/libs/entitlement/bridge/src/main/java/org/elasticsearch/entitlement/bridge/EntitlementChecker.java index d509763b3541d..de47e88aa8e95 100644 --- a/libs/entitlement/bridge/src/main/java/org/elasticsearch/entitlement/bridge/EntitlementChecker.java +++ b/libs/entitlement/bridge/src/main/java/org/elasticsearch/entitlement/bridge/EntitlementChecker.java @@ -9,6 +9,7 @@ package org.elasticsearch.entitlement.bridge; +import java.io.File; import java.io.InputStream; import java.io.PrintStream; import java.io.PrintWriter; @@ -47,7 +48,9 @@ import java.nio.channels.DatagramChannel; import java.nio.channels.ServerSocketChannel; import java.nio.channels.SocketChannel; +import java.nio.charset.Charset; import java.nio.file.Path; +import java.nio.file.attribute.UserPrincipal; import java.security.cert.CertStoreParameters; import java.util.List; import java.util.Locale; @@ -63,7 +66,7 @@ @SuppressWarnings("unused") // Called from instrumentation code inserted by the Entitlements agent public interface EntitlementChecker { - //////////////////// + /// ///////////////// // // Exit the JVM process // @@ -74,7 +77,7 @@ public interface EntitlementChecker { void check$java_lang_System$$exit(Class callerClass, int status); - //////////////////// + /// ///////////////// // // ClassLoader ctor // @@ -85,7 +88,7 @@ public interface EntitlementChecker { void check$java_lang_ClassLoader$(Class callerClass, String name, ClassLoader parent); - //////////////////// + /// ///////////////// // // SecureClassLoader ctor // @@ -96,7 +99,7 @@ public interface EntitlementChecker { void check$java_security_SecureClassLoader$(Class callerClass, String name, ClassLoader parent); - //////////////////// + /// ///////////////// // // URLClassLoader constructors // @@ -111,7 +114,7 @@ public interface EntitlementChecker { void check$java_net_URLClassLoader$(Class callerClass, String name, URL[] urls, ClassLoader parent, URLStreamHandlerFactory factory); - //////////////////// + /// ///////////////// // // "setFactory" methods // @@ -124,7 +127,7 @@ public interface EntitlementChecker { void check$javax_net_ssl_SSLContext$$setDefault(Class callerClass, SSLContext context); - //////////////////// + /// ///////////////// // // Process creation // @@ -133,7 +136,7 @@ public interface EntitlementChecker { void check$java_lang_ProcessBuilder$$startPipeline(Class callerClass, List builders); - //////////////////// + /// ///////////////// // // System Properties and similar // @@ -142,7 +145,7 @@ public interface EntitlementChecker { void check$java_lang_System$$clearProperty(Class callerClass, String key); - //////////////////// + /// ///////////////// // // JVM-wide state changes // @@ -219,7 +222,7 @@ public interface EntitlementChecker { void check$java_net_URLConnection$$setContentHandlerFactory(Class callerClass, ContentHandlerFactory fac); - //////////////////// + /// ///////////////// // // Network access // @@ -416,7 +419,7 @@ public interface EntitlementChecker { void check$sun_nio_ch_DatagramChannelImpl$receive(Class callerClass, DatagramChannel that, ByteBuffer dst); - //////////////////// + /// ///////////////// // // Load native libraries // @@ -484,4 +487,27 @@ public interface EntitlementChecker { void check$java_lang_foreign_SymbolLookup$$libraryLookup(Class callerClass, Path path, Arena arena); void check$java_lang_ModuleLayer$Controller$enableNativeAccess(Class callerClass, ModuleLayer.Controller that, Module target); + + /// ///////////////// + // + // File access + // + + void check$java_util_Scanner$(Class callerClass, File source); + + void check$java_util_Scanner$(Class callerClass, File source, String charsetName); + + void check$java_util_Scanner$(Class callerClass, File source, Charset charset); + + void check$java_io_FileOutputStream$(Class callerClass, String name); + + void check$java_io_FileOutputStream$(Class callerClass, String name, boolean append); + + void check$java_io_FileOutputStream$(Class callerClass, File file); + + void check$java_io_FileOutputStream$(Class callerClass, File file, boolean append); + + void check$java_nio_file_Files$$probeContentType(Class callerClass, Path path); + + void check$java_nio_file_Files$$setOwner(Class callerClass, Path path, UserPrincipal principal); } diff --git a/libs/entitlement/qa/entitled-plugin/src/main/java/org/elasticsearch/entitlement/qa/entitled/EntitledActions.java b/libs/entitlement/qa/entitled-plugin/src/main/java/org/elasticsearch/entitlement/qa/entitled/EntitledActions.java index 282860e1cdf60..24d7472e07c65 100644 --- a/libs/entitlement/qa/entitled-plugin/src/main/java/org/elasticsearch/entitlement/qa/entitled/EntitledActions.java +++ b/libs/entitlement/qa/entitled-plugin/src/main/java/org/elasticsearch/entitlement/qa/entitled/EntitledActions.java @@ -11,6 +11,11 @@ import org.elasticsearch.core.SuppressForbidden; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.UserPrincipal; + public final class EntitledActions { private EntitledActions() {} @@ -19,4 +24,7 @@ static void System_clearProperty(String key) { System.clearProperty(key); } + public static UserPrincipal getFileOwner(Path path) throws IOException { + return Files.getOwner(path); + } } diff --git a/libs/entitlement/qa/entitlement-test-plugin/src/main/java/org/elasticsearch/entitlement/qa/test/FileCheckActions.java b/libs/entitlement/qa/entitlement-test-plugin/src/main/java/org/elasticsearch/entitlement/qa/test/FileCheckActions.java new file mode 100644 index 0000000000000..6e15ff4d0cdd1 --- /dev/null +++ b/libs/entitlement/qa/entitlement-test-plugin/src/main/java/org/elasticsearch/entitlement/qa/test/FileCheckActions.java @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.entitlement.qa.test; + +import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.entitlement.qa.entitled.EntitledActions; + +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.UserPrincipal; +import java.util.Scanner; + +@SuppressForbidden(reason = "Explicitly checking APIs that are forbidden") +class FileCheckActions { + + private static Path testRootDir = Paths.get(System.getProperty("es.entitlements.testdir")); + + private static Path readDir() { + return testRootDir.resolve("read_dir"); + } + + private static Path readWriteDir() { + return testRootDir.resolve("read_write_dir"); + } + + private static Path readFile() { + return testRootDir.resolve("read_file"); + } + + private static Path readWriteFile() { + return testRootDir.resolve("read_write_file"); + } + + static void createScannerFile() throws FileNotFoundException { + new Scanner(readFile().toFile()); + } + + static void createScannerFileWithCharset() throws IOException { + new Scanner(readFile().toFile(), StandardCharsets.UTF_8); + } + + static void createScannerFileWithCharsetName() throws FileNotFoundException { + new Scanner(readFile().toFile(), "UTF-8"); + } + + static void createFileOutputStreamString() throws IOException { + new FileOutputStream(readWriteFile().toString()).close(); + } + + static void createFileOutputStreamStringWithAppend() throws IOException { + new FileOutputStream(readWriteFile().toString(), false).close(); + } + + static void createFileOutputStreamFile() throws IOException { + new FileOutputStream(readWriteFile().toFile()).close(); + } + + static void createFileOutputStreamFileWithAppend() throws IOException { + new FileOutputStream(readWriteFile().toFile(), false).close(); + } + + static void filesProbeContentType() throws IOException { + Files.probeContentType(readFile()); + } + + static void filesSetOwner() throws IOException { + UserPrincipal owner = EntitledActions.getFileOwner(readWriteFile()); + Files.setOwner(readWriteFile(), owner); // set to existing owner, just trying to execute the method + } + + private FileCheckActions() {} +} diff --git a/libs/entitlement/qa/entitlement-test-plugin/src/main/java/org/elasticsearch/entitlement/qa/test/RestEntitlementsCheckAction.java b/libs/entitlement/qa/entitlement-test-plugin/src/main/java/org/elasticsearch/entitlement/qa/test/RestEntitlementsCheckAction.java index c2b6478e561a8..9b8cae1b72d29 100644 --- a/libs/entitlement/qa/entitlement-test-plugin/src/main/java/org/elasticsearch/entitlement/qa/test/RestEntitlementsCheckAction.java +++ b/libs/entitlement/qa/entitlement-test-plugin/src/main/java/org/elasticsearch/entitlement/qa/test/RestEntitlementsCheckAction.java @@ -200,7 +200,6 @@ static CheckAction alwaysDenied(CheckedRunnable action) { entry("runtime_load_library", forPlugins(LoadNativeLibrariesCheckActions::runtimeLoadLibrary)), entry("system_load", forPlugins(LoadNativeLibrariesCheckActions::systemLoad)), entry("system_load_library", forPlugins(LoadNativeLibrariesCheckActions::systemLoadLibrary)), - entry("enable_native_access", new CheckAction(VersionSpecificNativeChecks::enableNativeAccess, false, 22)), entry("address_target_layout", new CheckAction(VersionSpecificNativeChecks::addressLayoutWithTargetLayout, false, 22)), entry("donwncall_handle", new CheckAction(VersionSpecificNativeChecks::linkerDowncallHandle, false, 22)), @@ -213,7 +212,16 @@ static CheckAction alwaysDenied(CheckedRunnable action) { new CheckAction(VersionSpecificNativeChecks::memorySegmentReinterpretWithSizeAndCleanup, false, 22) ), entry("symbol_lookup_name", new CheckAction(VersionSpecificNativeChecks::symbolLookupWithName, false, 22)), - entry("symbol_lookup_path", new CheckAction(VersionSpecificNativeChecks::symbolLookupWithPath, false, 22)) + entry("symbol_lookup_path", new CheckAction(VersionSpecificNativeChecks::symbolLookupWithPath, false, 22)), + entry("create_scanner", forPlugins(FileCheckActions::createScannerFile)), + entry("create_scanner_with_charset", forPlugins(FileCheckActions::createScannerFileWithCharset)), + entry("create_scanner_with_charset_name", forPlugins(FileCheckActions::createScannerFileWithCharsetName)), + entry("create_file_output_stream_string", forPlugins(FileCheckActions::createFileOutputStreamString)), + entry("create_file_output_stream_string_with_append", forPlugins(FileCheckActions::createFileOutputStreamStringWithAppend)), + entry("create_file_output_stream_file", forPlugins(FileCheckActions::createFileOutputStreamFile)), + entry("create_file_output_stream_file_with_append", forPlugins(FileCheckActions::createFileOutputStreamFileWithAppend)), + entry("files_probe_content_type", forPlugins(FileCheckActions::filesProbeContentType)), + entry("files_set_owner", forPlugins(FileCheckActions::filesSetOwner)) ) .filter(entry -> entry.getValue().fromJavaVersion() == null || Runtime.version().feature() >= entry.getValue().fromJavaVersion()) .collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue)); diff --git a/libs/entitlement/qa/src/javaRestTest/java/org/elasticsearch/entitlement/qa/AbstractEntitlementsIT.java b/libs/entitlement/qa/src/javaRestTest/java/org/elasticsearch/entitlement/qa/AbstractEntitlementsIT.java index b770b4915a317..487f692ef4488 100644 --- a/libs/entitlement/qa/src/javaRestTest/java/org/elasticsearch/entitlement/qa/AbstractEntitlementsIT.java +++ b/libs/entitlement/qa/src/javaRestTest/java/org/elasticsearch/entitlement/qa/AbstractEntitlementsIT.java @@ -34,6 +34,11 @@ public abstract class AbstractEntitlementsIT extends ESRestTestCase { Map.of("properties", List.of("es.entitlements.checkSetSystemProperty", "es.entitlements.checkClearSystemProperty")) ) ); + + builder.value(Map.of("file", Map.of("path", tempDir.resolve("read_dir"), "mode", "read"))); + builder.value(Map.of("file", Map.of("path", tempDir.resolve("read_write_dir"), "mode", "read_write"))); + builder.value(Map.of("file", Map.of("path", tempDir.resolve("read_file"), "mode", "read"))); + builder.value(Map.of("file", Map.of("path", tempDir.resolve("read_write_file"), "mode", "read_write"))); }; private final String actionName; diff --git a/libs/entitlement/qa/src/javaRestTest/java/org/elasticsearch/entitlement/qa/EntitlementsTestRule.java b/libs/entitlement/qa/src/javaRestTest/java/org/elasticsearch/entitlement/qa/EntitlementsTestRule.java index 8c9dcb6dd0efe..33d5eeca595ab 100644 --- a/libs/entitlement/qa/src/javaRestTest/java/org/elasticsearch/entitlement/qa/EntitlementsTestRule.java +++ b/libs/entitlement/qa/src/javaRestTest/java/org/elasticsearch/entitlement/qa/EntitlementsTestRule.java @@ -15,6 +15,7 @@ import org.elasticsearch.test.cluster.util.resource.Resource; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.yaml.YamlXContent; +import org.junit.rules.ExternalResource; import org.junit.rules.RuleChain; import org.junit.rules.TemporaryFolder; import org.junit.rules.TestRule; @@ -23,6 +24,7 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.nio.file.Files; import java.nio.file.Path; class EntitlementsTestRule implements TestRule { @@ -38,6 +40,16 @@ interface PolicyBuilder { @SuppressWarnings("this-escape") EntitlementsTestRule(boolean modular, PolicyBuilder policyBuilder) { testDir = new TemporaryFolder(); + var tempDirSetup = new ExternalResource() { + @Override + protected void before() throws Throwable { + Path testPath = testDir.getRoot().toPath(); + Files.createDirectory(testPath.resolve("read_dir")); + Files.createDirectory(testPath.resolve("read_write_dir")); + Files.writeString(testPath.resolve("read_file"), ""); + Files.writeString(testPath.resolve("read_write_file"), ""); + } + }; cluster = ElasticsearchCluster.local() .module("entitled") .module("entitlement-test-plugin", spec -> setupEntitlements(spec, modular, policyBuilder)) @@ -45,7 +57,7 @@ interface PolicyBuilder { .systemProperty("es.entitlements.testdir", () -> testDir.getRoot().getAbsolutePath()) .setting("xpack.security.enabled", "false") .build(); - ruleChain = RuleChain.outerRule(testDir).around(cluster); + ruleChain = RuleChain.outerRule(testDir).around(tempDirSetup).around(cluster); } @Override @@ -62,6 +74,7 @@ private void setupEntitlements(PluginInstallSpec spec, boolean modular, PolicyBu builder.startObject(); builder.field(moduleName); builder.startArray(); + policyBuilder.build(builder, testDir.getRoot().toPath()); builder.endArray(); builder.endObject(); diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/api/ElasticsearchEntitlementChecker.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/api/ElasticsearchEntitlementChecker.java index 8600dd357c384..48a7400a1db7b 100644 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/api/ElasticsearchEntitlementChecker.java +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/api/ElasticsearchEntitlementChecker.java @@ -9,9 +9,11 @@ package org.elasticsearch.entitlement.runtime.api; +import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.entitlement.bridge.EntitlementChecker; import org.elasticsearch.entitlement.runtime.policy.PolicyManager; +import java.io.File; import java.io.InputStream; import java.io.PrintStream; import java.io.PrintWriter; @@ -51,7 +53,9 @@ import java.nio.channels.DatagramChannel; import java.nio.channels.ServerSocketChannel; import java.nio.channels.SocketChannel; +import java.nio.charset.Charset; import java.nio.file.Path; +import java.nio.file.attribute.UserPrincipal; import java.security.cert.CertStoreParameters; import java.util.List; import java.util.Locale; @@ -69,6 +73,7 @@ * API methods for managing the checks. * The trampoline module loads this object via SPI. */ +@SuppressForbidden(reason = "Explicitly checking APIs that are forbidden") public class ElasticsearchEntitlementChecker implements EntitlementChecker { private final PolicyManager policyManager; @@ -868,4 +873,49 @@ public ElasticsearchEntitlementChecker(PolicyManager policyManager) { ) { policyManager.checkLoadingNativeLibraries(callerClass); } + + @Override + public void check$java_util_Scanner$(Class callerClass, File source) { + policyManager.checkFileRead(callerClass, source); + } + + @Override + public void check$java_util_Scanner$(Class callerClass, File source, String charsetName) { + policyManager.checkFileRead(callerClass, source); + } + + @Override + public void check$java_util_Scanner$(Class callerClass, File source, Charset charset) { + policyManager.checkFileRead(callerClass, source); + } + + @Override + public void check$java_io_FileOutputStream$(Class callerClass, String name) { + policyManager.checkFileWrite(callerClass, new File(name)); + } + + @Override + public void check$java_io_FileOutputStream$(Class callerClass, String name, boolean append) { + policyManager.checkFileWrite(callerClass, new File(name)); + } + + @Override + public void check$java_io_FileOutputStream$(Class callerClass, File file) { + policyManager.checkFileWrite(callerClass, file); + } + + @Override + public void check$java_io_FileOutputStream$(Class callerClass, File file, boolean append) { + policyManager.checkFileWrite(callerClass, file); + } + + @Override + public void check$java_nio_file_Files$$probeContentType(Class callerClass, Path path) { + policyManager.checkFileRead(callerClass, path); + } + + @Override + public void check$java_nio_file_Files$$setOwner(Class callerClass, Path path, UserPrincipal principal) { + policyManager.checkFileWrite(callerClass, path); + } } diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/FileAccessTree.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/FileAccessTree.java new file mode 100644 index 0000000000000..55813df28b6f8 --- /dev/null +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/FileAccessTree.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.entitlement.runtime.policy; + +import org.elasticsearch.core.SuppressForbidden; + +import java.io.File; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +final class FileAccessTree { + static final FileAccessTree EMPTY = new FileAccessTree(List.of()); + + private final String[] readPaths; + private final String[] writePaths; + + FileAccessTree(List fileEntitlements) { + List readPaths = new ArrayList<>(); + List writePaths = new ArrayList<>(); + for (FileEntitlement fileEntitlement : fileEntitlements) { + var mode = fileEntitlement.mode(); + if (mode == FileEntitlement.Mode.READ_WRITE) { + writePaths.add(fileEntitlement.path()); + } + readPaths.add(fileEntitlement.path()); + } + + readPaths.sort(String::compareTo); + writePaths.sort(String::compareTo); + + this.readPaths = readPaths.toArray(new String[0]); + this.writePaths = writePaths.toArray(new String[0]); + } + + boolean canRead(Path path) { + return checkPath(normalize(path), readPaths); + } + + @SuppressForbidden(reason = "Explicitly checking File apis") + boolean canRead(File file) { + return checkPath(normalize(file.toPath()), readPaths); + } + + boolean canWrite(Path path) { + return checkPath(normalize(path), writePaths); + } + + @SuppressForbidden(reason = "Explicitly checking File apis") + boolean canWrite(File file) { + return checkPath(normalize(file.toPath()), writePaths); + } + + private static String normalize(Path path) { + return path.toAbsolutePath().normalize().toString(); + } + + private static boolean checkPath(String path, String[] paths) { + if (paths.length == 0) { + return false; + } + int ndx = Arrays.binarySearch(paths, path); + if (ndx < -1) { + String maybeParent = paths[-ndx - 2]; + return path.startsWith(maybeParent); + } + return ndx >= 0; + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) return false; + FileAccessTree that = (FileAccessTree) o; + return Objects.deepEquals(readPaths, that.readPaths) && Objects.deepEquals(writePaths, that.writePaths); + } + + @Override + public int hashCode() { + return Objects.hash(Arrays.hashCode(readPaths), Arrays.hashCode(writePaths)); + } +} diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/FileEntitlement.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/FileEntitlement.java index 4fdbcc93ea6e0..4bd1dc10c85bb 100644 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/FileEntitlement.java +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/FileEntitlement.java @@ -9,62 +9,38 @@ package org.elasticsearch.entitlement.runtime.policy; -import java.util.List; -import java.util.Objects; +import java.nio.file.Paths; /** - * Describes a file entitlement with a path and actions. + * Describes a file entitlement with a path and mode. */ -public class FileEntitlement implements Entitlement { +public record FileEntitlement(String path, Mode mode) implements Entitlement { - public static final int READ_ACTION = 0x1; - public static final int WRITE_ACTION = 0x2; - - public static final String READ = "read"; - public static final String WRITE = "write"; - - private final String path; - private final int actions; - - @ExternalEntitlement(parameterNames = { "path", "actions" }, esModulesOnly = false) - public FileEntitlement(String path, List actionsList) { - this.path = path; - int actionsInt = 0; - - for (String actionString : actionsList) { - if (READ.equals(actionString)) { - if ((actionsInt & READ_ACTION) == READ_ACTION) { - throw new IllegalArgumentException("file action [read] specified multiple times"); - } - actionsInt |= READ_ACTION; - } else if (WRITE.equals(actionString)) { - if ((actionsInt & WRITE_ACTION) == WRITE_ACTION) { - throw new IllegalArgumentException("file action [write] specified multiple times"); - } - actionsInt |= WRITE_ACTION; - } else { - throw new IllegalArgumentException("unknown file action [" + actionString + "]"); - } - } + public enum Mode { + READ, + READ_WRITE + } - this.actions = actionsInt; + public FileEntitlement { + path = normalizePath(path); } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - FileEntitlement that = (FileEntitlement) o; - return actions == that.actions && Objects.equals(path, that.path); + private static String normalizePath(String path) { + return Paths.get(path).toAbsolutePath().normalize().toString(); } - @Override - public int hashCode() { - return Objects.hash(path, actions); + private static Mode parseMode(String mode) { + if (mode.equals("read")) { + return Mode.READ; + } else if (mode.equals("read_write")) { + return Mode.READ_WRITE; + } else { + throw new PolicyValidationException("invalid mode: " + mode + ", valid values: [read, read_write]"); + } } - @Override - public String toString() { - return "FileEntitlement{" + "path='" + path + '\'' + ", actions=" + actions + '}'; + @ExternalEntitlement(parameterNames = { "path", "mode" }, esModulesOnly = false) + public FileEntitlement(String path, String mode) { + this(path, parseMode(mode)); } } diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/PolicyManager.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/PolicyManager.java index dcdc7d1a47f9f..04942e15d10a4 100644 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/PolicyManager.java +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/PolicyManager.java @@ -10,13 +10,16 @@ package org.elasticsearch.entitlement.runtime.policy; import org.elasticsearch.core.Strings; +import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.entitlement.runtime.api.NotEntitledException; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; +import java.io.File; import java.lang.StackWalker.StackFrame; import java.lang.module.ModuleFinder; import java.lang.module.ModuleReference; +import java.nio.file.Path; import java.util.List; import java.util.Map; import java.util.Optional; @@ -36,15 +39,22 @@ public class PolicyManager { private static final Logger logger = LogManager.getLogger(PolicyManager.class); - record ModuleEntitlements(Map, List> entitlementsByType) { - public static final ModuleEntitlements NONE = new ModuleEntitlements(Map.of()); + record ModuleEntitlements(Map, List> entitlementsByType, FileAccessTree fileAccess) { + public static final ModuleEntitlements NONE = new ModuleEntitlements(Map.of(), FileAccessTree.EMPTY); ModuleEntitlements { entitlementsByType = Map.copyOf(entitlementsByType); } public static ModuleEntitlements from(List entitlements) { - return new ModuleEntitlements(entitlements.stream().collect(groupingBy(Entitlement::getClass))); + var fileEntitlements = entitlements.stream() + .filter(e -> e.getClass().equals(FileEntitlement.class)) + .map(e -> (FileEntitlement) e) + .toList(); + return new ModuleEntitlements( + entitlements.stream().collect(groupingBy(Entitlement::getClass)), + new FileAccessTree(fileEntitlements) + ); } public boolean hasEntitlement(Class entitlementClass) { @@ -189,6 +199,91 @@ public void checkChangeNetworkHandling(Class callerClass) { checkChangeJVMGlobalState(callerClass); } + /** + * Check for operations that can access sensitive network information, e.g. secrets, tokens or SSL sessions + */ + public void checkReadSensitiveNetworkInformation(Class callerClass) { + neverEntitled(callerClass, "access sensitive network information"); + } + + @SuppressForbidden(reason = "Explicitly checking File apis") + public void checkFileRead(Class callerClass, File file) { + var requestingClass = requestingClass(callerClass); + if (isTriviallyAllowed(requestingClass)) { + return; + } + + ModuleEntitlements entitlements = getEntitlements(requestingClass); + if (entitlements.fileAccess().canRead(file) == false) { + throw new NotEntitledException( + Strings.format( + "Not entitled: caller [%s], module [%s], entitlement [file], operation [read], path [%s]", + callerClass, + requestingClass.getModule(), + file + ) + ); + } + } + + public void checkFileRead(Class callerClass, Path path) { + var requestingClass = requestingClass(callerClass); + if (isTriviallyAllowed(requestingClass)) { + return; + } + + ModuleEntitlements entitlements = getEntitlements(requestingClass); + if (entitlements.fileAccess().canRead(path) == false) { + throw new NotEntitledException( + Strings.format( + "Not entitled: caller [%s], module [%s], entitlement [file], operation [read], path [%s]", + callerClass, + requestingClass.getModule(), + path + ) + ); + } + } + + @SuppressForbidden(reason = "Explicitly checking File apis") + public void checkFileWrite(Class callerClass, File file) { + var requestingClass = requestingClass(callerClass); + if (isTriviallyAllowed(requestingClass)) { + return; + } + + ModuleEntitlements entitlements = getEntitlements(requestingClass); + if (entitlements.fileAccess().canWrite(file) == false) { + throw new NotEntitledException( + Strings.format( + "Not entitled: caller [%s], module [%s], entitlement [file], operation [write], path [%s]", + callerClass, + requestingClass.getModule(), + file + ) + ); + } + } + + public void checkFileWrite(Class callerClass, Path path) { + var requestingClass = requestingClass(callerClass); + if (isTriviallyAllowed(requestingClass)) { + return; + } + + ModuleEntitlements entitlements = getEntitlements(requestingClass); + if (entitlements.fileAccess().canWrite(path) == false) { + throw new NotEntitledException( + Strings.format( + "Not entitled: caller [%s], module [%s], entitlement [file], operation [write], path [%s]", + callerClass, + requestingClass.getModule(), + path + ) + ); + } + } + /** * Check for operations that can access sensitive network information, e.g. secrets, tokens or SSL sessions */ diff --git a/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/FileAccessTreeTests.java b/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/FileAccessTreeTests.java new file mode 100644 index 0000000000000..1521c80341b9d --- /dev/null +++ b/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/FileAccessTreeTests.java @@ -0,0 +1,89 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.entitlement.runtime.policy; + +import org.elasticsearch.test.ESTestCase; +import org.junit.BeforeClass; + +import java.nio.file.Path; +import java.util.List; + +import static org.hamcrest.Matchers.is; + +public class FileAccessTreeTests extends ESTestCase { + + static Path root; + + @BeforeClass + public static void setupRoot() { + root = createTempDir(); + } + + private static Path path(String s) { + return root.resolve(s); + } + + public void testEmpty() { + var tree = new FileAccessTree(List.of()); + assertThat(tree.canRead(path("path")), is(false)); + assertThat(tree.canWrite(path("path")), is(false)); + } + + public void testRead() { + var tree = new FileAccessTree(List.of(entitlement("foo", "read"))); + assertThat(tree.canRead(path("foo")), is(true)); + assertThat(tree.canRead(path("foo/subdir")), is(true)); + assertThat(tree.canWrite(path("foo")), is(false)); + + assertThat(tree.canRead(path("before")), is(false)); + assertThat(tree.canRead(path("later")), is(false)); + } + + public void testWrite() { + var tree = new FileAccessTree(List.of(entitlement("foo", "read_write"))); + assertThat(tree.canWrite(path("foo")), is(true)); + assertThat(tree.canWrite(path("foo/subdir")), is(true)); + assertThat(tree.canRead(path("foo")), is(true)); + + assertThat(tree.canWrite(path("before")), is(false)); + assertThat(tree.canWrite(path("later")), is(false)); + } + + public void testTwoPaths() { + var tree = new FileAccessTree(List.of(entitlement("foo", "read"), entitlement("bar", "read"))); + assertThat(tree.canRead(path("a")), is(false)); + assertThat(tree.canRead(path("bar")), is(true)); + assertThat(tree.canRead(path("bar/subdir")), is(true)); + assertThat(tree.canRead(path("c")), is(false)); + assertThat(tree.canRead(path("foo")), is(true)); + assertThat(tree.canRead(path("foo/subdir")), is(true)); + assertThat(tree.canRead(path("z")), is(false)); + } + + public void testReadWriteUnderRead() { + var tree = new FileAccessTree(List.of(entitlement("foo", "read"), entitlement("foo/bar", "read_write"))); + assertThat(tree.canRead(path("foo")), is(true)); + assertThat(tree.canWrite(path("foo")), is(false)); + assertThat(tree.canRead(path("foo/bar")), is(true)); + assertThat(tree.canWrite(path("foo/bar")), is(true)); + } + + public void testNormalizePath() { + var tree = new FileAccessTree(List.of(entitlement("foo/../bar", "read"))); + assertThat(tree.canRead(path("foo/../bar")), is(true)); + assertThat(tree.canRead(path("foo")), is(false)); + assertThat(tree.canRead(path("")), is(false)); + } + + FileEntitlement entitlement(String path, String mode) { + Path p = path(path); + return new FileEntitlement(p.toString(), mode); + } +} diff --git a/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyManagerTests.java b/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyManagerTests.java index f50cd217696de..20035d0bb258b 100644 --- a/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyManagerTests.java +++ b/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyManagerTests.java @@ -31,10 +31,7 @@ import static java.util.Map.entry; import static org.elasticsearch.entitlement.runtime.policy.PolicyManager.ALL_UNNAMED; -import static org.elasticsearch.test.LambdaMatchers.transformedMatch; import static org.hamcrest.Matchers.aMapWithSize; -import static org.hamcrest.Matchers.contains; -import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.sameInstance; @@ -204,10 +201,8 @@ public void testGetEntitlementsReturnsEntitlementsForPluginModule() throws IOExc var entitlements = policyManager.getEntitlements(mockPluginClass); assertThat(entitlements.hasEntitlement(CreateClassLoaderEntitlement.class), is(true)); - assertThat( - entitlements.getEntitlements(FileEntitlement.class).toList(), - contains(transformedMatch(FileEntitlement::toString, containsString("/test/path"))) - ); + // TODO: this can't work on Windows, we need to have the root be unknown + // assertThat(entitlements.fileAccess().canRead("/test/path"), is(true)); } public void testGetEntitlementsResultIsCached() { @@ -324,7 +319,7 @@ private static Policy createPluginPolicy(String... pluginModules) { .map( name -> new Scope( name, - List.of(new FileEntitlement("/test/path", List.of(FileEntitlement.READ)), new CreateClassLoaderEntitlement()) + List.of(new FileEntitlement("/test/path", FileEntitlement.Mode.READ), new CreateClassLoaderEntitlement()) ) ) .toList() diff --git a/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyParserFailureTests.java b/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyParserFailureTests.java index dfcc5d8916f2c..cc8043990930d 100644 --- a/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyParserFailureTests.java +++ b/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyParserFailureTests.java @@ -55,7 +55,7 @@ public void testEntitlementMissingParameter() { """.getBytes(StandardCharsets.UTF_8)), "test-failure-policy.yaml", false).parsePolicy()); assertEquals( "[4:1] policy parsing error for [test-failure-policy.yaml] in scope [entitlement-module-name] " - + "for entitlement type [file]: missing entitlement parameter [actions]", + + "for entitlement type [file]: missing entitlement parameter [mode]", ppe.getMessage() ); } @@ -65,12 +65,11 @@ public void testEntitlementExtraneousParameter() { entitlement-module-name: - file: path: test-path - actions: - - read + mode: read extra: test """.getBytes(StandardCharsets.UTF_8)), "test-failure-policy.yaml", false).parsePolicy()); assertEquals( - "[7:1] policy parsing error for [test-failure-policy.yaml] in scope [entitlement-module-name] " + "[6:1] policy parsing error for [test-failure-policy.yaml] in scope [entitlement-module-name] " + "for entitlement type [file]: extraneous entitlement parameter(s) {extra=test}", ppe.getMessage() ); diff --git a/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyParserTests.java b/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyParserTests.java index 08185c3f82b31..191b3afcdc674 100644 --- a/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyParserTests.java +++ b/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyParserTests.java @@ -47,7 +47,7 @@ public void testPolicyBuilder() throws IOException { .parsePolicy(); Policy expected = new Policy( "test-policy.yaml", - List.of(new Scope("entitlement-module-name", List.of(new FileEntitlement("test/path/to/file", List.of("read", "write"))))) + List.of(new Scope("entitlement-module-name", List.of(new FileEntitlement("test/path/to/file", "read_write")))) ); assertEquals(expected, parsedPolicy); } @@ -57,7 +57,7 @@ public void testPolicyBuilderOnExternalPlugin() throws IOException { .parsePolicy(); Policy expected = new Policy( "test-policy.yaml", - List.of(new Scope("entitlement-module-name", List.of(new FileEntitlement("test/path/to/file", List.of("read", "write"))))) + List.of(new Scope("entitlement-module-name", List.of(new FileEntitlement("test/path/to/file", "read_write")))) ); assertEquals(expected, parsedPolicy); } diff --git a/libs/entitlement/src/test/resources/org/elasticsearch/entitlement/runtime/policy/test-policy.yaml b/libs/entitlement/src/test/resources/org/elasticsearch/entitlement/runtime/policy/test-policy.yaml index f13f574535bec..bbb926ccdd37d 100644 --- a/libs/entitlement/src/test/resources/org/elasticsearch/entitlement/runtime/policy/test-policy.yaml +++ b/libs/entitlement/src/test/resources/org/elasticsearch/entitlement/runtime/policy/test-policy.yaml @@ -1,6 +1,4 @@ entitlement-module-name: - file: path: "test/path/to/file" - actions: - - "read" - - "write" + mode: "read_write" diff --git a/modules/reindex/src/internalClusterTest/java/org/elasticsearch/migration/AbstractFeatureMigrationIntegTest.java b/modules/reindex/src/internalClusterTest/java/org/elasticsearch/migration/AbstractFeatureMigrationIntegTest.java index 860d63000f124..84e45024b69ff 100644 --- a/modules/reindex/src/internalClusterTest/java/org/elasticsearch/migration/AbstractFeatureMigrationIntegTest.java +++ b/modules/reindex/src/internalClusterTest/java/org/elasticsearch/migration/AbstractFeatureMigrationIntegTest.java @@ -9,14 +9,17 @@ package org.elasticsearch.migration; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.admin.cluster.migration.TransportGetFeatureUpgradeStatusAction; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; import org.elasticsearch.action.admin.indices.stats.IndexStats; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.action.support.ActionFilter; import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.client.internal.Client; import org.elasticsearch.cluster.ClusterState; @@ -28,6 +31,7 @@ import org.elasticsearch.index.IndexVersion; import org.elasticsearch.indices.AssociatedIndexDescriptor; import org.elasticsearch.indices.SystemIndexDescriptor; +import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.plugins.SystemIndexPlugin; @@ -50,6 +54,10 @@ import java.util.function.BiConsumer; import java.util.function.Function; +import static java.util.Collections.emptySet; +import static java.util.Collections.singletonList; +import static java.util.Collections.unmodifiableSet; +import static org.elasticsearch.common.util.set.Sets.newHashSet; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.endsWith; import static org.hamcrest.Matchers.equalTo; @@ -255,12 +263,18 @@ protected void assertIndexHasCorrectProperties( assertThat(thisIndexStats.getTotal().getDocs().getCount(), is((long) INDEX_DOC_COUNT)); } - public static class TestPlugin extends Plugin implements SystemIndexPlugin { + public static class TestPlugin extends Plugin implements SystemIndexPlugin, ActionPlugin { public final AtomicReference>> preMigrationHook = new AtomicReference<>(); public final AtomicReference>> postMigrationHook = new AtomicReference<>(); + private final BlockingActionFilter blockingActionFilter; public TestPlugin() { + blockingActionFilter = new BlockingActionFilter(); + } + @Override + public List getActionFilters() { + return singletonList(blockingActionFilter); } @Override @@ -299,5 +313,26 @@ public void indicesMigrationComplete( postMigrationHook.get().accept(clusterService.state(), preUpgradeMetadata); listener.onResponse(true); } + + public static class BlockingActionFilter extends org.elasticsearch.action.support.ActionFilter.Simple { + private Set blockedActions = emptySet(); + + @Override + protected boolean apply(String action, ActionRequest request, ActionListener listener) { + if (blockedActions.contains(action)) { + throw new ElasticsearchException("force exception on [" + action + "]"); + } + return true; + } + + @Override + public int order() { + return 0; + } + + public void blockActions(String... actions) { + blockedActions = unmodifiableSet(newHashSet(actions)); + } + } } } diff --git a/modules/reindex/src/internalClusterTest/java/org/elasticsearch/migration/FeatureMigrationIT.java b/modules/reindex/src/internalClusterTest/java/org/elasticsearch/migration/FeatureMigrationIT.java index cdf817a6b17b8..ee95ce5513820 100644 --- a/modules/reindex/src/internalClusterTest/java/org/elasticsearch/migration/FeatureMigrationIT.java +++ b/modules/reindex/src/internalClusterTest/java/org/elasticsearch/migration/FeatureMigrationIT.java @@ -17,11 +17,14 @@ import org.elasticsearch.action.admin.cluster.migration.PostFeatureUpgradeRequest; import org.elasticsearch.action.admin.cluster.migration.PostFeatureUpgradeResponse; import org.elasticsearch.action.admin.indices.alias.Alias; +import org.elasticsearch.action.admin.indices.alias.TransportIndicesAliasesAction; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; import org.elasticsearch.action.admin.indices.template.put.PutComponentTemplateAction; import org.elasticsearch.action.admin.indices.template.put.TransportPutComposableIndexTemplateAction; import org.elasticsearch.action.search.SearchRequestBuilder; +import org.elasticsearch.action.support.ActionFilter; +import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateUpdateTask; @@ -36,10 +39,12 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.indices.SystemIndexDescriptor; +import org.elasticsearch.migration.AbstractFeatureMigrationIntegTest.TestPlugin.BlockingActionFilter; import org.elasticsearch.painless.PainlessPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.SystemIndexPlugin; import org.elasticsearch.reindex.ReindexPlugin; +import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.upgrades.FeatureMigrationResults; import org.elasticsearch.upgrades.SingleFeatureMigrationResult; @@ -272,6 +277,60 @@ public void testMigrateIndexWithWriteBlock() throws Exception { }); } + @AwaitsFix(bugUrl = "ES-10666") // This test uncovered an existing issue + public void testIndexBlockIsRemovedWhenAliasRequestFails() throws Exception { + createSystemIndexForDescriptor(INTERNAL_UNMANAGED); + ensureGreen(); + + // Block the alias request to simulate a failure + InternalTestCluster internalTestCluster = internalCluster(); + ActionFilters actionFilters = internalTestCluster.getInstance(ActionFilters.class, internalTestCluster.getMasterName()); + BlockingActionFilter blockingActionFilter = null; + for (ActionFilter filter : actionFilters.filters()) { + if (filter instanceof BlockingActionFilter) { + blockingActionFilter = (BlockingActionFilter) filter; + break; + } + } + assertNotNull("BlockingActionFilter should exist", blockingActionFilter); + blockingActionFilter.blockActions(TransportIndicesAliasesAction.NAME); + + // Start the migration + client().execute(PostFeatureUpgradeAction.INSTANCE, new PostFeatureUpgradeRequest(TEST_REQUEST_TIMEOUT)).get(); + + // Wait till the migration fails + assertBusy(() -> { + GetFeatureUpgradeStatusResponse statusResp = client().execute( + GetFeatureUpgradeStatusAction.INSTANCE, + new GetFeatureUpgradeStatusRequest(TEST_REQUEST_TIMEOUT) + ).get(); + logger.info(Strings.toString(statusResp)); + assertThat(statusResp.getUpgradeStatus(), equalTo(GetFeatureUpgradeStatusResponse.UpgradeStatus.ERROR)); + }); + + // Get the settings to see if the write block was removed + var allsettings = client().admin().indices().prepareGetSettings(INTERNAL_UNMANAGED.getIndexPattern()).get().getIndexToSettings(); + var internalUnmanagedOldIndexSettings = allsettings.get(".int-unman-old"); + var writeBlock = internalUnmanagedOldIndexSettings.get(IndexMetadata.INDEX_BLOCKS_WRITE_SETTING.getKey()); + assertThat("Write block on old index should be removed on migration ERROR status", writeBlock, equalTo("false")); + + // Unblock the alias request + blockingActionFilter.blockActions(); + + // Retry the migration + client().execute(PostFeatureUpgradeAction.INSTANCE, new PostFeatureUpgradeRequest(TEST_REQUEST_TIMEOUT)).get(); + + // Ensure that the migration is successful after the alias request is unblocked + assertBusy(() -> { + GetFeatureUpgradeStatusResponse statusResp = client().execute( + GetFeatureUpgradeStatusAction.INSTANCE, + new GetFeatureUpgradeStatusRequest(TEST_REQUEST_TIMEOUT) + ).get(); + logger.info(Strings.toString(statusResp)); + assertThat(statusResp.getUpgradeStatus(), equalTo(GetFeatureUpgradeStatusResponse.UpgradeStatus.NO_MIGRATION_NEEDED)); + }); + } + public void testMigrationWillRunAfterError() throws Exception { createSystemIndexForDescriptor(INTERNAL_MANAGED); diff --git a/muted-tests.yml b/muted-tests.yml index 381adda7da8ed..0d62ef0530c27 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -280,18 +280,18 @@ tests: - class: org.elasticsearch.xpack.esql.parser.StatementParserTests method: testNamedFunctionArgumentInMap issue: https://github.com/elastic/elasticsearch/issues/121020 -- class: org.elasticsearch.xpack.remotecluster.RemoteClusterSecurityEsqlIT - method: testCrossClusterAsyncQuery - issue: https://github.com/elastic/elasticsearch/issues/121021 -- class: org.elasticsearch.xpack.remotecluster.RemoteClusterSecurityEsqlIT - method: testCrossClusterAsyncQueryStop - issue: https://github.com/elastic/elasticsearch/issues/121021 - class: org.elasticsearch.xpack.security.profile.ProfileIntegTests method: testSuggestProfilesWithName issue: https://github.com/elastic/elasticsearch/issues/121022 - class: org.elasticsearch.xpack.inference.action.filter.ShardBulkInferenceActionFilterIT method: testBulkOperations {p0=true} issue: https://github.com/elastic/elasticsearch/issues/120969 +- class: org.elasticsearch.xpack.security.profile.ProfileIntegTests + method: testProfileAPIsWhenIndexNotCreated + issue: https://github.com/elastic/elasticsearch/issues/121096 +- class: org.elasticsearch.xpack.security.profile.ProfileIntegTests + method: testGetProfiles + issue: https://github.com/elastic/elasticsearch/issues/121101 # Examples: # diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.update.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.update.json index 6c458ce080aa7..133354e3ec5be 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.update.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.update.json @@ -14,7 +14,7 @@ "paths": [ { "path": "/_inference/{inference_id}/_update", - "methods": ["POST"], + "methods": ["PUT"], "parts": { "inference_id": { "type": "string", @@ -24,7 +24,7 @@ }, { "path": "/_inference/{task_type}/{inference_id}/_update", - "methods": ["POST"], + "methods": ["PUT"], "parts": { "task_type": { "type": "string", diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index 096ccbce9a58b..d1c492caf9b48 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -1602,6 +1602,74 @@ synthetic_source with copy_to pointing inside object: hits.hits.2.fields: c.copy: [ "100", "hello", "zap" ] +--- +synthetic_source with copy_to inside nested object: + - do: + indices.create: + index: test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + name: + type: keyword + my_values: + type: nested + properties: + k: + type: keyword + copy_to: my_values.copy + second_level: + type: nested + properties: + k2: + type: keyword + copy_to: my_values.copy + copy: + type: keyword + dummy: + type: keyword + + - do: + index: + index: test + id: 1 + refresh: true + body: + name: "A" + my_values: + k: "hello" + + - do: + index: + index: test + id: 2 + refresh: true + body: + name: "B" + my_values: + second_level: + k2: "hello" + + - do: + search: + index: test + sort: name + + - match: + hits.hits.0._source: + name: "A" + my_values: + k: "hello" + - match: + hits.hits.1._source: + name: "B" + my_values: + second_level: + k2: "hello" + --- synthetic_source with copy_to pointing to ambiguous field: - do: diff --git a/server/src/main/java/org/elasticsearch/ElasticsearchTimeoutException.java b/server/src/main/java/org/elasticsearch/ElasticsearchTimeoutException.java index eddce19c77888..06ae43144476e 100644 --- a/server/src/main/java/org/elasticsearch/ElasticsearchTimeoutException.java +++ b/server/src/main/java/org/elasticsearch/ElasticsearchTimeoutException.java @@ -10,6 +10,7 @@ package org.elasticsearch; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.rest.RestStatus; import java.io.IOException; @@ -34,4 +35,10 @@ public ElasticsearchTimeoutException(String message, Object... args) { public ElasticsearchTimeoutException(String message, Throwable cause, Object... args) { super(message, cause, args); } + + @Override + public RestStatus status() { + // closest thing to "your request took longer than you asked for" + return RestStatus.TOO_MANY_REQUESTS; + } } diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 05c2071ad8d5f..14078fad9e20d 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -168,6 +168,7 @@ static TransportVersion def(int id) { public static final TransportVersion ILM_ADD_SEARCHABLE_SNAPSHOT_ADD_REPLICATE_FOR = def(8_834_00_0); public static final TransportVersion INGEST_REQUEST_INCLUDE_SOURCE_ON_ERROR = def(8_835_00_0); public static final TransportVersion RESOURCE_DEPRECATION_CHECKS = def(8_836_00_0); + public static final TransportVersion LINEAR_RETRIEVER_SUPPORT = def(8_837_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/alias/IndicesAliasesResponse.java b/server/src/main/java/org/elasticsearch/action/admin/indices/alias/IndicesAliasesResponse.java index 69ab9f57d2be7..071e9b42752c0 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/alias/IndicesAliasesResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/alias/IndicesAliasesResponse.java @@ -77,6 +77,17 @@ public boolean hasErrors() { return errors; } + /** + * Get a list of all errors from the response. If there are no errors, an empty list is returned. + */ + public List getErrors() { + if (errors == false) { + return List.of(); + } else { + return actionResults.stream().filter(a -> a.getError() != null).map(AliasActionResult::getError).toList(); + } + } + /** * Build a response from a list of action results. Sets the errors boolean based * on whether an of the individual results contain an error. @@ -165,6 +176,13 @@ public static AliasActionResult buildSuccess(List indices, AliasActions return new AliasActionResult(indices, action, null); } + /** + * The error result if the action failed, null if the action succeeded. + */ + public ElasticsearchException getError() { + return error; + } + private int getStatus() { return error == null ? 200 : error.status().getStatus(); } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/ProcessClusterEventTimeoutException.java b/server/src/main/java/org/elasticsearch/cluster/metadata/ProcessClusterEventTimeoutException.java index 1182faea81ed6..2a273f7f81e0f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/ProcessClusterEventTimeoutException.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/ProcessClusterEventTimeoutException.java @@ -28,6 +28,6 @@ public ProcessClusterEventTimeoutException(StreamInput in) throws IOException { @Override public RestStatus status() { - return RestStatus.SERVICE_UNAVAILABLE; + return RestStatus.TOO_MANY_REQUESTS; } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index 51e4e9f4c1b5e..ba9e902fee5d9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -367,19 +367,6 @@ public final DocumentParserContext maybeCloneForArray(Mapper mapper) throws IOEx return this; } - /** - * Creates a sub-context from the current {@link DocumentParserContext} to indicate that the source for the sub-context has been - * recorded and avoid duplicate recording for parts of the sub-context. Applies to synthetic source only. - */ - public final DocumentParserContext cloneWithRecordedSource() throws IOException { - if (canAddIgnoredField()) { - DocumentParserContext subcontext = createChildContext(parent()); - subcontext.setRecordedSource(); // Avoids double-storing parts of the source for the same parser subtree. - return subcontext; - } - return this; - } - /** * Add the given {@code field} to the _field_names field * @@ -466,10 +453,6 @@ public boolean isCopyToDestinationField(String name) { return copyToFields.contains(name); } - public Set getCopyToFields() { - return copyToFields; - } - /** * Add a new mapper dynamically created while parsing. * @@ -706,6 +689,26 @@ public LuceneDocument doc() { * @param doc the document to target */ public final DocumentParserContext createCopyToContext(String copyToField, LuceneDocument doc) throws IOException { + /* + Mark field as containing copied data meaning it should not be present + in synthetic _source (to be consistent with stored _source). + Ignored source values take precedence over standard synthetic source implementation + so by adding the `XContentDataHelper.voidValue()` entry we disable the field in synthetic source. + Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values + in copied field after reindexing. + */ + if (mappingLookup.isSourceSynthetic() && indexSettings().getSkipIgnoredSourceWrite() == false) { + ObjectMapper parent = root().findParentMapper(copyToField); + // There are scenarios when this is false: + // 1. all values of the field that is the source of copy_to are null + // 2. copy_to points at a field inside a disabled object + // 3. copy_to points at dynamic field which is not yet applied to mapping, we will process it properly after the dynamic update + if (parent != null) { + int offset = parent.isRoot() ? 0 : parent.fullPath().length() + 1; + ignoredFieldValues.add(new IgnoredSourceFieldMapper.NameValue(copyToField, offset, XContentDataHelper.voidValue(), doc)); + } + } + ContentPath path = new ContentPath(); XContentParser parser = DotExpandingXContentParser.expandDots(new CopyToParser(copyToField, parser()), path); return new Wrapper(root(), this) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java index 5f553ac8d2252..d8d8200baac31 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java @@ -25,8 +25,6 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; import java.util.Map; import java.util.Set; @@ -161,33 +159,7 @@ public void postParse(DocumentParserContext context) { return; } - Collection ignoredValuesToWrite = context.getIgnoredFieldValues(); - if (context.getCopyToFields().isEmpty() == false && indexSettings.getSkipIgnoredSourceWrite() == false) { - /* - Mark fields as containing copied data meaning they should not be present - in synthetic _source (to be consistent with stored _source). - Ignored source values take precedence over standard synthetic source implementation - so by adding the `XContentDataHelper.voidValue()` entry we disable the field in synthetic source. - Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values - in copied field after reindexing. - */ - var mutableList = new ArrayList<>(ignoredValuesToWrite); - for (String copyToField : context.getCopyToFields()) { - ObjectMapper parent = context.parent().findParentMapper(copyToField); - if (parent == null) { - // There are scenarios when this can happen: - // 1. all values of the field that is the source of copy_to are null - // 2. copy_to points at a field inside a disabled object - // 3. copy_to points at dynamic field which is not yet applied to mapping, we will process it properly on re-parse. - continue; - } - int offset = parent.isRoot() ? 0 : parent.fullPath().length() + 1; - mutableList.add(new IgnoredSourceFieldMapper.NameValue(copyToField, offset, XContentDataHelper.voidValue(), context.doc())); - } - ignoredValuesToWrite = mutableList; - } - - for (NameValue nameValue : ignoredValuesToWrite) { + for (NameValue nameValue : context.getIgnoredFieldValues()) { nameValue.doc().add(new StoredField(NAME, encode(nameValue))); } } diff --git a/server/src/main/java/org/elasticsearch/index/query/RankDocsQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/RankDocsQueryBuilder.java index 889fa40b79aa1..524310c547597 100644 --- a/server/src/main/java/org/elasticsearch/index/query/RankDocsQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/RankDocsQueryBuilder.java @@ -70,7 +70,9 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws changed |= newQueryBuilders[i] != queryBuilders[i]; } if (changed) { - return new RankDocsQueryBuilder(rankDocs, newQueryBuilders, onlyRankDocs); + RankDocsQueryBuilder clone = new RankDocsQueryBuilder(rankDocs, newQueryBuilders, onlyRankDocs); + clone.queryName(queryName()); + return clone; } } return super.doRewrite(queryRewriteContext); diff --git a/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java b/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java index f5670ebd8a543..bb9f7ad4b7bf1 100644 --- a/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java +++ b/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java @@ -290,8 +290,7 @@ public RetrieverSpec(ParseField name, RetrieverParser parser) { /** * Specification of custom {@link RetrieverBuilder}. * - * @param name the name by which this retriever might be parsed or deserialized. Make sure that the retriever builder returns - * this name for {@link NamedWriteable#getWriteableName()}. + * @param name the name by which this retriever might be parsed or deserialized. * @param parser the parser the reads the retriever builder from xcontent */ public RetrieverSpec(String name, RetrieverParser parser) { diff --git a/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java b/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java index e006f176ff91a..e5caa00537c67 100644 --- a/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java +++ b/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java @@ -31,7 +31,7 @@ public SearchTimeoutException(StreamInput in) throws IOException { @Override public RestStatus status() { - return RestStatus.GATEWAY_TIMEOUT; + return RestStatus.TOO_MANY_REQUESTS; } /** diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java index 8403031bc65f5..0bb5fd849bbcf 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java @@ -192,8 +192,13 @@ public void onFailure(Exception e) { } }); }); - - return new RankDocsRetrieverBuilder(rankWindowSize, newRetrievers.stream().map(s -> s.retriever).toList(), results::get); + RankDocsRetrieverBuilder rankDocsRetrieverBuilder = new RankDocsRetrieverBuilder( + rankWindowSize, + newRetrievers.stream().map(s -> s.retriever).toList(), + results::get + ); + rankDocsRetrieverBuilder.retrieverName(retrieverName()); + return rankDocsRetrieverBuilder; } @Override @@ -219,7 +224,8 @@ public ActionRequestValidationException validate( boolean allowPartialSearchResults ) { validationException = super.validate(source, validationException, isScroll, allowPartialSearchResults); - if (source.size() > rankWindowSize) { + final int size = source.size(); + if (size > rankWindowSize) { validationException = addValidationError( String.format( Locale.ROOT, @@ -227,7 +233,7 @@ public ActionRequestValidationException validate( getName(), getRankWindowSizeField().getPreferredName(), rankWindowSize, - source.size() + size ), validationException ); diff --git a/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java index 4d3f3fefd4462..a77f5327fbc26 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java @@ -90,11 +90,13 @@ public QueryBuilder topDocsQuery() { @Override public QueryBuilder explainQuery() { - return new RankDocsQueryBuilder( + var explainQuery = new RankDocsQueryBuilder( rankDocs.get(), sources.stream().map(RetrieverBuilder::explainQuery).toArray(QueryBuilder[]::new), true ); + explainQuery.queryName(retrieverName()); + return explainQuery; } @Override @@ -123,8 +125,12 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder } else { rankQuery = new RankDocsQueryBuilder(rankDocResults, null, false); } + rankQuery.queryName(retrieverName()); // ignore prefilters of this level, they were already propagated to children searchSourceBuilder.query(rankQuery); + if (searchSourceBuilder.size() < 0) { + searchSourceBuilder.size(rankWindowSize); + } if (sourceHasMinScore()) { searchSourceBuilder.minScore(this.minScore() == null ? Float.MIN_VALUE : this.minScore()); } diff --git a/server/src/main/java/org/elasticsearch/search/retriever/RescorerRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/RescorerRetrieverBuilder.java index 4531beef7125d..83a331fd81b54 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/RescorerRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/RescorerRetrieverBuilder.java @@ -144,6 +144,7 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept protected RescorerRetrieverBuilder clone(List newChildRetrievers, List newPreFilterQueryBuilders) { var newInstance = new RescorerRetrieverBuilder(newChildRetrievers.get(0), rescorers); newInstance.preFilterQueryBuilders = newPreFilterQueryBuilders; + newInstance.retrieverName = retrieverName; return newInstance; } diff --git a/server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java b/server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java index 186618f3662fb..cdd466c567e8b 100644 --- a/server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java +++ b/server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java @@ -15,7 +15,9 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest; import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequestBuilder; +import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse; import org.elasticsearch.action.admin.indices.create.CreateIndexClusterStateUpdateRequest; +import org.elasticsearch.action.admin.indices.readonly.AddIndexBlockRequest; import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsClusterStateUpdateRequest; import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.action.support.master.AcknowledgedResponse; @@ -32,7 +34,6 @@ import org.elasticsearch.cluster.metadata.MetadataIndexTemplateService; import org.elasticsearch.cluster.metadata.MetadataUpdateSettingsService; import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.CheckedBiConsumer; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.IndexScopedSettings; import org.elasticsearch.common.settings.Settings; @@ -59,6 +60,7 @@ import java.util.stream.Collectors; import static org.elasticsearch.action.admin.cluster.migration.TransportGetFeatureUpgradeStatusAction.NO_UPGRADE_REQUIRED_INDEX_VERSION; +import static org.elasticsearch.cluster.metadata.IndexMetadata.APIBlock.WRITE; import static org.elasticsearch.cluster.metadata.IndexMetadata.State.CLOSE; import static org.elasticsearch.core.Strings.format; @@ -448,12 +450,33 @@ private void migrateSingleIndex(ClusterState clusterState, Consumer { + if (aliasesResponse.hasErrors()) { + var e = new ElasticsearchException("Aliases request had errors"); + for (var error : aliasesResponse.getErrors()) { + e.addSuppressed(error); + } + throw e; + } + logger.info( + "Successfully migrated old index [{}] to new index [{}] from feature [{}]", + oldIndexName, + migrationInfo.getNextIndexName(), + migrationInfo.getFeatureName() + ); + delegate2.onResponse(bulkByScrollResponse); + }, e -> { + logger.error( + () -> format( + "An error occurred while changing aliases and removing the old index [%s] from feature [%s]", + oldIndexName, + migrationInfo.getFeatureName() + ), + e + ); + removeReadOnlyBlockOnReindexFailure(oldIndex, delegate2, e); + })); } }, e -> { logger.error( @@ -511,10 +534,7 @@ private void createIndex(SystemIndexMigrationInfo migrationInfo, ActionListener< ); } - private CheckedBiConsumer, AcknowledgedResponse, Exception> setAliasAndRemoveOldIndex( - SystemIndexMigrationInfo migrationInfo, - BulkByScrollResponse bulkByScrollResponse - ) { + private void setAliasAndRemoveOldIndex(SystemIndexMigrationInfo migrationInfo, ActionListener listener) { final IndicesAliasesRequestBuilder aliasesRequest = migrationInfo.createClient(baseClient).admin().indices().prepareAliases(); aliasesRequest.removeIndex(migrationInfo.getCurrentIndexName()); aliasesRequest.addAlias(migrationInfo.getNextIndexName(), migrationInfo.getCurrentIndexName()); @@ -533,30 +553,42 @@ private CheckedBiConsumer, AcknowledgedResp ); }); - // Technically this callback might have a different cluster state, but it shouldn't matter - these indices shouldn't be changing - // while we're trying to migrate them. - return (listener, unsetReadOnlyResponse) -> aliasesRequest.execute( - listener.delegateFailureAndWrap((l, deleteIndexResponse) -> l.onResponse(bulkByScrollResponse)) - ); + aliasesRequest.execute(listener); } /** - * Makes the index readonly if it's not set as a readonly yet + * Sets the write block on the index to the given value. */ private void setWriteBlock(Index index, boolean readOnlyValue, ActionListener listener) { - final Settings readOnlySettings = Settings.builder().put(IndexMetadata.INDEX_BLOCKS_WRITE_SETTING.getKey(), readOnlyValue).build(); - - metadataUpdateSettingsService.updateSettings( - new UpdateSettingsClusterStateUpdateRequest( - MasterNodeRequest.INFINITE_MASTER_NODE_TIMEOUT, - TimeValue.ZERO, - readOnlySettings, - UpdateSettingsClusterStateUpdateRequest.OnExisting.OVERWRITE, - UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REJECT, - index - ), - listener - ); + if (readOnlyValue) { + // Setting the Block with an AddIndexBlockRequest ensures all shards have accounted for the block and all + // in-flight writes are completed before returning. + baseClient.admin() + .indices() + .addBlock( + new AddIndexBlockRequest(WRITE, index.getName()).masterNodeTimeout(MasterNodeRequest.INFINITE_MASTER_NODE_TIMEOUT), + listener.delegateFailureAndWrap((l, response) -> { + if (response.isAcknowledged() == false) { + throw new ElasticsearchException("Failed to acknowledge read-only block index request"); + } + l.onResponse(response); + }) + ); + } else { + // The only way to remove a Block is via a settings update. + final Settings readOnlySettings = Settings.builder().put(IndexMetadata.INDEX_BLOCKS_WRITE_SETTING.getKey(), false).build(); + metadataUpdateSettingsService.updateSettings( + new UpdateSettingsClusterStateUpdateRequest( + MasterNodeRequest.INFINITE_MASTER_NODE_TIMEOUT, + TimeValue.ZERO, + readOnlySettings, + UpdateSettingsClusterStateUpdateRequest.OnExisting.OVERWRITE, + UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REJECT, + index + ), + listener + ); + } } private void reindex(SystemIndexMigrationInfo migrationInfo, ActionListener listener) { diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java index 5630c33ad559c..11787866af0d7 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java @@ -18,8 +18,7 @@ public enum FeatureFlag { TIME_SERIES_MODE("es.index_mode_feature_flag_registered=true", Version.fromString("8.0.0"), null), FAILURE_STORE_ENABLED("es.failure_store_feature_flag_enabled=true", Version.fromString("8.12.0"), null), - SUB_OBJECTS_AUTO_ENABLED("es.sub_objects_auto_feature_flag_enabled=true", Version.fromString("8.16.0"), null), - INFERENCE_UNIFIED_API_ENABLED("es.inference_unified_feature_flag_enabled=true", Version.fromString("8.18.0"), null); + SUB_OBJECTS_AUTO_ENABLED("es.sub_objects_auto_feature_flag_enabled=true", Version.fromString("8.16.0"), null); public final String systemProperty; public final Version from; diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/AbstractLocalClusterFactory.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/AbstractLocalClusterFactory.java index c7007ac60fe57..cfdca56542eb2 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/AbstractLocalClusterFactory.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/AbstractLocalClusterFactory.java @@ -350,15 +350,7 @@ private void initializeWorkingDirectory(boolean preserveWorkingDirectory) { IOUtils.deleteWithRetry(distributionDir); } - try { - IOUtils.syncWithLinks(distributionDescriptor.getDistributionDir(), distributionDir); - } catch (IOUtils.LinkCreationException e) { - // Note does not work for network drives, e.g. Vagrant - LOGGER.info("Failed to create working dir using hard links. Falling back to copy", e); - // ensure we get a clean copy - IOUtils.deleteWithRetry(distributionDir); - IOUtils.syncWithCopy(distributionDescriptor.getDistributionDir(), distributionDir); - } + IOUtils.syncMaybeWithLinks(distributionDescriptor.getDistributionDir(), distributionDir); } Files.createDirectories(repoDir); Files.createDirectories(dataDir); @@ -773,7 +765,8 @@ private void installModule(String moduleName, DefaultPluginInstallSpec installSp }); - IOUtils.syncWithCopy(modulePath, destination); + IOUtils.syncMaybeWithLinks(modulePath, destination); + try { if (installSpec.entitlementsOverride != null) { Path entitlementsFile = modulePath.resolve(ENTITLEMENT_POLICY_YAML); diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/util/IOUtils.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/util/IOUtils.java index 43034e502fbfb..b1e2175205594 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/util/IOUtils.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/util/IOUtils.java @@ -9,6 +9,9 @@ package org.elasticsearch.test.cluster.util; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; @@ -20,6 +23,7 @@ import java.util.stream.Stream; public final class IOUtils { + private static final Logger LOGGER = LogManager.getLogger(IOUtils.class); private static final int RETRY_DELETE_MILLIS = OS.current() == OS.WINDOWS ? 500 : 0; private static final int MAX_RETRY_DELETE_TIMES = OS.current() == OS.WINDOWS ? 15 : 0; @@ -51,6 +55,30 @@ public static void uncheckedDeleteWithRetry(Path path) { } } + /** + * Attempts to do a copy via linking, falling back to a normal copy if an exception is encountered. + * + * @see #syncWithLinks(Path, Path) + * @see #syncWithCopy(Path, Path) + * @param sourceRoot where to copy from + * @param destinationRoot destination to link to + */ + public static void syncMaybeWithLinks(Path sourceRoot, Path destinationRoot) { + try { + syncWithLinks(sourceRoot, destinationRoot); + } catch (LinkCreationException e) { + // Note does not work for network drives, e.g. Vagrant + LOGGER.info("Failed to sync using hard links. Falling back to copy.", e); + // ensure we get a clean copy + try { + deleteWithRetry(destinationRoot); + } catch (IOException ex) { + throw new UncheckedIOException(ex); + } + syncWithCopy(sourceRoot, destinationRoot); + } + } + /** * Does the equivalent of `cp -lr` and `chmod -r a-w` to save space and improve speed. * We remove write permissions to make sure files are note mistakenly edited ( e.x. the config file ) and changes diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/deprecation/DeprecatedIndexPredicate.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/deprecation/DeprecatedIndexPredicate.java index 48fb8ebdc577d..cba1df9b79c76 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/deprecation/DeprecatedIndexPredicate.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/deprecation/DeprecatedIndexPredicate.java @@ -50,7 +50,6 @@ public static Predicate getReindexRequiredPredicate(Metadata metadata, bo public static boolean reindexRequired(IndexMetadata indexMetadata, boolean filterToBlockedStatus) { return creationVersionBeforeMinimumWritableVersion(indexMetadata) && isNotSearchableSnapshot(indexMetadata) - && isNotClosed(indexMetadata) && matchBlockedStatus(indexMetadata, filterToBlockedStatus); } @@ -62,10 +61,6 @@ private static boolean creationVersionBeforeMinimumWritableVersion(IndexMetadata return metadata.getCreationVersion().before(MINIMUM_WRITEABLE_VERSION_AFTER_UPGRADE); } - private static boolean isNotClosed(IndexMetadata indexMetadata) { - return indexMetadata.getState().equals(IndexMetadata.State.CLOSE) == false; - } - private static boolean matchBlockedStatus(IndexMetadata indexMetadata, boolean filterToBlockedStatus) { return MetadataIndexStateService.VERIFIED_READ_ONLY_SETTING.get(indexMetadata.getSettings()) == filterToBlockedStatus; } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndex.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndex.java index 7a098d432f35b..1ab4906ed0d06 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndex.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndex.java @@ -38,6 +38,10 @@ public static String jobResultsIndexPrefix() { return AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX; } + public static String jobResultsIndexPattern() { + return AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX + "*"; + } + /** * The name of the alias pointing to the indices where the job's results are stored * @param jobId Job Id @@ -47,15 +51,26 @@ public static String jobResultsAliasedName(String jobId) { return AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX + jobId; } + /** + * Extract the job Id from the alias name. + * If not an results index alias null is returned + * @param jobResultsAliasedName The alias + * @return The job Id + */ + public static String jobIdFromAlias(String jobResultsAliasedName) { + if (jobResultsAliasedName.length() < AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX.length()) { + return null; + } + return jobResultsAliasedName.substring(AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX.length()); + } + /** * The name of the alias pointing to the write index for a job * @param jobId Job Id * @return The write alias */ public static String resultsWriteAlias(String jobId) { - // ".write" rather than simply "write" to avoid the danger of clashing - // with the read alias of a job whose name begins with "write-" - return AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX + ".write-" + jobId; + return AnomalyDetectorsIndexFields.RESULTS_INDEX_WRITE_PREFIX + jobId; } /** diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndexFields.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndexFields.java index 504a4b756c979..2a0fff86ba494 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndexFields.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndexFields.java @@ -11,6 +11,9 @@ public final class AnomalyDetectorsIndexFields { public static final String STATE_INDEX_PREFIX = ".ml-state"; public static final String RESULTS_INDEX_PREFIX = ".ml-anomalies-"; + // ".write" rather than simply "write" to avoid the danger of clashing + // with the read alias of a job whose name begins with "write-" + public static final String RESULTS_INDEX_WRITE_PREFIX = RESULTS_INDEX_PREFIX + ".write-"; public static final String RESULTS_INDEX_DEFAULT = "shared"; private AnomalyDetectorsIndexFields() {} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAlias.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAlias.java index e85acc159059e..06b2cfbad0105 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAlias.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAlias.java @@ -31,6 +31,8 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.indices.SystemIndexDescriptor; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.json.JsonXContent; @@ -64,27 +66,24 @@ public final class MlIndexAndAlias { */ public static final String BWC_MAPPINGS_VERSION = "8.11.0"; - private static final Logger logger = LogManager.getLogger(MlIndexAndAlias.class); + public static final String FIRST_INDEX_SIX_DIGIT_SUFFIX = "-000001"; - static final Comparator INDEX_NAME_COMPARATOR = new Comparator<>() { - - private final Predicate HAS_SIX_DIGIT_SUFFIX = Pattern.compile("\\d{6}").asMatchPredicate(); - - @Override - public int compare(String index1, String index2) { - String[] index1Parts = index1.split("-"); - String index1Suffix = index1Parts[index1Parts.length - 1]; - boolean index1HasSixDigitsSuffix = HAS_SIX_DIGIT_SUFFIX.test(index1Suffix); - String[] index2Parts = index2.split("-"); - String index2Suffix = index2Parts[index2Parts.length - 1]; - boolean index2HasSixDigitsSuffix = HAS_SIX_DIGIT_SUFFIX.test(index2Suffix); - if (index1HasSixDigitsSuffix && index2HasSixDigitsSuffix) { - return index1Suffix.compareTo(index2Suffix); - } else if (index1HasSixDigitsSuffix != index2HasSixDigitsSuffix) { - return Boolean.compare(index1HasSixDigitsSuffix, index2HasSixDigitsSuffix); - } else { - return index1.compareTo(index2); - } + private static final Logger logger = LogManager.getLogger(MlIndexAndAlias.class); + private static final Predicate HAS_SIX_DIGIT_SUFFIX = Pattern.compile("\\d{6}").asMatchPredicate(); + + static final Comparator INDEX_NAME_COMPARATOR = (index1, index2) -> { + String[] index1Parts = index1.split("-"); + String index1Suffix = index1Parts[index1Parts.length - 1]; + boolean index1HasSixDigitsSuffix = HAS_SIX_DIGIT_SUFFIX.test(index1Suffix); + String[] index2Parts = index2.split("-"); + String index2Suffix = index2Parts[index2Parts.length - 1]; + boolean index2HasSixDigitsSuffix = HAS_SIX_DIGIT_SUFFIX.test(index2Suffix); + if (index1HasSixDigitsSuffix && index2HasSixDigitsSuffix) { + return index1Suffix.compareTo(index2Suffix); + } else if (index1HasSixDigitsSuffix != index2HasSixDigitsSuffix) { + return Boolean.compare(index1HasSixDigitsSuffix, index2HasSixDigitsSuffix); + } else { + return index1.compareTo(index2); } }; @@ -126,7 +125,7 @@ public static void createIndexAndAliasIfNecessary( String legacyIndexWithoutSuffix = indexPatternPrefix; String indexPattern = indexPatternPrefix + "*"; // The initial index name must be suitable for rollover functionality. - String firstConcreteIndex = indexPatternPrefix + "-000001"; + String firstConcreteIndex = indexPatternPrefix + FIRST_INDEX_SIX_DIGIT_SUFFIX; String[] concreteIndexNames = resolver.concreteIndexNames(clusterState, IndicesOptions.lenientExpandHidden(), indexPattern); Optional indexPointedByCurrentWriteAlias = clusterState.getMetadata().hasAlias(alias) ? clusterState.getMetadata().getIndicesLookup().get(alias).getIndices().stream().map(Index::getName).findFirst() @@ -384,6 +383,10 @@ public static boolean hasIndexTemplate(ClusterState state, String templateName) return state.getMetadata().templatesV2().containsKey(templateName); } + public static boolean has6DigitSuffix(String indexName) { + return HAS_SIX_DIGIT_SUFFIX.test(indexName); + } + /** * Returns the latest index. Latest is the index with the highest * 6 digit suffix. @@ -395,4 +398,11 @@ public static String latestIndex(String[] concreteIndices) { ? concreteIndices[0] : Arrays.stream(concreteIndices).max(MlIndexAndAlias.INDEX_NAME_COMPARATOR).get(); } + + /** + * True if the version is read *and* write compatible not just read only compatible + */ + public static boolean indexIsReadWriteCompatibleInV9(IndexVersion version) { + return version.onOrAfter(IndexVersions.V_8_0_0); + } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/user/InternalUsers.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/user/InternalUsers.java index 52f077b658d02..1229d62dce047 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/user/InternalUsers.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/user/InternalUsers.java @@ -9,9 +9,11 @@ import org.elasticsearch.action.admin.cluster.shards.TransportClusterSearchShardsAction; import org.elasticsearch.action.admin.indices.analyze.TransportReloadAnalyzersAction; +import org.elasticsearch.action.admin.indices.close.TransportCloseIndexAction; import org.elasticsearch.action.admin.indices.create.TransportCreateIndexAction; import org.elasticsearch.action.admin.indices.delete.TransportDeleteIndexAction; import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeAction; +import org.elasticsearch.action.admin.indices.open.OpenIndexAction; import org.elasticsearch.action.admin.indices.readonly.TransportAddIndexBlockAction; import org.elasticsearch.action.admin.indices.refresh.RefreshAction; import org.elasticsearch.action.admin.indices.rollover.LazyRolloverAction; @@ -206,6 +208,8 @@ public class InternalUsers { "indices:admin/data_stream/index/reindex", "indices:admin/index/create_from_source", TransportAddIndexBlockAction.TYPE.name(), + OpenIndexAction.NAME, + TransportCloseIndexAction.NAME, TransportCreateIndexAction.TYPE.name(), TransportClusterSearchShardsAction.TYPE.name(), TransportUpdateSettingsAction.TYPE.name(), diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAliasTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAliasTests.java index 8fc1e55ec0ac5..22ec4551af2a9 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAliasTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAliasTests.java @@ -34,6 +34,7 @@ import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.indices.TestIndexNameExpressionResolver; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -365,8 +366,20 @@ public void testIndexNameComparator() { } public void testLatestIndex() { - var names = new String[] { "index-000001", "index-000002", "index-000003" }; - assertThat(MlIndexAndAlias.latestIndex(names), equalTo("index-000003")); + { + var names = new String[] { "index-000001", "index-000002", "index-000003" }; + assertThat(MlIndexAndAlias.latestIndex(names), equalTo("index-000003")); + } + { + var names = new String[] { "index", "index-000001", "index-000002" }; + assertThat(MlIndexAndAlias.latestIndex(names), equalTo("index-000002")); + } + } + + public void testIndexIsReadWriteCompatibleInV9() { + assertTrue(MlIndexAndAlias.indexIsReadWriteCompatibleInV9(IndexVersion.current())); + assertTrue(MlIndexAndAlias.indexIsReadWriteCompatibleInV9(IndexVersions.V_8_0_0)); + assertFalse(MlIndexAndAlias.indexIsReadWriteCompatibleInV9(IndexVersions.V_7_17_0)); } private void createIndexAndAliasIfNecessary(ClusterState clusterState) { diff --git a/x-pack/plugin/core/template-resources/src/main/resources/monitoring-es-mb.json b/x-pack/plugin/core/template-resources/src/main/resources/monitoring-es-mb.json index 793a8c3035d8e..6d82f6e5295e6 100644 --- a/x-pack/plugin/core/template-resources/src/main/resources/monitoring-es-mb.json +++ b/x-pack/plugin/core/template-resources/src/main/resources/monitoring-es-mb.json @@ -464,6 +464,13 @@ } } }, + "threads": { + "properties": { + "count": { + "type": "long" + } + } + }, "gc": { "properties": { "collectors": { @@ -562,6 +569,20 @@ "type": "long" } } + }, + "fetch_total": { + "properties": { + "count": { + "type": "long" + } + } + }, + "fetch_time": { + "properties": { + "ms": { + "type": "long" + } + } } } }, @@ -586,6 +607,42 @@ } } }, + "get": { + "properties": { + "total": { + "properties": { + "count": { + "type": "long" + } + } + }, + "time": { + "properties": { + "ms": { + "type": "long" + } + } + } + } + }, + "merges": { + "properties": { + "total": { + "properties": { + "count": { + "type": "long" + } + } + }, + "total_time": { + "properties": { + "ms": { + "type": "long" + } + } + } + } + }, "fielddata": { "properties": { "memory": { @@ -594,6 +651,13 @@ "type": "long" } } + }, + "evictions": { + "properties": { + "count": { + "type": "long" + } + } } } }, @@ -651,6 +715,60 @@ } } }, + "translog": { + "properties": { + "operations": { + "properties": { + "count": { + "type": "long" + } + } + }, + "size": { + "properties": { + "bytes": { + "type": "long" + } + } + } + } + }, + "refresh": { + "properties": { + "total": { + "properties": { + "count": { + "type": "long" + } + } + }, + "total_time": { + "properties": { + "ms": { + "type": "long" + } + } + } + } + }, + "flush": { + "properties": { + "total": { + "properties": { + "count": { + "type": "long" + } + } + }, + "total_time": { + "properties": { + "ms": { + "type": "long" + } + } + } + } + }, "segments": { "properties": { "version_map": { @@ -768,6 +886,20 @@ }, "process": { "properties": { + "mem": { + "properties": { + "total_virtual": { + "properties": { + "bytes": { + "type": "long" + } + } + } + } + }, + "open_file_descriptors": { + "type": "long" + }, "cpu": { "properties": { "pct": { @@ -882,6 +1014,88 @@ } } }, + "active": { + "properties": { + "count": { + "type": "long" + } + } + }, + "queue": { + "properties": { + "count": { + "type": "long" + } + } + } + } + }, + "force_merge": { + "properties": { + "rejected": { + "properties": { + "count": { + "type": "long" + } + } + }, + "active": { + "properties": { + "count": { + "type": "long" + } + } + }, + "queue": { + "properties": { + "count": { + "type": "long" + } + } + } + } + }, + "flush": { + "properties": { + "rejected": { + "properties": { + "count": { + "type": "long" + } + } + }, + "active": { + "properties": { + "count": { + "type": "long" + } + } + }, + "queue": { + "properties": { + "count": { + "type": "long" + } + } + } + } + }, + "search_worker": { + "properties": { + "rejected": { + "properties": { + "count": { + "type": "long" + } + } + }, + "active": { + "properties": { + "count": { + "type": "long" + } + } + }, "queue": { "properties": { "count": { @@ -900,6 +1114,38 @@ } } }, + "active": { + "properties": { + "count": { + "type": "long" + } + } + }, + "queue": { + "properties": { + "count": { + "type": "long" + } + } + } + } + }, + "system_read": { + "properties": { + "rejected": { + "properties": { + "count": { + "type": "long" + } + } + }, + "active": { + "properties": { + "count": { + "type": "long" + } + } + }, "queue": { "properties": { "count": { @@ -918,6 +1164,38 @@ } } }, + "active": { + "properties": { + "count": { + "type": "long" + } + } + }, + "queue": { + "properties": { + "count": { + "type": "long" + } + } + } + } + }, + "esql_worker": { + "properties": { + "rejected": { + "properties": { + "count": { + "type": "long" + } + } + }, + "active": { + "properties": { + "count": { + "type": "long" + } + } + }, "queue": { "properties": { "count": { @@ -936,6 +1214,38 @@ } } }, + "active": { + "properties": { + "count": { + "type": "long" + } + } + }, + "queue": { + "properties": { + "count": { + "type": "long" + } + } + } + } + }, + "system_write": { + "properties": { + "rejected": { + "properties": { + "count": { + "type": "long" + } + } + }, + "active": { + "properties": { + "count": { + "type": "long" + } + } + }, "queue": { "properties": { "count": { @@ -954,6 +1264,13 @@ } } }, + "active": { + "properties": { + "count": { + "type": "long" + } + } + }, "queue": { "properties": { "count": { @@ -962,6 +1279,181 @@ } } } + }, + "snapshot": { + "properties": { + "rejected": { + "properties": { + "count": { + "type": "long" + } + } + }, + "active": { + "properties": { + "count": { + "type": "long" + } + } + }, + "queue": { + "properties": { + "count": { + "type": "long" + } + } + } + } + } + } + }, + "transport": { + "properties": { + "tx": { + "properties": { + "size": { + "properties": { + "bytes": { + "type": "long" + } + } + }, + "count": { + "type": "long" + } + } + }, + "rx": { + "properties": { + "size": { + "properties": { + "bytes": { + "type": "long" + } + } + }, + "count": { + "type": "long" + } + } + } + } + }, + "ingest": { + "properties": { + "total": { + "properties": { + "current": { + "type": "long" + }, + "time_in_millis": { + "type": "long" + }, + "count": { + "type": "long" + }, + "failed": { + "type": "long" + } + } + } + } + }, + "indexing_pressure": { + "properties": { + "memory": { + "properties": { + "current": { + "properties": { + "all": { + "properties": { + "bytes": { + "type": "long" + } + } + }, + "coordinating": { + "properties": { + "bytes": { + "type": "long" + } + } + }, + "replica": { + "properties": { + "bytes": { + "type": "long" + } + } + }, + "combined_coordinating_and_primary": { + "properties": { + "bytes": { + "type": "long" + } + } + }, + "primary": { + "properties": { + "bytes": { + "type": "long" + } + } + } + } + }, + "total": { + "properties": { + "all": { + "properties": { + "bytes": { + "type": "long" + } + } + }, + "coordinating": { + "properties": { + "rejections": { + "type": "long" + }, + "bytes": { + "type": "long" + } + } + }, + "replica": { + "properties": { + "rejections": { + "type": "long" + }, + "bytes": { + "type": "long" + } + } + }, + "combined_coordinating_and_primary": { + "properties": { + "bytes": { + "type": "long" + } + } + }, + "primary": { + "properties": { + "rejections": { + "type": "long" + }, + "bytes": { + "type": "long" + } + } + } + } + }, + "limit_in_bytes": { + "type": "long" + } + } } } }, diff --git a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DataStreamDeprecationCheckerTests.java b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DataStreamDeprecationCheckerTests.java index e3c205ff8c740..2c32fb7610a18 100644 --- a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DataStreamDeprecationCheckerTests.java +++ b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DataStreamDeprecationCheckerTests.java @@ -75,23 +75,15 @@ public void testOldIndicesCheck() { assertThat(issuesByDataStream.get(dataStream.getName()), equalTo(List.of(expected))); } - public void testOldIndicesCheckWithOnlyClosedOrNewIndices() { + public void testOldIndicesCheckWithOnlyNewIndices() { // This tests what happens when any old indices that we have are closed. We expect no deprecation warning. - int oldClosedIndexCount = randomIntBetween(1, 100); int newOpenIndexCount = randomIntBetween(0, 100); int newClosedIndexCount = randomIntBetween(0, 100); Map nameToIndexMetadata = new HashMap<>(); Set expectedIndices = new HashSet<>(); - DataStream dataStream = createTestDataStream( - 0, - oldClosedIndexCount, - newOpenIndexCount, - newClosedIndexCount, - nameToIndexMetadata, - expectedIndices - ); + DataStream dataStream = createTestDataStream(0, 0, newOpenIndexCount, newClosedIndexCount, nameToIndexMetadata, expectedIndices); Metadata metadata = Metadata.builder() .indices(nameToIndexMetadata) @@ -168,7 +160,7 @@ private DataStream createTestDataStream( allIndices.add(createOldIndex(i, false, nameToIndexMetadata, expectedIndices)); } for (int i = 0; i < oldClosedIndexCount; i++) { - allIndices.add(createOldIndex(i, true, nameToIndexMetadata, null)); + allIndices.add(createOldIndex(i, true, nameToIndexMetadata, expectedIndices)); } for (int i = 0; i < newOpenIndexCount; i++) { allIndices.add(createNewIndex(i, false, nameToIndexMetadata)); @@ -218,7 +210,7 @@ private Index createIndex( ) { Settings.Builder settingsBuilder = isOld ? settings(IndexVersion.fromId(7170099)) : settings(IndexVersion.current()); String indexName = (isOld ? "old-" : "new-") + (isClosed ? "closed-" : "") + "data-stream-index-" + suffix; - if (isOld && isClosed == false) { // we only expect warnings on open old indices + if (isOld) { if (expectedIndices.isEmpty() == false && randomIntBetween(0, 2) == 0) { settingsBuilder.put(INDEX_STORE_TYPE_SETTING.getKey(), SearchableSnapshotsSettings.SEARCHABLE_SNAPSHOT_STORE_TYPE); } else { diff --git a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/IndexDeprecationCheckerTests.java b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/IndexDeprecationCheckerTests.java index e49a6046c5c64..edbe7562a1560 100644 --- a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/IndexDeprecationCheckerTests.java +++ b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/IndexDeprecationCheckerTests.java @@ -49,6 +49,7 @@ public void testOldIndicesCheck() { .settings(settings(OLD_VERSION)) .numberOfShards(1) .numberOfReplicas(0) + .state(randomBoolean() ? IndexMetadata.State.OPEN : IndexMetadata.State.CLOSE) // does not matter if its open or closed .build(); ClusterState clusterState = ClusterState.builder(ClusterState.EMPTY_STATE) .metadata(Metadata.builder().put(indexMetadata, true)) @@ -205,24 +206,6 @@ public void testOldIndicesCheckSnapshotIgnored() { assertThat(issuesByIndex.size(), equalTo(0)); } - public void testOldIndicesCheckClosedIgnored() { - Settings.Builder settings = settings(OLD_VERSION); - IndexMetadata indexMetadata = IndexMetadata.builder("test") - .settings(settings) - .numberOfShards(1) - .numberOfReplicas(0) - .state(IndexMetadata.State.CLOSE) - .build(); - ClusterState clusterState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(indexMetadata, true)) - .build(); - Map> issuesByIndex = checker.check( - clusterState, - new DeprecationInfoAction.Request(TimeValue.THIRTY_SECONDS) - ); - assertThat(issuesByIndex.size(), equalTo(0)); - } - public void testOldIndicesIgnoredWarningCheck() { Settings.Builder settings = settings(OLD_VERSION).put(MetadataIndexStateService.VERIFIED_READ_ONLY_SETTING.getKey(), true); IndexMetadata indexMetadata = IndexMetadata.builder("test").settings(settings).numberOfShards(1).numberOfReplicas(0).build(); diff --git a/x-pack/plugin/ent-search/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/entsearch/rules/80_query_rules_retriever.yml b/x-pack/plugin/ent-search/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/entsearch/rules/80_query_rules_retriever.yml index 089a078c62207..4ce0c55511cbd 100644 --- a/x-pack/plugin/ent-search/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/entsearch/rules/80_query_rules_retriever.yml +++ b/x-pack/plugin/ent-search/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/entsearch/rules/80_query_rules_retriever.yml @@ -288,10 +288,9 @@ setup: rank_window_size: 1 - match: { hits.total.value: 3 } + - length: { hits.hits: 1 } - match: { hits.hits.0._id: foo } - match: { hits.hits.0._score: 1.7014124E38 } - - match: { hits.hits.1._score: 0 } - - match: { hits.hits.2._score: 0 } - do: headers: @@ -315,12 +314,10 @@ setup: rank_window_size: 2 - match: { hits.total.value: 3 } + - length: { hits.hits: 2 } - match: { hits.hits.0._id: foo } - match: { hits.hits.0._score: 1.7014124E38 } - match: { hits.hits.1._id: foo2 } - - match: { hits.hits.1._score: 1.7014122E38 } - - match: { hits.hits.2._id: bar_no_rule } - - match: { hits.hits.2._score: 0 } - do: headers: @@ -344,6 +341,7 @@ setup: rank_window_size: 10 - match: { hits.total.value: 3 } + - length: { hits.hits: 3 } - match: { hits.hits.0._id: foo } - match: { hits.hits.0._score: 1.7014124E38 } - match: { hits.hits.1._id: foo2 } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index e8c5edc1c8b58..548fb30a51355 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -699,7 +699,7 @@ public enum Cap { /** * LOOKUP JOIN with TEXT fields on the right (right side of the join) (#119473) */ - LOOKUP_JOIN_TEXT(Build.current().isSnapshot()), + LOOKUP_JOIN_TEXT(JOIN_LOOKUP_V12.isEnabled()), /** * LOOKUP JOIN without MV matching (https://github.com/elastic/elasticsearch/issues/118780) diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/BaseMockEISAuthServerTest.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/BaseMockEISAuthServerTest.java index 230b7ff576296..d0f797e9f8fab 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/BaseMockEISAuthServerTest.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/BaseMockEISAuthServerTest.java @@ -14,7 +14,6 @@ import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.core.TimeValue; import org.elasticsearch.test.cluster.ElasticsearchCluster; -import org.elasticsearch.test.cluster.FeatureFlag; import org.elasticsearch.test.cluster.local.distribution.DistributionType; import org.elasticsearch.test.rest.ESRestTestCase; import org.junit.ClassRule; @@ -40,12 +39,9 @@ public class BaseMockEISAuthServerTest extends ESRestTestCase { .setting("xpack.security.enabled", "true") // Adding both settings unless one feature flag is disabled in a particular environment .setting("xpack.inference.elastic.url", mockEISServer::getUrl) - // TODO remove this once we've removed DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG and EIS_GATEWAY_URL - .setting("xpack.inference.eis.gateway.url", mockEISServer::getUrl) // This plugin is located in the inference/qa/test-service-plugin package, look for TestInferenceServicePlugin .plugin("inference-service-test") .user("x_pack_rest_user", "x-pack-test-password") - .feature(FeatureFlag.INFERENCE_UNIFIED_API_ENABLED) .build(); // The reason we're doing this is to make sure the mock server is initialized first so we can get the address before communicating diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java index 5174b5bbb8cb4..bb3f3e9b46c4d 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java @@ -19,7 +19,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.inference.TaskType; import org.elasticsearch.test.cluster.ElasticsearchCluster; -import org.elasticsearch.test.cluster.FeatureFlag; import org.elasticsearch.test.cluster.local.distribution.DistributionType; import org.elasticsearch.test.rest.ESRestTestCase; import org.elasticsearch.xcontent.XContentBuilder; @@ -50,8 +49,8 @@ public class InferenceBaseRestTest extends ESRestTestCase { .setting("xpack.security.enabled", "true") .plugin("inference-service-test") .user("x_pack_rest_user", "x-pack-test-password") - .feature(FeatureFlag.INFERENCE_UNIFIED_API_ENABLED) .build(); + @ClassRule public static MlModelServer mlModelServer = new MlModelServer(); @@ -356,7 +355,8 @@ protected Deque unifiedCompletionInferOnMockService( List input, @Nullable Consumer responseConsumerCallback ) throws Exception { - var endpoint = Strings.format("_inference/%s/%s/_unified", taskType, modelId); + var route = randomBoolean() ? "_stream" : "_unified"; // TODO remove unified route + var endpoint = Strings.format("_inference/%s/%s/%s", taskType, modelId, route); return callAsyncUnified(endpoint, input, "user", responseConsumerCallback); } diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java index 76483a5f62fec..3a2a003636b13 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java @@ -10,7 +10,6 @@ package org.elasticsearch.xpack.inference; import org.elasticsearch.inference.TaskType; -import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceFeature; import java.io.IOException; @@ -24,17 +23,11 @@ public void testGetDefaultEndpoints() throws IOException { var allModels = getAllModels(); var chatCompletionModels = getModels("_all", TaskType.CHAT_COMPLETION); - if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled() - || ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) { - assertThat(allModels, hasSize(4)); - assertThat(chatCompletionModels, hasSize(1)); - - for (var model : chatCompletionModels) { - assertEquals("chat_completion", model.get("task_type")); - } - } else { - assertThat(allModels, hasSize(3)); - assertThat(chatCompletionModels, hasSize(0)); + assertThat(allModels, hasSize(4)); + assertThat(chatCompletionModels, hasSize(1)); + + for (var model : chatCompletionModels) { + assertEquals("chat_completion", model.get("task_type")); } } diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java index 856fdeb6287e9..9d4cec798964a 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java @@ -12,11 +12,8 @@ import org.elasticsearch.client.Request; import org.elasticsearch.common.Strings; import org.elasticsearch.inference.TaskType; -import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceFeature; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Map; @@ -28,12 +25,7 @@ public class InferenceGetServicesIT extends BaseMockEISAuthServerTest { @SuppressWarnings("unchecked") public void testGetServicesWithoutTaskType() throws IOException { List services = getAllServices(); - if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled() - || ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) { - assertThat(services.size(), equalTo(19)); - } else { - assertThat(services.size(), equalTo(18)); - } + assertThat(services.size(), equalTo(19)); String[] providers = new String[services.size()]; for (int i = 0; i < services.size(); i++) { @@ -41,14 +33,15 @@ public void testGetServicesWithoutTaskType() throws IOException { providers[i] = (String) serviceConfig.get("service"); } - var providerList = new ArrayList<>( - Arrays.asList( + assertArrayEquals( + List.of( "alibabacloud-ai-search", "amazonbedrock", "anthropic", "azureaistudio", "azureopenai", "cohere", + "elastic", "elasticsearch", "googleaistudio", "googlevertexai", @@ -61,13 +54,9 @@ public void testGetServicesWithoutTaskType() throws IOException { "test_service", "text_embedding_test_service", "watsonxai" - ) + ).toArray(), + providers ); - if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled() - || ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) { - providerList.add(6, "elastic"); - } - assertArrayEquals(providerList.toArray(), providers); } @SuppressWarnings("unchecked") @@ -130,7 +119,7 @@ public void testGetServicesWithCompletionTaskType() throws IOException { providers[i] = (String) serviceConfig.get("service"); } - var providerList = new ArrayList<>( + assertArrayEquals( List.of( "alibabacloud-ai-search", "amazonbedrock", @@ -141,21 +130,15 @@ public void testGetServicesWithCompletionTaskType() throws IOException { "googleaistudio", "openai", "streaming_completion_test_service" - ) + ).toArray(), + providers ); - - assertArrayEquals(providers, providerList.toArray()); } @SuppressWarnings("unchecked") public void testGetServicesWithChatCompletionTaskType() throws IOException { List services = getServices(TaskType.CHAT_COMPLETION); - if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled() - || ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) { - assertThat(services.size(), equalTo(3)); - } else { - assertThat(services.size(), equalTo(2)); - } + assertThat(services.size(), equalTo(3)); String[] providers = new String[services.size()]; for (int i = 0; i < services.size(); i++) { @@ -163,26 +146,13 @@ public void testGetServicesWithChatCompletionTaskType() throws IOException { providers[i] = (String) serviceConfig.get("service"); } - var providerList = new ArrayList<>(List.of("openai", "streaming_completion_test_service")); - - if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled() - || ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) { - providerList.addFirst("elastic"); - } - - assertArrayEquals(providers, providerList.toArray()); + assertArrayEquals(List.of("elastic", "openai", "streaming_completion_test_service").toArray(), providers); } @SuppressWarnings("unchecked") public void testGetServicesWithSparseEmbeddingTaskType() throws IOException { List services = getServices(TaskType.SPARSE_EMBEDDING); - - if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled() - || ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) { - assertThat(services.size(), equalTo(5)); - } else { - assertThat(services.size(), equalTo(4)); - } + assertThat(services.size(), equalTo(5)); String[] providers = new String[services.size()]; for (int i = 0; i < services.size(); i++) { @@ -190,12 +160,10 @@ public void testGetServicesWithSparseEmbeddingTaskType() throws IOException { providers[i] = (String) serviceConfig.get("service"); } - var providerList = new ArrayList<>(Arrays.asList("alibabacloud-ai-search", "elasticsearch", "hugging_face", "test_service")); - if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled() - || ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) { - providerList.add(1, "elastic"); - } - assertArrayEquals(providers, providerList.toArray()); + assertArrayEquals( + List.of("alibabacloud-ai-search", "elastic", "elasticsearch", "hugging_face", "test_service").toArray(), + providers + ); } private List getAllServices() throws IOException { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index b007aa8bfa1f8..adea09adb8afc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -131,12 +131,8 @@ import java.util.Map; import java.util.function.Predicate; import java.util.function.Supplier; -import java.util.stream.Stream; import static java.util.Collections.singletonList; -import static org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceService.ELASTIC_INFERENCE_SERVICE_IDENTIFIER; -import static org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG; -import static org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG; public class InferencePlugin extends Plugin implements @@ -193,24 +189,17 @@ public InferencePlugin(Settings settings) { @Override public List> getActions() { - var availableActions = List.of( + return List.of( new ActionHandler<>(InferenceAction.INSTANCE, TransportInferenceAction.class), - new ActionHandler<>(GetInferenceModelAction.INSTANCE, TransportGetInferenceModelAction.class), new ActionHandler<>(PutInferenceModelAction.INSTANCE, TransportPutInferenceModelAction.class), new ActionHandler<>(UpdateInferenceModelAction.INSTANCE, TransportUpdateInferenceModelAction.class), new ActionHandler<>(DeleteInferenceEndpointAction.INSTANCE, TransportDeleteInferenceEndpointAction.class), new ActionHandler<>(XPackUsageFeatureAction.INFERENCE, TransportInferenceUsageAction.class), new ActionHandler<>(GetInferenceDiagnosticsAction.INSTANCE, TransportGetInferenceDiagnosticsAction.class), - new ActionHandler<>(GetInferenceServicesAction.INSTANCE, TransportGetInferenceServicesAction.class) + new ActionHandler<>(GetInferenceServicesAction.INSTANCE, TransportGetInferenceServicesAction.class), + new ActionHandler<>(UnifiedCompletionAction.INSTANCE, TransportUnifiedCompletionInferenceAction.class) ); - - List> conditionalActions = - UnifiedCompletionFeature.UNIFIED_COMPLETION_FEATURE_FLAG.isEnabled() - ? List.of(new ActionHandler<>(UnifiedCompletionAction.INSTANCE, TransportUnifiedCompletionInferenceAction.class)) - : List.of(); - - return Stream.concat(availableActions.stream(), conditionalActions.stream()).toList(); } @Override @@ -225,7 +214,7 @@ public List getRestHandlers( Supplier nodesInCluster, Predicate clusterSupportsFeature ) { - var availableRestActions = List.of( + return List.of( new RestInferenceAction(), new RestStreamInferenceAction(threadPoolSetOnce), new RestGetInferenceModelAction(), @@ -233,13 +222,9 @@ public List getRestHandlers( new RestUpdateInferenceModelAction(), new RestDeleteInferenceEndpointAction(), new RestGetInferenceDiagnosticsAction(), - new RestGetInferenceServicesAction() + new RestGetInferenceServicesAction(), + new RestUnifiedCompletionInferenceAction(threadPoolSetOnce) ); - List conditionalRestActions = UnifiedCompletionFeature.UNIFIED_COMPLETION_FEATURE_FLAG.isEnabled() - ? List.of(new RestUnifiedCompletionInferenceAction(threadPoolSetOnce)) - : List.of(); - - return Stream.concat(availableRestActions.stream(), conditionalRestActions.stream()).toList(); } @Override @@ -264,46 +249,44 @@ public Collection createComponents(PluginServices services) { var inferenceServices = new ArrayList<>(inferenceServiceExtensions); inferenceServices.add(this::getInferenceServiceFactories); - if (isElasticInferenceServiceEnabled()) { - // Create a separate instance of HTTPClientManager with its own SSL configuration (`xpack.inference.elastic.http.ssl.*`). - var elasticInferenceServiceHttpClientManager = HttpClientManager.create( - settings, - services.threadPool(), - services.clusterService(), - throttlerManager, - getSslService() - ); - - var elasticInferenceServiceRequestSenderFactory = new HttpRequestSender.Factory( - serviceComponents.get(), - elasticInferenceServiceHttpClientManager, - services.clusterService() - ); - elasicInferenceServiceFactory.set(elasticInferenceServiceRequestSenderFactory); - - ElasticInferenceServiceSettings inferenceServiceSettings = new ElasticInferenceServiceSettings(settings); - String elasticInferenceUrl = this.getElasticInferenceServiceUrl(inferenceServiceSettings); - - var elasticInferenceServiceComponentsInstance = new ElasticInferenceServiceComponents(elasticInferenceUrl); - elasticInferenceServiceComponents.set(elasticInferenceServiceComponentsInstance); - - var authorizationHandler = new ElasticInferenceServiceAuthorizationHandler( - elasticInferenceServiceComponentsInstance.elasticInferenceServiceUrl(), - services.threadPool() - ); - - inferenceServices.add( - () -> List.of( - context -> new ElasticInferenceService( - elasicInferenceServiceFactory.get(), - serviceComponents.get(), - elasticInferenceServiceComponentsInstance, - modelRegistry, - authorizationHandler - ) + // Create a separate instance of HTTPClientManager with its own SSL configuration (`xpack.inference.elastic.http.ssl.*`). + var elasticInferenceServiceHttpClientManager = HttpClientManager.create( + settings, + services.threadPool(), + services.clusterService(), + throttlerManager, + getSslService() + ); + + var elasticInferenceServiceRequestSenderFactory = new HttpRequestSender.Factory( + serviceComponents.get(), + elasticInferenceServiceHttpClientManager, + services.clusterService() + ); + elasicInferenceServiceFactory.set(elasticInferenceServiceRequestSenderFactory); + + ElasticInferenceServiceSettings inferenceServiceSettings = new ElasticInferenceServiceSettings(settings); + String elasticInferenceUrl = inferenceServiceSettings.getElasticInferenceServiceUrl(); + + var elasticInferenceServiceComponentsInstance = new ElasticInferenceServiceComponents(elasticInferenceUrl); + elasticInferenceServiceComponents.set(elasticInferenceServiceComponentsInstance); + + var authorizationHandler = new ElasticInferenceServiceAuthorizationHandler( + elasticInferenceServiceComponentsInstance.elasticInferenceServiceUrl(), + services.threadPool() + ); + + inferenceServices.add( + () -> List.of( + context -> new ElasticInferenceService( + elasicInferenceServiceFactory.get(), + serviceComponents.get(), + elasticInferenceServiceComponentsInstance, + modelRegistry, + authorizationHandler ) - ); - } + ) + ); var factoryContext = new InferenceServiceExtension.InferenceServiceFactoryContext( services.client(), @@ -429,11 +412,7 @@ public List> getSettings() { settings.addAll(Truncator.getSettingsDefinitions()); settings.addAll(RequestExecutorServiceSettings.getSettingsDefinitions()); settings.add(SKIP_VALIDATE_AND_START); - - // Register Elastic Inference Service settings definitions if the corresponding feature flag is enabled. - if (isElasticInferenceServiceEnabled()) { - settings.addAll(ElasticInferenceServiceSettings.getSettingsDefinitions()); - } + settings.addAll(ElasticInferenceServiceSettings.getSettingsDefinitions()); return settings; } @@ -505,30 +484,6 @@ public Map getHighlighters() { return Map.of(SemanticTextHighlighter.NAME, new SemanticTextHighlighter()); } - // Get Elastic Inference service URL based on feature flags to support transitioning - // to the new Elastic Inference Service URL. - private String getElasticInferenceServiceUrl(ElasticInferenceServiceSettings settings) { - String elasticInferenceUrl = null; - - if (ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled()) { - elasticInferenceUrl = settings.getElasticInferenceServiceUrl(); - } else if (DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled()) { - log.warn( - "Deprecated flag {} detected for enabling {}. Please use {}.", - ELASTIC_INFERENCE_SERVICE_IDENTIFIER, - DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG, - ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG - ); - elasticInferenceUrl = settings.getEisGatewayUrl(); - } - - return elasticInferenceUrl; - } - - protected Boolean isElasticInferenceServiceEnabled() { - return (ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled() || DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled()); - } - protected SSLService getSslService() { return XPackPlugin.getSharedSslService(); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/UnifiedCompletionFeature.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/UnifiedCompletionFeature.java deleted file mode 100644 index 3e13d0c1e39de..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/UnifiedCompletionFeature.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference; - -import org.elasticsearch.common.util.FeatureFlag; - -/** - * Unified Completion feature flag. When the feature is complete, this flag will be removed. - * Enable feature via JVM option: `-Des.inference_unified_feature_flag_enabled=true`. - */ -public class UnifiedCompletionFeature { - public static final FeatureFlag UNIFIED_COMPLETION_FEATURE_FLAG = new FeatureFlag("inference_unified"); - - private UnifiedCompletionFeature() {} -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java index 165c42fdb7d1f..fa6cc3db0ef9f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java @@ -29,6 +29,7 @@ import static org.elasticsearch.search.rank.RankBuilder.DEFAULT_RANK_WINDOW_SIZE; import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; +import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.DEFAULT_RERANK_ID; /** * A {@code RetrieverBuilder} for parsing and constructing a text similarity reranker retriever. @@ -47,10 +48,11 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(TextSimilarityRankBuilder.NAME, args -> { RetrieverBuilder retrieverBuilder = (RetrieverBuilder) args[0]; - String inferenceId = (String) args[1]; + String inferenceId = args[1] == null ? DEFAULT_RERANK_ID : (String) args[1]; String inferenceText = (String) args[2]; String field = (String) args[3]; int rankWindowSize = args[4] == null ? DEFAULT_RANK_WINDOW_SIZE : (int) args[4]; + return new TextSimilarityRankRetrieverBuilder(retrieverBuilder, inferenceId, inferenceText, field, rankWindowSize); }); @@ -60,7 +62,7 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder c.trackRetrieverUsage(innerRetriever.getName()); return innerRetriever; }, RETRIEVER_FIELD); - PARSER.declareString(constructorArg(), INFERENCE_ID_FIELD); + PARSER.declareString(optionalConstructorArg(), INFERENCE_ID_FIELD); PARSER.declareString(constructorArg(), INFERENCE_TEXT_FIELD); PARSER.declareString(constructorArg(), FIELD_FIELD); PARSER.declareInt(optionalConstructorArg(), RANK_WINDOW_SIZE_FIELD); @@ -171,6 +173,10 @@ public String getName() { return TextSimilarityRankBuilder.NAME; } + public String inferenceId() { + return inferenceId; + } + public int rankWindowSize() { return rankWindowSize; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java index 57c06df8d8dfe..7f43676dfb5f0 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java @@ -31,6 +31,7 @@ public final class Paths { + INFERENCE_ID + "}/_stream"; + // TODO remove the _unified path public static final String UNIFIED_SUFFIX = "_unified"; static final String UNIFIED_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/" + UNIFIED_SUFFIX; static final String UNIFIED_TASK_TYPE_INFERENCE_ID_PATH = "_inference/{" diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestStreamInferenceAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestStreamInferenceAction.java index 881af435b29b6..518056365d88b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestStreamInferenceAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestStreamInferenceAction.java @@ -9,12 +9,17 @@ import org.apache.lucene.util.SetOnce; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.inference.TaskType; import org.elasticsearch.rest.RestChannel; +import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.Scope; import org.elasticsearch.rest.ServerlessScope; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.core.inference.action.UnifiedCompletionAction; +import java.io.IOException; import java.util.List; import java.util.Objects; @@ -50,4 +55,32 @@ protected InferenceAction.Request prepareInferenceRequest(InferenceAction.Reques protected ActionListener listener(RestChannel channel) { return new ServerSentEventsRestActionListener(channel, threadPool); } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { + var params = parseParams(restRequest); + var inferTimeout = parseTimeout(restRequest); + + if (params.taskType() == TaskType.CHAT_COMPLETION) { + UnifiedCompletionAction.Request request; + try (var parser = restRequest.contentParser()) { + request = UnifiedCompletionAction.Request.parseRequest(params.inferenceEntityId(), params.taskType(), inferTimeout, parser); + } + + return channel -> client.execute( + UnifiedCompletionAction.INSTANCE, + request, + new ServerSentEventsRestActionListener(channel, threadPool) + ); + } else { + InferenceAction.Request.Builder requestBuilder; + try (var parser = restRequest.contentParser()) { + requestBuilder = InferenceAction.Request.parseRequest(params.inferenceEntityId(), params.taskType(), parser); + } + + requestBuilder.setInferenceTimeout(inferTimeout); + var request = prepareInferenceRequest(requestBuilder); + return channel -> client.execute(InferenceAction.INSTANCE, request, listener(channel)); + } + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceFeature.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceFeature.java index 623c25222446c..4ec270eef3a62 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceFeature.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceFeature.java @@ -10,14 +10,12 @@ import org.elasticsearch.common.util.FeatureFlag; /** - * Elastic Inference Service (EIS) feature flag. When the feature is complete, this flag will be removed. - * Enable feature via JVM option: `-Des.elastic_inference_service_feature_flag_enabled=true`. + * Elastic Inference Service feature flag. Not being used anymore, but we'll keep it until the controller is no longer + * passing -Des.elastic_inference_service_feature_flag_enabled=true at startup. */ public class ElasticInferenceServiceFeature { - // TODO when we remove this also look in InferenceGetServicesIT and remove references to the deprecated URL setting @Deprecated - public static final FeatureFlag DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG = new FeatureFlag("eis"); - public static final FeatureFlag ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG = new FeatureFlag("elastic_inference_service"); + } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSettings.java index 3b6b1088cc9cf..fd4a70da01fda 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSettings.java @@ -20,10 +20,6 @@ */ public class ElasticInferenceServiceSettings { - // TODO when we remove this look at InferenceGetServicesIT and remove the setting there as well - @Deprecated - static final Setting EIS_GATEWAY_URL = Setting.simpleString("xpack.inference.eis.gateway.url", Setting.Property.NodeScope); - public static final String ELASTIC_INFERENCE_SERVICE_SSL_CONFIGURATION_PREFIX = "xpack.inference.elastic.http.ssl."; static final Setting ELASTIC_INFERENCE_SERVICE_URL = Setting.simpleString( @@ -31,14 +27,9 @@ public class ElasticInferenceServiceSettings { Setting.Property.NodeScope ); - // Adjust this variable to be volatile, if the setting can be updated at some point in time - @Deprecated - private final String eisGatewayUrl; - private final String elasticInferenceServiceUrl; public ElasticInferenceServiceSettings(Settings settings) { - eisGatewayUrl = EIS_GATEWAY_URL.get(settings); elasticInferenceServiceUrl = ELASTIC_INFERENCE_SERVICE_URL.get(settings); } @@ -55,7 +46,6 @@ public ElasticInferenceServiceSettings(Settings settings) { public static List> getSettingsDefinitions() { ArrayList> settings = new ArrayList<>(); - settings.add(EIS_GATEWAY_URL); settings.add(ELASTIC_INFERENCE_SERVICE_URL); settings.add(ELASTIC_INFERENCE_SERVICE_SSL_ENABLED); settings.addAll(ELASTIC_INFERENCE_SERVICE_SSL_CONFIGURATION_SETTINGS.getEnabledSettings()); @@ -63,11 +53,6 @@ public static List> getSettingsDefinitions() { return settings; } - @Deprecated - public String getEisGatewayUrl() { - return eisGatewayUrl; - } - public String getElasticInferenceServiceUrl() { return elasticInferenceServiceUrl; } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/InferencePluginTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/InferencePluginTests.java deleted file mode 100644 index d1db5b8b12cc6..0000000000000 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/InferencePluginTests.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference; - -import org.elasticsearch.common.settings.Setting; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceSettings; -import org.junit.After; -import org.junit.Before; - -import static org.hamcrest.Matchers.is; - -public class InferencePluginTests extends ESTestCase { - private InferencePlugin inferencePlugin; - - private Boolean elasticInferenceServiceEnabled = true; - - private void setElasticInferenceServiceEnabled(Boolean elasticInferenceServiceEnabled) { - this.elasticInferenceServiceEnabled = elasticInferenceServiceEnabled; - } - - @Before - public void setUp() throws Exception { - super.setUp(); - - Settings settings = Settings.builder().build(); - inferencePlugin = new InferencePlugin(settings) { - @Override - protected Boolean isElasticInferenceServiceEnabled() { - return elasticInferenceServiceEnabled; - } - }; - } - - @After - public void tearDown() throws Exception { - super.tearDown(); - } - - public void testElasticInferenceServiceSettingsPresent() throws Exception { - setElasticInferenceServiceEnabled(true); // enable elastic inference service - boolean anyMatch = inferencePlugin.getSettings() - .stream() - .map(Setting::getKey) - .anyMatch(key -> key.startsWith(ElasticInferenceServiceSettings.ELASTIC_INFERENCE_SERVICE_SSL_CONFIGURATION_PREFIX)); - - assertThat("xpack.inference.elastic settings are present", anyMatch, is(true)); - } - - public void testElasticInferenceServiceSettingsNotPresent() throws Exception { - setElasticInferenceServiceEnabled(false); // disable elastic inference service - boolean noneMatch = inferencePlugin.getSettings() - .stream() - .map(Setting::getKey) - .noneMatch(key -> key.startsWith(ElasticInferenceServiceSettings.ELASTIC_INFERENCE_SERVICE_SSL_CONFIGURATION_PREFIX)); - - assertThat("xpack.inference.elastic settings are not present", noneMatch, is(true)); - } -} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilderTests.java index b6d455dd233ba..93c3ffe5d14fb 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilderTests.java @@ -31,6 +31,7 @@ import java.util.List; import static org.elasticsearch.search.rank.RankBuilder.DEFAULT_RANK_WINDOW_SIZE; +import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.DEFAULT_RERANK_ID; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @@ -108,7 +109,6 @@ public void testParserDefaults() throws IOException { } }, "field": "my-field", - "inference_id": "my-inference-id", "inference_text": "my-inference-text" }"""; @@ -118,6 +118,7 @@ public void testParserDefaults() throws IOException { new RetrieverParserContext(new SearchUsage(), nf -> true) ); assertEquals(DEFAULT_RANK_WINDOW_SIZE, parsed.rankWindowSize()); + assertEquals(DEFAULT_RERANK_ID, parsed.inferenceId()); } } diff --git a/x-pack/plugin/migrate/src/internalClusterTest/java/org/elasticsearch/xpack/migrate/action/ReindexDatastreamIndexTransportActionIT.java b/x-pack/plugin/migrate/src/internalClusterTest/java/org/elasticsearch/xpack/migrate/action/ReindexDatastreamIndexTransportActionIT.java index 0ad7dc45d4df8..1c9d85af8d5bd 100644 --- a/x-pack/plugin/migrate/src/internalClusterTest/java/org/elasticsearch/xpack/migrate/action/ReindexDatastreamIndexTransportActionIT.java +++ b/x-pack/plugin/migrate/src/internalClusterTest/java/org/elasticsearch/xpack/migrate/action/ReindexDatastreamIndexTransportActionIT.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.migrate.action; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; import org.elasticsearch.action.admin.indices.get.GetIndexRequest; @@ -152,6 +153,15 @@ public void testSetSourceToBlockWrites() throws Exception { assertHitCount(prepareSearch(sourceIndex).setSize(0), 0); } + public void testMissingSourceIndex() { + var nonExistentSourceIndex = randomAlphaOfLength(20).toLowerCase(Locale.ROOT); + assertThrows( + ResourceNotFoundException.class, + () -> client().execute(ReindexDataStreamIndexAction.INSTANCE, new ReindexDataStreamIndexAction.Request(nonExistentSourceIndex)) + .actionGet() + ); + } + public void testSettingsAddedBeforeReindex() throws Exception { // start with a static setting var numShards = randomIntBetween(1, 10); diff --git a/x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/action/ReindexDataStreamIndexTransportAction.java b/x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/action/ReindexDataStreamIndexTransportAction.java index d3fe27006e82e..8c12011ca4bb1 100644 --- a/x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/action/ReindexDataStreamIndexTransportAction.java +++ b/x-pack/plugin/migrate/src/main/java/org/elasticsearch/xpack/migrate/action/ReindexDataStreamIndexTransportAction.java @@ -10,8 +10,15 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.search.TotalHits; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.close.CloseIndexRequest; +import org.elasticsearch.action.admin.indices.close.CloseIndexResponse; +import org.elasticsearch.action.admin.indices.close.TransportCloseIndexAction; import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; +import org.elasticsearch.action.admin.indices.open.OpenIndexAction; +import org.elasticsearch.action.admin.indices.open.OpenIndexRequest; +import org.elasticsearch.action.admin.indices.open.OpenIndexResponse; import org.elasticsearch.action.admin.indices.readonly.AddIndexBlockRequest; import org.elasticsearch.action.admin.indices.readonly.AddIndexBlockResponse; import org.elasticsearch.action.admin.indices.readonly.TransportAddIndexBlockAction; @@ -117,6 +124,11 @@ protected void doExecute( var destIndexName = generateDestIndexName(sourceIndexName); TaskId taskId = new TaskId(clusterService.localNode().getId(), task.getId()); IndexMetadata sourceIndex = clusterService.state().getMetadata().index(sourceIndexName); + if (sourceIndex == null) { + listener.onFailure(new ResourceNotFoundException("source index [{}] does not exist", sourceIndexName)); + return; + } + Settings settingsBefore = sourceIndex.getSettings(); var hasOldVersion = DeprecatedIndexPredicate.getReindexRequiredPredicate(clusterService.state().metadata(), false); @@ -139,18 +151,51 @@ protected void doExecute( listener.onFailure(new ElasticsearchException(errorMessage)); return; } - + final boolean wasClosed = isClosed(sourceIndex); SubscribableListener.newForked(l -> setBlockWrites(sourceIndexName, l, taskId)) + .andThen(l -> openIndexIfClosed(sourceIndexName, wasClosed, l, taskId)) .andThen(l -> refresh(sourceIndexName, l, taskId)) .andThen(l -> deleteDestIfExists(destIndexName, l, taskId)) .andThen(l -> createIndex(sourceIndex, destIndexName, l, taskId)) .andThen(l -> reindex(sourceIndexName, destIndexName, l, taskId)) .andThen(l -> copyOldSourceSettingsToDest(settingsBefore, destIndexName, l, taskId)) .andThen(l -> sanityCheck(sourceIndexName, destIndexName, l, taskId)) + .andThen(l -> closeIndexIfWasClosed(destIndexName, wasClosed, l, taskId)) .andThenApply(ignored -> new ReindexDataStreamIndexAction.Response(destIndexName)) .addListener(listener); } + private void openIndexIfClosed(String indexName, boolean isClosed, ActionListener listener, TaskId parentTaskId) { + if (isClosed) { + logger.debug("Opening index [{}]", indexName); + var request = new OpenIndexRequest(indexName); + request.setParentTask(parentTaskId); + client.execute(OpenIndexAction.INSTANCE, request, listener); + } else { + listener.onResponse(null); + } + } + + private void closeIndexIfWasClosed( + String indexName, + boolean wasClosed, + ActionListener listener, + TaskId parentTaskId + ) { + if (wasClosed) { + logger.debug("Closing index [{}]", indexName); + var request = new CloseIndexRequest(indexName); + request.setParentTask(parentTaskId); + client.execute(TransportCloseIndexAction.TYPE, request, listener); + } else { + listener.onResponse(null); + } + } + + private static boolean isClosed(IndexMetadata indexMetadata) { + return indexMetadata.getState().equals(IndexMetadata.State.CLOSE); + } + private void setBlockWrites(String sourceIndexName, ActionListener listener, TaskId parentTaskId) { logger.debug("Setting write block on source index [{}]", sourceIndexName); addBlockToIndex(WRITE, sourceIndexName, new ActionListener<>() { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 043a27b7cd147..01127c97ba90c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -1240,7 +1240,8 @@ public Collection createComponents(PluginServices services) { ), indexNameExpressionResolver, client - ) + ), + new MlAnomaliesIndexUpdate(indexNameExpressionResolver, client) ) ); clusterService.addListener(mlAutoUpdateService); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAnomaliesIndexUpdate.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAnomaliesIndexUpdate.java new file mode 100644 index 0000000000000..27bce6747b32f --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAnomaliesIndexUpdate.java @@ -0,0 +1,235 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.ml; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest; +import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequestBuilder; +import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse; +import org.elasticsearch.action.admin.indices.rollover.RolloverRequest; +import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.action.support.SubscribableListener; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.client.internal.OriginSettingClient; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.routing.IndexRoutingTable; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.xpack.core.ml.job.config.Job; +import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex; +import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndexFields; +import org.elasticsearch.xpack.core.ml.utils.MlIndexAndAlias; +import org.elasticsearch.xpack.core.ml.utils.MlStrings; + +import java.util.ArrayList; +import java.util.List; + +import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN; + +/** + * Rollover the various .ml-anomalies result indices + * updating the read and write aliases + */ +public class MlAnomaliesIndexUpdate implements MlAutoUpdateService.UpdateAction { + + private static final Logger logger = LogManager.getLogger(MlAnomaliesIndexUpdate.class); + + private final IndexNameExpressionResolver expressionResolver; + private final OriginSettingClient client; + + public MlAnomaliesIndexUpdate(IndexNameExpressionResolver expressionResolver, Client client) { + this.expressionResolver = expressionResolver; + this.client = new OriginSettingClient(client, ML_ORIGIN); + } + + @Override + public boolean isMinTransportVersionSupported(TransportVersion minTransportVersion) { + // Automatic rollover does not require any new features + // but wait for all nodes to be upgraded anyway + return minTransportVersion.onOrAfter(TransportVersions.ML_ROLLOVER_LEGACY_INDICES); + } + + @Override + public boolean isAbleToRun(ClusterState latestState) { + // Find the .ml-anomalies-shared and all custom results indices + String[] indices = expressionResolver.concreteIndexNames( + latestState, + IndicesOptions.lenientExpandOpenHidden(), + AnomalyDetectorsIndex.jobResultsIndexPattern() + ); + + for (String index : indices) { + IndexRoutingTable routingTable = latestState.getRoutingTable().index(index); + if (routingTable == null || routingTable.allPrimaryShardsActive() == false) { + return false; + } + } + return true; + } + + @Override + public String getName() { + return "ml_anomalies_index_update"; + } + + @Override + public void runUpdate(ClusterState latestState) { + List failures = new ArrayList<>(); + + // list all indices starting .ml-anomalies- + // this includes the shared index and all custom results indices + String[] indices = expressionResolver.concreteIndexNames( + latestState, + IndicesOptions.lenientExpandOpenHidden(), + AnomalyDetectorsIndex.jobResultsIndexPattern() + ); + + for (String index : indices) { + boolean isCompatibleIndexVersion = MlIndexAndAlias.indexIsReadWriteCompatibleInV9( + latestState.metadata().index(index).getCreationVersion() + ); + + if (isCompatibleIndexVersion) { + continue; + } + + PlainActionFuture updated = new PlainActionFuture<>(); + rollAndUpdateAliases(latestState, index, updated); + try { + updated.actionGet(); + } catch (Exception ex) { + var message = "failed rolling over legacy ml anomalies index [" + index + "]"; + logger.warn(message, ex); + if (ex instanceof ElasticsearchException elasticsearchException) { + failures.add(new ElasticsearchStatusException(message, elasticsearchException.status(), elasticsearchException)); + } else { + failures.add(new ElasticsearchStatusException(message, RestStatus.REQUEST_TIMEOUT, ex)); + } + + break; + } + } + + if (failures.isEmpty()) { + logger.info("legacy ml anomalies indices rolled over and aliases updated"); + return; + } + + var exception = new ElasticsearchStatusException("failed to roll over legacy ml anomalies", RestStatus.CONFLICT); + failures.forEach(exception::addSuppressed); + throw exception; + } + + private void rollAndUpdateAliases(ClusterState clusterState, String index, ActionListener listener) { + // Create an alias specifically for rolling over. + // The ml-anomalies index has aliases for each job anyone + // of which could be used but that means one alias is + // treated differently. + // Using a `.` in the alias name avoids any conflicts + // as AD job Ids cannot start with `.` + String rolloverAlias = index + ".rollover_alias"; + + // If the index does not end in a digit then rollover does not know + // what to name the new index so it must be specified in the request. + // Otherwise leave null and rollover will calculate the new name + String newIndexName = MlIndexAndAlias.has6DigitSuffix(index) ? null : index + MlIndexAndAlias.FIRST_INDEX_SIX_DIGIT_SUFFIX; + IndicesAliasesRequestBuilder aliasRequestBuilder = client.admin().indices().prepareAliases(); + + SubscribableListener.newForked( + l -> { createAliasForRollover(index, rolloverAlias, l.map(AcknowledgedResponse::isAcknowledged)); } + ).andThen((l, success) -> { + rollover(rolloverAlias, newIndexName, l); + }).andThen((l, newIndexNameResponse) -> { + addIndexAliasesRequests(aliasRequestBuilder, index, newIndexNameResponse, clusterState); + // Delete the new alias created for the rollover action + aliasRequestBuilder.removeAlias(newIndexNameResponse, rolloverAlias); + updateAliases(aliasRequestBuilder, l); + }).addListener(listener); + } + + private void rollover(String alias, @Nullable String newIndexName, ActionListener listener) { + client.admin().indices().rolloverIndex(new RolloverRequest(alias, newIndexName), listener.delegateFailure((l, response) -> { + l.onResponse(response.getNewIndex()); + })); + } + + private void createAliasForRollover(String indexName, String aliasName, ActionListener listener) { + logger.info("creating alias for rollover [{}]", aliasName); + client.admin() + .indices() + .prepareAliases() + .addAliasAction(IndicesAliasesRequest.AliasActions.add().index(indexName).alias(aliasName).isHidden(true)) + .execute(listener); + } + + private void updateAliases(IndicesAliasesRequestBuilder request, ActionListener listener) { + request.execute(listener.delegateFailure((l, response) -> l.onResponse(Boolean.TRUE))); + } + + IndicesAliasesRequestBuilder addIndexAliasesRequests( + IndicesAliasesRequestBuilder aliasRequestBuilder, + String oldIndex, + String newIndex, + ClusterState clusterState + ) { + // Multiple jobs can share the same index each job + // has a read and write alias that needs updating + // after the rollover + var meta = clusterState.metadata().index(oldIndex); + assert meta != null; + if (meta == null) { + return aliasRequestBuilder; + } + + for (var alias : meta.getAliases().values()) { + if (isAnomaliesWriteAlias(alias.alias())) { + aliasRequestBuilder.addAliasAction( + IndicesAliasesRequest.AliasActions.add().index(newIndex).alias(alias.alias()).isHidden(true).writeIndex(true) + ); + aliasRequestBuilder.addAliasAction(IndicesAliasesRequest.AliasActions.remove().index(oldIndex).alias(alias.alias())); + } else if (isAnomaliesReadAlias(alias.alias())) { + String jobId = AnomalyDetectorsIndex.jobIdFromAlias(alias.alias()); + aliasRequestBuilder.addAliasAction( + IndicesAliasesRequest.AliasActions.add() + .index(newIndex) + .alias(alias.alias()) + .isHidden(true) + .filter(QueryBuilders.termQuery(Job.ID.getPreferredName(), jobId)) + ); + } + } + + return aliasRequestBuilder; + } + + static boolean isAnomaliesWriteAlias(String aliasName) { + return aliasName.startsWith(AnomalyDetectorsIndexFields.RESULTS_INDEX_WRITE_PREFIX); + } + + static boolean isAnomaliesReadAlias(String aliasName) { + if (aliasName.startsWith(AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX) == false) { + return false; + } + + // See {@link AnomalyDetectorsIndex#jobResultsAliasedName} + String jobIdPart = aliasName.substring(AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX.length()); + // If this is a write alias it will start with a `.` character + // which is not a valid job id. + return MlStrings.isValidId(jobIdPart); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlIndexRollover.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlIndexRollover.java index 7dbafdc2676ba..c079e5dfde737 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlIndexRollover.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlIndexRollover.java @@ -117,7 +117,7 @@ public void runUpdate(ClusterState latestState) { } if (failures.isEmpty()) { - logger.info("ML legacy indies rolled over"); + logger.info("ML legacy indices rolled over"); return; } @@ -136,7 +136,10 @@ private void rolloverLegacyIndices(ClusterState clusterState, String indexPatter } String latestIndex = MlIndexAndAlias.latestIndex(concreteIndices); - boolean isCompatibleIndexVersion = isCompatibleIndexVersion(clusterState.metadata().index(latestIndex).getCreationVersion()); + // Indices created before 8.0 are read only in 9 + boolean isCompatibleIndexVersion = MlIndexAndAlias.indexIsReadWriteCompatibleInV9( + clusterState.metadata().index(latestIndex).getCreationVersion() + ); boolean hasAlias = clusterState.getMetadata().hasAlias(alias); if (isCompatibleIndexVersion && hasAlias) { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlAnomaliesIndexUpdateTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlAnomaliesIndexUpdateTests.java new file mode 100644 index 0000000000000..b203d756c3214 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlAnomaliesIndexUpdateTests.java @@ -0,0 +1,254 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.ml; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest; +import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequestBuilder; +import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse; +import org.elasticsearch.action.admin.indices.alias.TransportIndicesAliasesAction; +import org.elasticsearch.action.admin.indices.rollover.RolloverAction; +import org.elasticsearch.action.admin.indices.rollover.RolloverRequest; +import org.elasticsearch.action.admin.indices.rollover.RolloverResponse; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.client.internal.ElasticsearchClient; +import org.elasticsearch.client.internal.OriginSettingClient; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.AliasMetadata; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.concurrent.ThreadContext; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; +import org.elasticsearch.indices.TestIndexNameExpressionResolver; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.same; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; + +public class MlAnomaliesIndexUpdateTests extends ESTestCase { + + public void testIsAnomaliesWriteAlias() { + assertTrue(MlAnomaliesIndexUpdate.isAnomaliesWriteAlias(AnomalyDetectorsIndex.resultsWriteAlias("foo"))); + assertFalse(MlAnomaliesIndexUpdate.isAnomaliesWriteAlias(AnomalyDetectorsIndex.jobResultsAliasedName("foo"))); + assertFalse(MlAnomaliesIndexUpdate.isAnomaliesWriteAlias("some-index")); + } + + public void testIsAnomaliesAlias() { + assertTrue(MlAnomaliesIndexUpdate.isAnomaliesReadAlias(AnomalyDetectorsIndex.jobResultsAliasedName("foo"))); + assertFalse(MlAnomaliesIndexUpdate.isAnomaliesReadAlias(AnomalyDetectorsIndex.resultsWriteAlias("foo"))); + assertFalse(MlAnomaliesIndexUpdate.isAnomaliesReadAlias("some-index")); + } + + public void testIsAbleToRun_IndicesDoNotExist() { + RoutingTable.Builder routingTable = RoutingTable.builder(); + var updater = new MlAnomaliesIndexUpdate(TestIndexNameExpressionResolver.newInstance(), mock(Client.class)); + + ClusterState.Builder csBuilder = ClusterState.builder(new ClusterName("_name")); + csBuilder.routingTable(routingTable.build()); + assertTrue(updater.isAbleToRun(csBuilder.build())); + } + + public void testIsAbleToRun_IndicesHaveNoRouting() { + IndexMetadata.Builder indexMetadata = IndexMetadata.builder(".ml-anomalies-shared"); + indexMetadata.settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_INDEX_UUID, "_uuid") + ); + + Metadata.Builder metadata = Metadata.builder(); + metadata.put(indexMetadata); + ClusterState.Builder csBuilder = ClusterState.builder(new ClusterName("_name")); + csBuilder.routingTable(RoutingTable.builder().build()); // no routing to index + csBuilder.metadata(metadata); + + var updater = new MlAnomaliesIndexUpdate(TestIndexNameExpressionResolver.newInstance(), mock(Client.class)); + + assertFalse(updater.isAbleToRun(csBuilder.build())); + } + + public void testBuildIndexAliasesRequest() { + var anomaliesIndex = ".ml-anomalies-sharedindex"; + var jobs = List.of("job1", "job2"); + IndexMetadata.Builder indexMetadata = createSharedResultsIndex(anomaliesIndex, IndexVersion.current(), jobs); + Metadata.Builder metadata = Metadata.builder(); + metadata.put(indexMetadata); + ClusterState.Builder csBuilder = ClusterState.builder(new ClusterName("_name")); + csBuilder.metadata(metadata); + + var updater = new MlAnomaliesIndexUpdate( + TestIndexNameExpressionResolver.newInstance(), + new OriginSettingClient(mock(Client.class), "doesn't matter") + ); + + IndicesAliasesRequestBuilder aliasRequestBuilder = new IndicesAliasesRequestBuilder(mock(ElasticsearchClient.class)); + + var newIndex = anomaliesIndex + "-000001"; + var request = updater.addIndexAliasesRequests(aliasRequestBuilder, anomaliesIndex, newIndex, csBuilder.build()); + var actions = request.request().getAliasActions(); + assertThat(actions, hasSize(6)); + + // The order in which the alias actions are created + // is not preserved so look for the item in the list + for (var job : jobs) { + var expected = new AliasActionMatcher( + AnomalyDetectorsIndex.resultsWriteAlias(job), + newIndex, + IndicesAliasesRequest.AliasActions.Type.ADD + ); + assertThat(actions.stream().filter(expected::matches).count(), equalTo(1L)); + + expected = new AliasActionMatcher( + AnomalyDetectorsIndex.resultsWriteAlias(job), + anomaliesIndex, + IndicesAliasesRequest.AliasActions.Type.REMOVE + ); + assertThat(actions.stream().filter(expected::matches).count(), equalTo(1L)); + + expected = new AliasActionMatcher( + AnomalyDetectorsIndex.jobResultsAliasedName(job), + newIndex, + IndicesAliasesRequest.AliasActions.Type.ADD + ); + assertThat(actions.stream().filter(expected::matches).count(), equalTo(1L)); + } + } + + public void testRunUpdate_UpToDateIndices() { + String indexName = ".ml-anomalies-sharedindex"; + var jobs = List.of("job1", "job2"); + IndexMetadata.Builder indexMetadata = createSharedResultsIndex(indexName, IndexVersion.current(), jobs); + + Metadata.Builder metadata = Metadata.builder(); + metadata.put(indexMetadata); + ClusterState.Builder csBuilder = ClusterState.builder(new ClusterName("_name")); + csBuilder.metadata(metadata); + + var client = mock(Client.class); + var updater = new MlAnomaliesIndexUpdate(TestIndexNameExpressionResolver.newInstance(), client); + updater.runUpdate(csBuilder.build()); + // everything up to date so no action for the client + verify(client).settings(); + verify(client).threadPool(); + verifyNoMoreInteractions(client); + } + + public void testRunUpdate_LegacyIndex() { + String indexName = ".ml-anomalies-sharedindex"; + var jobs = List.of("job1", "job2"); + IndexMetadata.Builder indexMetadata = createSharedResultsIndex(indexName, IndexVersions.V_7_17_0, jobs); + + Metadata.Builder metadata = Metadata.builder(); + metadata.put(indexMetadata); + ClusterState.Builder csBuilder = ClusterState.builder(new ClusterName("_name")); + csBuilder.metadata(metadata); + + var client = mockClientWithRolloverAndAlias(indexName); + var updater = new MlAnomaliesIndexUpdate(TestIndexNameExpressionResolver.newInstance(), client); + + updater.runUpdate(csBuilder.build()); + verify(client).settings(); + verify(client, times(7)).threadPool(); + verify(client, times(2)).execute(same(TransportIndicesAliasesAction.TYPE), any(), any()); // create rollover alias and update + verify(client).execute(same(RolloverAction.INSTANCE), any(), any()); // index rolled over + verifyNoMoreInteractions(client); + } + + private record AliasActionMatcher(String aliasName, String index, IndicesAliasesRequest.AliasActions.Type actionType) { + boolean matches(IndicesAliasesRequest.AliasActions aliasAction) { + return aliasAction.actionType() == actionType + && aliasAction.aliases()[0].equals(aliasName) + && aliasAction.indices()[0].equals(index); + } + } + + private IndexMetadata.Builder createSharedResultsIndex(String indexName, IndexVersion indexVersion, List jobs) { + IndexMetadata.Builder indexMetadata = IndexMetadata.builder(indexName); + indexMetadata.settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, indexVersion) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_INDEX_UUID, "_uuid") + ); + + for (var jobId : jobs) { + indexMetadata.putAlias(AliasMetadata.builder(AnomalyDetectorsIndex.jobResultsAliasedName(jobId)).isHidden(true).build()); + indexMetadata.putAlias( + AliasMetadata.builder(AnomalyDetectorsIndex.resultsWriteAlias(jobId)).writeIndex(true).isHidden(true).build() + ); + } + + return indexMetadata; + } + + @SuppressWarnings("unchecked") + static Client mockClientWithRolloverAndAlias(String indexName) { + var client = mock(Client.class); + + var aliasRequestCount = new AtomicInteger(0); + + doAnswer(invocationOnMock -> { + ActionListener actionListener = (ActionListener) invocationOnMock.getArguments()[2]; + actionListener.onResponse(new RolloverResponse(indexName, indexName + "-new", Map.of(), false, true, true, true, true)); + return null; + }).when(client).execute(same(RolloverAction.INSTANCE), any(RolloverRequest.class), any(ActionListener.class)); + + doAnswer(invocationOnMock -> { + ActionListener actionListener = (ActionListener) invocationOnMock + .getArguments()[2]; + var request = (IndicesAliasesRequest) invocationOnMock.getArguments()[1]; + // Check the rollover alias is create and deleted + if (aliasRequestCount.getAndIncrement() == 0) { + var addAliasAction = new AliasActionMatcher( + indexName + ".rollover_alias", + indexName, + IndicesAliasesRequest.AliasActions.Type.ADD + ); + assertEquals(1L, request.getAliasActions().stream().filter(addAliasAction::matches).count()); + } else { + var removeAliasAction = new AliasActionMatcher( + indexName + ".rollover_alias", + indexName + "-new", + IndicesAliasesRequest.AliasActions.Type.REMOVE + ); + assertEquals(1L, request.getAliasActions().stream().filter(removeAliasAction::matches).count()); + } + + actionListener.onResponse(IndicesAliasesResponse.ACKNOWLEDGED_NO_ERRORS); + + return null; + }).when(client).execute(same(TransportIndicesAliasesAction.TYPE), any(IndicesAliasesRequest.class), any(ActionListener.class)); + + var threadPool = mock(ThreadPool.class); + when(threadPool.getThreadContext()).thenReturn(new ThreadContext(Settings.EMPTY)); + when(client.threadPool()).thenReturn(threadPool); + + return client; + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlIndexRolloverTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlIndexRolloverTests.java index aa59028a4cc0d..491b20f0a2d3e 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlIndexRolloverTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlIndexRolloverTests.java @@ -258,7 +258,7 @@ public void testIsCompatibleIndexVersion() { } @SuppressWarnings("unchecked") - private Client mockClientWithRolloverAndAlias() { + static Client mockClientWithRolloverAndAlias() { var client = mock(Client.class); doAnswer(invocationOnMock -> { diff --git a/x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/MonitoringTemplateRegistry.java b/x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/MonitoringTemplateRegistry.java index cfd322d04e92f..0605177b2c2e5 100644 --- a/x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/MonitoringTemplateRegistry.java +++ b/x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/MonitoringTemplateRegistry.java @@ -77,7 +77,7 @@ public class MonitoringTemplateRegistry extends IndexTemplateRegistry { * writes monitoring data in ECS format as of 8.0. These templates define the ECS schema as well as alias fields for the old monitoring * mappings that point to the corresponding ECS fields. */ - public static final int STACK_MONITORING_REGISTRY_VERSION = 8_00_00_99 + 19; + public static final int STACK_MONITORING_REGISTRY_VERSION = 8_00_00_99 + 20; private static final String STACK_MONITORING_REGISTRY_VERSION_VARIABLE = "xpack.stack.monitoring.template.release.version"; private static final String STACK_TEMPLATE_VERSION = "8"; private static final String STACK_TEMPLATE_VERSION_VARIABLE = "xpack.stack.monitoring.template.version"; diff --git a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java new file mode 100644 index 0000000000000..f98231a647470 --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java @@ -0,0 +1,838 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.linear; + +import org.apache.lucene.search.TotalHits; +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.TransportVersion; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.search.SearchRequestBuilder; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.query.InnerHitBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.collapse.CollapseBuilder; +import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; +import org.elasticsearch.search.retriever.KnnRetrieverBuilder; +import org.elasticsearch.search.retriever.StandardRetrieverBuilder; +import org.elasticsearch.search.retriever.TestRetrieverBuilder; +import org.elasticsearch.search.sort.FieldSortBuilder; +import org.elasticsearch.search.sort.SortOrder; +import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; +import org.elasticsearch.search.vectors.QueryVectorBuilder; +import org.elasticsearch.search.vectors.TestQueryVectorBuilderPlugin; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.rank.rrf.RRFRankPlugin; +import org.junit.Before; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +@ESIntegTestCase.ClusterScope(minNumDataNodes = 2) +public class LinearRetrieverIT extends ESIntegTestCase { + + protected static String INDEX = "test_index"; + protected static final String DOC_FIELD = "doc"; + protected static final String TEXT_FIELD = "text"; + protected static final String VECTOR_FIELD = "vector"; + protected static final String TOPIC_FIELD = "topic"; + + @Override + protected Collection> nodePlugins() { + return List.of(RRFRankPlugin.class); + } + + @Before + public void setup() throws Exception { + setupIndex(); + } + + protected void setupIndex() { + String mapping = """ + { + "properties": { + "vector": { + "type": "dense_vector", + "dims": 1, + "element_type": "float", + "similarity": "l2_norm", + "index": true, + "index_options": { + "type": "flat" + } + }, + "text": { + "type": "text" + }, + "doc": { + "type": "keyword" + }, + "topic": { + "type": "keyword" + }, + "views": { + "type": "nested", + "properties": { + "last30d": { + "type": "integer" + }, + "all": { + "type": "integer" + } + } + } + } + } + """; + createIndex(INDEX, Settings.builder().put(SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 5)).build()); + admin().indices().preparePutMapping(INDEX).setSource(mapping, XContentType.JSON).get(); + indexDoc(INDEX, "doc_1", DOC_FIELD, "doc_1", TOPIC_FIELD, "technology", TEXT_FIELD, "term"); + indexDoc( + INDEX, + "doc_2", + DOC_FIELD, + "doc_2", + TOPIC_FIELD, + "astronomy", + TEXT_FIELD, + "search term term", + VECTOR_FIELD, + new float[] { 2.0f } + ); + indexDoc(INDEX, "doc_3", DOC_FIELD, "doc_3", TOPIC_FIELD, "technology", VECTOR_FIELD, new float[] { 3.0f }); + indexDoc(INDEX, "doc_4", DOC_FIELD, "doc_4", TOPIC_FIELD, "technology", TEXT_FIELD, "term term term term"); + indexDoc(INDEX, "doc_5", DOC_FIELD, "doc_5", TOPIC_FIELD, "science", TEXT_FIELD, "irrelevant stuff"); + indexDoc( + INDEX, + "doc_6", + DOC_FIELD, + "doc_6", + TEXT_FIELD, + "search term term term term term term", + VECTOR_FIELD, + new float[] { 6.0f } + ); + indexDoc( + INDEX, + "doc_7", + DOC_FIELD, + "doc_7", + TOPIC_FIELD, + "biology", + TEXT_FIELD, + "term term term term term term term", + VECTOR_FIELD, + new float[] { 7.0f } + ); + refresh(INDEX); + } + + public void testLinearRetrieverWithAggs() { + final int rankWindowSize = 100; + SearchSourceBuilder source = new SearchSourceBuilder(); + // this one retrieves docs 1, 2, 4, 6, and 7 + StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_1")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(9L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_4")).boost(8L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(7L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_7")).boost(6L)) + ); + // this one retrieves docs 2 and 6 due to prefilter + StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(20L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_3")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(5L)) + ); + standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD)); + // this one retrieves docs 2, 3, 6, and 7 + KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 2.0f }, null, 10, 100, null, null); + + // all requests would have an equal weight and use the identity normalizer + source.retriever( + new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(standard0, null), + new CompoundRetrieverBuilder.RetrieverSource(standard1, null), + new CompoundRetrieverBuilder.RetrieverSource(knnRetrieverBuilder, null) + ), + rankWindowSize + ) + ); + source.size(1); + source.aggregation(AggregationBuilders.terms("topic_agg").field(TOPIC_FIELD)); + SearchRequestBuilder req = client().prepareSearch(INDEX).setSource(source); + ElasticsearchAssertions.assertResponse(req, resp -> { + assertNull(resp.pointInTimeId()); + assertNotNull(resp.getHits().getTotalHits()); + assertThat(resp.getHits().getTotalHits().value(), equalTo(6L)); + assertThat(resp.getHits().getTotalHits().relation(), equalTo(TotalHits.Relation.EQUAL_TO)); + assertThat(resp.getHits().getHits().length, equalTo(1)); + assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_2")); + + assertNotNull(resp.getAggregations()); + assertNotNull(resp.getAggregations().get("topic_agg")); + Terms terms = resp.getAggregations().get("topic_agg"); + + assertThat(terms.getBucketByKey("technology").getDocCount(), equalTo(3L)); + assertThat(terms.getBucketByKey("astronomy").getDocCount(), equalTo(1L)); + assertThat(terms.getBucketByKey("biology").getDocCount(), equalTo(1L)); + }); + } + + public void testLinearWithCollapse() { + final int rankWindowSize = 100; + SearchSourceBuilder source = new SearchSourceBuilder(); + // this one retrieves docs 1, 2, 4, 6, and 7 + // with scores 10, 9, 8, 7, 6 + StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_1")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(9L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_4")).boost(8L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(7L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_7")).boost(6L)) + ); + // this one retrieves docs 2 and 6 due to prefilter + // with scores 20, 5 + StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(20L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_3")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(5L)) + ); + standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD)); + // this one retrieves docs 2, 3, 6, and 7 + // with scores 1, 0.5, 0.05882353, 0.03846154 + KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 2.0f }, null, 10, 100, null, null); + // final ranking with no-normalizer would be: doc 2, 6, 1, 4, 7, 3 + // doc 1: 10 + // doc 2: 9 + 20 + 1 = 30 + // doc 3: 0.5 + // doc 4: 8 + // doc 6: 7 + 5 + 0.05882353 = 12.05882353 + // doc 7: 6 + 0.03846154 = 6.03846154 + source.retriever( + new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(standard0, null), + new CompoundRetrieverBuilder.RetrieverSource(standard1, null), + new CompoundRetrieverBuilder.RetrieverSource(knnRetrieverBuilder, null) + ), + rankWindowSize + ) + ); + source.collapse( + new CollapseBuilder(TOPIC_FIELD).setInnerHits( + new InnerHitBuilder("a").addSort(new FieldSortBuilder(DOC_FIELD).order(SortOrder.DESC)).setSize(10) + ) + ); + source.fetchField(TOPIC_FIELD); + SearchRequestBuilder req = client().prepareSearch(INDEX).setSource(source); + ElasticsearchAssertions.assertResponse(req, resp -> { + assertNull(resp.pointInTimeId()); + assertNotNull(resp.getHits().getTotalHits()); + assertThat(resp.getHits().getTotalHits().value(), equalTo(6L)); + assertThat(resp.getHits().getTotalHits().relation(), equalTo(TotalHits.Relation.EQUAL_TO)); + assertThat(resp.getHits().getHits().length, equalTo(4)); + assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_2")); + assertThat(resp.getHits().getAt(0).getScore(), equalTo(30f)); + assertThat(resp.getHits().getAt(1).getId(), equalTo("doc_6")); + assertThat((double) resp.getHits().getAt(1).getScore(), closeTo(12.0588f, 0.0001f)); + assertThat(resp.getHits().getAt(2).getId(), equalTo("doc_1")); + assertThat(resp.getHits().getAt(2).getScore(), equalTo(10f)); + assertThat(resp.getHits().getAt(2).getInnerHits().get("a").getAt(0).getId(), equalTo("doc_4")); + assertThat(resp.getHits().getAt(2).getInnerHits().get("a").getAt(1).getId(), equalTo("doc_3")); + assertThat(resp.getHits().getAt(2).getInnerHits().get("a").getAt(2).getId(), equalTo("doc_1")); + assertThat(resp.getHits().getAt(3).getId(), equalTo("doc_7")); + assertThat((double) resp.getHits().getAt(3).getScore(), closeTo(6.0384f, 0.0001f)); + }); + } + + public void testLinearRetrieverWithCollapseAndAggs() { + final int rankWindowSize = 100; + SearchSourceBuilder source = new SearchSourceBuilder(); + // this one retrieves docs 1, 2, 4, 6, and 7 + // with scores 10, 9, 8, 7, 6 + StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_1")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(9L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_4")).boost(8L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(7L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_7")).boost(6L)) + ); + // this one retrieves docs 2 and 6 due to prefilter + // with scores 20, 5 + StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(20L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_3")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(5L)) + ); + standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD)); + // this one retrieves docs 2, 3, 6, and 7 + // with scores 1, 0.5, 0.05882353, 0.03846154 + KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 2.0f }, null, 10, 100, null, null); + // final ranking with no-normalizer would be: doc 2, 6, 1, 4, 7, 3 + // doc 1: 10 + // doc 2: 9 + 20 + 1 = 30 + // doc 3: 0.5 + // doc 4: 8 + // doc 6: 7 + 5 + 0.05882353 = 12.05882353 + // doc 7: 6 + 0.03846154 = 6.03846154 + source.retriever( + new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(standard0, null), + new CompoundRetrieverBuilder.RetrieverSource(standard1, null), + new CompoundRetrieverBuilder.RetrieverSource(knnRetrieverBuilder, null) + ), + rankWindowSize + ) + ); + source.collapse( + new CollapseBuilder(TOPIC_FIELD).setInnerHits( + new InnerHitBuilder("a").addSort(new FieldSortBuilder(DOC_FIELD).order(SortOrder.DESC)).setSize(10) + ) + ); + source.fetchField(TOPIC_FIELD); + source.aggregation(AggregationBuilders.terms("topic_agg").field(TOPIC_FIELD)); + SearchRequestBuilder req = client().prepareSearch(INDEX).setSource(source); + ElasticsearchAssertions.assertResponse(req, resp -> { + assertNull(resp.pointInTimeId()); + assertNotNull(resp.getHits().getTotalHits()); + assertThat(resp.getHits().getTotalHits().value(), equalTo(6L)); + assertThat(resp.getHits().getTotalHits().relation(), equalTo(TotalHits.Relation.EQUAL_TO)); + assertThat(resp.getHits().getHits().length, equalTo(4)); + assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_2")); + assertThat(resp.getHits().getAt(1).getId(), equalTo("doc_6")); + assertThat(resp.getHits().getAt(2).getId(), equalTo("doc_1")); + assertThat(resp.getHits().getAt(2).getInnerHits().get("a").getAt(0).getId(), equalTo("doc_4")); + assertThat(resp.getHits().getAt(2).getInnerHits().get("a").getAt(1).getId(), equalTo("doc_3")); + assertThat(resp.getHits().getAt(2).getInnerHits().get("a").getAt(2).getId(), equalTo("doc_1")); + assertThat(resp.getHits().getAt(3).getId(), equalTo("doc_7")); + + assertNotNull(resp.getAggregations()); + assertNotNull(resp.getAggregations().get("topic_agg")); + Terms terms = resp.getAggregations().get("topic_agg"); + + assertThat(terms.getBucketByKey("technology").getDocCount(), equalTo(3L)); + assertThat(terms.getBucketByKey("astronomy").getDocCount(), equalTo(1L)); + assertThat(terms.getBucketByKey("biology").getDocCount(), equalTo(1L)); + }); + } + + public void testMultipleLinearRetrievers() { + final int rankWindowSize = 100; + SearchSourceBuilder source = new SearchSourceBuilder(); + // this one retrieves docs 1, 2, 4, 6, and 7 + // with scores 10, 9, 8, 7, 6 + StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_1")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(9L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_4")).boost(8L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(7L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_7")).boost(6L)) + ); + // this one retrieves docs 2 and 6 due to prefilter + // with scores 20, 5 + StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(20L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_3")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(5L)) + ); + standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD)); + source.retriever( + new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource( + // this one returns docs doc 2, 1, 6, 4, 7 + // with scores 38, 20, 19, 16, 12 + new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(standard0, null), + new CompoundRetrieverBuilder.RetrieverSource(standard1, null) + ), + rankWindowSize, + new float[] { 2.0f, 1.0f }, + new ScoreNormalizer[] { IdentityScoreNormalizer.INSTANCE, IdentityScoreNormalizer.INSTANCE } + ), + null + ), + // this one bring just doc 7 which should be ranked first eventually with a score of 100 + new CompoundRetrieverBuilder.RetrieverSource( + new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 7.0f }, null, 1, 100, null, null), + null + ) + ), + rankWindowSize, + new float[] { 1.0f, 100.0f }, + new ScoreNormalizer[] { IdentityScoreNormalizer.INSTANCE, IdentityScoreNormalizer.INSTANCE } + ) + ); + + SearchRequestBuilder req = client().prepareSearch(INDEX).setSource(source); + ElasticsearchAssertions.assertResponse(req, resp -> { + assertNull(resp.pointInTimeId()); + assertNotNull(resp.getHits().getTotalHits()); + assertThat(resp.getHits().getTotalHits().value(), equalTo(5L)); + assertThat(resp.getHits().getTotalHits().relation(), equalTo(TotalHits.Relation.EQUAL_TO)); + assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_7")); + assertThat(resp.getHits().getAt(0).getScore(), equalTo(112f)); + assertThat(resp.getHits().getAt(1).getId(), equalTo("doc_2")); + assertThat(resp.getHits().getAt(1).getScore(), equalTo(38f)); + assertThat(resp.getHits().getAt(2).getId(), equalTo("doc_1")); + assertThat(resp.getHits().getAt(2).getScore(), equalTo(20f)); + assertThat(resp.getHits().getAt(3).getId(), equalTo("doc_6")); + assertThat(resp.getHits().getAt(3).getScore(), equalTo(19f)); + assertThat(resp.getHits().getAt(4).getId(), equalTo("doc_4")); + assertThat(resp.getHits().getAt(4).getScore(), equalTo(16f)); + }); + } + + public void testLinearExplainWithNamedRetrievers() { + final int rankWindowSize = 100; + SearchSourceBuilder source = new SearchSourceBuilder(); + // this one retrieves docs 1, 2, 4, 6, and 7 + // with scores 10, 9, 8, 7, 6 + StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_1")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(9L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_4")).boost(8L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(7L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_7")).boost(6L)) + ); + standard0.retrieverName("my_custom_retriever"); + // this one retrieves docs 2 and 6 due to prefilter + // with scores 20, 5 + StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(20L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_3")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(5L)) + ); + standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD)); + // this one retrieves docs 2, 3, 6, and 7 + // with scores 1, 0.5, 0.05882353, 0.03846154 + KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 2.0f }, null, 10, 100, null, null); + // final ranking with no-normalizer would be: doc 2, 6, 1, 4, 7, 3 + // doc 1: 10 + // doc 2: 9 + 20 + 1 = 30 + // doc 3: 0.5 + // doc 4: 8 + // doc 6: 7 + 5 + 0.05882353 = 12.05882353 + // doc 7: 6 + 0.03846154 = 6.03846154 + source.retriever( + new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(standard0, null), + new CompoundRetrieverBuilder.RetrieverSource(standard1, null), + new CompoundRetrieverBuilder.RetrieverSource(knnRetrieverBuilder, null) + ), + rankWindowSize + ) + ); + source.explain(true); + source.size(1); + SearchRequestBuilder req = client().prepareSearch(INDEX).setSource(source); + ElasticsearchAssertions.assertResponse(req, resp -> { + assertNull(resp.pointInTimeId()); + assertNotNull(resp.getHits().getTotalHits()); + assertThat(resp.getHits().getTotalHits().value(), equalTo(6L)); + assertThat(resp.getHits().getTotalHits().relation(), equalTo(TotalHits.Relation.EQUAL_TO)); + assertThat(resp.getHits().getHits().length, equalTo(1)); + assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_2")); + assertThat(resp.getHits().getAt(0).getExplanation().isMatch(), equalTo(true)); + assertThat(resp.getHits().getAt(0).getExplanation().getDescription(), containsString("sum of:")); + assertThat(resp.getHits().getAt(0).getExplanation().getDetails().length, equalTo(2)); + var rrfDetails = resp.getHits().getAt(0).getExplanation().getDetails()[0]; + assertThat(rrfDetails.getDetails().length, equalTo(3)); + assertThat( + rrfDetails.getDescription(), + equalTo( + "weighted linear combination score: [30.0] computed for normalized scores [9.0, 20.0, 1.0] " + + "and weights [1.0, 1.0, 1.0] as sum of (weight[i] * score[i]) for each query." + ) + ); + + assertThat( + rrfDetails.getDetails()[0].getDescription(), + containsString( + "weighted score: [9.0] in query at index [0] [my_custom_retriever] computed as [1.0 * 9.0] " + + "using score normalizer [none] for original matching query with score" + ) + ); + assertThat( + rrfDetails.getDetails()[1].getDescription(), + containsString( + "weighted score: [20.0] in query at index [1] computed as [1.0 * 20.0] using score normalizer [none] " + + "for original matching query with score:" + ) + ); + assertThat( + rrfDetails.getDetails()[2].getDescription(), + containsString( + "weighted score: [1.0] in query at index [2] computed as [1.0 * 1.0] using score normalizer [none] " + + "for original matching query with score" + ) + ); + }); + } + + public void testLinearExplainWithAnotherNestedLinear() { + final int rankWindowSize = 100; + SearchSourceBuilder source = new SearchSourceBuilder(); + // this one retrieves docs 1, 2, 4, 6, and 7 + // with scores 10, 9, 8, 7, 6 + StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_1")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(9L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_4")).boost(8L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(7L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_7")).boost(6L)) + ); + standard0.retrieverName("my_custom_retriever"); + // this one retrieves docs 2 and 6 due to prefilter + // with scores 20, 5 + StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(20L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_3")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(5L)) + ); + standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD)); + // this one retrieves docs 2, 3, 6, and 7 + // with scores 1, 0.5, 0.05882353, 0.03846154 + KnnRetrieverBuilder knnRetrieverBuilder = new KnnRetrieverBuilder(VECTOR_FIELD, new float[] { 2.0f }, null, 10, 100, null, null); + // final ranking with no-normalizer would be: doc 2, 6, 1, 4, 7, 3 + // doc 1: 10 + // doc 2: 9 + 20 + 1 = 30 + // doc 3: 0.5 + // doc 4: 8 + // doc 6: 7 + 5 + 0.05882353 = 12.05882353 + // doc 7: 6 + 0.03846154 = 6.03846154 + LinearRetrieverBuilder nestedLinear = new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(standard0, null), + new CompoundRetrieverBuilder.RetrieverSource(standard1, null), + new CompoundRetrieverBuilder.RetrieverSource(knnRetrieverBuilder, null) + ), + rankWindowSize + ); + nestedLinear.retrieverName("nested_linear"); + // this one retrieves docs 6 with a score of 100 + StandardRetrieverBuilder standard2 = new StandardRetrieverBuilder( + QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(20L) + ); + source.retriever( + new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(nestedLinear, null), + new CompoundRetrieverBuilder.RetrieverSource(standard2, null) + ), + rankWindowSize, + new float[] { 1, 5f }, + new ScoreNormalizer[] { IdentityScoreNormalizer.INSTANCE, IdentityScoreNormalizer.INSTANCE } + ) + ); + source.explain(true); + source.size(1); + SearchRequestBuilder req = client().prepareSearch(INDEX).setSource(source); + ElasticsearchAssertions.assertResponse(req, resp -> { + assertNull(resp.pointInTimeId()); + assertNotNull(resp.getHits().getTotalHits()); + assertThat(resp.getHits().getTotalHits().value(), equalTo(6L)); + assertThat(resp.getHits().getTotalHits().relation(), equalTo(TotalHits.Relation.EQUAL_TO)); + assertThat(resp.getHits().getHits().length, equalTo(1)); + assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_6")); + assertThat(resp.getHits().getAt(0).getExplanation().isMatch(), equalTo(true)); + assertThat(resp.getHits().getAt(0).getExplanation().getDescription(), containsString("sum of:")); + assertThat(resp.getHits().getAt(0).getExplanation().getDetails().length, equalTo(2)); + var linearTopLevel = resp.getHits().getAt(0).getExplanation().getDetails()[0]; + assertThat(linearTopLevel.getDetails().length, equalTo(2)); + assertThat( + linearTopLevel.getDescription(), + containsString( + "weighted linear combination score: [112.05882] computed for normalized scores [12.058824, 20.0] " + + "and weights [1.0, 5.0] as sum of (weight[i] * score[i]) for each query." + ) + ); + assertThat(linearTopLevel.getDetails()[0].getDescription(), containsString("weighted score: [12.058824]")); + assertThat(linearTopLevel.getDetails()[0].getDescription(), containsString("nested_linear")); + assertThat(linearTopLevel.getDetails()[1].getDescription(), containsString("weighted score: [100.0]")); + + var linearNested = linearTopLevel.getDetails()[0]; + assertThat(linearNested.getDetails()[0].getDetails().length, equalTo(3)); + assertThat(linearNested.getDetails()[0].getDetails()[0].getDescription(), containsString("weighted score: [7.0]")); + assertThat(linearNested.getDetails()[0].getDetails()[1].getDescription(), containsString("weighted score: [5.0]")); + assertThat(linearNested.getDetails()[0].getDetails()[2].getDescription(), containsString("weighted score: [0.05882353]")); + + var standard0Details = linearTopLevel.getDetails()[1]; + assertThat(standard0Details.getDetails()[0].getDescription(), containsString("ConstantScore")); + }); + } + + public void testLinearInnerRetrieverAll4xxSearchErrors() { + final int rankWindowSize = 100; + SearchSourceBuilder source = new SearchSourceBuilder(); + // this will throw a 4xx error during evaluation + StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder( + QueryBuilders.constantScoreQuery(QueryBuilders.rangeQuery(VECTOR_FIELD).gte(10)) + ); + StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(20L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_3")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(5L)) + ); + standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD)); + source.retriever( + new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(standard0, null), + new CompoundRetrieverBuilder.RetrieverSource(standard1, null) + ), + rankWindowSize + ) + ); + SearchRequestBuilder req = client().prepareSearch(INDEX).setSource(source); + Exception ex = expectThrows(ElasticsearchStatusException.class, req::get); + assertThat(ex, instanceOf(ElasticsearchStatusException.class)); + assertThat( + ex.getMessage(), + containsString( + "[linear] search failed - retrievers '[standard]' returned errors. All failures are attached as suppressed exceptions." + ) + ); + assertThat(ExceptionsHelper.status(ex), equalTo(RestStatus.BAD_REQUEST)); + assertThat(ex.getSuppressed().length, equalTo(1)); + assertThat(ex.getSuppressed()[0].getCause().getCause(), instanceOf(IllegalArgumentException.class)); + } + + public void testLinearInnerRetrieverMultipleErrorsOne5xx() { + final int rankWindowSize = 100; + SearchSourceBuilder source = new SearchSourceBuilder(); + // this will throw a 4xx error during evaluation + StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder( + QueryBuilders.constantScoreQuery(QueryBuilders.rangeQuery(VECTOR_FIELD).gte(10)) + ); + // this will throw a 5xx error + TestRetrieverBuilder testRetrieverBuilder = new TestRetrieverBuilder("val") { + @Override + public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder, boolean compoundUsed) { + searchSourceBuilder.aggregation(AggregationBuilders.avg("some_invalid_param")); + } + }; + source.retriever( + new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(standard0, null), + new CompoundRetrieverBuilder.RetrieverSource(testRetrieverBuilder, null) + ), + rankWindowSize + ) + ); + SearchRequestBuilder req = client().prepareSearch(INDEX).setSource(source); + Exception ex = expectThrows(ElasticsearchStatusException.class, req::get); + assertThat(ex, instanceOf(ElasticsearchStatusException.class)); + assertThat( + ex.getMessage(), + containsString( + "[linear] search failed - retrievers '[standard, test]' returned errors. " + + "All failures are attached as suppressed exceptions." + ) + ); + assertThat(ExceptionsHelper.status(ex), equalTo(RestStatus.INTERNAL_SERVER_ERROR)); + assertThat(ex.getSuppressed().length, equalTo(2)); + assertThat(ex.getSuppressed()[0].getCause().getCause(), instanceOf(IllegalArgumentException.class)); + assertThat(ex.getSuppressed()[1].getCause().getCause(), instanceOf(IllegalStateException.class)); + } + + public void testLinearInnerRetrieverErrorWhenExtractingToSource() { + final int rankWindowSize = 100; + SearchSourceBuilder source = new SearchSourceBuilder(); + TestRetrieverBuilder failingRetriever = new TestRetrieverBuilder("some value") { + @Override + public QueryBuilder topDocsQuery() { + return QueryBuilders.matchAllQuery(); + } + + @Override + public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder, boolean compoundUsed) { + throw new UnsupportedOperationException("simulated failure"); + } + }; + StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(20L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_3")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(5L)) + ); + standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD)); + source.retriever( + new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(failingRetriever, null), + new CompoundRetrieverBuilder.RetrieverSource(standard1, null) + ), + rankWindowSize + ) + ); + source.size(1); + expectThrows(UnsupportedOperationException.class, () -> client().prepareSearch(INDEX).setSource(source).get()); + } + + public void testLinearInnerRetrieverErrorOnTopDocs() { + final int rankWindowSize = 100; + SearchSourceBuilder source = new SearchSourceBuilder(); + TestRetrieverBuilder failingRetriever = new TestRetrieverBuilder("some value") { + @Override + public QueryBuilder topDocsQuery() { + throw new UnsupportedOperationException("simulated failure"); + } + + @Override + public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder, boolean compoundUsed) { + searchSourceBuilder.query(QueryBuilders.matchAllQuery()); + } + }; + StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder( + QueryBuilders.boolQuery() + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_2")).boost(20L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_3")).boost(10L)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(5L)) + ); + standard1.getPreFilterQueryBuilders().add(QueryBuilders.queryStringQuery("search").defaultField(TEXT_FIELD)); + source.retriever( + new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(failingRetriever, null), + new CompoundRetrieverBuilder.RetrieverSource(standard1, null) + ), + rankWindowSize + ) + ); + source.size(1); + source.aggregation(AggregationBuilders.terms("topic_agg").field(TOPIC_FIELD)); + expectThrows(UnsupportedOperationException.class, () -> client().prepareSearch(INDEX).setSource(source).get()); + } + + public void testLinearFiltersPropagatedToKnnQueryVectorBuilder() { + final int rankWindowSize = 100; + SearchSourceBuilder source = new SearchSourceBuilder(); + // this will retriever all but 7 only due to top-level filter + StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(QueryBuilders.matchAllQuery()); + // this will too retrieve just doc 7 + KnnRetrieverBuilder knnRetriever = new KnnRetrieverBuilder( + "vector", + null, + new TestQueryVectorBuilderPlugin.TestQueryVectorBuilder(new float[] { 3 }), + 10, + 10, + null, + null + ); + source.retriever( + new LinearRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(standardRetriever, null), + new CompoundRetrieverBuilder.RetrieverSource(knnRetriever, null) + ), + rankWindowSize + ) + ); + source.retriever().getPreFilterQueryBuilders().add(QueryBuilders.boolQuery().must(QueryBuilders.termQuery(DOC_FIELD, "doc_7"))); + source.size(10); + SearchRequestBuilder req = client().prepareSearch(INDEX).setSource(source); + ElasticsearchAssertions.assertResponse(req, resp -> { + assertNull(resp.pointInTimeId()); + assertNotNull(resp.getHits().getTotalHits()); + assertThat(resp.getHits().getTotalHits().value(), equalTo(1L)); + assertThat(resp.getHits().getHits()[0].getId(), equalTo("doc_7")); + }); + } + + public void testRewriteOnce() { + final float[] vector = new float[] { 1 }; + AtomicInteger numAsyncCalls = new AtomicInteger(); + QueryVectorBuilder vectorBuilder = new QueryVectorBuilder() { + @Override + public void buildVector(Client client, ActionListener listener) { + numAsyncCalls.incrementAndGet(); + listener.onResponse(vector); + } + + @Override + public String getWriteableName() { + throw new IllegalStateException("Should not be called"); + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + throw new IllegalStateException("Should not be called"); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + throw new IllegalStateException("Should not be called"); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + throw new IllegalStateException("Should not be called"); + } + }; + var knn = new KnnRetrieverBuilder("vector", null, vectorBuilder, 10, 10, null, null); + var standard = new StandardRetrieverBuilder(new KnnVectorQueryBuilder("vector", vectorBuilder, 10, 10, null)); + var rrf = new LinearRetrieverBuilder( + List.of(new CompoundRetrieverBuilder.RetrieverSource(knn, null), new CompoundRetrieverBuilder.RetrieverSource(standard, null)), + 10 + ); + assertResponse( + client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(rrf)), + searchResponse -> assertThat(searchResponse.getHits().getTotalHits().value(), is(4L)) + ); + assertThat(numAsyncCalls.get(), equalTo(2)); + + // check that we use the rewritten vector to build the explain query + assertResponse( + client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(rrf).explain(true)), + searchResponse -> assertThat(searchResponse.getHits().getTotalHits().value(), is(4L)) + ); + assertThat(numAsyncCalls.get(), equalTo(4)); + } +} diff --git a/x-pack/plugin/rank-rrf/src/main/java/module-info.java b/x-pack/plugin/rank-rrf/src/main/java/module-info.java index 4fd2a7e4d54f3..fbe467fdf3eae 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/module-info.java +++ b/x-pack/plugin/rank-rrf/src/main/java/module-info.java @@ -5,7 +5,7 @@ * 2.0. */ -import org.elasticsearch.xpack.rank.rrf.RRFFeatures; +import org.elasticsearch.xpack.rank.RankRRFFeatures; module org.elasticsearch.rank.rrf { requires org.apache.lucene.core; @@ -14,7 +14,9 @@ requires org.elasticsearch.server; requires org.elasticsearch.xcore; + exports org.elasticsearch.xpack.rank; exports org.elasticsearch.xpack.rank.rrf; + exports org.elasticsearch.xpack.rank.linear; - provides org.elasticsearch.features.FeatureSpecification with RRFFeatures; + provides org.elasticsearch.features.FeatureSpecification with RankRRFFeatures; } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFFeatures.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java similarity index 65% rename from x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFFeatures.java rename to x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java index 494eaa508c14a..5966e17f20429 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFFeatures.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.rank.rrf; +package org.elasticsearch.xpack.rank; import org.elasticsearch.features.FeatureSpecification; import org.elasticsearch.features.NodeFeature; @@ -14,10 +14,14 @@ import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT; -/** - * A set of features specifically for the rrf plugin. - */ -public class RRFFeatures implements FeatureSpecification { +public class RankRRFFeatures implements FeatureSpecification { + + public static final NodeFeature LINEAR_RETRIEVER_SUPPORTED = new NodeFeature("linear_retriever_supported"); + + @Override + public Set getFeatures() { + return Set.of(LINEAR_RETRIEVER_SUPPORTED); + } @Override public Set getTestFeatures() { diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/IdentityScoreNormalizer.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/IdentityScoreNormalizer.java new file mode 100644 index 0000000000000..15af17a1db4ef --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/IdentityScoreNormalizer.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.linear; + +import org.apache.lucene.search.ScoreDoc; + +public class IdentityScoreNormalizer extends ScoreNormalizer { + + public static final IdentityScoreNormalizer INSTANCE = new IdentityScoreNormalizer(); + + public static final String NAME = "none"; + + @Override + public String getName() { + return NAME; + } + + @Override + public ScoreDoc[] normalizeScores(ScoreDoc[] docs) { + return docs; + } +} diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRankDoc.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRankDoc.java new file mode 100644 index 0000000000000..bb1c420bbd06c --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRankDoc.java @@ -0,0 +1,143 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.linear; + +import org.apache.lucene.search.Explanation; +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.search.rank.RankDoc; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Objects; + +import static org.elasticsearch.xpack.rank.linear.LinearRetrieverBuilder.DEFAULT_SCORE; +import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_NORMALIZER; +import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_WEIGHT; + +public class LinearRankDoc extends RankDoc { + + public static final String NAME = "linear_rank_doc"; + + final float[] weights; + final String[] normalizers; + public float[] normalizedScores; + + public LinearRankDoc(int doc, float score, int shardIndex) { + super(doc, score, shardIndex); + this.weights = null; + this.normalizers = null; + } + + public LinearRankDoc(int doc, float score, int shardIndex, float[] weights, String[] normalizers) { + super(doc, score, shardIndex); + this.weights = weights; + this.normalizers = normalizers; + } + + public LinearRankDoc(StreamInput in) throws IOException { + super(in); + weights = in.readOptionalFloatArray(); + normalizedScores = in.readOptionalFloatArray(); + normalizers = in.readOptionalStringArray(); + } + + @Override + public Explanation explain(Explanation[] sources, String[] queryNames) { + assert normalizedScores != null && weights != null && normalizers != null; + assert normalizedScores.length == sources.length; + + Explanation[] details = new Explanation[sources.length]; + for (int i = 0; i < sources.length; i++) { + final String queryAlias = queryNames[i] == null ? "" : " [" + queryNames[i] + "]"; + final String queryIdentifier = "at index [" + i + "]" + queryAlias; + final float weight = weights == null ? DEFAULT_WEIGHT : weights[i]; + final float normalizedScore = normalizedScores == null ? DEFAULT_SCORE : normalizedScores[i]; + final String normalizer = normalizers == null ? DEFAULT_NORMALIZER.getName() : normalizers[i]; + if (normalizedScore > 0) { + details[i] = Explanation.match( + weight * normalizedScore, + "weighted score: [" + + weight * normalizedScore + + "] in query " + + queryIdentifier + + " computed as [" + + weight + + " * " + + normalizedScore + + "]" + + " using score normalizer [" + + normalizer + + "]" + + " for original matching query with score:", + sources[i] + ); + } else { + final String description = "weighted score: [0], result not found in query " + queryIdentifier; + details[i] = Explanation.noMatch(description); + } + } + return Explanation.match( + score, + "weighted linear combination score: [" + + score + + "] computed for normalized scores " + + Arrays.toString(normalizedScores) + + (weights == null ? "" : " and weights " + Arrays.toString(weights)) + + " as sum of (weight[i] * score[i]) for each query.", + details + ); + } + + @Override + protected void doWriteTo(StreamOutput out) throws IOException { + out.writeOptionalFloatArray(weights); + out.writeOptionalFloatArray(normalizedScores); + out.writeOptionalStringArray(normalizers); + } + + @Override + protected void doToXContent(XContentBuilder builder, Params params) throws IOException { + if (weights != null) { + builder.field("weights", weights); + } + if (normalizedScores != null) { + builder.field("normalizedScores", normalizedScores); + } + if (normalizers != null) { + builder.field("normalizers", normalizers); + } + } + + @Override + public boolean doEquals(RankDoc rd) { + LinearRankDoc lrd = (LinearRankDoc) rd; + return Arrays.equals(weights, lrd.weights) + && Arrays.equals(normalizedScores, lrd.normalizedScores) + && Arrays.equals(normalizers, lrd.normalizers); + } + + @Override + public int doHashCode() { + int result = Objects.hash(Arrays.hashCode(weights), Arrays.hashCode(normalizedScores), Arrays.hashCode(normalizers)); + return 31 * result; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.LINEAR_RETRIEVER_SUPPORT; + } +} diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java new file mode 100644 index 0000000000000..66bbbf95bc9d6 --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -0,0 +1,208 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.linear; + +import org.apache.lucene.search.ScoreDoc; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.util.Maps; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.license.LicenseUtils; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.rank.RankBuilder; +import org.elasticsearch.search.rank.RankDoc; +import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; +import org.elasticsearch.search.retriever.RetrieverBuilder; +import org.elasticsearch.search.retriever.RetrieverParserContext; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xpack.core.XPackPlugin; +import org.elasticsearch.xpack.rank.rrf.RRFRankPlugin; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; +import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; +import static org.elasticsearch.xpack.rank.RankRRFFeatures.LINEAR_RETRIEVER_SUPPORTED; +import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_WEIGHT; + +/** + * The {@code LinearRetrieverBuilder} supports the combination of different retrievers through a weighted linear combination. + * For example, assume that we have retrievers r1 and r2, the final score of the {@code LinearRetrieverBuilder} is defined as + * {@code score(r)=w1*score(r1) + w2*score(r2)}. + * Each sub-retriever score can be normalized before being considered for the weighted linear sum, by setting the appropriate + * normalizer parameter. + * + */ +public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder { + + public static final String NAME = "linear"; + + public static final ParseField RETRIEVERS_FIELD = new ParseField("retrievers"); + + public static final float DEFAULT_SCORE = 0f; + + private final float[] weights; + private final ScoreNormalizer[] normalizers; + + @SuppressWarnings("unchecked") + static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + NAME, + false, + args -> { + List retrieverComponents = (List) args[0]; + int rankWindowSize = args[1] == null ? RankBuilder.DEFAULT_RANK_WINDOW_SIZE : (int) args[1]; + List innerRetrievers = new ArrayList<>(); + float[] weights = new float[retrieverComponents.size()]; + ScoreNormalizer[] normalizers = new ScoreNormalizer[retrieverComponents.size()]; + int index = 0; + for (LinearRetrieverComponent component : retrieverComponents) { + innerRetrievers.add(new RetrieverSource(component.retriever, null)); + weights[index] = component.weight; + normalizers[index] = component.normalizer; + index++; + } + return new LinearRetrieverBuilder(innerRetrievers, rankWindowSize, weights, normalizers); + } + ); + + static { + PARSER.declareObjectArray(constructorArg(), LinearRetrieverComponent::fromXContent, RETRIEVERS_FIELD); + PARSER.declareInt(optionalConstructorArg(), RANK_WINDOW_SIZE_FIELD); + RetrieverBuilder.declareBaseParserFields(NAME, PARSER); + } + + private static float[] getDefaultWeight(int size) { + float[] weights = new float[size]; + Arrays.fill(weights, DEFAULT_WEIGHT); + return weights; + } + + private static ScoreNormalizer[] getDefaultNormalizers(int size) { + ScoreNormalizer[] normalizers = new ScoreNormalizer[size]; + Arrays.fill(normalizers, IdentityScoreNormalizer.INSTANCE); + return normalizers; + } + + public static LinearRetrieverBuilder fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException { + if (context.clusterSupportsFeature(LINEAR_RETRIEVER_SUPPORTED) == false) { + throw new ParsingException(parser.getTokenLocation(), "unknown retriever [" + NAME + "]"); + } + if (RRFRankPlugin.LINEAR_RETRIEVER_FEATURE.check(XPackPlugin.getSharedLicenseState()) == false) { + throw LicenseUtils.newComplianceException("linear retriever"); + } + return PARSER.apply(parser, context); + } + + LinearRetrieverBuilder(List innerRetrievers, int rankWindowSize) { + this(innerRetrievers, rankWindowSize, getDefaultWeight(innerRetrievers.size()), getDefaultNormalizers(innerRetrievers.size())); + } + + public LinearRetrieverBuilder( + List innerRetrievers, + int rankWindowSize, + float[] weights, + ScoreNormalizer[] normalizers + ) { + super(innerRetrievers, rankWindowSize); + if (weights.length != innerRetrievers.size()) { + throw new IllegalArgumentException("The number of weights must match the number of inner retrievers"); + } + if (normalizers.length != innerRetrievers.size()) { + throw new IllegalArgumentException("The number of normalizers must match the number of inner retrievers"); + } + this.weights = weights; + this.normalizers = normalizers; + } + + @Override + protected LinearRetrieverBuilder clone(List newChildRetrievers, List newPreFilterQueryBuilders) { + LinearRetrieverBuilder clone = new LinearRetrieverBuilder(newChildRetrievers, rankWindowSize, weights, normalizers); + clone.preFilterQueryBuilders = newPreFilterQueryBuilders; + clone.retrieverName = retrieverName; + return clone; + } + + @Override + protected SearchSourceBuilder finalizeSourceBuilder(SearchSourceBuilder sourceBuilder) { + sourceBuilder.trackScores(true); + return sourceBuilder; + } + + @Override + protected RankDoc[] combineInnerRetrieverResults(List rankResults, boolean isExplain) { + Map docsToRankResults = Maps.newMapWithExpectedSize(rankWindowSize); + final String[] normalizerNames = Arrays.stream(normalizers).map(ScoreNormalizer::getName).toArray(String[]::new); + for (int result = 0; result < rankResults.size(); result++) { + final ScoreNormalizer normalizer = normalizers[result] == null ? IdentityScoreNormalizer.INSTANCE : normalizers[result]; + ScoreDoc[] originalScoreDocs = rankResults.get(result); + ScoreDoc[] normalizedScoreDocs = normalizer.normalizeScores(originalScoreDocs); + for (int scoreDocIndex = 0; scoreDocIndex < normalizedScoreDocs.length; scoreDocIndex++) { + LinearRankDoc rankDoc = docsToRankResults.computeIfAbsent( + new RankDoc.RankKey(originalScoreDocs[scoreDocIndex].doc, originalScoreDocs[scoreDocIndex].shardIndex), + key -> { + if (isExplain) { + LinearRankDoc doc = new LinearRankDoc(key.doc(), 0f, key.shardIndex(), weights, normalizerNames); + doc.normalizedScores = new float[rankResults.size()]; + return doc; + } else { + return new LinearRankDoc(key.doc(), 0f, key.shardIndex()); + } + } + ); + if (isExplain) { + rankDoc.normalizedScores[result] = normalizedScoreDocs[scoreDocIndex].score; + } + // if we do not have scores associated with this result set, just ignore its contribution to the final + // score computation by setting its score to 0. + final float docScore = false == Float.isNaN(normalizedScoreDocs[scoreDocIndex].score) + ? normalizedScoreDocs[scoreDocIndex].score + : DEFAULT_SCORE; + final float weight = Float.isNaN(weights[result]) ? DEFAULT_WEIGHT : weights[result]; + rankDoc.score += weight * docScore; + } + } + // sort the results based on the final score, tiebreaker based on smaller doc id + LinearRankDoc[] sortedResults = docsToRankResults.values().toArray(LinearRankDoc[]::new); + Arrays.sort(sortedResults); + // trim the results if needed, otherwise each shard will always return `rank_window_size` results. + LinearRankDoc[] topResults = new LinearRankDoc[Math.min(rankWindowSize, sortedResults.length)]; + for (int rank = 0; rank < topResults.length; ++rank) { + topResults[rank] = sortedResults[rank]; + topResults[rank].rank = rank + 1; + } + return topResults; + } + + @Override + public String getName() { + return NAME; + } + + public void doToXContent(XContentBuilder builder, Params params) throws IOException { + int index = 0; + if (innerRetrievers.isEmpty() == false) { + builder.startArray(RETRIEVERS_FIELD.getPreferredName()); + for (var entry : innerRetrievers) { + builder.startObject(); + builder.field(LinearRetrieverComponent.RETRIEVER_FIELD.getPreferredName(), entry.retriever()); + builder.field(LinearRetrieverComponent.WEIGHT_FIELD.getPreferredName(), weights[index]); + builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizers[index].getName()); + builder.endObject(); + index++; + } + builder.endArray(); + } + builder.field(RANK_WINDOW_SIZE_FIELD.getPreferredName(), rankWindowSize); + } +} diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java new file mode 100644 index 0000000000000..bb0d79d3fe488 --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.linear; + +import org.elasticsearch.search.retriever.RetrieverBuilder; +import org.elasticsearch.search.retriever.RetrieverParserContext; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; + +import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; +import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; + +public class LinearRetrieverComponent implements ToXContentObject { + + public static final ParseField RETRIEVER_FIELD = new ParseField("retriever"); + public static final ParseField WEIGHT_FIELD = new ParseField("weight"); + public static final ParseField NORMALIZER_FIELD = new ParseField("normalizer"); + + static final float DEFAULT_WEIGHT = 1f; + static final ScoreNormalizer DEFAULT_NORMALIZER = IdentityScoreNormalizer.INSTANCE; + + RetrieverBuilder retriever; + float weight; + ScoreNormalizer normalizer; + + public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight, ScoreNormalizer normalizer) { + assert retrieverBuilder != null; + this.retriever = retrieverBuilder; + this.weight = weight == null ? DEFAULT_WEIGHT : weight; + this.normalizer = normalizer == null ? DEFAULT_NORMALIZER : normalizer; + if (this.weight < 0) { + throw new IllegalArgumentException("[weight] must be non-negative"); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(RETRIEVER_FIELD.getPreferredName(), retriever); + builder.field(WEIGHT_FIELD.getPreferredName(), weight); + builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName()); + return builder; + } + + @SuppressWarnings("unchecked") + static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "retriever-component", + false, + args -> { + RetrieverBuilder retrieverBuilder = (RetrieverBuilder) args[0]; + Float weight = (Float) args[1]; + ScoreNormalizer normalizer = (ScoreNormalizer) args[2]; + return new LinearRetrieverComponent(retrieverBuilder, weight, normalizer); + } + ); + + static { + PARSER.declareNamedObject(constructorArg(), (p, c, n) -> { + RetrieverBuilder innerRetriever = p.namedObject(RetrieverBuilder.class, n, c); + c.trackRetrieverUsage(innerRetriever.getName()); + return innerRetriever; + }, RETRIEVER_FIELD); + PARSER.declareFloat(optionalConstructorArg(), WEIGHT_FIELD); + PARSER.declareField( + optionalConstructorArg(), + (p, c) -> ScoreNormalizer.valueOf(p.text()), + NORMALIZER_FIELD, + ObjectParser.ValueType.STRING + ); + } + + public static LinearRetrieverComponent fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException { + return PARSER.apply(parser, context); + } +} diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java new file mode 100644 index 0000000000000..56b42b48a5d47 --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java @@ -0,0 +1,65 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.linear; + +import org.apache.lucene.search.ScoreDoc; + +public class MinMaxScoreNormalizer extends ScoreNormalizer { + + public static final MinMaxScoreNormalizer INSTANCE = new MinMaxScoreNormalizer(); + + public static final String NAME = "minmax"; + + private static final float EPSILON = 1e-6f; + + public MinMaxScoreNormalizer() {} + + @Override + public String getName() { + return NAME; + } + + @Override + public ScoreDoc[] normalizeScores(ScoreDoc[] docs) { + if (docs.length == 0) { + return docs; + } + // create a new array to avoid changing ScoreDocs in place + ScoreDoc[] scoreDocs = new ScoreDoc[docs.length]; + float min = Float.MAX_VALUE; + float max = Float.MIN_VALUE; + boolean atLeastOneValidScore = false; + for (ScoreDoc rd : docs) { + if (false == atLeastOneValidScore && false == Float.isNaN(rd.score)) { + atLeastOneValidScore = true; + } + if (rd.score > max) { + max = rd.score; + } + if (rd.score < min) { + min = rd.score; + } + } + if (false == atLeastOneValidScore) { + // we do not have any scores to normalize, so we just return the original array + return docs; + } + + boolean minEqualsMax = Math.abs(min - max) < EPSILON; + for (int i = 0; i < docs.length; i++) { + float score; + if (minEqualsMax) { + score = min; + } else { + score = (docs[i].score - min) / (max - min); + } + scoreDocs[i] = new ScoreDoc(docs[i].doc, score, docs[i].shardIndex); + } + return scoreDocs; + } +} diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java new file mode 100644 index 0000000000000..48334b9adf957 --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java @@ -0,0 +1,31 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.linear; + +import org.apache.lucene.search.ScoreDoc; + +/** + * A no-op {@link ScoreNormalizer} that does not modify the scores. + */ +public abstract class ScoreNormalizer { + + public static ScoreNormalizer valueOf(String normalizer) { + if (MinMaxScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) { + return MinMaxScoreNormalizer.INSTANCE; + } else if (IdentityScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) { + return IdentityScoreNormalizer.INSTANCE; + + } else { + throw new IllegalArgumentException("Unknown normalizer [" + normalizer + "]"); + } + } + + public abstract String getName(); + + public abstract ScoreDoc[] normalizeScores(ScoreDoc[] docs); +} diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRankPlugin.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRankPlugin.java index 9404d863f1d28..251015b21ff50 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRankPlugin.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRankPlugin.java @@ -17,6 +17,8 @@ import org.elasticsearch.search.rank.RankShardResult; import org.elasticsearch.xcontent.NamedXContentRegistry; import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xpack.rank.linear.LinearRankDoc; +import org.elasticsearch.xpack.rank.linear.LinearRetrieverBuilder; import java.util.List; @@ -28,6 +30,12 @@ public class RRFRankPlugin extends Plugin implements SearchPlugin { License.OperationMode.ENTERPRISE ); + public static final LicensedFeature.Momentary LINEAR_RETRIEVER_FEATURE = LicensedFeature.momentary( + null, + "linear-retriever", + License.OperationMode.ENTERPRISE + ); + public static final String NAME = "rrf"; @Override @@ -35,7 +43,8 @@ public List getNamedWriteables() { return List.of( new NamedWriteableRegistry.Entry(RankBuilder.class, NAME, RRFRankBuilder::new), new NamedWriteableRegistry.Entry(RankShardResult.class, NAME, RRFRankShardResult::new), - new NamedWriteableRegistry.Entry(RankDoc.class, RRFRankDoc.NAME, RRFRankDoc::new) + new NamedWriteableRegistry.Entry(RankDoc.class, RRFRankDoc.NAME, RRFRankDoc::new), + new NamedWriteableRegistry.Entry(RankDoc.class, LinearRankDoc.NAME, LinearRankDoc::new) ); } @@ -46,6 +55,9 @@ public List getNamedXContent() { @Override public List> getRetrievers() { - return List.of(new RetrieverSpec<>(new ParseField(NAME), RRFRetrieverBuilder::fromXContent)); + return List.of( + new RetrieverSpec<>(new ParseField(NAME), RRFRetrieverBuilder::fromXContent), + new RetrieverSpec<>(new ParseField(LinearRetrieverBuilder.NAME), LinearRetrieverBuilder::fromXContent) + ); } } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 93445a9ce5ac9..a32f7ba1f923d 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -101,6 +101,7 @@ public String getName() { protected RRFRetrieverBuilder clone(List newRetrievers, List newPreFilterQueryBuilders) { RRFRetrieverBuilder clone = new RRFRetrieverBuilder(newRetrievers, this.rankWindowSize, this.rankConstant); clone.preFilterQueryBuilders = newPreFilterQueryBuilders; + clone.retrieverName = retrieverName; return clone; } diff --git a/x-pack/plugin/rank-rrf/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification b/x-pack/plugin/rank-rrf/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification index 605e999b66c66..528b7e35bee65 100644 --- a/x-pack/plugin/rank-rrf/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification +++ b/x-pack/plugin/rank-rrf/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification @@ -5,4 +5,4 @@ # 2.0. # -org.elasticsearch.xpack.rank.rrf.RRFFeatures +org.elasticsearch.xpack.rank.RankRRFFeatures diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRankDocTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRankDocTests.java new file mode 100644 index 0000000000000..051aa6bddb4d7 --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRankDocTests.java @@ -0,0 +1,97 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.linear; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.search.rank.AbstractRankDocWireSerializingTestCase; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.rank.rrf.RRFRankPlugin; + +import java.io.IOException; +import java.util.List; + +public class LinearRankDocTests extends AbstractRankDocWireSerializingTestCase { + + protected LinearRankDoc createTestRankDoc() { + int queries = randomIntBetween(2, 20); + float[] weights = new float[queries]; + String[] normalizers = new String[queries]; + float[] normalizedScores = new float[queries]; + for (int i = 0; i < queries; i++) { + weights[i] = randomFloat(); + normalizers[i] = randomAlphaOfLengthBetween(1, 10); + normalizedScores[i] = randomFloat(); + } + LinearRankDoc rankDoc = new LinearRankDoc(randomNonNegativeInt(), randomFloat(), randomIntBetween(0, 1), weights, normalizers); + rankDoc.rank = randomNonNegativeInt(); + rankDoc.normalizedScores = normalizedScores; + return rankDoc; + } + + @Override + protected List getAdditionalNamedWriteables() { + try (RRFRankPlugin rrfRankPlugin = new RRFRankPlugin()) { + return rrfRankPlugin.getNamedWriteables(); + } catch (IOException ex) { + throw new AssertionError("Failed to create RRFRankPlugin", ex); + } + } + + @Override + protected Writeable.Reader instanceReader() { + return LinearRankDoc::new; + } + + @Override + protected LinearRankDoc mutateInstance(LinearRankDoc instance) throws IOException { + LinearRankDoc mutated = new LinearRankDoc( + instance.doc, + instance.score, + instance.shardIndex, + instance.weights, + instance.normalizers + ); + mutated.normalizedScores = instance.normalizedScores; + mutated.rank = instance.rank; + if (frequently()) { + mutated.doc = randomValueOtherThan(instance.doc, ESTestCase::randomNonNegativeInt); + } + if (frequently()) { + mutated.score = randomValueOtherThan(instance.score, ESTestCase::randomFloat); + } + if (frequently()) { + mutated.shardIndex = randomValueOtherThan(instance.shardIndex, ESTestCase::randomNonNegativeInt); + } + if (frequently()) { + mutated.rank = randomValueOtherThan(instance.rank, ESTestCase::randomNonNegativeInt); + } + if (frequently()) { + for (int i = 0; i < mutated.normalizedScores.length; i++) { + if (frequently()) { + mutated.normalizedScores[i] = randomFloat(); + } + } + } + if (frequently()) { + for (int i = 0; i < mutated.weights.length; i++) { + if (frequently()) { + mutated.weights[i] = randomFloat(); + } + } + } + if (frequently()) { + for (int i = 0; i < mutated.normalizers.length; i++) { + if (frequently()) { + mutated.normalizers[i] = randomValueOtherThan(instance.normalizers[i], () -> randomAlphaOfLengthBetween(1, 10)); + } + } + } + return mutated; + } +} diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java new file mode 100644 index 0000000000000..5cc66c6f50d3c --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java @@ -0,0 +1,101 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.linear; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.search.SearchModule; +import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; +import org.elasticsearch.search.retriever.RetrieverBuilder; +import org.elasticsearch.search.retriever.RetrieverParserContext; +import org.elasticsearch.search.retriever.TestRetrieverBuilder; +import org.elasticsearch.test.AbstractXContentTestCase; +import org.elasticsearch.usage.SearchUsage; +import org.elasticsearch.xcontent.NamedXContentRegistry; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.XContentParser; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static java.util.Collections.emptyList; + +public class LinearRetrieverBuilderParsingTests extends AbstractXContentTestCase { + private static List xContentRegistryEntries; + + @BeforeClass + public static void init() { + xContentRegistryEntries = new SearchModule(Settings.EMPTY, emptyList()).getNamedXContents(); + } + + @AfterClass + public static void afterClass() throws Exception { + xContentRegistryEntries = null; + } + + @Override + protected LinearRetrieverBuilder createTestInstance() { + int rankWindowSize = randomInt(100); + int num = randomIntBetween(1, 3); + List innerRetrievers = new ArrayList<>(); + float[] weights = new float[num]; + ScoreNormalizer[] normalizers = new ScoreNormalizer[num]; + for (int i = 0; i < num; i++) { + innerRetrievers.add( + new CompoundRetrieverBuilder.RetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder(), null) + ); + weights[i] = randomFloat(); + normalizers[i] = randomScoreNormalizer(); + } + return new LinearRetrieverBuilder(innerRetrievers, rankWindowSize, weights, normalizers); + } + + @Override + protected LinearRetrieverBuilder doParseInstance(XContentParser parser) throws IOException { + return (LinearRetrieverBuilder) RetrieverBuilder.parseTopLevelRetrieverBuilder( + parser, + new RetrieverParserContext(new SearchUsage(), n -> true) + ); + } + + @Override + protected boolean supportsUnknownFields() { + return false; + } + + @Override + protected NamedXContentRegistry xContentRegistry() { + List entries = new ArrayList<>(xContentRegistryEntries); + entries.add( + new NamedXContentRegistry.Entry( + RetrieverBuilder.class, + TestRetrieverBuilder.TEST_SPEC.getName(), + (p, c) -> TestRetrieverBuilder.TEST_SPEC.getParser().fromXContent(p, (RetrieverParserContext) c), + TestRetrieverBuilder.TEST_SPEC.getName().getForRestApiVersion() + ) + ); + entries.add( + new NamedXContentRegistry.Entry( + RetrieverBuilder.class, + new ParseField(LinearRetrieverBuilder.NAME), + (p, c) -> LinearRetrieverBuilder.PARSER.apply(p, (RetrieverParserContext) c) + ) + ); + return new NamedXContentRegistry(entries); + } + + private static ScoreNormalizer randomScoreNormalizer() { + if (randomBoolean()) { + return MinMaxScoreNormalizer.INSTANCE; + } else { + return IdentityScoreNormalizer.INSTANCE; + } + } +} diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/java/org/elasticsearch/xpack/rank/rrf/LinearRankClientYamlTestSuiteIT.java b/x-pack/plugin/rank-rrf/src/yamlRestTest/java/org/elasticsearch/xpack/rank/rrf/LinearRankClientYamlTestSuiteIT.java new file mode 100644 index 0000000000000..8af4ae307a51a --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/java/org/elasticsearch/xpack/rank/rrf/LinearRankClientYamlTestSuiteIT.java @@ -0,0 +1,45 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.rank.rrf; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; +import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; +import org.junit.ClassRule; + +/** Runs yaml rest tests. */ +public class LinearRankClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { + + @ClassRule + public static ElasticsearchCluster cluster = ElasticsearchCluster.local() + .nodes(2) + .module("mapper-extras") + .module("rank-rrf") + .module("lang-painless") + .module("x-pack-inference") + .setting("xpack.license.self_generated.type", "trial") + .plugin("inference-service-test") + .build(); + + public LinearRankClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { + super(testCandidate); + } + + @ParametersFactory + public static Iterable parameters() throws Exception { + return ESClientYamlSuiteTestCase.createParameters(new String[] { "linear" }); + } + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } +} diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/license/100_license.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/license/100_license.yml index cd227eec4e227..42d0fa1998246 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/license/100_license.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/license/100_license.yml @@ -111,3 +111,43 @@ setup: - match: { status: 403 } - match: { error.type: security_exception } - match: { error.reason: "current license is non-compliant for [Reciprocal Rank Fusion (RRF)]" } + + +--- +"linear retriever invalid license": + - requires: + cluster_features: [ "linear_retriever_supported" ] + reason: "Support for linear retriever" + + - do: + catch: forbidden + search: + index: test + body: + track_total_hits: false + fields: [ "text" ] + retriever: + linear: + retrievers: [ + { + knn: { + field: vector, + query_vector: [ 0.0 ], + k: 3, + num_candidates: 3 + } + }, + { + standard: { + query: { + term: { + text: term + } + } + } + } + ] + + - match: { status: 403 } + - match: { error.type: security_exception } + - match: { error.reason: "current license is non-compliant for [linear retriever]" } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml new file mode 100644 index 0000000000000..70db6c1543365 --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -0,0 +1,1065 @@ +setup: + - requires: + cluster_features: [ "linear_retriever_supported" ] + reason: "Support for linear retriever" + test_runner_features: close_to + + - do: + indices.create: + index: test + body: + mappings: + properties: + vector: + type: dense_vector + dims: 1 + index: true + similarity: l2_norm + index_options: + type: flat + keyword: + type: keyword + other_keyword: + type: keyword + timestamp: + type: date + + - do: + bulk: + refresh: true + index: test + body: + - '{"index": {"_id": 1 }}' + - '{"vector": [1], "keyword": "one", "other_keyword": "other", "timestamp": "2021-01-01T00:00:00"}' + - '{"index": {"_id": 2 }}' + - '{"vector": [2], "keyword": "two", "timestamp": "2022-01-01T00:00:00"}' + - '{"index": {"_id": 3 }}' + - '{"vector": [3], "keyword": "three", "timestamp": "2023-01-01T00:00:00"}' + - '{"index": {"_id": 4 }}' + - '{"vector": [4], "keyword": "four", "other_keyword": "other", "timestamp": "2024-01-01T00:00:00"}' + +--- +"basic linear weighted combination of a standard and knn retrievers": + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 0.5 + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: 2.0 + } + ] + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._score: 5.0 } + - match: { hits.hits.1._id: "4" } + - match: { hits.hits.1._score: 2.0 } + +--- +"basic linear weighted combination - interleaved results": + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + # this one will return docs 1 and doc 2 with scores 20 and 10 respectively + retriever: { + standard: { + query: { + bool: { + should: [ + { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "two" + } + } + }, + boost: 5.0 + } + } + ] + } + } + } + }, + weight: 2 + }, + { + # this one will return docs 3 and doc 4 with scores 15 and 12 respectively + retriever: { + standard: { + query: { + bool: { + should: [ + { + constant_score: { + filter: { + term: { + keyword: { + value: "three" + } + } + }, + boost: 5.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 4.0 + } + } + ] + } + } + } + }, + weight: 3 + } + ] + + - match: { hits.total.value: 4 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._score: 20.0 } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1._score: 15.0 } + - match: { hits.hits.2._id: "4" } + - match: { hits.hits.2._score: 12.0 } + - match: { hits.hits.3._id: "2" } + - match: { hits.hits.3._score: 10.0 } + +--- +"should normalize initial scores": + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + bool: { + should: [ + { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "two" + } + } + }, + boost: 9.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "three" + } + } + }, + boost: 5.0 + } + } + ] + } + } + } + }, + weight: 10.0, + normalizer: "minmax" + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: 2.0 + } + ] + + - match: { hits.total.value: 4 } + - match: { hits.hits.0._id: "1" } + - match: {hits.hits.0._score: 10.0} + - match: { hits.hits.1._id: "2" } + - match: {hits.hits.1._score: 8.0} + - match: { hits.hits.2._id: "4" } + - match: {hits.hits.2._score: 2.0} + - match: { hits.hits.2._score: 2.0 } + - match: { hits.hits.3._id: "3" } + - close_to: { hits.hits.3._score: { value: 0.0, error: 0.001 } } + +--- +"should throw on unknown normalizer": + - do: + catch: /Unknown normalizer \[aardvark\]/ + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 1.0, + normalizer: "aardvark" + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: 2.0 + } + ] + +--- +"should throw on negative weights": + - do: + catch: /\[weight\] must be non-negative/ + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 1.0 + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: -10 + } + ] + +--- +"pagination within a consistent rank_window_size": + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + bool: { + should: [ + { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "two" + } + } + }, + boost: 9.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "three" + } + } + }, + boost: 5.0 + } + } + ] + } + } + } + }, + weight: 10.0, + normalizer: "minmax" + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: 2.0 + } + ] + from: 2 + size: 1 + + - match: { hits.total.value: 4 } + - length: { hits.hits: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._score: 2.0 } + + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + bool: { + should: [ + { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "two" + } + } + }, + boost: 9.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "three" + } + } + }, + boost: 5.0 + } + } + ] + } + } + } + }, + weight: 10.0, + normalizer: "minmax" + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: 2.0 + } + ] + from: 3 + size: 1 + + - match: { hits.total.value: 4 } + - match: { hits.hits.0._id: "3" } + - close_to: { hits.hits.0._score: { value: 0.0, error: 0.001 } } + +--- +"should throw when rank_window_size less than size": + - do: + catch: "/\\[linear\\] requires \\[rank_window_size: 2\\] be greater than or equal to \\[size: 10\\]/" + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + match_all: { } + } + } + }, + weight: 10.0, + normalizer: "minmax" + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: 2.0 + } + ] + rank_window_size: 2 + size: 10 +--- +"should respect rank_window_size for normalization and returned hits": + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + bool: { + should: [ + { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "two" + } + } + }, + boost: 9.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "three" + } + } + }, + boost: 5.0 + } + } + ] + } + } + } + }, + weight: 1.0, + normalizer: "minmax" + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: 2.0 + } + ] + rank_window_size: 2 + size: 2 + + - match: { hits.total.value: 4 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._score: 2.0 } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1._score: 1.0 } + +--- +"explain should provide info on weights and inner retrievers": + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + bool: { + should: [ + { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 1.0 + } + } + ] + } + }, + _name: "my_standard_retriever" + } + }, + weight: 10.0, + normalizer: "minmax" + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: 20.0 + } + ] + explain: true + size: 2 + + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._explanation.description: "/weighted.linear.combination.score:.\\[20.0].computed.for.normalized.scores.\\[.*,.1.0\\].and.weights.\\[10.0,.20.0\\].as.sum.of.\\(weight\\[i\\].*.score\\[i\\]\\).for.each.query./"} + - match: { hits.hits.0._explanation.details.0.value: 0.0 } + - match: { hits.hits.0._explanation.details.0.description: "/.*weighted.score.*result.not.found.in.query.at.index.\\[0\\].\\[my_standard_retriever\\]/" } + - match: { hits.hits.0._explanation.details.1.value: 20.0 } + - match: { hits.hits.0._explanation.details.1.description: "/.*weighted.score.*using.score.normalizer.\\[none\\].*/" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1._explanation.description: "/weighted.linear.combination.score:.\\[10.0].computed.for.normalized.scores.\\[1.0,.0.0\\].and.weights.\\[10.0,.20.0\\].as.sum.of.\\(weight\\[i\\].*.score\\[i\\]\\).for.each.query./"} + - match: { hits.hits.1._explanation.details.0.value: 10.0 } + - match: { hits.hits.1._explanation.details.0.description: "/.*weighted.score.*\\[my_standard_retriever\\].*using.score.normalizer.\\[minmax\\].*/" } + - match: { hits.hits.1._explanation.details.1.value: 0.0 } + - match: { hits.hits.1._explanation.details.1.description: "/.*weighted.score.*result.not.found.in.query.at.index.\\[1\\]/" } + +--- +"collapsing results": + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 0.5 + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: 2.0 + } + ] + collapse: + field: other_keyword + inner_hits: { + name: sub_hits, + sort: + { + keyword: { + order: desc + } + } + } + - match: { hits.hits.0._id: "1" } + - length: { hits.hits.0.inner_hits.sub_hits.hits.hits : 2 } + - match: { hits.hits.0.inner_hits.sub_hits.hits.hits.0._id: "1" } + - match: { hits.hits.0.inner_hits.sub_hits.hits.hits.1._id: "4" } + +--- +"multiple nested linear retrievers": + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 0.5 + }, + { + retriever: { + linear: { + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "two" + } + } + }, + boost: 20.0 + } + } + } + } + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + } + } + ] + } + }, + weight: 2.0 + } + ] + + - match: { hits.total.value: 3 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0._score: 40.0 } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1._score: 5.0 } + - match: { hits.hits.2._id: "4" } + - match: { hits.hits.2._score: 2.0 } + +--- +"linear retriever with filters": + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 0.5 + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: 2.0 + } + ] + filter: + term: + keyword: "four" + + + - match: { hits.total.value: 1 } + - length: {hits.hits: 1} + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._score: 2.0 } + +--- +"linear retriever with filters on nested retrievers": + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + }, + filter: { + term: { + keyword: "four" + } + } + } + }, + weight: 0.5 + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: 2.0 + } + ] + + - match: { hits.total.value: 1 } + - length: {hits.hits: 1} + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._score: 2.0 } + + +--- +"linear retriever with custom sort and score for nested retrievers": + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + bool: { + should: [ + { + term: { + keyword: { + value: "one" # this will give doc 1 a normalized score of 10 because min == max + } + } + }, + { + term: { + keyword: { + value: "two" # this will give doc 2 a normalized score of 10 because min == max + } + } + } ] + } + }, + boost: 10.0 + } + }, + sort: { + timestamp: { + order: "asc" + } + } + } + }, + weight: 1.0, + normalizer: "minmax" + }, + { + # because we're sorting on timestamp and use a rank window size of 3, we will only get to see + # docs 3 and 2. + # their `scores` (which are the timestamps) are: + # doc 3: 1672531200000 (2023-01-01T00:00:00) + # doc 2: 1640995200000 (2022-01-01T00:00:00) + # doc 1: 1609459200000 (2021-01-01T00:00:00) + # and their normalized scores based on the provided conf + # will be: + # normalized(doc3) = 1. + # normalized(doc2) = 0.5 + # normalized(doc1) = 0 + retriever: { + standard: { + query: { + function_score: { + query: { + bool: { + should: [ + { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "two" + } + } + }, + boost: 9.0 + } + }, + { + constant_score: { + filter: { + term: { + keyword: { + value: "three" + } + } + }, + boost: 1.0 + } + } + ] + } + }, + functions: [ { + script_score: { + script: { + source: "doc['timestamp'].value.millis" + } + } + } ], + "boost_mode": "replace" + } + }, + sort: { + timestamp: { + order: "desc" + } + } + } + }, + weight: 1.0, + normalizer: "minmax" + } + ] + rank_window_size: 3 + size: 2 + + - match: { hits.total.value: 3 } + - length: {hits.hits: 2} + - match: { hits.hits.0._id: "2" } + - close_to: { hits.hits.0._score: { value: 10.5, error: 0.001 } } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1._score: 10 } diff --git a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityEsqlIT.java b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityEsqlIT.java index 09dda0f708a86..dcf993ea4ce7a 100644 --- a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityEsqlIT.java +++ b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityEsqlIT.java @@ -344,13 +344,24 @@ private static String populateOtherUser() throws IOException { return otherUser; } + private void performRequestWithAdminUserIgnoreNotFound(RestClient targetFulfillingClusterClient, Request request) throws IOException { + try { + performRequestWithAdminUser(targetFulfillingClusterClient, request); + } catch (ResponseException e) { + if (e.getResponse().getStatusLine().getStatusCode() != 404) { + throw e; + } + logger.info("Ignored \"not found\" exception", e); + } + } + @After public void wipeData() throws Exception { CheckedConsumer wipe = client -> { - performRequestWithAdminUser(client, new Request("DELETE", "/employees")); - performRequestWithAdminUser(client, new Request("DELETE", "/employees2")); - performRequestWithAdminUser(client, new Request("DELETE", "/employees3")); - performRequestWithAdminUser(client, new Request("DELETE", "/_enrich/policy/countries")); + performRequestWithAdminUserIgnoreNotFound(client, new Request("DELETE", "/employees")); + performRequestWithAdminUserIgnoreNotFound(client, new Request("DELETE", "/employees2")); + performRequestWithAdminUserIgnoreNotFound(client, new Request("DELETE", "/employees3")); + performRequestWithAdminUserIgnoreNotFound(client, new Request("DELETE", "/_enrich/policy/countries")); }; wipe.accept(fulfillingClusterClient); wipe.accept(client()); diff --git a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/DataStreamsUpgradeIT.java b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/DataStreamsUpgradeIT.java index 746c8c926086e..2d229d7ffece5 100644 --- a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/DataStreamsUpgradeIT.java +++ b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/DataStreamsUpgradeIT.java @@ -30,7 +30,6 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.time.Instant; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -269,15 +268,10 @@ private static void createAndRolloverDataStream(String dataStreamName, int numRo private void upgradeDataStream(String dataStreamName, int numRolloversOnOldCluster) throws Exception { Set indicesNeedingUpgrade = getDataStreamIndices(dataStreamName); - Set closedOldIndices = getClosedIndices(dataStreamName); final int explicitRolloverOnNewClusterCount = randomIntBetween(0, 2); for (int i = 0; i < explicitRolloverOnNewClusterCount; i++) { String oldIndexName = rollover(dataStreamName); if (randomBoolean()) { - if (i == 0) { - // Since this is the first rollover on the new cluster, the old index came from the old cluster - closedOldIndices.add(oldIndexName); - } closeIndex(oldIndexName); } } @@ -305,39 +299,51 @@ private void upgradeDataStream(String dataStreamName, int numRolloversOnOldClust statusResponse.getEntity().getContent(), false ); + String statusResponseString = statusResponseMap.keySet() + .stream() + .map(key -> key + "=" + statusResponseMap.get(key)) + .collect(Collectors.joining(", ", "{", "}")); assertOK(statusResponse); - assertThat(statusResponseMap.get("complete"), equalTo(true)); + assertThat(statusResponseString, statusResponseMap.get("complete"), equalTo(true)); final int originalWriteIndex = 1; if (isOriginalClusterSameMajorVersionAsCurrent()) { assertThat( + statusResponseString, statusResponseMap.get("total_indices_in_data_stream"), equalTo(originalWriteIndex + numRolloversOnOldCluster + explicitRolloverOnNewClusterCount) ); // If the original cluster was the same as this one, we don't want any indices reindexed: - assertThat(statusResponseMap.get("total_indices_requiring_upgrade"), equalTo(0)); - assertThat(statusResponseMap.get("successes"), equalTo(0)); + assertThat(statusResponseString, statusResponseMap.get("total_indices_requiring_upgrade"), equalTo(0)); + assertThat(statusResponseString, statusResponseMap.get("successes"), equalTo(0)); } else { // The number of rollovers that will have happened when we call reindex: final int rolloversPerformedByReindex = explicitRolloverOnNewClusterCount == 0 ? 1 : 0; final int expectedTotalIndicesInDataStream = originalWriteIndex + numRolloversOnOldCluster + explicitRolloverOnNewClusterCount + rolloversPerformedByReindex; - assertThat(statusResponseMap.get("total_indices_in_data_stream"), equalTo(expectedTotalIndicesInDataStream)); + assertThat( + statusResponseString, + statusResponseMap.get("total_indices_in_data_stream"), + equalTo(expectedTotalIndicesInDataStream) + ); /* * total_indices_requiring_upgrade is made up of: (the original write index) + numRolloversOnOldCluster. The number of * rollovers on the upgraded cluster is irrelevant since those will not be reindexed. */ assertThat( + statusResponseString, statusResponseMap.get("total_indices_requiring_upgrade"), - equalTo(originalWriteIndex + numRolloversOnOldCluster - closedOldIndices.size()) + equalTo(originalWriteIndex + numRolloversOnOldCluster) ); - assertThat(statusResponseMap.get("successes"), equalTo(numRolloversOnOldCluster + 1 - closedOldIndices.size())); + assertThat(statusResponseString, statusResponseMap.get("successes"), equalTo(numRolloversOnOldCluster + 1)); // We expect all the original indices to have been deleted for (String oldIndex : indicesNeedingUpgrade) { - if (closedOldIndices.contains(oldIndex) == false) { - assertThat(indexExists(oldIndex), equalTo(false)); - } + assertThat(statusResponseString, indexExists(oldIndex), equalTo(false)); } - assertThat(getDataStreamIndices(dataStreamName).size(), equalTo(expectedTotalIndicesInDataStream)); + assertThat( + statusResponseString, + getDataStreamIndices(dataStreamName).size(), + equalTo(expectedTotalIndicesInDataStream) + ); } }, 60, TimeUnit.SECONDS); Request cancelRequest = new Request("POST", "_migration/reindex/" + dataStreamName + "/_cancel"); @@ -356,29 +362,6 @@ private Set getDataStreamIndices(String dataStreamName) throws IOExcepti return indices.stream().map(index -> index.get("index_name").toString()).collect(Collectors.toSet()); } - @SuppressWarnings("unchecked") - private Set getClosedIndices(String dataStreamName) throws IOException { - Set allIndices = getDataStreamIndices(dataStreamName); - Set closedIndices = new HashSet<>(); - Response response = client().performRequest(new Request("GET", "_cluster/state/blocks/indices")); - Map responseMap = XContentHelper.convertToMap(JsonXContent.jsonXContent, response.getEntity().getContent(), false); - Map blocks = (Map) responseMap.get("blocks"); - Map indices = (Map) blocks.get("indices"); - for (Map.Entry indexEntry : indices.entrySet()) { - String indexName = indexEntry.getKey(); - if (allIndices.contains(indexName)) { - Map blocksForIndex = (Map) indexEntry.getValue(); - for (Map.Entry blockEntry : blocksForIndex.entrySet()) { - Map block = (Map) blockEntry.getValue(); - if ("index closed".equals(block.get("description"))) { - closedIndices.add(indexName); - } - } - } - } - return closedIndices; - } - /* * Similar to isOriginalClusterCurrent, but returns true if the major versions of the clusters are the same. So true * for 8.6 and 8.17, but false for 7.17 and 8.18.