diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/CRUDDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/CRUDDocumentationIT.java index 5b990e8b30cd8..3ee230ddbdffd 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/CRUDDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/CRUDDocumentationIT.java @@ -83,7 +83,6 @@ import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptType; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; -import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.tasks.TaskId; import java.util.Collections; @@ -833,10 +832,6 @@ public void testReindex() throws Exception { // tag::reindex-request-pipeline request.setDestPipeline("my_pipeline"); // <1> // end::reindex-request-pipeline - // tag::reindex-request-sort - request.addSortField("field1", SortOrder.DESC); // <1> - request.addSortField("field2", SortOrder.ASC); // <2> - // end::reindex-request-sort // tag::reindex-request-script request.setScript( new Script( diff --git a/docs/java-rest/high-level/document/reindex.asciidoc b/docs/java-rest/high-level/document/reindex.asciidoc index f30b9eef4a41b..c094a5f1ab7eb 100644 --- a/docs/java-rest/high-level/document/reindex.asciidoc +++ b/docs/java-rest/high-level/document/reindex.asciidoc @@ -89,16 +89,6 @@ include-tagged::{doc-tests-file}[{api}-request-pipeline] -------------------------------------------------- <1> set pipeline to `my_pipeline` -If you want a particular set of documents from the source index you’ll need to use sort. If possible, prefer a more -selective query to maxDocs and sort. - -["source","java",subs="attributes,callouts,macros"] --------------------------------------------------- -include-tagged::{doc-tests-file}[{api}-request-sort] --------------------------------------------------- -<1> add descending sort to`field1` -<2> add ascending sort to `field2` - +{request}+ also supports a `script` that modifies the document. It allows you to also change the document's metadata. The following example illustrates that. diff --git a/docs/reference/docs/reindex.asciidoc b/docs/reference/docs/reindex.asciidoc index 688f340f2b33d..8e2110651a148 100644 --- a/docs/reference/docs/reindex.asciidoc +++ b/docs/reference/docs/reindex.asciidoc @@ -476,9 +476,14 @@ which defaults to a maximum size of 100 MB. (Optional, integer) Total number of slices. `sort`::: ++ +-- (Optional, list) A comma-separated list of `:` pairs to sort by before indexing. Use in conjunction with `max_docs` to control what documents are reindexed. +deprecated::[7.6, Sort in reindex is deprecated. Sorting in reindex was never guaranteed to index documents in order and prevents further development of reindex such as resilience and performance improvements. If used in combination with `max_docs`, consider using a query filter instead.] +-- + `_source`::: (Optional, string) If `true` reindexes all source fields. Set to a list to reindex select fields. @@ -602,8 +607,8 @@ POST _reindex -------------------------------------------------- // TEST[setup:twitter] -[[docs-reindex-select-sort]] -===== Reindex select documents with sort +[[docs-reindex-select-max-docs]] +===== Reindex select documents with `max_docs` You can limit the number of processed documents by setting `max_docs`. For example, this request copies a single document from `twitter` to @@ -624,28 +629,6 @@ POST _reindex -------------------------------------------------- // TEST[setup:twitter] -You can use `sort` in conjunction with `max_docs` to select the documents you want to reindex. -Sorting makes the scroll less efficient but in some contexts it's worth it. -If possible, it's better to use a more selective query instead of `max_docs` and `sort`. - -For example, following request copies 10000 documents from `twitter` into `new_twitter`: - -[source,console] --------------------------------------------------- -POST _reindex -{ - "max_docs": 10000, - "source": { - "index": "twitter", - "sort": { "date": "desc" } - }, - "dest": { - "index": "new_twitter" - } -} --------------------------------------------------- -// TEST[setup:twitter] - [[docs-reindex-multiple-indices]] ===== Reindex from multiple indices @@ -825,11 +808,10 @@ POST _reindex "index": "twitter", "query": { "function_score" : { - "query" : { "match_all": {} }, - "random_score" : {} + "random_score" : {}, + "min_score" : 0.9 <1> } - }, - "sort": "_score" <1> + } }, "dest": { "index": "random_twitter" @@ -838,8 +820,8 @@ POST _reindex ---------------------------------------------------------------- // TEST[setup:big_twitter] -<1> `_reindex` defaults to sorting by `_doc` so `random_score` will not have any -effect unless you override the sort to `_score`. +<1> You may need to adjust the `min_score` depending on the relative amount of +data extracted from source. [[reindex-scripts]] ===== Modify documents during reindexing diff --git a/docs/reference/ilm/ilm-with-existing-indices.asciidoc b/docs/reference/ilm/ilm-with-existing-indices.asciidoc index cff82df838806..96c1e4589de9a 100644 --- a/docs/reference/ilm/ilm-with-existing-indices.asciidoc +++ b/docs/reference/ilm/ilm-with-existing-indices.asciidoc @@ -352,6 +352,10 @@ will mean that all documents in `ilm-mylogs-000001` come before all documents in `ilm-mylogs-000002`, and so on. However, if this is not a requirement, omitting the sort will allow the data to be reindexed more quickly. +NOTE: Sorting in reindex is deprecated, see +<>. Instead use timestamp +ranges to partition data in separate reindex runs. + IMPORTANT: If your data uses document IDs generated by means other than Elasticsearch's automatic ID generation, you may need to do additional processing to ensure that the document IDs don't conflict during the reindex, as @@ -404,4 +408,4 @@ PUT _cluster/settings All of the reindexed data should now be accessible via the alias set up above, in this case `mylogs`. Once you have verified that all the data has been reindexed and is available in the new indices, the existing indices can be -safely removed. \ No newline at end of file +safely removed. diff --git a/modules/reindex/src/main/java/org/elasticsearch/index/reindex/Reindexer.java b/modules/reindex/src/main/java/org/elasticsearch/index/reindex/Reindexer.java index 695b659443d98..6193d541c6ee4 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/index/reindex/Reindexer.java +++ b/modules/reindex/src/main/java/org/elasticsearch/index/reindex/Reindexer.java @@ -40,6 +40,7 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.common.xcontent.DeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; @@ -51,6 +52,7 @@ import org.elasticsearch.index.reindex.remote.RemoteScrollableHitSource; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptService; +import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.threadpool.ThreadPool; import java.io.IOException; @@ -71,6 +73,9 @@ public class Reindexer { private static final Logger logger = LogManager.getLogger(Reindexer.class); + private static final DeprecationLogger deprecationLogger = new DeprecationLogger(logger); + static final String SORT_DEPRECATED_MESSAGE = "The sort option in reindex is deprecated. " + + "Instead consider using query filtering to find the desired subset of data."; private final ClusterService clusterService; private final Client client; @@ -88,6 +93,10 @@ public class Reindexer { } public void initTask(BulkByScrollTask task, ReindexRequest request, ActionListener listener) { + SearchSourceBuilder searchSource = request.getSearchRequest().source(); + if (searchSource != null && searchSource.sorts() != null && searchSource.sorts().isEmpty() == false) { + deprecationLogger.deprecated(SORT_DEPRECATED_MESSAGE); + } BulkByScrollParallelizationHelper.initTaskState(task, request, client, listener); } diff --git a/modules/reindex/src/test/java/org/elasticsearch/index/reindex/ReindexSingleNodeTests.java b/modules/reindex/src/test/java/org/elasticsearch/index/reindex/ReindexSingleNodeTests.java new file mode 100644 index 0000000000000..1eaa42c296e3b --- /dev/null +++ b/modules/reindex/src/test/java/org/elasticsearch/index/reindex/ReindexSingleNodeTests.java @@ -0,0 +1,60 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.reindex; + +import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.sort.SortOrder; +import org.elasticsearch.test.ESSingleNodeTestCase; + +import java.util.Arrays; +import java.util.Collection; + +import static org.elasticsearch.index.reindex.ReindexTestCase.matcher; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; + +public class ReindexSingleNodeTests extends ESSingleNodeTestCase { + @Override + protected Collection> getPlugins() { + return Arrays.asList(ReindexPlugin.class); + } + + public void testDeprecatedSort() { + int max = between(2, 20); + for (int i = 0; i < max; i++) { + client().prepareIndex("source", "_doc").setId(Integer.toString(i)).setSource("foo", i).get(); + } + + client().admin().indices().prepareRefresh("source").get(); + assertHitCount(client().prepareSearch("source").setSize(0).get(), max); + + // Copy a subset of the docs sorted + int subsetSize = randomIntBetween(1, max - 1); + ReindexRequestBuilder copy = new ReindexRequestBuilder(client(), ReindexAction.INSTANCE) + .source("source").destination("dest").refresh(true); + copy.maxDocs(subsetSize); + copy.request().addSortField("foo", SortOrder.DESC); + assertThat(copy.get(), matcher().created(subsetSize)); + + assertHitCount(client().prepareSearch("dest").setSize(0).get(), subsetSize); + assertHitCount(client().prepareSearch("dest").setQuery(new RangeQueryBuilder("foo").gte(0).lt(max-subsetSize)).get(), 0); + assertWarnings(Reindexer.SORT_DEPRECATED_MESSAGE); + } +} diff --git a/modules/reindex/src/test/resources/rest-api-spec/test/reindex/30_search.yml b/modules/reindex/src/test/resources/rest-api-spec/test/reindex/30_search.yml index 908ab55673c56..fc2537dbcfb70 100644 --- a/modules/reindex/src/test/resources/rest-api-spec/test/reindex/30_search.yml +++ b/modules/reindex/src/test/resources/rest-api-spec/test/reindex/30_search.yml @@ -35,7 +35,7 @@ "Sorting and size combined": - skip: version: " - 7.2.99" - reason: "deprecation warnings only emitted on 7.3+" + reason: "size deprecation warnings only emitted on 7.3+, but sort deprecated in 7.6" features: warnings - do: @@ -54,6 +54,8 @@ - do: warnings: - Deprecated field [size] used, expected [max_docs] instead + - The sort option in reindex is deprecated. Instead consider using query + filtering to find the desired subset of data. reindex: refresh: true body: @@ -123,8 +125,9 @@ --- "Sorting and max_docs in body combined": - skip: - version: " - 7.2.99" - reason: "max_docs introduced in 7.3.0" + version: " - 7.5.99" + reason: "max_docs introduced in 7.3.0, but sort deprecated in 7.6" + features: "warnings" - do: index: @@ -140,6 +143,9 @@ indices.refresh: {} - do: + warnings: + - The sort option in reindex is deprecated. Instead consider using query + filtering to find the desired subset of data. reindex: refresh: true body: diff --git a/server/src/main/java/org/elasticsearch/index/reindex/ReindexRequest.java b/server/src/main/java/org/elasticsearch/index/reindex/ReindexRequest.java index cb9a765f30da8..65d1c3d73d383 100644 --- a/server/src/main/java/org/elasticsearch/index/reindex/ReindexRequest.java +++ b/server/src/main/java/org/elasticsearch/index/reindex/ReindexRequest.java @@ -189,7 +189,10 @@ public ReindexRequest setSourceQuery(QueryBuilder queryBuilder) { * * @param name The name of the field to sort by * @param order The order in which to sort + * @deprecated Specifying a sort field for reindex is deprecated. If using this in combination with maxDocs, consider using a + * query filter instead. */ + @Deprecated public ReindexRequest addSortField(String name, SortOrder order) { this.getSearchRequest().source().sort(name, order); return this;