Skip to content

Commit

Permalink
Add pre-upgrade check to test cluster routing allocation is enabled (#…
Browse files Browse the repository at this point in the history
…39340)

When following the steps mentioned in upgrade guide
https://www.elastic.co/guide/en/elastic-stack/6.6/upgrading-elastic-stack.html
if we disable the cluster shard allocation but fail to enable it after
upgrading the nodes and plugins, the next step of upgrading internal
indices fails. As we did not check the bulk request response for reindexing,
we delete the old index assuming it has been created. This is fatal
as we cannot recover from this state.

This commit adds a pre-upgrade check to test the cluster shard
allocation setting and fail upgrade if it is disabled. In case there
are search or bulk failures then we remove the read-only block and
fail the upgrade index request.

Closes #39339
  • Loading branch information
bizybot authored Mar 8, 2019
1 parent 19b184d commit bae7e71
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
package org.elasticsearch.xpack.core.upgrade;

public final class IndexUpgradeCheckVersion {
public static final int UPRADE_VERSION = 6;
public static final int UPGRADE_VERSION = 6;

private IndexUpgradeCheckVersion() {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
*/
package org.elasticsearch.xpack.upgrade;

import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider;
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider.Allocation;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.index.reindex.BulkByScrollResponse;
import org.elasticsearch.protocol.xpack.migration.UpgradeActionRequired;
Expand All @@ -18,7 +21,6 @@
import org.elasticsearch.xpack.core.upgrade.IndexUpgradeCheckVersion;

import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Function;

/**
Expand Down Expand Up @@ -51,7 +53,17 @@ public IndexUpgradeCheck(String name,
Function<IndexMetaData, UpgradeActionRequired> actionRequired,
Client client, ClusterService clusterService, String[] types, Script updateScript) {
this(name, actionRequired, client, clusterService, types, updateScript,
listener -> listener.onResponse(null), (t, listener) -> listener.onResponse(TransportResponse.Empty.INSTANCE));
(cs, listener) -> {
Allocation clusterRoutingAllocation = EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING
.get(cs.getMetaData().settings());
if (Allocation.NONE == clusterRoutingAllocation) {
listener.onFailure(new ElasticsearchException(
"pre-upgrade check failed, please enable cluster routing allocation using setting [{}]",
EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey()));
} else {
listener.onResponse(null);
}
}, (t, listener) -> listener.onResponse(TransportResponse.Empty.INSTANCE));
}

/**
Expand All @@ -69,11 +81,11 @@ public IndexUpgradeCheck(String name,
public IndexUpgradeCheck(String name,
Function<IndexMetaData, UpgradeActionRequired> actionRequired,
Client client, ClusterService clusterService, String[] types, Script updateScript,
Consumer<ActionListener<T>> preUpgrade,
BiConsumer<ClusterState, ActionListener<T>> preUpgrade,
BiConsumer<T, ActionListener<TransportResponse.Empty>> postUpgrade) {
this.name = name;
this.actionRequired = actionRequired;
this.reindexer = new InternalIndexReindexer<>(client, clusterService, IndexUpgradeCheckVersion.UPRADE_VERSION, updateScript,
this.reindexer = new InternalIndexReindexer<>(client, clusterService, IndexUpgradeCheckVersion.UPGRADE_VERSION, updateScript,
types, preUpgrade, postUpgrade);
}

Expand Down Expand Up @@ -106,4 +118,9 @@ public void upgrade(TaskId task, IndexMetaData indexMetaData, ClusterState state
ActionListener<BulkByScrollResponse> listener) {
reindexer.upgrade(task, indexMetaData.getIndex().getName(), state, listener);
}

// pkg scope for testing
InternalIndexReindexer getInternalIndexReindexer() {
return reindexer;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
*/
package org.elasticsearch.xpack.upgrade;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.Client;
Expand All @@ -15,6 +18,7 @@
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.index.reindex.BulkByScrollResponse;
Expand All @@ -25,7 +29,6 @@
import org.elasticsearch.transport.TransportResponse;

import java.util.function.BiConsumer;
import java.util.function.Consumer;

import static org.elasticsearch.index.IndexSettings.same;

Expand All @@ -39,17 +42,18 @@
* - Delete index .{name} and add alias .{name} to .{name}-6
*/
public class InternalIndexReindexer<T> {
private static final Logger logger = LogManager.getLogger(InternalIndexReindexer.class);

private final Client client;
private final ClusterService clusterService;
private final Script transformScript;
private final String[] types;
private final int version;
private final Consumer<ActionListener<T>> preUpgrade;
private final BiConsumer<ClusterState, ActionListener<T>> preUpgrade;
private final BiConsumer<T, ActionListener<TransportResponse.Empty>> postUpgrade;

public InternalIndexReindexer(Client client, ClusterService clusterService, int version, Script transformScript, String[] types,
Consumer<ActionListener<T>> preUpgrade,
BiConsumer<ClusterState,ActionListener<T>> preUpgrade,
BiConsumer<T, ActionListener<TransportResponse.Empty>> postUpgrade) {
this.client = client;
this.clusterService = clusterService;
Expand All @@ -62,7 +66,7 @@ public InternalIndexReindexer(Client client, ClusterService clusterService, int

public void upgrade(TaskId task, String index, ClusterState clusterState, ActionListener<BulkByScrollResponse> listener) {
ParentTaskAssigningClient parentAwareClient = new ParentTaskAssigningClient(client, task);
preUpgrade.accept(ActionListener.wrap(
preUpgrade.accept(clusterState, ActionListener.wrap(
t -> innerUpgrade(parentAwareClient, index, clusterState, ActionListener.wrap(
response -> postUpgrade.accept(t, ActionListener.wrap(
empty -> listener.onResponse(response),
Expand All @@ -76,32 +80,61 @@ public void upgrade(TaskId task, String index, ClusterState clusterState, Action
private void innerUpgrade(ParentTaskAssigningClient parentAwareClient, String index, ClusterState clusterState,
ActionListener<BulkByScrollResponse> listener) {
String newIndex = index + "-" + version;
logger.trace("upgrading index {} to new index {}", index, newIndex);
try {
checkMasterAndDataNodeVersion(clusterState);
parentAwareClient.admin().indices().prepareCreate(newIndex).execute(ActionListener.wrap(createIndexResponse ->
setReadOnlyBlock(index, ActionListener.wrap(setReadOnlyResponse ->
reindex(parentAwareClient, index, newIndex, ActionListener.wrap(
bulkByScrollResponse -> // Successful completion of reindexing - delete old index
removeReadOnlyBlock(parentAwareClient, index, ActionListener.wrap(unsetReadOnlyResponse ->
parentAwareClient.admin().indices().prepareAliases().removeIndex(index)
.addAlias(newIndex, index).execute(ActionListener.wrap(deleteIndexResponse ->
listener.onResponse(bulkByScrollResponse), listener::onFailure
)), listener::onFailure
)),
e -> // Something went wrong during reindexing - remove readonly flag and report the error
removeReadOnlyBlock(parentAwareClient, index, ActionListener.wrap(unsetReadOnlyResponse -> {
listener.onFailure(e);
}, e1 -> {
listener.onFailure(e);
}))
)), listener::onFailure
)), listener::onFailure
));
parentAwareClient.admin().indices().prepareCreate(newIndex).execute(ActionListener.wrap(createIndexResponse -> {
setReadOnlyBlock(index, ActionListener.wrap(
setReadOnlyResponse -> reindex(parentAwareClient, index, newIndex, ActionListener.wrap(bulkByScrollResponse -> {
if ((bulkByScrollResponse.getBulkFailures() != null
&& bulkByScrollResponse.getBulkFailures().isEmpty() == false)
|| (bulkByScrollResponse.getSearchFailures() != null
&& bulkByScrollResponse.getSearchFailures().isEmpty() == false)) {
ElasticsearchException ex = logAndThrowExceptionForFailures(bulkByScrollResponse);
removeReadOnlyBlockOnReindexFailure(parentAwareClient, index, listener, ex);
} else {
// Successful completion of reindexing - remove read only and delete old index
removeReadOnlyBlock(parentAwareClient, index,
ActionListener.wrap(unsetReadOnlyResponse -> parentAwareClient.admin().indices().prepareAliases()
.removeIndex(index).addAlias(newIndex, index)
.execute(ActionListener.wrap(
deleteIndexResponse -> listener.onResponse(bulkByScrollResponse),
listener::onFailure)),
listener::onFailure));
}
}, e -> {
logger.error("error occurred while reindexing", e);
removeReadOnlyBlockOnReindexFailure(parentAwareClient, index, listener, e);
})), listener::onFailure));
}, listener::onFailure));
} catch (Exception ex) {
logger.error("error occurred while upgrading index", ex);
removeReadOnlyBlockOnReindexFailure(parentAwareClient, index, listener, ex);
listener.onFailure(ex);
}
}

private void removeReadOnlyBlockOnReindexFailure(ParentTaskAssigningClient parentAwareClient, String index,
ActionListener<BulkByScrollResponse> listener, Exception ex) {
removeReadOnlyBlock(parentAwareClient, index, ActionListener.wrap(unsetReadOnlyResponse -> {
listener.onFailure(ex);
}, e1 -> {
listener.onFailure(ex);
}));
}

private ElasticsearchException logAndThrowExceptionForFailures(BulkByScrollResponse bulkByScrollResponse) {
String bulkFailures = (bulkByScrollResponse.getBulkFailures() != null)
? Strings.collectionToCommaDelimitedString(bulkByScrollResponse.getBulkFailures())
: "";
String searchFailures = (bulkByScrollResponse.getSearchFailures() != null)
? Strings.collectionToCommaDelimitedString(bulkByScrollResponse.getSearchFailures())
: "";
logger.error("error occurred while reindexing, bulk failures [{}], search failures [{}]", bulkFailures, searchFailures);
return new ElasticsearchException("error occurred while reindexing, bulk failures [{}], search failures [{}]", bulkFailures,
searchFailures);
}

private void checkMasterAndDataNodeVersion(ClusterState clusterState) {
if (clusterState.nodes().getMinNodeVersion().before(Upgrade.UPGRADE_INTRODUCED)) {
throw new IllegalStateException("All nodes should have at least version [" + Upgrade.UPGRADE_INTRODUCED + "] to upgrade");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public void testInternalUpgradePrePostChecks() throws Exception {
}
},
client(), internalCluster().clusterService(internalCluster().getMasterName()), Strings.EMPTY_ARRAY, null,
listener -> {
(cs, listener) -> {
assertFalse(preUpgradeIsCalled.getAndSet(true));
assertFalse(postUpgradeIsCalled.get());
listener.onResponse(val);
Expand Down
Loading

0 comments on commit bae7e71

Please sign in to comment.