Skip to content

Commit

Permalink
Describe STALE_STATE_CONFIG in ClusterFormationFH (elastic#53878)
Browse files Browse the repository at this point in the history
We mark cluster states persisted on master-ineligible nodes as
potentially-stale using the voting configuration `{STALE_STATE_CONFIG}` which
prevents these nodes from being elected as master if they are restarted as
master-eligible. Today we do not handle this special voting configuration
differently in the `ClusterFormationFailureHandler`, leading to a mysterious
message `an election requires a node with id [STALE_STATE_CONFIG]` if the
election does not succeed.

This commit adds a special case description for this situation to explain
better why this node cannot win an election.

Closes elastic#53734
  • Loading branch information
DaveCTurner committed Mar 20, 2020
1 parent 0cfe6d9 commit 879e26e
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.gateway.GatewayMetaState;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.threadpool.ThreadPool.Names;

Expand Down Expand Up @@ -210,7 +211,12 @@ private String describeQuorum(VotingConfiguration votingConfiguration) {
assert requiredNodes <= realNodeIds.size() : nodeIds;

if (nodeIds.size() == 1) {
return "a node with id " + realNodeIds;
if (nodeIds.contains(GatewayMetaState.STALE_STATE_CONFIG_NODE_ID)) {
return "one or more nodes that have already participated as master-eligible nodes in the cluster but this node was " +
"not master-eligible the last time it joined the cluster";
} else {
return "a node with id " + realNodeIds;
}
} else if (nodeIds.size() == 2) {
return "two nodes with ids " + realNodeIds;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@
*/
public class GatewayMetaState implements Closeable {

/**
* Fake node ID for a voting configuration written by a master-ineligible data node to indicate that its on-disk state is potentially
* stale (since it is written asynchronously after application, rather than before acceptance). This node ID means that if the node is
* restarted as a master-eligible node then it does not win any elections until it has received a fresh cluster state.
*/
public static final String STALE_STATE_CONFIG_NODE_ID = "STALE_STATE_CONFIG";

// Set by calling start()
private final SetOnce<PersistedState> persistedState = new SetOnce<>();

Expand Down Expand Up @@ -425,7 +432,7 @@ protected void doRun() {
}

static final CoordinationMetaData.VotingConfiguration staleStateConfiguration =
new CoordinationMetaData.VotingConfiguration(Collections.singleton("STALE_STATE_CONFIG"));
new CoordinationMetaData.VotingConfiguration(Collections.singleton(STALE_STATE_CONFIG_NODE_ID));

static ClusterState resetVotingConfiguration(ClusterState clusterState) {
CoordinationMetaData newCoordinationMetaData = CoordinationMetaData.builder(clusterState.coordinationMetaData())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.gateway.GatewayMetaState;
import org.elasticsearch.test.ESTestCase;

import java.util.Arrays;
Expand Down Expand Up @@ -412,5 +413,14 @@ public void testDescriptionAfterBootstrapping() {
"have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + otherMasterNode + ", " + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0")));

assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, GatewayMetaState.STALE_STATE_CONFIG_NODE_ID), emptyList(),
emptyList(), 0L, electionStrategy).getDescription(),
is("master not discovered or elected yet, an election requires one or more nodes that have already participated as " +
"master-eligible nodes in the cluster but this node was not master-eligible the last time it joined the cluster, " +
"have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));

}
}

0 comments on commit 879e26e

Please sign in to comment.