Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[7.x] Add target_node_name for REPLACE shutdown type (#77151) #77278

Merged
merged 5 commits into from
Sep 7, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

package org.elasticsearch.cluster.metadata;

import org.apache.logging.log4j.message.ParameterizedMessage;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.AbstractDiffable;
import org.elasticsearch.cluster.Diffable;
import org.elasticsearch.common.io.stream.StreamInput;
Expand All @@ -33,13 +35,16 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
ToXContentObject,
Diffable<SingleNodeShutdownMetadata> {

public static final Version REPLACE_SHUTDOWN_TYPE_ADDED_VERSION = Version.V_7_16_0;

public static final ParseField NODE_ID_FIELD = new ParseField("node_id");
public static final ParseField TYPE_FIELD = new ParseField("type");
public static final ParseField REASON_FIELD = new ParseField("reason");
public static final String STARTED_AT_READABLE_FIELD = "shutdown_started";
public static final ParseField STARTED_AT_MILLIS_FIELD = new ParseField(STARTED_AT_READABLE_FIELD + "millis");
public static final ParseField ALLOCATION_DELAY_FIELD = new ParseField("allocation_delay");
public static final ParseField NODE_SEEN_FIELD = new ParseField("node_seen");
public static final ParseField TARGET_NODE_NAME_FIELD = new ParseField("target_node_name");

public static final ConstructingObjectParser<SingleNodeShutdownMetadata, Void> PARSER = new ConstructingObjectParser<>(
"node_shutdown_info",
Expand All @@ -49,7 +54,8 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
(String) a[2],
(long) a[3],
(boolean) a[4],
(TimeValue) a[5]
(TimeValue) a[5],
(String) a[6]
)
);

Expand All @@ -64,6 +70,7 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
(p, c) -> TimeValue.parseTimeValue(p.textOrNull(), ALLOCATION_DELAY_FIELD.getPreferredName()), ALLOCATION_DELAY_FIELD,
ObjectParser.ValueType.STRING_OR_NULL
);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), TARGET_NODE_NAME_FIELD);
}

public static SingleNodeShutdownMetadata parse(XContentParser parser) {
Expand All @@ -78,6 +85,7 @@ public static SingleNodeShutdownMetadata parse(XContentParser parser) {
private final long startedAtMillis;
private final boolean nodeSeen;
@Nullable private final TimeValue allocationDelay;
@Nullable private final String targetNodeName;

/**
* @param nodeId The node ID that this shutdown metadata refers to.
Expand All @@ -91,7 +99,8 @@ private SingleNodeShutdownMetadata(
String reason,
long startedAtMillis,
boolean nodeSeen,
@Nullable TimeValue allocationDelay
@Nullable TimeValue allocationDelay,
@Nullable String targetNodeName
) {
this.nodeId = Objects.requireNonNull(nodeId, "node ID must not be null");
this.type = Objects.requireNonNull(type, "shutdown type must not be null");
Expand All @@ -102,6 +111,13 @@ private SingleNodeShutdownMetadata(
throw new IllegalArgumentException("shard allocation delay is only valid for RESTART-type shutdowns");
}
this.allocationDelay = allocationDelay;
if (targetNodeName != null && type != Type.REPLACE) {
throw new IllegalArgumentException(new ParameterizedMessage("target node name is only valid for REPLACE type shutdowns, " +
"but was given type [{}] and target node name [{}]", type, targetNodeName).getFormattedMessage());
} else if (targetNodeName == null && type == Type.REPLACE) {
throw new IllegalArgumentException("target node name is required for REPLACE type shutdowns");
}
this.targetNodeName = targetNodeName;
}

public SingleNodeShutdownMetadata(StreamInput in) throws IOException {
Expand All @@ -111,6 +127,11 @@ public SingleNodeShutdownMetadata(StreamInput in) throws IOException {
this.startedAtMillis = in.readVLong();
this.nodeSeen = in.readBoolean();
this.allocationDelay = in.readOptionalTimeValue();
if (in.getVersion().onOrAfter(REPLACE_SHUTDOWN_TYPE_ADDED_VERSION)) {
this.targetNodeName = in.readOptionalString();
} else {
this.targetNodeName = null;
}
}

/**
Expand Down Expand Up @@ -148,6 +169,13 @@ public boolean getNodeSeen() {
return nodeSeen;
}

/**
* @return The name of the node to be used as a replacement for this node, or null.
*/
public String getTargetNodeName() {
return targetNodeName;
}

/**
* @return The amount of time shard reallocation should be delayed for shards on this node, so that they will not be automatically
* reassigned while the node is restarting. Will be {@code null} for non-restart shutdowns.
Expand All @@ -165,11 +193,18 @@ public TimeValue getAllocationDelay() {
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(nodeId);
out.writeEnum(type);
if (out.getVersion().before(REPLACE_SHUTDOWN_TYPE_ADDED_VERSION) && this.type == SingleNodeShutdownMetadata.Type.REPLACE) {
out.writeEnum(SingleNodeShutdownMetadata.Type.REMOVE);
} else {
out.writeEnum(type);
}
out.writeString(reason);
out.writeVLong(startedAtMillis);
out.writeBoolean(nodeSeen);
out.writeOptionalTimeValue(allocationDelay);
if (out.getVersion().onOrAfter(REPLACE_SHUTDOWN_TYPE_ADDED_VERSION)) {
out.writeOptionalString(targetNodeName);
}
}

@Override
Expand All @@ -184,6 +219,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (allocationDelay != null) {
builder.field(ALLOCATION_DELAY_FIELD.getPreferredName(), allocationDelay.getStringRep());
}
if (targetNodeName != null) {
builder.field(TARGET_NODE_NAME_FIELD.getPreferredName(), targetNodeName);
}
}
builder.endObject();

Expand All @@ -200,7 +238,8 @@ && getNodeId().equals(that.getNodeId())
&& getType() == that.getType()
&& getReason().equals(that.getReason())
&& getNodeSeen() == that.getNodeSeen()
&& Objects.equals(allocationDelay, that.allocationDelay);
&& Objects.equals(allocationDelay, that.allocationDelay)
&& Objects.equals(targetNodeName, that.targetNodeName);
}

@Override
Expand All @@ -211,7 +250,8 @@ public int hashCode() {
getReason(),
getStartedAtMillis(),
getNodeSeen(),
allocationDelay
allocationDelay,
targetNodeName
);
}

Expand All @@ -228,7 +268,8 @@ public static Builder builder(SingleNodeShutdownMetadata original) {
.setType(original.getType())
.setReason(original.getReason())
.setStartedAtMillis(original.getStartedAtMillis())
.setNodeSeen(original.getNodeSeen());
.setNodeSeen(original.getNodeSeen())
.setTargetNodeName(original.getTargetNodeName());
}

public static class Builder {
Expand All @@ -238,6 +279,7 @@ public static class Builder {
private long startedAtMillis = -1;
private boolean nodeSeen = false;
private TimeValue allocationDelay;
private String targetNodeName;

private Builder() {}

Expand Down Expand Up @@ -295,6 +337,15 @@ public Builder setAllocationDelay(TimeValue allocationDelay) {
return this;
}

/**
* @param targetNodeName The name of the node which should be used to replcae this one. Only valid if the shutdown type is REPLACE.
* @return This builder.
*/
public Builder setTargetNodeName(String targetNodeName) {
this.targetNodeName = targetNodeName;
return this;
}

public SingleNodeShutdownMetadata build() {
if (startedAtMillis == -1) {
throw new IllegalArgumentException("start timestamp must be set");
Expand All @@ -306,7 +357,8 @@ public SingleNodeShutdownMetadata build() {
reason,
startedAtMillis,
nodeSeen,
allocationDelay
allocationDelay,
targetNodeName
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ private SingleNodeShutdownMetadata randomNodeShutdownInfo() {
.setStartedAtMillis(randomNonNegativeLong());
if (type.equals(SingleNodeShutdownMetadata.Type.RESTART) && randomBoolean()) {
builder.setAllocationDelay(TimeValue.parseTimeValue(randomTimeValue(), this.getTestName()));
} else if (type.equals(SingleNodeShutdownMetadata.Type.REPLACE)) {
builder.setTargetNodeName(randomAlphaOfLengthBetween(5,10));
}
return builder.setNodeSeen(randomBoolean())
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,11 +374,14 @@ public void testRemainingDelayCalculationsWithUnrelatedShutdowns() throws Except
Map<String, SingleNodeShutdownMetadata> shutdowns = new HashMap<>();
int numberOfShutdowns = randomIntBetween(1,15);
for (int i = 0; i <= numberOfShutdowns; i++) {
final SingleNodeShutdownMetadata.Type type = randomFrom(EnumSet.allOf(SingleNodeShutdownMetadata.Type.class));
final String targetNodeName = type == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
SingleNodeShutdownMetadata shutdown = SingleNodeShutdownMetadata.builder()
.setNodeId(randomValueOtherThan(lastNodeId, () -> randomAlphaOfLengthBetween(5,10)))
.setReason(this.getTestName())
.setStartedAtMillis(randomNonNegativeLong())
.setType(randomFrom(EnumSet.allOf(SingleNodeShutdownMetadata.Type.class)))
.setType(type)
.setTargetNodeName(targetNodeName)
.build();
shutdowns.put(shutdown.getNodeId(), shutdown);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

import static org.hamcrest.Matchers.equalTo;

Expand Down Expand Up @@ -176,13 +175,13 @@ public void testCannotAutoExpandToRemovingNode() {
}

private ClusterState prepareState(ClusterState initialState, SingleNodeShutdownMetadata.Type shutdownType) {
Map<String, SingleNodeShutdownMetadata> nodesShutdownInfo = new HashMap<>();

final String targetNodeName = shutdownType == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
final SingleNodeShutdownMetadata nodeShutdownMetadata = SingleNodeShutdownMetadata.builder()
.setNodeId(DATA_NODE.getId())
.setType(shutdownType)
.setReason(this.getTestName())
.setStartedAtMillis(1L)
.setTargetNodeName(targetNodeName)
.build();
NodesShutdownMetadata nodesShutdownMetadata = new NodesShutdownMetadata(new HashMap<>()).putSingleNodeMetadata(
nodeShutdownMetadata
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,15 +356,21 @@ public void testStepCompletableIfAllShardsActive() {
ImmutableOpenMap.Builder<String, IndexMetadata> indices = ImmutableOpenMap.<String, IndexMetadata> builder().fPut(index.getName(),
indexMetadata);

final SingleNodeShutdownMetadata.Type type = randomFrom(
SingleNodeShutdownMetadata.Type.REMOVE,
SingleNodeShutdownMetadata.Type.REPLACE
);
final String targetNodeName = type == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
ClusterState clusterState = ClusterState.builder(ClusterState.EMPTY_STATE)
.metadata(Metadata.builder()
.indices(indices.build())
.putCustom(NodesShutdownMetadata.TYPE, new NodesShutdownMetadata(Collections.singletonMap("node1",
SingleNodeShutdownMetadata.builder()
.setType(randomFrom(SingleNodeShutdownMetadata.Type.REMOVE, SingleNodeShutdownMetadata.Type.REPLACE))
.setType(type)
.setStartedAtMillis(randomNonNegativeLong())
.setReason("test")
.setNodeId("node1")
.setTargetNodeName(targetNodeName)
.build()))))
.nodes(DiscoveryNodes.builder()
.add(DiscoveryNode.createLocal(Settings.builder().put(node1Settings.build())
Expand Down Expand Up @@ -407,15 +413,21 @@ public void testStepBecomesUncompletable() {
ImmutableOpenMap.Builder<String, IndexMetadata> indices = ImmutableOpenMap.<String, IndexMetadata> builder().fPut(index.getName(),
indexMetadata);

final SingleNodeShutdownMetadata.Type type = randomFrom(
SingleNodeShutdownMetadata.Type.REMOVE,
SingleNodeShutdownMetadata.Type.REPLACE
);
final String targetNodeName = type == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
ClusterState clusterState = ClusterState.builder(ClusterState.EMPTY_STATE)
.metadata(Metadata.builder()
.indices(indices.build())
.putCustom(NodesShutdownMetadata.TYPE, new NodesShutdownMetadata(Collections.singletonMap("node1",
SingleNodeShutdownMetadata.builder()
.setType(randomFrom(SingleNodeShutdownMetadata.Type.REMOVE, SingleNodeShutdownMetadata.Type.REPLACE))
.setType(type)
.setStartedAtMillis(randomNonNegativeLong())
.setReason("test")
.setNodeId("node1")
.setTargetNodeName(targetNodeName)
.build()))))
.nodes(DiscoveryNodes.builder()
.add(DiscoveryNode.createLocal(Settings.builder().put(node1Settings.build())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -580,14 +580,20 @@ public void testIndicesOnShuttingDownNodesInDangerousStep() {
IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "shutdown_node"),
equalTo(Collections.emptySet()));

final SingleNodeShutdownMetadata.Type type = randomFrom(
SingleNodeShutdownMetadata.Type.REMOVE,
SingleNodeShutdownMetadata.Type.REPLACE
);
final String targetNodeName = type == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
state = ClusterState.builder(state)
.metadata(Metadata.builder(state.metadata())
.putCustom(NodesShutdownMetadata.TYPE, new NodesShutdownMetadata(Collections.singletonMap("shutdown_node",
SingleNodeShutdownMetadata.builder()
.setNodeId("shutdown_node")
.setReason("shut down for test")
.setStartedAtMillis(randomNonNegativeLong())
.setType(randomFrom(SingleNodeShutdownMetadata.Type.REMOVE, SingleNodeShutdownMetadata.Type.REPLACE))
.setType(type)
.setTargetNodeName(targetNodeName)
.build())))
.build())
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,16 @@ public void testJobsVacateShuttingDownNode() throws Exception {
});

// Call the shutdown API for the chosen node.
final SingleNodeShutdownMetadata.Type type = randomFrom(SingleNodeShutdownMetadata.Type.values());
final String targetNodeName = type == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
client().execute(
PutShutdownNodeAction.INSTANCE,
new PutShutdownNodeAction.Request(
nodeIdToShutdown.get(),
randomFrom(SingleNodeShutdownMetadata.Type.values()),
gwbrown marked this conversation as resolved.
Show resolved Hide resolved
"just testing",
null
)
null,
targetNodeName)
).actionGet();

// Wait for the desired end state of all 6 jobs running on nodes that are not shutting down.
Expand Down Expand Up @@ -144,14 +146,15 @@ public void testCloseJobVacatingShuttingDownNode() throws Exception {
});

// Call the shutdown API for the chosen node.
final SingleNodeShutdownMetadata.Type type = randomFrom(SingleNodeShutdownMetadata.Type.values());
final String targetNodeName = type == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
client().execute(
PutShutdownNodeAction.INSTANCE,
new PutShutdownNodeAction.Request(
nodeIdToShutdown.get(),
randomFrom(SingleNodeShutdownMetadata.Type.values()),
nodeIdToShutdown.get(), type,
"just testing",
null
)
null,
targetNodeName)
)
.actionGet();

Expand Down
Loading