Skip to content

Commit

Permalink
Improve Node Shutdown Observability (elastic#78727)
Browse files Browse the repository at this point in the history
* Increase node shutdown logging level when stalled

* Add allocation explanation to STALLED response

* Adjust logging levels per review

* Include all SingleNodeShutdownMetadata fields in logs

* Depluralize `node_shutdown_decisions` per review

* Repluralize node_allocation_decisions when we're actually reading from the Allocation Explain API
  • Loading branch information
gwbrown authored and Gordon Brown committed Oct 8, 2021
1 parent ffa96e6 commit 8e76fc6
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

package org.elasticsearch.cluster.metadata;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.routing.allocation.ShardAllocationDecision;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
Expand All @@ -20,25 +22,42 @@
import java.util.Objects;

public class ShutdownShardMigrationStatus implements Writeable, ToXContentObject {
private static final Version ALLOCATION_DECISION_ADDED_VERSION = Version.V_8_0_0;

private final SingleNodeShutdownMetadata.Status status;
private final long shardsRemaining;
@Nullable private final String explanation;
@Nullable private final ShardAllocationDecision allocationDecision;

public ShutdownShardMigrationStatus(SingleNodeShutdownMetadata.Status status, long shardsRemaining) {
this(status, shardsRemaining, null);
this(status, shardsRemaining, null, null);
}

public ShutdownShardMigrationStatus(SingleNodeShutdownMetadata.Status status, long shardsRemaining, @Nullable String explanation) {
this(status, shardsRemaining, explanation, null);
}

public ShutdownShardMigrationStatus(
SingleNodeShutdownMetadata.Status status,
long shardsRemaining,
@Nullable String explanation,
@Nullable ShardAllocationDecision allocationDecision
) {
this.status = Objects.requireNonNull(status, "status must not be null");
this.shardsRemaining = shardsRemaining;
this.explanation = explanation;
this.allocationDecision = allocationDecision;
}

public ShutdownShardMigrationStatus(StreamInput in) throws IOException {
this.status = in.readEnum(SingleNodeShutdownMetadata.Status.class);
this.shardsRemaining = in.readLong();
this.explanation = in.readOptionalString();
if (in.getVersion().onOrAfter(ALLOCATION_DECISION_ADDED_VERSION)) {
this.allocationDecision = in.readOptionalWriteable(ShardAllocationDecision::new);
} else {
this.allocationDecision = null;
}
}

public long getShardsRemaining() {
Expand All @@ -61,6 +80,13 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (Objects.nonNull(explanation)) {
builder.field("explanation", explanation);
}
if (Objects.nonNull(allocationDecision)) {
builder.startObject("node_allocation_decision");
{
allocationDecision.toXContent(builder, params);
}
builder.endObject();
}
builder.endObject();
return builder;
}
Expand All @@ -70,19 +96,25 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeEnum(status);
out.writeLong(shardsRemaining);
out.writeOptionalString(explanation);
if (out.getVersion().onOrAfter(ALLOCATION_DECISION_ADDED_VERSION)) {
out.writeOptionalWriteable(allocationDecision);
}
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if ((o instanceof ShutdownShardMigrationStatus) == false) return false;
ShutdownShardMigrationStatus that = (ShutdownShardMigrationStatus) o;
return shardsRemaining == that.shardsRemaining && status == that.status && Objects.equals(explanation, that.explanation);
return shardsRemaining == that.shardsRemaining
&& status == that.status
&& Objects.equals(explanation, that.explanation)
&& Objects.equals(allocationDecision, that.allocationDecision);
}

@Override
public int hashCode() {
return Objects.hash(status, shardsRemaining, explanation);
return Objects.hash(status, shardsRemaining, explanation, allocationDecision);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,34 @@ public int hashCode() {
);
}

@Override public String toString() {
final StringBuilder stringBuilder = new StringBuilder();
stringBuilder
.append("{")
.append("nodeId=[")
.append(nodeId)
.append(']')
.append(", type=[")
.append(type)
.append("], reason=[")
.append(reason)
.append(']');
if (allocationDelay != null) {
stringBuilder
.append(", allocationDelay=[")
.append(allocationDelay)
.append("]");
}
if (targetNodeName != null) {
stringBuilder
.append(", targetNodeName=[")
.append(targetNodeName)
.append("]");
}
stringBuilder.append("}");
return stringBuilder.toString();
}

public static Builder builder() {
return new Builder();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ protected void masterOperation(
public ClusterState execute(ClusterState currentState) throws Exception {
NodesShutdownMetadata currentShutdownMetadata = currentState.metadata().custom(NodesShutdownMetadata.TYPE);

logger.info("removing shutdown record for node [{}]", request.getNodeId());

return ClusterState.builder(currentState)
.metadata(
Metadata.builder(currentState.metadata())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,17 +260,15 @@ static ShutdownShardMigrationStatus shardMigrationStatus(
return hasShardCopyOnOtherNode == false;
})
.peek(pair -> {
if (logger.isTraceEnabled()) { // don't serialize the decision unless we have to
logger.trace(
"node [{}] shutdown of type [{}] stalled: found shard [{}][{}] from index [{}] with negative decision: [{}]",
nodeId,
shutdownType,
pair.v1().getId(),
pair.v1().primary() ? "primary" : "replica",
pair.v1().shardId().getIndexName(),
Strings.toString(pair.v2())
);
}
logger.debug(
"node [{}] shutdown of type [{}] stalled: found shard [{}][{}] from index [{}] with negative decision: [{}]",
nodeId,
shutdownType,
pair.v1().getId(),
pair.v1().primary() ? "primary" : "replica",
pair.v1().shardId().getIndexName(),
Strings.toString(pair.v2())
);
})
.findFirst();

Expand All @@ -285,6 +283,7 @@ static ShutdownShardMigrationStatus shardMigrationStatus(
} else if (unmovableShard.isPresent()) {
// We found a shard that can't be moved, so shard relocation is stalled. Blame the unmovable shard.
ShardRouting shardRouting = unmovableShard.get().v1();
ShardAllocationDecision decision = unmovableShard.get().v2();

return new ShutdownShardMigrationStatus(
SingleNodeShutdownMetadata.Status.STALLED,
Expand All @@ -294,7 +293,8 @@ static ShutdownShardMigrationStatus shardMigrationStatus(
shardRouting.shardId().getId(),
shardRouting.primary() ? "primary" : "replica",
shardRouting.index().getName()
).getFormattedMessage()
).getFormattedMessage(),
decision
);
} else {
return new ShutdownShardMigrationStatus(SingleNodeShutdownMetadata.Status.IN_PROGRESS, totalRemainingShards);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,6 @@ public ClusterState execute(ClusterState currentState) {
currentShutdownMetadata = new NodesShutdownMetadata(new HashMap<>());
}

// Verify that there's not already a shutdown metadata for this node
SingleNodeShutdownMetadata existingRecord = currentShutdownMetadata.getAllNodeMetadataMap().get(request.getNodeId());
if (existingRecord != null) {
logger.info(
"updating existing shutdown record for node [{}] of type [{}] with reason [{}] with new type [{}] and reason [{}]",
existingRecord.getNodeId(),
existingRecord.getType(),
existingRecord.getReason(),
request.getType(),
request.getReason()
);
}

final boolean nodeSeen = currentState.getNodes().nodeExists(request.getNodeId());

SingleNodeShutdownMetadata newNodeMetadata = SingleNodeShutdownMetadata.builder()
Expand All @@ -88,6 +75,14 @@ public ClusterState execute(ClusterState currentState) {
.setAllocationDelay(request.getAllocationDelay())
.build();

// Verify that there's not already a shutdown metadata for this node
SingleNodeShutdownMetadata existingRecord = currentShutdownMetadata.getAllNodeMetadataMap().get(request.getNodeId());
if (existingRecord != null) {
logger.info("updating existing shutdown record {} with new record {}", existingRecord, newNodeMetadata);
} else {
logger.info("creating shutdown record {}", newNodeMetadata);
}

return ClusterState.builder(currentState)
.metadata(
Metadata.builder(currentState.metadata())
Expand Down

0 comments on commit 8e76fc6

Please sign in to comment.