Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle master failure in NodeSeenService #77220

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ public void testShardStatusStaysCompleteAfterNodeLeaves() throws Exception {
* Similar to the previous test, but ensures that the status stays at `COMPLETE` when the node is offline when the shutdown is
* registered. This may happen if {@link NodeSeenService} isn't working as expected.
*/
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/76689")
public void testShardStatusStaysCompleteAfterNodeLeavesIfRegisteredWhileNodeOffline() throws Exception {
assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
final String nodeToRestartName = internalCluster().startNode();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

package org.elasticsearch.xpack.shutdown;

import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.ClusterState;
Expand Down Expand Up @@ -46,8 +46,11 @@ public void clusterChanged(ClusterChangedEvent event) {
return;
}

if (event.nodesAdded() == false) {
// If there's no new nodes this cluster state update, nothing to do.
final boolean thisNodeJustBecameMaster = event.previousState().nodes().isLocalNodeElectedMaster() == false
&& event.state().nodes().isLocalNodeElectedMaster();
if ((event.nodesAdded() || thisNodeJustBecameMaster) == false) {
logger.error("GWB> Bailing early");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably remove this line :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops, I had those in there to help verify my reading that the == false was missing 🤦

Removed.

// If there's both 1) no new nodes this cluster state update and 2) this node has not just become the master node, nothing to do
return;
}

Expand All @@ -67,6 +70,7 @@ public void clusterChanged(ClusterChangedEvent event) {
.collect(Collectors.toUnmodifiableSet());

if (nodesNotPreviouslySeen.isEmpty() == false) {
logger.error("GWB> Submitting update task for nodes [{}]", nodesNotPreviouslySeen);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this one :)

clusterService.submitStateUpdateTask("shutdown-seen-nodes-updater", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
Expand All @@ -86,6 +90,7 @@ public ClusterState execute(ClusterState currentState) throws Exception {

final NodesShutdownMetadata newNodesMetadata = new NodesShutdownMetadata(newShutdownMetadataMap);
if (newNodesMetadata.equals(currentShutdownMetadata)) {
logger.error("GWB> Bailing update task as it's a no-op");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this one :)

// Turns out the update was a no-op
return currentState;
}
Expand Down