-
Notifications
You must be signed in to change notification settings - Fork 24.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move monitoring collection timeouts to coordinator (#67084)
With #66993 there is now support for coordinator-side timeouts on a `BroadcastRequest`, which includes requests for node stats and recoveries. This commit adjusts Monitoring to use these coordinator-side timeouts where applicable, which will prevent partial stats responses from accumulating on the master while one or more nodes are not responding quickly enough. It also enhances the message logged on a timeout to include the IDs of the nodes which did not respond in time. Closes #60188.
- Loading branch information
1 parent
1cbccb1
commit 1d2462e
Showing
15 changed files
with
359 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
103 changes: 103 additions & 0 deletions
103
...n/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/collector/TimeoutUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.monitoring.collector; | ||
|
||
import org.elasticsearch.ElasticsearchException; | ||
import org.elasticsearch.ElasticsearchTimeoutException; | ||
import org.elasticsearch.action.FailedNodeException; | ||
import org.elasticsearch.action.support.DefaultShardOperationFailedException; | ||
import org.elasticsearch.action.support.broadcast.BroadcastResponse; | ||
import org.elasticsearch.action.support.nodes.BaseNodeResponse; | ||
import org.elasticsearch.action.support.nodes.BaseNodesResponse; | ||
import org.elasticsearch.action.support.tasks.BaseTasksResponse; | ||
import org.elasticsearch.common.unit.TimeValue; | ||
import org.elasticsearch.transport.ReceiveTimeoutTransportException; | ||
|
||
import java.util.HashSet; | ||
import java.util.concurrent.TimeoutException; | ||
|
||
/** | ||
* Utilities for identifying timeouts in responses to collection requests, since we prefer to fail the whole collection attempt if any of | ||
* the involved nodes times out. | ||
*/ | ||
public final class TimeoutUtils { | ||
private TimeoutUtils() { | ||
} | ||
|
||
/** | ||
* @throws ElasticsearchTimeoutException iff the {@code response} contains any node-level timeout. The exception message identifies the | ||
* nodes that timed out and mentions {@code collectionTimeout}. | ||
*/ | ||
public static <T extends BaseNodeResponse> void ensureNoTimeouts(TimeValue collectionTimeout, BaseNodesResponse<T> response) { | ||
HashSet<String> timedOutNodeIds = null; | ||
for (FailedNodeException failedNodeException : response.failures()) { | ||
if (isTimeoutFailure(failedNodeException)) { | ||
if (timedOutNodeIds == null) { | ||
timedOutNodeIds = new HashSet<>(); | ||
} | ||
timedOutNodeIds.add(failedNodeException.nodeId()); | ||
} | ||
} | ||
ensureNoTimeouts(collectionTimeout, timedOutNodeIds); | ||
} | ||
|
||
/** | ||
* @throws ElasticsearchTimeoutException iff the {@code response} contains any node-level timeout. The exception message identifies the | ||
* nodes that timed out and mentions {@code collectionTimeout}. | ||
*/ | ||
public static void ensureNoTimeouts(TimeValue collectionTimeout, BaseTasksResponse response) { | ||
HashSet<String> timedOutNodeIds = null; | ||
for (ElasticsearchException nodeFailure : response.getNodeFailures()) { | ||
if (nodeFailure instanceof FailedNodeException) { | ||
FailedNodeException failedNodeException = (FailedNodeException) nodeFailure; | ||
if (isTimeoutFailure(failedNodeException)) { | ||
if (timedOutNodeIds == null) { | ||
timedOutNodeIds = new HashSet<>(); | ||
} | ||
timedOutNodeIds.add(failedNodeException.nodeId()); | ||
} | ||
} | ||
} | ||
ensureNoTimeouts(collectionTimeout, timedOutNodeIds); | ||
} | ||
|
||
/** | ||
* @throws ElasticsearchTimeoutException iff the {@code response} contains any node-level timeout. The exception message identifies the | ||
* nodes that timed out and mentions {@code collectionTimeout}. | ||
*/ | ||
public static void ensureNoTimeouts(TimeValue collectionTimeout, BroadcastResponse response) { | ||
HashSet<String> timedOutNodeIds = null; | ||
for (DefaultShardOperationFailedException shardFailure : response.getShardFailures()) { | ||
final Throwable shardFailureCause = shardFailure.getCause(); | ||
if (shardFailureCause instanceof FailedNodeException) { | ||
FailedNodeException failedNodeException = (FailedNodeException) shardFailureCause; | ||
if (isTimeoutFailure(failedNodeException)) { | ||
if (timedOutNodeIds == null) { | ||
timedOutNodeIds = new HashSet<>(); | ||
} | ||
timedOutNodeIds.add(failedNodeException.nodeId()); | ||
} | ||
} | ||
} | ||
ensureNoTimeouts(collectionTimeout, timedOutNodeIds); | ||
} | ||
|
||
private static boolean isTimeoutFailure(FailedNodeException failedNodeException) { | ||
final Throwable cause = failedNodeException.getCause(); | ||
return cause instanceof ElasticsearchTimeoutException | ||
|| cause instanceof TimeoutException | ||
|| cause instanceof ReceiveTimeoutTransportException; | ||
} | ||
|
||
private static void ensureNoTimeouts(TimeValue collectionTimeout, HashSet<String> timedOutNodeIds) { | ||
if (timedOutNodeIds != null) { | ||
throw new ElasticsearchTimeoutException((timedOutNodeIds.size() == 1 ? "node " : "nodes ") + timedOutNodeIds + | ||
" did not respond within [" + collectionTimeout + "]"); | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.