-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Decommission retry/pr #66
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,9 @@ | |
import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; | ||
import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; | ||
import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsResponse; | ||
import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionAction; | ||
import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; | ||
import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; | ||
import org.opensearch.cluster.ClusterState; | ||
import org.opensearch.cluster.ClusterStateObserver; | ||
import org.opensearch.cluster.ClusterStateTaskConfig; | ||
|
@@ -72,6 +75,57 @@ public class DecommissionController { | |
this.threadPool = threadPool; | ||
} | ||
|
||
public void retryDecommissionAction( | ||
DecommissionRequest decommissionRequest, | ||
long startTime, | ||
ActionListener<DecommissionResponse> listener | ||
) { | ||
final long remainingTimeoutMS = decommissionRequest.getRetryTimeout().millis() - (threadPool.relativeTimeInMillis() - startTime); | ||
if (remainingTimeoutMS <= 0) { | ||
logger.debug( | ||
"timed out before retrying [{}] for attribute [{}] after cluster manager change", | ||
DecommissionAction.NAME, | ||
decommissionRequest.getDecommissionAttribute() | ||
); | ||
listener.onFailure( | ||
new OpenSearchTimeoutException( | ||
"timed out before retrying [{}] for attribute [{}] after cluster manager change", | ||
DecommissionAction.NAME, | ||
decommissionRequest.getDecommissionAttribute() | ||
) | ||
); | ||
return; | ||
} | ||
Comment on lines
+83
to
+98
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This timeout is a check for retry eligibility only. Other actions as part of decommission has their own timeouts. This is the place where we are actually triggering the retry and hence the check. If timed out, request will not be eligible for a retry There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we do without this ? In worst case, the retried action will timeout and we will throw a timeout exception . This part of the code is looking out of place from readability POV. |
||
decommissionRequest.setRetryOnClusterManagerChange(true); | ||
decommissionRequest.setRetryTimeout(TimeValue.timeValueMillis(remainingTimeoutMS)); | ||
transportService.sendRequest( | ||
transportService.getLocalNode(), | ||
DecommissionAction.NAME, | ||
decommissionRequest, | ||
new TransportResponseHandler<DecommissionResponse>() { | ||
@Override | ||
public void handleResponse(DecommissionResponse response) { | ||
listener.onResponse(response); | ||
} | ||
|
||
@Override | ||
public void handleException(TransportException exp) { | ||
listener.onFailure(exp); | ||
} | ||
|
||
@Override | ||
public String executor() { | ||
return ThreadPool.Names.SAME; | ||
} | ||
|
||
@Override | ||
public DecommissionResponse read(StreamInput in) throws IOException { | ||
return new DecommissionResponse(in); | ||
} | ||
} | ||
); | ||
} | ||
|
||
/** | ||
* Transport call to add nodes to voting config exclusion | ||
* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,11 +13,11 @@ | |
import org.apache.logging.log4j.message.ParameterizedMessage; | ||
import org.opensearch.OpenSearchTimeoutException; | ||
import org.opensearch.action.ActionListener; | ||
import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; | ||
import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; | ||
import org.opensearch.cluster.ClusterState; | ||
import org.opensearch.cluster.ClusterStateObserver; | ||
import org.opensearch.cluster.ClusterStateUpdateTask; | ||
import org.opensearch.cluster.NotClusterManagerException; | ||
import org.opensearch.cluster.metadata.Metadata; | ||
import org.opensearch.cluster.node.DiscoveryNode; | ||
import org.opensearch.cluster.routing.allocation.AllocationService; | ||
|
@@ -66,6 +66,7 @@ public class DecommissionService { | |
private final TransportService transportService; | ||
private final ThreadPool threadPool; | ||
private final DecommissionController decommissionController; | ||
private final long startTime; | ||
private volatile List<String> awarenessAttributes; | ||
private volatile Map<String, List<String>> forcedAwarenessAttributes; | ||
|
||
|
@@ -82,6 +83,7 @@ public DecommissionService( | |
this.transportService = transportService; | ||
this.threadPool = threadPool; | ||
this.decommissionController = new DecommissionController(clusterService, transportService, allocationService, threadPool); | ||
this.startTime = threadPool.relativeTimeInMillis(); | ||
this.awarenessAttributes = CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.get(settings); | ||
clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING, this::setAwarenessAttributes); | ||
|
||
|
@@ -112,13 +114,14 @@ private void setForcedAwarenessAttributes(Settings forceSettings) { | |
* Starts the new decommission request and registers the metadata with status as {@link DecommissionStatus#INIT} | ||
* Once the status is updated, it tries to exclude to-be-decommissioned cluster manager eligible nodes from Voting Configuration | ||
* | ||
* @param decommissionAttribute register decommission attribute in the metadata request | ||
* @param decommissionRequest request for decommission action | ||
* @param listener register decommission listener | ||
*/ | ||
public void startDecommissionAction( | ||
final DecommissionAttribute decommissionAttribute, | ||
final DecommissionRequest decommissionRequest, | ||
final ActionListener<DecommissionResponse> listener | ||
) { | ||
final DecommissionAttribute decommissionAttribute = decommissionRequest.getDecommissionAttribute(); | ||
// register the metadata with status as INIT as first step | ||
clusterService.submitStateUpdateTask("decommission [" + decommissionAttribute + "]", new ClusterStateUpdateTask(Priority.URGENT) { | ||
@Override | ||
|
@@ -128,6 +131,7 @@ public ClusterState execute(ClusterState currentState) { | |
DecommissionAttributeMetadata decommissionAttributeMetadata = currentState.metadata().decommissionAttributeMetadata(); | ||
// check that request is eligible to proceed | ||
ensureEligibleRequest(decommissionAttributeMetadata, decommissionAttribute); | ||
ensureEligibleRetry(decommissionRequest, decommissionAttributeMetadata); | ||
decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); | ||
logger.info("registering decommission metadata [{}] to execute action", decommissionAttributeMetadata.toString()); | ||
return ClusterState.builder(currentState) | ||
|
@@ -156,15 +160,17 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS | |
decommissionAttributeMetadata.decommissionAttribute(), | ||
decommissionAttributeMetadata.status() | ||
); | ||
decommissionClusterManagerNodes(decommissionAttributeMetadata.decommissionAttribute(), listener); | ||
assert decommissionAttributeMetadata.decommissionAttribute().equals(decommissionRequest.getDecommissionAttribute()); | ||
decommissionClusterManagerNodes(decommissionRequest, listener); | ||
} | ||
}); | ||
} | ||
|
||
private synchronized void decommissionClusterManagerNodes( | ||
final DecommissionAttribute decommissionAttribute, | ||
final DecommissionRequest decommissionRequest, | ||
ActionListener<DecommissionResponse> listener | ||
) { | ||
final DecommissionAttribute decommissionAttribute = decommissionRequest.getDecommissionAttribute(); | ||
ClusterState state = clusterService.getClusterApplierService().state(); | ||
// since here metadata is already registered with INIT, we can guarantee that no new node with decommission attribute can further | ||
// join the cluster | ||
|
@@ -210,18 +216,23 @@ public void onResponse(Void unused) { | |
failDecommissionedNodes(clusterService.getClusterApplierService().state()); | ||
} | ||
} else { | ||
// explicitly calling listener.onFailure with NotClusterManagerException as the local node is not the cluster manager | ||
// this will ensures that request is retried until cluster manager times out | ||
logger.info( | ||
"local node is not eligible to process the request, " | ||
+ "throwing NotClusterManagerException to attempt a retry on an eligible node" | ||
); | ||
listener.onFailure( | ||
new NotClusterManagerException( | ||
"node [" | ||
+ transportService.getLocalNode().toString() | ||
+ "] not eligible to execute decommission request. Will retry until timeout." | ||
) | ||
// since the local node is no longer cluster manager which could've happened due to leader abdication, | ||
// hence retrying the decommission action until it times out | ||
logger.info("local node is not eligible to process the request, " + "retrying the transport action until it times out"); | ||
decommissionController.retryDecommissionAction( | ||
decommissionRequest, | ||
startTime, | ||
ActionListener.delegateResponse(listener, (delegatedListener, t) -> { | ||
logger.debug( | ||
() -> new ParameterizedMessage( | ||
"failed to retry decommission action for attribute [{}]", | ||
decommissionRequest.getDecommissionAttribute() | ||
), | ||
t | ||
); | ||
clearVotingConfigExclusionAndUpdateStatus(false, false); // TODO - need to test this | ||
delegatedListener.onFailure(t); | ||
}) | ||
); | ||
Comment on lines
-213
to
236
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how are responding to user API call now ? Earlier , the retried request on new elected cluster manager would respond it . Would the same hold true now as well ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, response to the retried request is attached to the listener of original request. The chain will continue if multiple retries gets executed. |
||
} | ||
} | ||
|
@@ -468,6 +479,18 @@ private static void ensureEligibleRequest( | |
} | ||
} | ||
|
||
public void ensureEligibleRetry(DecommissionRequest decommissionRequest, DecommissionAttributeMetadata decommissionAttributeMetadata) { | ||
if (decommissionAttributeMetadata != null) { | ||
if (decommissionAttributeMetadata.status().equals(DecommissionStatus.INIT) | ||
&& decommissionRequest.retryOnClusterManagerChange() == false) { | ||
throw new DecommissioningFailedException( | ||
decommissionRequest.getDecommissionAttribute(), | ||
"concurrent request received to decommission attribute" | ||
); | ||
} | ||
} | ||
} | ||
|
||
private ActionListener<DecommissionStatus> statusUpdateListener() { | ||
return new ActionListener<>() { | ||
@Override | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how are we making sure that user doesn't specify this parameter ? did we evaluate putting this in request context as it is an internal detail of a request and not a user facing one ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you talking about the retry flag or the timeout?
For flag, The REST action doesn't support accepting retry flag with the user and hence any request created at REST layer will always have it set to false. Today, it is only set to true when we trigger retry action from service
For timeout, the default is set and user can set one according to his case
Let me know if this answers your question