diff --git a/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java b/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java index d270f651..e3e713a8 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java @@ -34,6 +34,7 @@ import hudson.Util; import hudson.init.Terminator; import hudson.model.Node; +import hudson.model.Result; import hudson.model.TaskListener; import hudson.remoting.Channel; import hudson.remoting.ChannelClosedException; @@ -69,12 +70,14 @@ import org.jenkinsci.plugins.workflow.actions.LabelAction; import org.jenkinsci.plugins.workflow.graph.FlowNode; import org.jenkinsci.plugins.workflow.steps.AbstractStepExecutionImpl; +import org.jenkinsci.plugins.workflow.steps.FlowInterruptedException; import org.jenkinsci.plugins.workflow.steps.Step; import org.jenkinsci.plugins.workflow.steps.StepContext; import org.jenkinsci.plugins.workflow.steps.StepDescriptor; import org.jenkinsci.plugins.workflow.steps.StepExecution; import org.jenkinsci.plugins.workflow.support.concurrent.Timeout; import org.jenkinsci.plugins.workflow.support.concurrent.WithThreadName; +import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepExecution; import org.kohsuke.accmod.Restricted; import org.kohsuke.accmod.restrictions.DoNotUse; import org.kohsuke.accmod.restrictions.NoExternalUse; @@ -318,7 +321,7 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab return false; } - private @CheckForNull FilePath getWorkspace() throws AbortException { + private @CheckForNull FilePath getWorkspace() throws IOException, InterruptedException { if (ws == null) { ws = FilePathUtils.find(node, remote); if (ws == null) { @@ -326,10 +329,10 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab // (Note that a Computer may be missing because a Node is offline, // and conversely after removing a Node its Computer may remain for a while. // Therefore we only fail here if _both_ are absent.) + // ExecutorStepExecution.NodeListener will normally do this first, so this is a fallback. Jenkins j = Jenkins.getInstanceOrNull(); if (!node.isEmpty() && j != null && j.getNode(node) == null) { - // TODO or FlowInterruptedException? - throw new AbortException("Agent " + node + " was removed"); + throw new FlowInterruptedException(Result.ABORTED, new ExecutorStepExecution.RemovedNodeCause()); } LOGGER.log(Level.FINE, "Jenkins is not running, no such node {0}, or it is offline", node); return null; @@ -478,9 +481,11 @@ private void check() { final FilePath workspace; try { workspace = getWorkspace(); - } catch (AbortException x) { + } catch (IOException | InterruptedException x) { recurrencePeriod = 0; - getContext().onFailure(x); + if (causeOfStoppage == null) { // do not doubly terminate + getContext().onFailure(x); + } return; } if (workspace == null) { diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java index 13264417..6b58086b 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java @@ -48,8 +48,10 @@ import java.util.logging.Logger; import javax.annotation.CheckForNull; import javax.annotation.Nullable; +import jenkins.model.CauseOfInterruption; import jenkins.model.Jenkins; import jenkins.model.Jenkins.MasterComputer; +import jenkins.model.NodeListener; import jenkins.model.queue.AsynchronousExecution; import jenkins.security.QueueItemAuthenticator; import jenkins.security.QueueItemAuthenticatorProvider; @@ -68,11 +70,14 @@ import org.jenkinsci.plugins.workflow.graph.FlowNode; import org.jenkinsci.plugins.workflow.graphanalysis.FlowScanningUtils; import org.jenkinsci.plugins.workflow.steps.AbstractStepExecutionImpl; +import org.jenkinsci.plugins.workflow.steps.BodyExecution; import org.jenkinsci.plugins.workflow.steps.BodyExecutionCallback; import org.jenkinsci.plugins.workflow.steps.StepContext; import org.jenkinsci.plugins.workflow.steps.durable_task.Messages; import org.jenkinsci.plugins.workflow.support.actions.WorkspaceActionImpl; import org.jenkinsci.plugins.workflow.support.concurrent.Timeout; +import org.jenkinsci.plugins.workflow.support.pickles.ExecutorPickle; +import org.jenkinsci.plugins.workflow.support.pickles.WorkspaceListLeasePickle; import org.kohsuke.accmod.Restricted; import org.kohsuke.accmod.restrictions.DoNotUse; import org.kohsuke.accmod.restrictions.NoExternalUse; @@ -248,6 +253,40 @@ public void stop(Throwable cause) throws Exception { } + @Extension public static final class RemovedNodeListener extends NodeListener { + @Override protected void onDeleted(Node node) { + Computer c = node.toComputer(); + if (c == null || c.isOnline()) { + LOGGER.fine(() -> "computer for " + node.getNodeName() + " was missing or online, skipping"); + return; + } + for (Executor e : c.getExecutors()) { + Queue.Executable exec = e.getCurrentExecutable(); + if (exec instanceof PlaceholderTask.PlaceholderExecutable) { + PlaceholderTask task = ((PlaceholderTask.PlaceholderExecutable) exec).getParent(); + TaskListener listener = TaskListener.NULL; + try { + listener = task.context.get(TaskListener.class); + } catch (Exception x) { + LOGGER.log(Level.WARNING, null, x); + } + if (task.body == null) { + listener.getLogger().println("Agent " + node.getNodeName() + " was deleted, but do not have a node body to cancel"); + continue; + } + listener.getLogger().println("Agent " + node.getNodeName() + " was deleted; cancelling node body"); + task.body.cancel(new RemovedNodeCause()); + } + } + } + } + + public static final class RemovedNodeCause extends CauseOfInterruption { + @Override public String getShortDescription() { + return "Agent was removed"; + } + } + /** Transient handle of a running executor task. */ private static final class RunningTask { /** null until placeholder executable runs */ @@ -278,6 +317,17 @@ public static final class PlaceholderTask implements ContinuedTask, Serializable */ private String cookie; + /** + * Needed for {@link BodyExecution#cancel}. + * {@code transient} because we cannot save a {@link BodyExecution} in {@link PlaceholderTask}: + * {@link ExecutorPickle} is written to the stream first, which holds a {@link PlaceholderTask}, + * and the {@link BodyExecution} holds {@link PlaceholderTask.Callback} whose {@link WorkspaceList.Lease} + * is not processed by {@link WorkspaceListLeasePickle} since pickles are not recursive. + * So we make a best effort and only try to cancel a body within the current session. + * @see RemovedNodeListener + */ + private transient @CheckForNull BodyExecution body; + /** {@link Authentication#getName} of user of build, if known. */ private final @CheckForNull String auth; @@ -781,7 +831,7 @@ private final class PlaceholderExecutable implements ContinuableExecutable, Acce flowNode.addAction(new WorkspaceActionImpl(workspace, flowNode)); } listener.getLogger().println("Running on " + ModelHyperlinkNote.encodeTo(node) + " in " + workspace); - context.newBodyInvoker() + body = context.newBodyInvoker() .withContexts(exec, computer, env, workspace) .withCallback(new Callback(cookie, lease)) .start(); diff --git a/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java b/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java index ac75cded..8d9afccc 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java @@ -77,6 +77,7 @@ import org.jenkinsci.plugins.workflow.steps.StepContext; import org.jenkinsci.plugins.workflow.steps.StepDescriptor; import org.jenkinsci.plugins.workflow.steps.StepExecution; +import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepExecution; import org.jenkinsci.plugins.workflow.support.visualization.table.FlowGraphTable; import org.jenkinsci.plugins.workflow.support.visualization.table.FlowGraphTable.Row; import static org.junit.Assert.*; @@ -614,15 +615,15 @@ private static final class HelloNote extends ConsoleNote { @Issue("JENKINS-49707") @Test public void removingAgentIsFatal() throws Exception { - logging.record(DurableTaskStep.class, Level.FINE).record(FileMonitoringTask.class, Level.FINE); + logging.record(DurableTaskStep.class, Level.FINE).record(FileMonitoringTask.class, Level.FINE).record(ExecutorStepExecution.class, Level.FINE); DumbSlave s = j.createSlave("remote", null, null); WorkflowJob p = j.jenkins.createProject(WorkflowJob.class, "p"); p.setDefinition(new CpsFlowDefinition("node('remote') {isUnix() ? sh('sleep 1000000') : bat('ping -t 127.0.0.1 > nul')}", true)); WorkflowRun b = p.scheduleBuild2(0).waitForStart(); j.waitForMessage(Functions.isWindows() ? ">ping" : "+ sleep", b); j.jenkins.removeNode(s); - j.assertBuildStatus(Result.FAILURE, j.waitForCompletion(b)); - j.assertLogContains("Agent remote was removed", b); + j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(b)); + j.waitForMessage(new ExecutorStepExecution.RemovedNodeCause().getShortDescription(), b); } @Issue("JENKINS-44521")