From 99ad1f2e15a48a9c92cb7d4bd45f1e903f6096be Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Mon, 29 Apr 2019 20:22:16 -0400
Subject: [PATCH 01/17] [JENKINS-49707] If a KubernetesComputer disconnects,
 remove the KubernetesSlave so that DurableTaskStep knows to clean up.

---
 pom.xml                                        | 13 +++++++------
 .../plugins/kubernetes/KubernetesLauncher.java | 18 ++++++++++++++++++
 .../pipeline/KubernetesPipelineTest.java       | 11 +++++++++++
 .../kubernetes/pipeline/terminatedPod.groovy   |  9 +++++++++
 4 files changed, 45 insertions(+), 6 deletions(-)
 create mode 100644 src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPod.groovy

diff --git a/pom.xml b/pom.xml
index b1b66ccc45..5bb14714cd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -45,10 +45,11 @@
     <connectorHost />
     <jenkins.host.address />
     <java.level>8</java.level>
-    <jenkins.version>2.138.4</jenkins.version>
+    <jenkins.version>2.150.1</jenkins.version>
     <no-test-jar>false</no-test-jar>
     <surefire.rerunFailingTestsCount>1</surefire.rerunFailingTestsCount>
     <pipeline-model-definition.version>1.3.7</pipeline-model-definition.version>
+    <workflow-support-plugin.version>3.1</workflow-support-plugin.version>
   </properties>
 
   <dependencies>
@@ -94,7 +95,7 @@
     <dependency> <!-- OnceRetentionStrategy -->
       <groupId>org.jenkins-ci.plugins</groupId>
       <artifactId>durable-task</artifactId>
-      <version>1.28</version>
+      <version>1.29</version>
     </dependency>
     <dependency>
       <groupId>org.jenkins-ci.plugins</groupId>
@@ -104,7 +105,7 @@
     <dependency>
       <groupId>org.jenkins-ci.plugins.workflow</groupId>
       <artifactId>workflow-step-api</artifactId>
-      <version>2.17</version>
+      <version>2.18</version>
     </dependency>
     <dependency> <!-- DeclarativeAgent -->
       <groupId>org.jenkinsci.plugins</groupId>
@@ -136,19 +137,19 @@
     <dependency>
       <groupId>org.jenkins-ci.plugins.workflow</groupId>
       <artifactId>workflow-support</artifactId>
-      <version>3.0</version>
+      <version>${workflow-support-plugin.version}</version>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.jenkins-ci.plugins.workflow</groupId>
       <artifactId>workflow-durable-task-step</artifactId>
-      <version>2.27</version>
+      <version>2.31-SNAPSHOT</version> <!-- https://github.com/jenkinsci/workflow-durable-task-step-plugin/pull/104 -->
       <scope>test</scope>
     </dependency>
     <dependency> <!-- SemaphoreStep -->
       <groupId>org.jenkins-ci.plugins.workflow</groupId>
       <artifactId>workflow-support</artifactId>
-      <version>3.0</version>
+      <version>${workflow-support-plugin.version}</version>
       <classifier>tests</classifier>
       <scope>test</scope>
     </dependency>
diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java
index 9e0e7f6936..1365c4514a 100644
--- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java
+++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java
@@ -39,6 +39,7 @@
 import org.kohsuke.stapler.DataBoundConstructor;
 
 import com.google.common.base.Throwables;
+import hudson.model.Slave;
 
 import hudson.model.TaskListener;
 import hudson.slaves.JNLPLauncher;
@@ -46,6 +47,7 @@
 
 import io.fabric8.kubernetes.api.model.Pod;
 import io.fabric8.kubernetes.client.KubernetesClient;
+import jenkins.model.Jenkins;
 
 /**
  * Launches on Kubernetes the specified {@link KubernetesComputer} instance.
@@ -159,4 +161,20 @@ public AllContainersRunningPodWatcher getWatcher() {
         return watcher;
     }
 
+    @Override
+    public void afterDisconnect(SlaveComputer computer, TaskListener listener) {
+        // TODO can this be done only if the pod is dead?
+        Slave node = computer.getNode();
+        if (node != null) {
+            try {
+                Jenkins.get().removeNode(node);
+                LOGGER.info(() -> "Removed disconnected agent " + node.getNodeName());
+            } catch (IOException x) {
+                LOGGER.log(Level.WARNING, x, () -> "Unable to remove Jenkins node " + node.getNodeName());
+            }
+        } else {
+            LOGGER.fine(() -> "Could not find disconnected agent " + computer.getName() + " to remove");
+        }
+    }
+
 }
diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
index 3305465a92..3cb190b461 100644
--- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
+++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
@@ -50,6 +50,7 @@
 import org.jvnet.hudson.test.JenkinsRuleNonLocalhost;
 
 import hudson.model.Result;
+import org.jvnet.hudson.test.Issue;
 
 /**
  * @author Carlos Sanchez
@@ -334,4 +335,14 @@ public void runInPodWithRetention() throws Exception {
         r.assertBuildStatusSuccess(r.waitForCompletion(b));
         assertTrue(deletePods(cloud.connect(), getLabels(this, name), true));
     }
+
+    @Issue("JENKINS-49707")
+    @Test
+    public void terminatedPod() throws Exception {
+        r.waitForMessage("+ sleep", b);
+        deletePods(cloud.connect(), getLabels(this, name), false);
+        r.assertBuildStatus(Result.FAILURE, r.waitForCompletion(b));
+        r.assertLogContains(" was removed", b);
+    }
+
 }
diff --git a/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPod.groovy b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPod.groovy
new file mode 100644
index 0000000000..6789d6127e
--- /dev/null
+++ b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPod.groovy
@@ -0,0 +1,9 @@
+podTemplate(label: 'runInPod', containers: [
+        containerTemplate(name: 'busybox', image: 'busybox', ttyEnabled: true, command: '/bin/cat'),
+    ]) {
+    node ('runInPod') {
+        container('busybox') {
+            sh 'sleep 9999999'
+        }
+    }
+}

From cae2572a63ad059c420d2f90965355b53fd7677c Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Mon, 29 Apr 2019 21:39:55 -0400
Subject: [PATCH 02/17] workflow-step-api-plugin.version

---
 pom.xml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 5bb14714cd..ca658eea31 100644
--- a/pom.xml
+++ b/pom.xml
@@ -50,6 +50,7 @@
     <surefire.rerunFailingTestsCount>1</surefire.rerunFailingTestsCount>
     <pipeline-model-definition.version>1.3.7</pipeline-model-definition.version>
     <workflow-support-plugin.version>3.1</workflow-support-plugin.version>
+    <workflow-step-api-plugin.version>2.18</workflow-step-api-plugin.version>
   </properties>
 
   <dependencies>
@@ -105,7 +106,7 @@
     <dependency>
       <groupId>org.jenkins-ci.plugins.workflow</groupId>
       <artifactId>workflow-step-api</artifactId>
-      <version>2.18</version>
+      <version>${workflow-step-api-plugin.version}</version>
     </dependency>
     <dependency> <!-- DeclarativeAgent -->
       <groupId>org.jenkinsci.plugins</groupId>
@@ -130,7 +131,7 @@
     <dependency> <!-- StepConfigTester -->
       <groupId>org.jenkins-ci.plugins.workflow</groupId>
       <artifactId>workflow-step-api</artifactId>
-      <version>2.17</version>
+      <version>${workflow-step-api-plugin.version}</version>
       <classifier>tests</classifier>
       <scope>test</scope>
     </dependency>

From eed014f57d1f84b9cc7297614483c0b347c255c2 Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Mon, 29 Apr 2019 21:41:35 -0400
Subject: [PATCH 03/17] Updated to
 https://github.com/jenkinsci/workflow-durable-task-step-plugin/pull/104/commits/58b127fa34cd10bfc76659b86292832965fe7c95.

---
 .../plugins/kubernetes/pipeline/KubernetesPipelineTest.java  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
index 3cb190b461..1255dcfa4d 100644
--- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
+++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
@@ -50,6 +50,7 @@
 import org.jvnet.hudson.test.JenkinsRuleNonLocalhost;
 
 import hudson.model.Result;
+import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepExecution;
 import org.jvnet.hudson.test.Issue;
 
 /**
@@ -341,8 +342,8 @@ public void runInPodWithRetention() throws Exception {
     public void terminatedPod() throws Exception {
         r.waitForMessage("+ sleep", b);
         deletePods(cloud.connect(), getLabels(this, name), false);
-        r.assertBuildStatus(Result.FAILURE, r.waitForCompletion(b));
-        r.assertLogContains(" was removed", b);
+        r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
+        r.waitForMessage(new ExecutorStepExecution.RemovedNodeCause().getShortDescription(), b);
     }
 
 }

From c3c98a1ee17ad3a10d42bd939a07127c59767e8c Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Tue, 30 Apr 2019 17:25:09 -0400
Subject: [PATCH 04/17] Incremental deployment.

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index ca658eea31..e8e1fd4b9f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -144,7 +144,7 @@
     <dependency>
       <groupId>org.jenkins-ci.plugins.workflow</groupId>
       <artifactId>workflow-durable-task-step</artifactId>
-      <version>2.31-SNAPSHOT</version> <!-- https://github.com/jenkinsci/workflow-durable-task-step-plugin/pull/104 -->
+      <version>2.31-rc898.994233111902</version> <!-- TODO https://github.com/jenkinsci/workflow-durable-task-step-plugin/pull/104 -->
       <scope>test</scope>
     </dependency>
     <dependency> <!-- SemaphoreStep -->

From 4510310f594a2962c105a931de0c7893b97b3d02 Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Tue, 30 Apr 2019 17:41:19 -0400
Subject: [PATCH 05/17] JDK 11 Javadoc failure.

---
 pom.xml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index e8e1fd4b9f..6a6d90ca2f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,8 @@
   <parent>
     <groupId>org.jenkins-ci.plugins</groupId>
     <artifactId>plugin</artifactId>
-    <version>3.42</version>
+    <version>3.43</version>
+    <relativePath />
   </parent>
 
   <groupId>org.csanchez.jenkins.plugins</groupId>

From 8569e835bce42d815e6c3fcc0c47531fdc203c0f Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Wed, 1 May 2019 09:06:58 -0400
Subject: [PATCH 06/17] =?UTF-8?q?Setting=20surefire.rerunFailingTestsCount?=
 =?UTF-8?q?=20to=200=20not=201.=20while=20this=20plugin=20had=20tried=20to?=
 =?UTF-8?q?=20turn=20off=20the=20rerun=20feature,=20what=20it=20actually?=
 =?UTF-8?q?=20did=20was=20make=20it=20so=20that=20failing=20tests=20are=20?=
 =?UTF-8?q?rerun=20once=E2=80=94i.e.,=20run=20twice=20before=20finally=20f?=
 =?UTF-8?q?ailing.=20This=20happens=20even=20when=20-Dtest=3D=E2=80=A6=20i?=
 =?UTF-8?q?s=20specified,=20which=20disables=20the=20profile=20in=20the=20?=
 =?UTF-8?q?parent=20POM,=20annoyingly=20making=20all=20failing=20local=20t?=
 =?UTF-8?q?ests=20run=20twice.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 6a6d90ca2f..9dc52fff4f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,7 +48,7 @@
     <java.level>8</java.level>
     <jenkins.version>2.150.1</jenkins.version>
     <no-test-jar>false</no-test-jar>
-    <surefire.rerunFailingTestsCount>1</surefire.rerunFailingTestsCount>
+    <surefire.rerunFailingTestsCount>0</surefire.rerunFailingTestsCount>
     <pipeline-model-definition.version>1.3.7</pipeline-model-definition.version>
     <workflow-support-plugin.version>3.1</workflow-support-plugin.version>
     <workflow-step-api-plugin.version>2.18</workflow-step-api-plugin.version>

From f11a18226d8dd95ab695f6a1ac38dd49e2aa59b3 Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Wed, 1 May 2019 09:08:36 -0400
Subject: [PATCH 07/17] Copy-pasta.

---
 .../jenkins/plugins/kubernetes/pipeline/terminatedPod.groovy  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPod.groovy b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPod.groovy
index 6789d6127e..8e54c40221 100644
--- a/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPod.groovy
+++ b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPod.groovy
@@ -1,7 +1,7 @@
-podTemplate(label: 'runInPod', containers: [
+podTemplate(label: 'terminatedPod', containers: [
         containerTemplate(name: 'busybox', image: 'busybox', ttyEnabled: true, command: '/bin/cat'),
     ]) {
-    node ('runInPod') {
+    node ('terminatedPod') {
         container('busybox') {
             sh 'sleep 9999999'
         }

From ce032c60f93fd9078e9780489400387f3331073f Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Wed, 1 May 2019 09:42:59 -0400
Subject: [PATCH 08/17] Redesigned fix to remove agents when a pod is deleted,
 rather than merely due to a Remoting channel being disconnected. Among other
 things, this fixes failures in RestartPipelineTest. (Perhaps attributable to
 org.jvnet.hudson.test.ChannelShutdownListener rather than production
 behavior, but still.)

---
 .../kubernetes/KubernetesLauncher.java        |  18 ---
 .../kubernetes/pod/retention/Reaper.java      | 137 ++++++++++++++++++
 .../pipeline/RestartPipelineTest.java         |  35 +++++
 .../pipeline/terminatedPodAfterRestart.groovy |  11 ++
 4 files changed, 183 insertions(+), 18 deletions(-)
 create mode 100644 src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/retention/Reaper.java
 create mode 100644 src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPodAfterRestart.groovy

diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java
index 1365c4514a..9e0e7f6936 100644
--- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java
+++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java
@@ -39,7 +39,6 @@
 import org.kohsuke.stapler.DataBoundConstructor;
 
 import com.google.common.base.Throwables;
-import hudson.model.Slave;
 
 import hudson.model.TaskListener;
 import hudson.slaves.JNLPLauncher;
@@ -47,7 +46,6 @@
 
 import io.fabric8.kubernetes.api.model.Pod;
 import io.fabric8.kubernetes.client.KubernetesClient;
-import jenkins.model.Jenkins;
 
 /**
  * Launches on Kubernetes the specified {@link KubernetesComputer} instance.
@@ -161,20 +159,4 @@ public AllContainersRunningPodWatcher getWatcher() {
         return watcher;
     }
 
-    @Override
-    public void afterDisconnect(SlaveComputer computer, TaskListener listener) {
-        // TODO can this be done only if the pod is dead?
-        Slave node = computer.getNode();
-        if (node != null) {
-            try {
-                Jenkins.get().removeNode(node);
-                LOGGER.info(() -> "Removed disconnected agent " + node.getNodeName());
-            } catch (IOException x) {
-                LOGGER.log(Level.WARNING, x, () -> "Unable to remove Jenkins node " + node.getNodeName());
-            }
-        } else {
-            LOGGER.fine(() -> "Could not find disconnected agent " + computer.getName() + " to remove");
-        }
-    }
-
 }
diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/retention/Reaper.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/retention/Reaper.java
new file mode 100644
index 0000000000..15277a170c
--- /dev/null
+++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/retention/Reaper.java
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2019 CloudBees, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.csanchez.jenkins.plugins.kubernetes.pod.retention;
+
+import hudson.Extension;
+import hudson.model.Computer;
+import hudson.model.Node;
+import hudson.model.TaskListener;
+import hudson.slaves.Cloud;
+import hudson.slaves.ComputerListener;
+import hudson.slaves.EphemeralNode;
+import io.fabric8.kubernetes.api.model.Pod;
+import io.fabric8.kubernetes.client.KubernetesClientException;
+import io.fabric8.kubernetes.client.Watcher;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import jenkins.model.Jenkins;
+import org.csanchez.jenkins.plugins.kubernetes.KubernetesCloud;
+import org.csanchez.jenkins.plugins.kubernetes.KubernetesComputer;
+import org.csanchez.jenkins.plugins.kubernetes.KubernetesSlave;
+
+/**
+ * Checks for deleted pods corresponding to {@link KubernetesSlave} and ensures the node is removed from Jenkins too.
+ * <p>If the pod has been deleted, all of the associated state (running user processes, workspace, etc.) must also be gone;
+ * so there is no point in retaining this agent definition any further.
+ * ({@link KubernetesSlave} is not an {@link EphemeralNode}: it <em>does</em> support running across Jenkins restarts.)
+ * <p>Note that pod retention policies other than the default {@link Never} may disable this system,
+ * unless some external process or garbage collection policy results in pod deletion.
+ */
+@Extension
+public class Reaper extends ComputerListener implements Watcher<Pod> {
+    
+    private static final Logger LOGGER = Logger.getLogger(Reaper.class.getName());
+
+    /**
+     * Activate this feature only if and when some Kubernetes agent is actually used.
+     * Avoids touching the API server when this plugin is not even in use.
+     */
+    private final AtomicBoolean activated = new AtomicBoolean();
+
+    @Override
+    public void onOnline(Computer c, TaskListener listener) throws IOException, InterruptedException {
+        if (c instanceof KubernetesComputer && activated.compareAndSet(false, true)) {
+            activate();
+        }
+    }
+
+    private void activate() {
+        LOGGER.fine("Activating reaper");
+        // First check all existing nodes to see if they still have active pods.
+        // (We may have missed deletion events while Jenkins was shut off,
+        // or pods may have been deleted before any Kubernetes agent was brought online.)
+        for (Node n : new ArrayList<>(Jenkins.get().getNodes())) {
+            if (!(n instanceof KubernetesSlave)) {
+                continue;
+            }
+            KubernetesSlave ks = (KubernetesSlave) n;
+            String ns = ks.getNamespace();
+            String name = ks.getPodName();
+            try {
+                // TODO more efficient to do a single (or paged) list request, but tricky since there may be multiple clouds,
+                // and even within a single cloud an agent pod is permitted to use a nondefault namespace
+                if (ks.getKubernetesCloud().connect().pods().inNamespace(ns).withName(name).get() == null) {
+                    LOGGER.info(() -> ns + "/" + name + " seems to have been deleted, so removing corresponding Jenkins agent");
+                    Jenkins.get().removeNode(ks);
+                } else {
+                    LOGGER.fine(() -> ns + "/" + name + " still seems to exist, OK");
+                }
+            } catch (Exception x) {
+                LOGGER.log(Level.WARNING, "failed to do initial reap check for " + ns + "/" + name, x);
+            }
+        }
+        // Now set up a watch for any subsequent pod deletions.
+        for (Cloud c : Jenkins.get().clouds) {
+            if (!(c instanceof KubernetesCloud)) {
+                continue;
+            }
+            KubernetesCloud kc = (KubernetesCloud) c;
+            try {
+                kc.connect().pods().inAnyNamespace().watch(this);
+            } catch (Exception x) {
+                LOGGER.log(Level.WARNING, "failed to set up watcher on " + kc.getDisplayName(), x);
+            }
+        }
+    }
+
+    @Override
+    public void eventReceived(Watcher.Action action, Pod pod) {
+        if (action == Watcher.Action.DELETED) {
+            String ns = pod.getMetadata().getNamespace();
+            String name = pod.getMetadata().getName();
+            for (Node n : new ArrayList<>(Jenkins.get().getNodes())) {
+                if (!(n instanceof KubernetesSlave)) {
+                    continue;
+                }
+                KubernetesSlave ks = (KubernetesSlave) n;
+                if (ks.getNamespace().equals(ns) && ks.getPodName().equals(name)) {
+                    LOGGER.info(() -> ns + "/" + name + " was just deleted, so removing corresponding Jenkins agent");
+                    try {
+                        Jenkins.get().removeNode(ks);
+                        return;
+                    } catch (Exception x) {
+                        LOGGER.log(Level.WARNING, "failed to reap " + ns + "/" + name, x);
+                    }
+                }
+            }
+            LOGGER.fine(() -> "received deletion notice for " + ns + "/" + name + " which does not seem to correspond to any Jenkins agent");
+        }
+    }
+
+    @Override
+    public void onClose(KubernetesClientException cause) {
+        // TODO ignore, or do we need to manually reattach the watcher?
+        // AllContainersRunningPodWatcher is not reattached, but this is expected to be short-lived,
+        // useful only until the containers of a single pod start running.
+        // (At least when using kubernetes-client/java, the connection gets closed after 2m on GKE
+        // and you need to rerun the watch. Does the fabric8io client wrap this?)
+    }
+
+}
diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/RestartPipelineTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/RestartPipelineTest.java
index 40314eec22..a892f19581 100644
--- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/RestartPipelineTest.java
+++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/RestartPipelineTest.java
@@ -56,11 +56,15 @@
 import org.jvnet.hudson.test.RestartableJenkinsNonLocalhostRule;
 
 import hudson.model.Node;
+import hudson.model.Result;
 import hudson.slaves.DumbSlave;
 import hudson.slaves.JNLPLauncher;
 import hudson.slaves.NodeProperty;
 import hudson.slaves.RetentionStrategy;
 import jenkins.model.JenkinsLocationConfiguration;
+import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepExecution;
+import org.jvnet.hudson.test.Issue;
+import org.jvnet.hudson.test.JenkinsRule;
 
 public class RestartPipelineTest {
     protected static final String CONTAINER_ENV_VAR_VALUE = "container-env-var-value";
@@ -187,6 +191,37 @@ public void runInPodWithRestart() throws Exception {
         });
     }
 
+    @Issue("JENKINS-49707")
+    @Test
+    public void terminatedPodAfterRestart() throws Exception {
+        story.then(r -> {
+            configureCloud();
+            WorkflowJob p = r.jenkins.createProject(WorkflowJob.class, "p");
+            p.setDefinition(new CpsFlowDefinition(loadPipelineScript("terminatedPodAfterRestart.groovy"), true));
+            WorkflowRun b = p.scheduleBuild2(0).waitForStart();
+            r.waitForMessage("+ sleep", b);
+        });
+        story.then(r -> {
+            WorkflowRun b = r.jenkins.getItemByFullName("p", WorkflowJob.class).getBuildByNumber(1);
+            r.waitForMessage("Ready to run", b);
+            // Note that the test is cheating here slightly.
+            // The watch in Reaper is still running across the in-JVM restarts,
+            // whereas in production it would have been cancelled during the shutdown.
+            // But it does not matter since we are waiting for the agent to come back online after the restart,
+            // which is sufficient trigger to reactivate the reaper.
+            // Indeed we get two Reaper instances running, which independently remove the node.
+            deletePods(cloud.connect(), getLabels(this, name), false);
+            r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
+            while (!JenkinsRule.getLog(b).contains(new ExecutorStepExecution.RemovedNodeCause().getShortDescription())) {
+                // TODO JenkinsRule.waitForMessage has a race condition w.r.t. the termination cause printed by WorkflowRun.finish
+                Thread.sleep(100);
+            }
+            // Currently the logic in ExecutorStepExecution cannot handle a Jenkins restart so it prints the following.
+            // It does not matter since DurableTaskStep redundantly implements the same check.
+            r.assertLogContains(" was deleted, but do not have a node body to cancel", b);
+        });
+    }
+
     @Test
     public void getContainerLogWithRestart() throws Exception {
         story.then(r -> {
diff --git a/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPodAfterRestart.groovy b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPodAfterRestart.groovy
new file mode 100644
index 0000000000..c87a0ec733
--- /dev/null
+++ b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPodAfterRestart.groovy
@@ -0,0 +1,11 @@
+package org.csanchez.jenkins.plugins.kubernetes.pipeline
+
+podTemplate(label: 'terminatedPodAfterRestart', containers: [
+        containerTemplate(name: 'busybox', image: 'busybox', ttyEnabled: true, command: '/bin/cat'),
+]) {
+    node ('terminatedPodAfterRestart') {
+        container('busybox') {
+                sh 'sleep 9999999'
+        }
+    }
+}

From 61307bfa0a667915f2dee4a3166bdd5742fc790e Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Wed, 1 May 2019 18:34:07 -0400
Subject: [PATCH 09/17] We will not in general be permitted to watch pods at
 cluster scope, so do not even try.

---
 .../jenkins/plugins/kubernetes/pod/retention/Reaper.java | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/retention/Reaper.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/retention/Reaper.java
index 15277a170c..8621731706 100644
--- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/retention/Reaper.java
+++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/retention/Reaper.java
@@ -24,6 +24,7 @@
 import hudson.slaves.ComputerListener;
 import hudson.slaves.EphemeralNode;
 import io.fabric8.kubernetes.api.model.Pod;
+import io.fabric8.kubernetes.client.KubernetesClient;
 import io.fabric8.kubernetes.client.KubernetesClientException;
 import io.fabric8.kubernetes.client.Watcher;
 import java.io.IOException;
@@ -76,7 +77,10 @@ private void activate() {
             String name = ks.getPodName();
             try {
                 // TODO more efficient to do a single (or paged) list request, but tricky since there may be multiple clouds,
-                // and even within a single cloud an agent pod is permitted to use a nondefault namespace
+                // and even within a single cloud an agent pod is permitted to use a nondefault namespace,
+                // yet we do not want to do an unnamespaced pod list for RBAC reasons.
+                // Could use a hybrid approach: first list all pods in the configured namespace for all clouds;
+                // then go back and individually check any unmatched agents with their configured namespace.
                 if (ks.getKubernetesCloud().connect().pods().inNamespace(ns).withName(name).get() == null) {
                     LOGGER.info(() -> ns + "/" + name + " seems to have been deleted, so removing corresponding Jenkins agent");
                     Jenkins.get().removeNode(ks);
@@ -94,7 +98,8 @@ private void activate() {
             }
             KubernetesCloud kc = (KubernetesCloud) c;
             try {
-                kc.connect().pods().inAnyNamespace().watch(this);
+                KubernetesClient client = kc.connect();
+                client.pods().inNamespace(client.getNamespace()).watch(this);
             } catch (Exception x) {
                 LOGGER.log(Level.WARNING, "failed to set up watcher on " + kc.getDisplayName(), x);
             }

From 2c3b39e41599c684916b10f2c76c7dc500202e44 Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Wed, 5 Jun 2019 13:46:43 -0400
Subject: [PATCH 10/17] Bump.

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index ca3d70f04b..6db9be6126 100644
--- a/pom.xml
+++ b/pom.xml
@@ -145,7 +145,7 @@
     <dependency>
       <groupId>org.jenkins-ci.plugins.workflow</groupId>
       <artifactId>workflow-durable-task-step</artifactId>
-      <version>2.31-rc898.994233111902</version> <!-- TODO https://github.com/jenkinsci/workflow-durable-task-step-plugin/pull/104 -->
+      <version>2.32-rc917.b70f8edaf896</version> <!-- TODO https://github.com/jenkinsci/workflow-durable-task-step-plugin/pull/104 -->
       <scope>test</scope>
     </dependency>
     <dependency> <!-- SemaphoreStep -->

From 5b30251b59e6d14d9d3997479ce200bf9615267f Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Wed, 5 Jun 2019 15:24:06 -0400
Subject: [PATCH 11/17] RequireUpperBoundDeps

---
 pom.xml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pom.xml b/pom.xml
index 6db9be6126..c4b8d3008b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -50,8 +50,8 @@
     <no-test-jar>false</no-test-jar>
     <surefire.rerunFailingTestsCount>0</surefire.rerunFailingTestsCount>
     <pipeline-model-definition.version>1.3.7</pipeline-model-definition.version>
-    <workflow-support-plugin.version>3.1</workflow-support-plugin.version>
-    <workflow-step-api-plugin.version>2.18</workflow-step-api-plugin.version>
+    <workflow-support-plugin.version>3.3</workflow-support-plugin.version>
+    <workflow-step-api-plugin.version>2.20</workflow-step-api-plugin.version>
   </properties>
 
   <dependencies>
@@ -248,7 +248,7 @@
       <dependency>
         <groupId>org.jenkins-ci.plugins</groupId>
         <artifactId>script-security</artifactId>
-        <version>1.50</version>
+        <version>1.58</version>
       </dependency>
       <dependency>
         <groupId>org.jenkins-ci.plugins</groupId>
@@ -263,7 +263,7 @@
       <dependency>
         <groupId>org.jenkins-ci.plugins</groupId>
         <artifactId>structs</artifactId>
-        <version>1.17</version>
+        <version>1.18</version>
       </dependency>
       <dependency>
         <groupId>org.jenkins-ci.plugins</groupId>

From 3f00eb155773cf41d630f11467c881a54a307dc2 Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Wed, 5 Jun 2019 19:39:18 -0400
Subject: [PATCH 12/17] Need to update workflow-cps to interpret
 DynamicContext.

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index c4b8d3008b..6c9a66a9ed 100644
--- a/pom.xml
+++ b/pom.xml
@@ -182,7 +182,7 @@
     <dependency>
       <groupId>org.jenkins-ci.plugins.workflow</groupId>
       <artifactId>workflow-cps</artifactId>
-      <version>2.61.1</version>
+      <version>2.70</version>
       <scope>test</scope>
     </dependency>
     <dependency>

From 210e0f724f0a9d0f6d13a35baea03d5dc010d0d7 Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Tue, 11 Jun 2019 16:55:59 -0400
Subject: [PATCH 13/17] Test flake pending #496.

---
 .../plugins/kubernetes/pipeline/KubernetesPipelineTest.java | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
index e647781c12..762989c66b 100644
--- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
+++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
@@ -51,6 +51,7 @@
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 import org.jvnet.hudson.test.Issue;
+import org.jvnet.hudson.test.JenkinsRule;
 import org.jvnet.hudson.test.JenkinsRuleNonLocalhost;
 
 /**
@@ -362,7 +363,10 @@ public void terminatedPod() throws Exception {
         r.waitForMessage("+ sleep", b);
         deletePods(cloud.connect(), getLabels(this, name), false);
         r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
-        r.waitForMessage(new ExecutorStepExecution.RemovedNodeCause().getShortDescription(), b);
+        // TODO could use waitForMessage after #496
+        while (!JenkinsRule.getLog(b).contains(new ExecutorStepExecution.RemovedNodeCause().getShortDescription())) {
+            Thread.sleep(100);
+        }
     }
 
 }

From a25d24f7cc8f73b33fd2b91f87d0be2de534d5dd Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Tue, 11 Jun 2019 17:05:15 -0400
Subject: [PATCH 14/17] Bump.

---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 9ed49d20e1..90b5c4f604 100644
--- a/pom.xml
+++ b/pom.xml
@@ -46,7 +46,7 @@
     <connectorHost />
     <jenkins.host.address />
     <java.level>8</java.level>
-    <jenkins.version>2.150.1</jenkins.version>
+    <jenkins.version>2.176.1</jenkins.version>
     <no-test-jar>false</no-test-jar>
     <useBeta>true</useBeta>
     <surefire.rerunFailingTestsCount>0</surefire.rerunFailingTestsCount>
@@ -152,7 +152,7 @@
     <dependency>
       <groupId>org.jenkins-ci.plugins.workflow</groupId>
       <artifactId>workflow-durable-task-step</artifactId>
-      <version>2.32-rc917.b70f8edaf896</version> <!-- TODO https://github.com/jenkinsci/workflow-durable-task-step-plugin/pull/104 -->
+      <version>2.32-rc934.23119201f0b5</version> <!-- TODO https://github.com/jenkinsci/workflow-durable-task-step-plugin/pull/104 -->
       <scope>test</scope>
     </dependency>
     <dependency> <!-- SemaphoreStep -->

From e71a5c5950ee976da51bf3046775ff2b22e4c170 Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Tue, 11 Jun 2019 17:08:18 -0400
Subject: [PATCH 15/17] Senseless to even try to build against _older_ LTS
 lines than our minimum. Unfortunately I lack write access to the repo so this
 Jenkinsfile change will be ignored.

---
 Jenkinsfile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index b3e75c465c..3063ec25ed 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1 +1,4 @@
-buildPlugin(configurations: buildPlugin.recommendedConfigurations().findAll { it.platform == 'linux' })
+buildPlugin(configurations: [
+    [platform: 'linux', jdk: '8', jenkins: null],
+    [platform: 'linux', jdk: '11', jenkins: null],
+])

From 70153a9224217001bfc65d9a8fac4a86405786ac Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Fri, 5 Jul 2019 15:43:05 -0400
Subject: [PATCH 16/17] workflow-durable-task-step 2.32

Co-Authored-By: Devin Nusbaum <dwnusbaum@users.noreply.github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index b93e640e23..cf10a862cf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -152,7 +152,7 @@
     <dependency>
       <groupId>org.jenkins-ci.plugins.workflow</groupId>
       <artifactId>workflow-durable-task-step</artifactId>
-      <version>2.32-rc934.23119201f0b5</version> <!-- TODO https://github.com/jenkinsci/workflow-durable-task-step-plugin/pull/104 -->
+      <version>2.32</version>
       <scope>test</scope>
     </dependency>
     <dependency> <!-- SemaphoreStep -->

From bb4e2978fe5ffffd5fdfa8dc0f70e23bf9fae4cc Mon Sep 17 00:00:00 2001
From: Jesse Glick <jglick@cloudbees.com>
Date: Fri, 5 Jul 2019 15:52:33 -0400
Subject: [PATCH 17/17] Using improved assertion after #496.

---
 .../plugins/kubernetes/pipeline/KubernetesPipelineTest.java  | 5 +----
 .../plugins/kubernetes/pipeline/RestartPipelineTest.java     | 5 +----
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
index cef66b283a..acf89efeef 100644
--- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
+++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
@@ -353,10 +353,7 @@ public void terminatedPod() throws Exception {
         r.waitForMessage("+ sleep", b);
         deletePods(cloud.connect(), getLabels(this, name), false);
         r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
-        // TODO could use waitForMessage after #496
-        while (!JenkinsRule.getLog(b).contains(new ExecutorStepExecution.RemovedNodeCause().getShortDescription())) {
-            Thread.sleep(100);
-        }
+        r.waitForMessage(new ExecutorStepExecution.RemovedNodeCause().getShortDescription(), b);
     }
 
     @Test
diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/RestartPipelineTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/RestartPipelineTest.java
index a95e1d5055..ffb3129d94 100644
--- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/RestartPipelineTest.java
+++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/RestartPipelineTest.java
@@ -212,10 +212,7 @@ public void terminatedPodAfterRestart() throws Exception {
             // Indeed we get two Reaper instances running, which independently remove the node.
             deletePods(cloud.connect(), getLabels(this, name), false);
             r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
-            while (!JenkinsRule.getLog(b).contains(new ExecutorStepExecution.RemovedNodeCause().getShortDescription())) {
-                // TODO JenkinsRule.waitForMessage has a race condition w.r.t. the termination cause printed by WorkflowRun.finish
-                Thread.sleep(100);
-            }
+            r.waitForMessage(new ExecutorStepExecution.RemovedNodeCause().getShortDescription(), b);
             // Currently the logic in ExecutorStepExecution cannot handle a Jenkins restart so it prints the following.
             // It does not matter since DurableTaskStep redundantly implements the same check.
             r.assertLogContains(" was deleted, but do not have a node body to cancel", b);