From 3b52b39c50ec6c9ab66b0c7aa79f2c3991e97127 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 16 Jul 2020 13:00:08 -0400 Subject: [PATCH] mrd: reconcile should treat pending deployments as paused (#8446) If a job update includes a task group that has no changes, those allocations have their version bumped in-place. The ends up triggering an eval from `deploymentwatcher` when it verifies their health. Although this eval is a no-op, we were only treating pending deployments the same as paused when the deployment was a new MRD. This means that any eval after the initial one will kick off the deployment, and that caused pending deployments to "jump the queue" and run ahead of schedule, breaking MRD invariants and resulting in a state with all regions blocked. This behavior can be replicated even in the case of job updates with no in-place updates by patching `deploymentwatcher` to inject a spurious no-op eval. This changeset fixes the behavior by treating pending deployments the same as paused in all cases in the reconciler. --- scheduler/reconcile.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 9d825ac4964..faa260a6ea9 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -197,7 +197,8 @@ func (a *allocReconciler) Compute() *reconcileResults { // Detect if the deployment is paused if a.deployment != nil { - a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused + a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused || + a.deployment.Status == structs.DeploymentStatusPending a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed } if a.deployment == nil {