From 5a98dfa493702aa5dd7260616d470f7b077e97b6 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 5 Dec 2018 13:01:12 -0800 Subject: [PATCH] Don't GC running but desired stop allocations This PR fixes an edge case where we could GC an allocation that was in a desired stop state but had not terminated yet. This can be hit if the client hasn't shutdown the allocation yet or if the allocation is still shutting down (long kill_timeout). Fixes https://github.com/hashicorp/nomad/issues/4940 --- nomad/core_sched.go | 7 ++++++- nomad/core_sched_test.go | 10 ++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/nomad/core_sched.go b/nomad/core_sched.go index a3fbe4010da..396ef641f99 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -7,7 +7,6 @@ import ( log "github.com/hashicorp/go-hclog" memdb "github.com/hashicorp/go-memdb" - "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/scheduler" @@ -623,6 +622,12 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time, return false } + // If the allocation is still running on the client we can not garbage + // collect it. + if a.ClientStatus == structs.AllocClientStatusRunning { + return false + } + // If the job is deleted, stopped or dead all allocs can be removed if job == nil || job.Stop || job.Status == structs.JobStatusDead { return true diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go index 02ea29ea367..000f1a72d90 100644 --- a/nomad/core_sched_test.go +++ b/nomad/core_sched_test.go @@ -1972,6 +1972,16 @@ func TestAllocation_GCEligible(t *testing.T) { ThresholdIndex: 90, ShouldGC: false, }, + { + Desc: "Don't GC when non terminal on client and job dead", + ClientStatus: structs.AllocClientStatusRunning, + DesiredStatus: structs.AllocDesiredStatusStop, + JobStatus: structs.JobStatusDead, + GCTime: fail, + ModifyIndex: 90, + ThresholdIndex: 90, + ShouldGC: false, + }, { Desc: "GC when terminal but not failed ", ClientStatus: structs.AllocClientStatusComplete,