Skip to content

Commit

Permalink
test: add e2e for non-overlapping placements (#14646)
Browse files Browse the repository at this point in the history
* test: add e2e for non-overlapping placements

Followup to #10446

Fails (as expected) against 1.3.x at the wait for blocked eval (because
the allocs are allowed to overlap).

Passes against 1.4.0-beta.1 (as expected).

* Update e2e/overlap/overlap_test.go

Co-authored-by: James Rasell <[email protected]>
  • Loading branch information
schmichael and jrasell authored Sep 22, 2022
1 parent ed4e15d commit 62d30c0
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 0 deletions.
130 changes: 130 additions & 0 deletions e2e/overlap/overlap_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
package overlap

import (
"fmt"
"testing"
"time"

"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
)

// TestOverlap asserts that the resources used by an allocation are not
// considered free until their ClientStatus is terminal.
//
// See: https://github.com/hashicorp/nomad/issues/10440
func TestOverlap(t *testing.T) {
nomadClient := e2eutil.NomadClient(t)
e2eutil.WaitForLeader(t, nomadClient)

// Wait for at least 1 node to be ready and get its ID
var node *api.Node
testutil.Wait(t, func() (bool, error) {
nodesList, _, err := nomadClient.Nodes().List(nil)
if err != nil {
return false, fmt.Errorf("error listing nodes: %v", err)
}

for _, n := range nodesList {
if n.Status == "ready" {
node, _, err = nomadClient.Nodes().Info(n.ID, nil)
must.NoError(t, err)
return true, nil
}
}

return false, fmt.Errorf("no nodes ready before timeout; need at least 1 ready")
})

// Force job to fill one exact node
getJob := func() (*api.Job, string) {
job, err := e2eutil.Parse2(t, "testdata/overlap.nomad")
must.NoError(t, err)
jobID := *job.ID + uuid.Short()
job.ID = &jobID
job.Datacenters = []string{node.Datacenter}
job.Constraints[1].RTarget = node.ID
availCPU := int(node.NodeResources.Cpu.CpuShares - int64(node.ReservedResources.Cpu.CpuShares))
job.TaskGroups[0].Tasks[0].Resources.CPU = &availCPU
return job, *job.ID
}
job1, jobID1 := getJob()

_, _, err := nomadClient.Jobs().Register(job1, nil)
must.NoError(t, err)
defer e2eutil.WaitForJobStopped(t, nomadClient, jobID1)

var origAlloc *api.AllocationListStub
testutil.Wait(t, func() (bool, error) {
a, _, err := nomadClient.Jobs().Allocations(jobID1, false, nil)
must.NoError(t, err)
if n := len(a); n == 0 {
return false, fmt.Errorf("timed out before an allocation was found for %s", jobID1)
}
must.Len(t, 1, a)

origAlloc = a[0]
return origAlloc.ClientStatus == "running", fmt.Errorf("timed out before alloc %s for %s was running: %s",
origAlloc.ID, jobID1, origAlloc.ClientStatus)
})

// Stop job but don't wait for ClientStatus terminal
_, _, err = nomadClient.Jobs().Deregister(jobID1, false, nil)
must.NoError(t, err)
minStopTime := time.Now().Add(job1.TaskGroups[0].Tasks[0].ShutdownDelay)

testutil.Wait(t, func() (bool, error) {
a, _, err := nomadClient.Allocations().Info(origAlloc.ID, nil)
must.NoError(t, err)
ds, cs := a.DesiredStatus, a.ClientStatus
return ds == "stop" && cs == "running", fmt.Errorf("expected alloc %s to be stop|running but found %s|%s",
a.ID, ds, cs)
})

// Start replacement job and assert it is blocked
job2, jobID2 := getJob()
job2.TaskGroups[0].Tasks[0].ShutdownDelay = 0 // no need on the followup
resp, _, err := nomadClient.Jobs().Register(job2, nil)
must.NoError(t, err)
defer e2eutil.WaitForJobStopped(t, nomadClient, jobID2)

testutil.Wait(t, func() (bool, error) {
e, _, err := nomadClient.Evaluations().Info(resp.EvalID, nil)
must.NoError(t, err)
if e == nil {
return false, fmt.Errorf("eval %s does not exist yet", resp.EvalID)
}
return e.BlockedEval != "", fmt.Errorf("expected a blocked eval to be created but found: %#v", *e)
})

// Wait for job1's ShutdownDelay for origAlloc.ClientStatus to go terminal
sleepyTime := minStopTime.Sub(time.Now())
if sleepyTime > 0 {
t.Logf("Sleeping for the rest of the shutdown_delay (%.3s/%s)",
sleepyTime, job1.TaskGroups[0].Tasks[0].ShutdownDelay)
time.Sleep(sleepyTime)
}

testutil.Wait(t, func() (bool, error) {
a, _, err := nomadClient.Allocations().Info(origAlloc.ID, nil)
must.NoError(t, err)
return a.ClientStatus == "complete", fmt.Errorf("expected original alloc %s to be complete but is %s",
a.ID, a.ClientStatus)
})

// Assert replacement job unblocked and running
testutil.Wait(t, func() (bool, error) {
a, _, err := nomadClient.Jobs().Allocations(jobID2, false, nil)
must.NoError(t, err)
if n := len(a); n == 0 {
return false, fmt.Errorf("timed out before an allocation was found for %s", jobID2)
}
must.Len(t, 1, a)

return a[0].ClientStatus == "running", fmt.Errorf("timed out before alloc %s for %s was running: %s",
a[0].ID, jobID2, a[0].ClientStatus)
})
}
37 changes: 37 additions & 0 deletions e2e/overlap/testdata/overlap.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
job "overlap" {
datacenters = ["dc1"]
type = "service"

constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

constraint {
attribute = "${node.unique.id}"
value = "<<Must be filled in by test>>"
}

group "overlap" {
count = 1

task "test" {
driver = "raw_exec"

# Delay shutdown to delay next placement
shutdown_delay = "10s"

config {
command = "bash"
args = ["-c", "sleep 15000"]
}

resources {
# Must be filled in by test
cpu = "0"
memory = "50"
}
}
}
}

0 comments on commit 62d30c0

Please sign in to comment.