Skip to content

Commit

Permalink
Merge pull request #7072 from hashicorp/system-sched-e2e
Browse files Browse the repository at this point in the history
System sched e2e
  • Loading branch information
drewbailey authored Feb 4, 2020
2 parents ed41d7b + 84cc906 commit f944959
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 0 deletions.
1 change: 1 addition & 0 deletions e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
_ "github.com/hashicorp/nomad/e2e/nomad09upgrade"
_ "github.com/hashicorp/nomad/e2e/nomadexec"
_ "github.com/hashicorp/nomad/e2e/spread"
_ "github.com/hashicorp/nomad/e2e/systemsched"
_ "github.com/hashicorp/nomad/e2e/taskevents"
)

Expand Down
20 changes: 20 additions & 0 deletions e2e/e2eutil/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,26 @@ func WaitForAllocsRunning(t *testing.T, nomadClient *api.Client, allocIDs []stri
}
}

func WaitForAllocsNotPending(t *testing.T, nomadClient *api.Client, allocIDs []string) {
for _, allocID := range allocIDs {
WaitForAllocNotPending(t, nomadClient, allocID)
}
}

func WaitForAllocNotPending(t *testing.T, nomadClient *api.Client, allocID string) {
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
if err != nil {
return false, err
}

return alloc.ClientStatus != structs.AllocClientStatusPending, fmt.Errorf("expected status not pending, but was: %s", alloc.ClientStatus)
}, func(err error) {
t.Fatalf("failed to wait on alloc: %v", err)
})
}

func AllocIDsFromAllocationListStubs(allocs []*api.AllocationListStub) []string {
allocIDs := make([]string, 0, len(allocs))
for _, alloc := range allocs {
Expand Down
37 changes: 37 additions & 0 deletions e2e/systemsched/input/system_job0.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
job "system_job" {
datacenters = ["dc1"]

type = "system"

constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

group "system_job_group" {
count = 1

restart {
attempts = 10
interval = "1m"

delay = "2s"
mode = "delay"
}

task "system_task" {
driver = "docker"

config {
image = "bash:latest"

command = "bash"
args = ["-c", "sleep 15000"]
}

env {
version = "1"
}
}
}
}
37 changes: 37 additions & 0 deletions e2e/systemsched/input/system_job1.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
job "system_job" {
datacenters = ["dc1"]

type = "system"

constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

group "system_job_group" {
count = 1

restart {
attempts = 10
interval = "1m"

delay = "2s"
mode = "delay"
}

task "system_task" {
driver = "docker"

config {
image = "bash:latest"

command = "bash"
args = ["-c", "sleep 15000"]
}

env {
version = "2"
}
}
}
}
147 changes: 147 additions & 0 deletions e2e/systemsched/systemsched.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package systemsched

import (
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/stretchr/testify/require"
)

type SystemSchedTest struct {
framework.TC
jobIDs []string
}

func init() {
framework.AddSuites(&framework.TestSuite{
Component: "SystemScheduler",
CanRunLocal: true,
Cases: []framework.TestCase{
new(SystemSchedTest),
},
})
}

func (tc *SystemSchedTest) BeforeAll(f *framework.F) {
// Ensure cluster has leader before running tests
e2eutil.WaitForLeader(f.T(), tc.Nomad())
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 4)
}

func (tc *SystemSchedTest) TestJobUpdateOnIneligbleNode(f *framework.F) {
t := f.T()
nomadClient := tc.Nomad()

jobID := "system_deployment"
tc.jobIDs = append(tc.jobIDs, jobID)
e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "systemsched/input/system_job0.nomad", jobID, "")

jobs := nomadClient.Jobs()
allocs, _, err := jobs.Allocations(jobID, true, nil)
require.NoError(t, err)

var allocIDs []string
for _, alloc := range allocs {
allocIDs = append(allocIDs, alloc.ID)
}

// Wait for allocations to get past initial pending state
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)

// Mark one node as ineligible
nodesAPI := tc.Nomad().Nodes()
disabledNodeID := allocs[0].NodeID
_, err = nodesAPI.ToggleEligibility(disabledNodeID, false, nil)
require.NoError(t, err)

// Assert all jobs still running
jobs = nomadClient.Jobs()
allocs, _, err = jobs.Allocations(jobID, true, nil)

allocIDs = nil
for _, alloc := range allocs {
allocIDs = append(allocIDs, alloc.ID)
}

require.NoError(t, err)
allocForDisabledNode := make(map[string]*api.AllocationListStub)

// Wait for allocs to run and collect allocs on ineligible node
// Allocation could have failed, ensure there is one thats running
// and that it is the correct version (0)
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
for _, alloc := range allocs {
if alloc.NodeID == disabledNodeID {
allocForDisabledNode[alloc.ID] = alloc
}
}

// Filter down to only our latest running alloc
for _, alloc := range allocForDisabledNode {
require.Equal(t, uint64(0), alloc.JobVersion)
if alloc.ClientStatus == structs.AllocClientStatusComplete {
// remove the old complete alloc from map
delete(allocForDisabledNode, alloc.ID)
}
}
require.NotEmpty(t, allocForDisabledNode)
require.Len(t, allocForDisabledNode, 1)

// Update job
e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "systemsched/input/system_job1.nomad", jobID, "")

// Get updated allocations
jobs = nomadClient.Jobs()
allocs, _, err = jobs.Allocations(jobID, false, nil)
require.NoError(t, err)

allocIDs = nil
for _, alloc := range allocs {
allocIDs = append(allocIDs, alloc.ID)
}

// Wait for allocs to start
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)

// Get latest alloc status now that they are no longer pending
allocs, _, err = jobs.Allocations(jobID, false, nil)
require.NoError(t, err)

var foundPreviousAlloc bool
for _, dAlloc := range allocForDisabledNode {
for _, alloc := range allocs {
if alloc.ID == dAlloc.ID {
foundPreviousAlloc = true
require.Equal(t, uint64(0), alloc.JobVersion)
} else {
// Ensure allocs running on non disabled node are
// newer version
if alloc.ClientStatus == structs.AllocClientStatusRunning {
require.Equal(t, uint64(1), alloc.JobVersion)
}
}
}
}
require.True(t, foundPreviousAlloc, "unable to find previous alloc for ineligible node")
}

func (tc *SystemSchedTest) AfterEach(f *framework.F) {
nomadClient := tc.Nomad()

// Mark all nodes eligible
nodesAPI := tc.Nomad().Nodes()
nodes, _, _ := nodesAPI.List(nil)
for _, node := range nodes {
nodesAPI.ToggleEligibility(node.ID, true, nil)
}

jobs := nomadClient.Jobs()
// Stop all jobs in test
for _, id := range tc.jobIDs {
jobs.Deregister(id, true, nil)
}
tc.jobIDs = []string{}
// Garbage collect
nomadClient.System().GarbageCollect()
}

0 comments on commit f944959

Please sign in to comment.