-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7072 from hashicorp/system-sched-e2e
System sched e2e
- Loading branch information
Showing
5 changed files
with
242 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
job "system_job" { | ||
datacenters = ["dc1"] | ||
|
||
type = "system" | ||
|
||
constraint { | ||
attribute = "${attr.kernel.name}" | ||
value = "linux" | ||
} | ||
|
||
group "system_job_group" { | ||
count = 1 | ||
|
||
restart { | ||
attempts = 10 | ||
interval = "1m" | ||
|
||
delay = "2s" | ||
mode = "delay" | ||
} | ||
|
||
task "system_task" { | ||
driver = "docker" | ||
|
||
config { | ||
image = "bash:latest" | ||
|
||
command = "bash" | ||
args = ["-c", "sleep 15000"] | ||
} | ||
|
||
env { | ||
version = "1" | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
job "system_job" { | ||
datacenters = ["dc1"] | ||
|
||
type = "system" | ||
|
||
constraint { | ||
attribute = "${attr.kernel.name}" | ||
value = "linux" | ||
} | ||
|
||
group "system_job_group" { | ||
count = 1 | ||
|
||
restart { | ||
attempts = 10 | ||
interval = "1m" | ||
|
||
delay = "2s" | ||
mode = "delay" | ||
} | ||
|
||
task "system_task" { | ||
driver = "docker" | ||
|
||
config { | ||
image = "bash:latest" | ||
|
||
command = "bash" | ||
args = ["-c", "sleep 15000"] | ||
} | ||
|
||
env { | ||
version = "2" | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
package systemsched | ||
|
||
import ( | ||
"github.com/hashicorp/nomad/api" | ||
"github.com/hashicorp/nomad/e2e/e2eutil" | ||
"github.com/hashicorp/nomad/e2e/framework" | ||
"github.com/hashicorp/nomad/nomad/structs" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
type SystemSchedTest struct { | ||
framework.TC | ||
jobIDs []string | ||
} | ||
|
||
func init() { | ||
framework.AddSuites(&framework.TestSuite{ | ||
Component: "SystemScheduler", | ||
CanRunLocal: true, | ||
Cases: []framework.TestCase{ | ||
new(SystemSchedTest), | ||
}, | ||
}) | ||
} | ||
|
||
func (tc *SystemSchedTest) BeforeAll(f *framework.F) { | ||
// Ensure cluster has leader before running tests | ||
e2eutil.WaitForLeader(f.T(), tc.Nomad()) | ||
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 4) | ||
} | ||
|
||
func (tc *SystemSchedTest) TestJobUpdateOnIneligbleNode(f *framework.F) { | ||
t := f.T() | ||
nomadClient := tc.Nomad() | ||
|
||
jobID := "system_deployment" | ||
tc.jobIDs = append(tc.jobIDs, jobID) | ||
e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "systemsched/input/system_job0.nomad", jobID, "") | ||
|
||
jobs := nomadClient.Jobs() | ||
allocs, _, err := jobs.Allocations(jobID, true, nil) | ||
require.NoError(t, err) | ||
|
||
var allocIDs []string | ||
for _, alloc := range allocs { | ||
allocIDs = append(allocIDs, alloc.ID) | ||
} | ||
|
||
// Wait for allocations to get past initial pending state | ||
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs) | ||
|
||
// Mark one node as ineligible | ||
nodesAPI := tc.Nomad().Nodes() | ||
disabledNodeID := allocs[0].NodeID | ||
_, err = nodesAPI.ToggleEligibility(disabledNodeID, false, nil) | ||
require.NoError(t, err) | ||
|
||
// Assert all jobs still running | ||
jobs = nomadClient.Jobs() | ||
allocs, _, err = jobs.Allocations(jobID, true, nil) | ||
|
||
allocIDs = nil | ||
for _, alloc := range allocs { | ||
allocIDs = append(allocIDs, alloc.ID) | ||
} | ||
|
||
require.NoError(t, err) | ||
allocForDisabledNode := make(map[string]*api.AllocationListStub) | ||
|
||
// Wait for allocs to run and collect allocs on ineligible node | ||
// Allocation could have failed, ensure there is one thats running | ||
// and that it is the correct version (0) | ||
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs) | ||
for _, alloc := range allocs { | ||
if alloc.NodeID == disabledNodeID { | ||
allocForDisabledNode[alloc.ID] = alloc | ||
} | ||
} | ||
|
||
// Filter down to only our latest running alloc | ||
for _, alloc := range allocForDisabledNode { | ||
require.Equal(t, uint64(0), alloc.JobVersion) | ||
if alloc.ClientStatus == structs.AllocClientStatusComplete { | ||
// remove the old complete alloc from map | ||
delete(allocForDisabledNode, alloc.ID) | ||
} | ||
} | ||
require.NotEmpty(t, allocForDisabledNode) | ||
require.Len(t, allocForDisabledNode, 1) | ||
|
||
// Update job | ||
e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "systemsched/input/system_job1.nomad", jobID, "") | ||
|
||
// Get updated allocations | ||
jobs = nomadClient.Jobs() | ||
allocs, _, err = jobs.Allocations(jobID, false, nil) | ||
require.NoError(t, err) | ||
|
||
allocIDs = nil | ||
for _, alloc := range allocs { | ||
allocIDs = append(allocIDs, alloc.ID) | ||
} | ||
|
||
// Wait for allocs to start | ||
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs) | ||
|
||
// Get latest alloc status now that they are no longer pending | ||
allocs, _, err = jobs.Allocations(jobID, false, nil) | ||
require.NoError(t, err) | ||
|
||
var foundPreviousAlloc bool | ||
for _, dAlloc := range allocForDisabledNode { | ||
for _, alloc := range allocs { | ||
if alloc.ID == dAlloc.ID { | ||
foundPreviousAlloc = true | ||
require.Equal(t, uint64(0), alloc.JobVersion) | ||
} else { | ||
// Ensure allocs running on non disabled node are | ||
// newer version | ||
if alloc.ClientStatus == structs.AllocClientStatusRunning { | ||
require.Equal(t, uint64(1), alloc.JobVersion) | ||
} | ||
} | ||
} | ||
} | ||
require.True(t, foundPreviousAlloc, "unable to find previous alloc for ineligible node") | ||
} | ||
|
||
func (tc *SystemSchedTest) AfterEach(f *framework.F) { | ||
nomadClient := tc.Nomad() | ||
|
||
// Mark all nodes eligible | ||
nodesAPI := tc.Nomad().Nodes() | ||
nodes, _, _ := nodesAPI.List(nil) | ||
for _, node := range nodes { | ||
nodesAPI.ToggleEligibility(node.ID, true, nil) | ||
} | ||
|
||
jobs := nomadClient.Jobs() | ||
// Stop all jobs in test | ||
for _, id := range tc.jobIDs { | ||
jobs.Deregister(id, true, nil) | ||
} | ||
tc.jobIDs = []string{} | ||
// Garbage collect | ||
nomadClient.System().GarbageCollect() | ||
} |