From aef9bfeb12b6fd79bcd474b64704b337e566b3b8 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Wed, 18 Aug 2021 09:50:37 -0400 Subject: [PATCH] Consider all system jobs for a new node (#11054) When a node becomes ready, create an eval for all system jobs across namespaces. The previous code uses `job.ID` to deduplicate evals, but that ignores the job namespace. Thus if there are multiple jobs in different namespaces sharing the same ID/Name, only one will be considered for running in the new node. Thus, Nomad may skip running some system jobs in that node. --- .changelog/11054.txt | 3 ++ nomad/node_endpoint.go | 10 +++--- nomad/node_endpoint_test.go | 61 +++++++++++++++++++++++++++++++++++++ nomad/structs/structs.go | 4 +-- scheduler/rank.go | 4 +-- 5 files changed, 73 insertions(+), 9 deletions(-) create mode 100644 .changelog/11054.txt diff --git a/.changelog/11054.txt b/.changelog/11054.txt new file mode 100644 index 00000000000..c949fc34239 --- /dev/null +++ b/.changelog/11054.txt @@ -0,0 +1,3 @@ +```release-note:bug +core: Fixed a bug where system jobs with non-unique IDs may not be placed on new nodes +``` diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 36a18a26f64..7775fcef29d 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -1352,15 +1352,15 @@ func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint6 // Create an eval for each JobID affected var evals []*structs.Evaluation var evalIDs []string - jobIDs := make(map[string]struct{}) + jobIDs := map[structs.NamespacedID]struct{}{} now := time.Now().UTC().UnixNano() for _, alloc := range allocs { // Deduplicate on JobID - if _, ok := jobIDs[alloc.JobID]; ok { + if _, ok := jobIDs[alloc.JobNamespacedID()]; ok { continue } - jobIDs[alloc.JobID] = struct{}{} + jobIDs[alloc.JobNamespacedID()] = struct{}{} // Create a new eval eval := &structs.Evaluation{ @@ -1383,10 +1383,10 @@ func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint6 // Create an evaluation for each system job. for _, job := range sysJobs { // Still dedup on JobID as the node may already have the system job. - if _, ok := jobIDs[job.ID]; ok { + if _, ok := jobIDs[job.NamespacedID()]; ok { continue } - jobIDs[job.ID] = struct{}{} + jobIDs[job.NamespacedID()] = struct{}{} // Create a new eval eval := &structs.Evaluation{ diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index fbc82d8b7a0..36969ddd2a0 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -2492,6 +2492,67 @@ func TestClientEndpoint_CreateNodeEvals(t *testing.T) { } } +// TestClientEndpoint_CreateNodeEvals_MultipleNSes asserts that evals are made +// for all jobs across namespaces +func TestClientEndpoint_CreateNodeEvals_MultipleNSes(t *testing.T) { + t.Parallel() + + s1, cleanupS1 := TestServer(t, nil) + defer cleanupS1() + testutil.WaitForLeader(t, s1.RPC) + + state := s1.fsm.State() + + idx := uint64(3) + ns1 := mock.Namespace() + err := state.UpsertNamespaces(idx, []*structs.Namespace{ns1}) + require.NoError(t, err) + idx++ + + node := mock.Node() + err = state.UpsertNode(structs.MsgTypeTestSetup, idx, node) + require.NoError(t, err) + idx++ + + // Inject a fake system job. + defaultJob := mock.SystemJob() + err = state.UpsertJob(structs.MsgTypeTestSetup, idx, defaultJob) + require.NoError(t, err) + idx++ + + nsJob := mock.SystemJob() + nsJob.ID = defaultJob.ID + nsJob.Namespace = ns1.Name + err = state.UpsertJob(structs.MsgTypeTestSetup, idx, nsJob) + require.NoError(t, err) + idx++ + + // Create some evaluations + evalIDs, index, err := s1.staticEndpoints.Node.createNodeEvals(node.ID, 1) + require.NoError(t, err) + require.NotZero(t, index) + require.Len(t, evalIDs, 2) + + byNS := map[string]*structs.Evaluation{} + for _, evalID := range evalIDs { + eval, err := state.EvalByID(nil, evalID) + require.NoError(t, err) + byNS[eval.Namespace] = eval + } + + require.Len(t, byNS, 2) + + defaultNSEval := byNS[defaultJob.Namespace] + require.NotNil(t, defaultNSEval) + require.Equal(t, defaultJob.ID, defaultNSEval.JobID) + require.Equal(t, defaultJob.Namespace, defaultNSEval.Namespace) + + otherNSEval := byNS[nsJob.Namespace] + require.NotNil(t, otherNSEval) + require.Equal(t, nsJob.ID, otherNSEval.JobID) + require.Equal(t, nsJob.Namespace, otherNSEval.Namespace) +} + func TestClientEndpoint_Evaluate(t *testing.T) { t.Parallel() diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 3cff6ffc5f9..240c524e3ef 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -3986,8 +3986,8 @@ type Job struct { } // NamespacedID returns the namespaced id useful for logging -func (j *Job) NamespacedID() *NamespacedID { - return &NamespacedID{ +func (j *Job) NamespacedID() NamespacedID { + return NamespacedID{ ID: j.ID, Namespace: j.Namespace, } diff --git a/scheduler/rank.go b/scheduler/rank.go index efe586efaaa..799a29c4692 100644 --- a/scheduler/rank.go +++ b/scheduler/rank.go @@ -151,7 +151,7 @@ type BinPackIterator struct { source RankIterator evict bool priority int - jobId *structs.NamespacedID + jobId structs.NamespacedID taskGroup *structs.TaskGroup scoreFit func(*structs.Node, *structs.ComparableResources) float64 } @@ -228,7 +228,7 @@ OUTER: var allocsToPreempt []*structs.Allocation // Initialize preemptor with node - preemptor := NewPreemptor(iter.priority, iter.ctx, iter.jobId) + preemptor := NewPreemptor(iter.priority, iter.ctx, &iter.jobId) preemptor.SetNode(option.Node) // Count the number of existing preemptions