From 2bb78ac2c189e1f15e3047d81cf7dbecba32139f Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Thu, 10 Sep 2020 14:18:55 -0400 Subject: [PATCH 1/2] test for rescheduling non-canaries --- scheduler/generic_sched_test.go | 68 +++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index e8dc50e6944..344d902a2bf 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -5343,6 +5343,74 @@ func TestServiceSched_Preemption(t *testing.T) { require.Equal(expectedPreemptedAllocs, actualPreemptedAllocs) } +// TestServiceSched_Migrate_NonCanary asserts that when rescheduling +// non-canary allocations, a single allocation is migrated +func TestServiceSched_Migrate_NonCanary(t *testing.T) { + h := NewHarness(t) + + node1 := mock.Node() + require.NoError(t, h.State.UpsertNode(h.NextIndex(), node1)) + + job := mock.Job() + job.Stable = true + job.TaskGroups[0].Count = 1 + job.TaskGroups[0].Update = &structs.UpdateStrategy{ + MaxParallel: 1, + Canary: 1, + } + require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) + + deployment := &structs.Deployment{ + ID: uuid.Generate(), + JobID: job.ID, + Namespace: job.Namespace, + JobVersion: job.Version, + JobModifyIndex: job.JobModifyIndex, + JobCreateIndex: job.CreateIndex, + TaskGroups: map[string]*structs.DeploymentState{ + "web": {DesiredTotal: 1}, + }, + Status: structs.DeploymentStatusSuccessful, + StatusDescription: structs.DeploymentStatusDescriptionSuccessful, + } + require.NoError(t, h.State.UpsertDeployment(h.NextIndex(), deployment)) + + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = node1.ID + alloc.DeploymentID = deployment.ID + alloc.Name = "my-job.web[0]" + alloc.DesiredStatus = structs.AllocDesiredStatusRun + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) + require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) + + // Create a mock evaluation + eval := &structs.Evaluation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Priority: 50, + TriggeredBy: structs.EvalTriggerAllocStop, + JobID: job.ID, + Status: structs.EvalStatusPending, + } + require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) + + // Process the evaluation + err := h.Process(NewServiceScheduler, eval) + require.NoError(t, err) + + // Ensure a single plan + require.Len(t, h.Plans, 1) + plan := h.Plans[0] + + require.Contains(t, plan.NodeAllocation, node1.ID) + allocs := plan.NodeAllocation[node1.ID] + require.Len(t, allocs, 1) + +} + // TestServiceSched_Migrate_CanaryStatus asserts that migrations/rescheduling // of allocations use the proper versions of allocs rather than latest: // Canaries should be replaced by canaries, and non-canaries should be replaced From 64175dcceef3b0682314fc9e330ecd2c0ce1e9cb Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Thu, 10 Sep 2020 12:44:24 -0400 Subject: [PATCH 2/2] Revert the `requireCanary` check introduced in https://github.com/hashicorp/nomad/pull/8691/files#diff-1801138ac4d10f2064ba6f2e434ac9b4L430-R431 . The change was intended to fix a case where a canary alloc may fail to be rescheduled if all the other allocs fail as well (e.g. if all allocs happen to be placed on a node that died). However, it introduced some unintended side-effects. Reverting the change for now and will investigate further. --- scheduler/generic_sched_test.go | 22 ++++++++++++++++------ scheduler/reconcile.go | 4 +--- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 344d902a2bf..9f164baceff 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -5537,10 +5537,18 @@ func TestServiceSched_Migrate_CanaryStatus(t *testing.T) { // Now test that all node1 allocs are migrated while preserving Version and Canary info { + // FIXME: This is a bug, we ought to reschedule canaries in this case but don't + rescheduleCanary := false + + expectedMigrations := 3 + if rescheduleCanary { + expectedMigrations++ + } + ws := memdb.NewWatchSet() allocs, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, true) require.NoError(t, err) - require.Len(t, allocs, 8) + require.Len(t, allocs, 4+expectedMigrations) nodeAllocs := map[string][]*structs.Allocation{} for _, a := range allocs { @@ -5554,7 +5562,7 @@ func TestServiceSched_Migrate_CanaryStatus(t *testing.T) { } node2Allocs := nodeAllocs[node2.ID] - require.Len(t, node2Allocs, 4) + require.Len(t, node2Allocs, expectedMigrations) sort.Slice(node2Allocs, func(i, j int) bool { return node2Allocs[i].Job.Version < node2Allocs[j].Job.Version }) for _, a := range node2Allocs[:3] { @@ -5563,9 +5571,11 @@ func TestServiceSched_Migrate_CanaryStatus(t *testing.T) { require.Equal(t, node2.ID, a.NodeID) require.Equal(t, deployment.ID, a.DeploymentID) } - require.Equal(t, structs.AllocDesiredStatusRun, node2Allocs[3].DesiredStatus) - require.Equal(t, uint64(1), node2Allocs[3].Job.Version) - require.Equal(t, node2.ID, node2Allocs[3].NodeID) - require.Equal(t, updateDeployment, node2Allocs[3].DeploymentID) + if rescheduleCanary { + require.Equal(t, structs.AllocDesiredStatusRun, node2Allocs[3].DesiredStatus) + require.Equal(t, uint64(1), node2Allocs[3].Job.Version) + require.Equal(t, node2.ID, node2Allocs[3].NodeID) + require.Equal(t, updateDeployment, node2Allocs[3].DeploymentID) + } } } diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 029fe3b3124..151d846537c 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -426,9 +426,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { // desired means we need to create canaries strategy := tg.Update canariesPromoted := dstate != nil && dstate.Promoted - replaceAllAllocs := len(untainted) == 0 && len(migrate)+len(lost) != 0 - requireCanary := (len(destructive) != 0 || replaceAllAllocs) && - strategy != nil && len(canaries) < strategy.Canary && !canariesPromoted + requireCanary := len(destructive) != 0 && strategy != nil && len(canaries) < strategy.Canary && !canariesPromoted if requireCanary { dstate.DesiredCanaries = strategy.Canary }