From fcf9f1e2ecfd04b9b3285fe20ae632aad60977a2 Mon Sep 17 00:00:00 2001
From: Dom Lavery <dom@circleci.com>
Date: Wed, 12 Apr 2023 16:14:55 +0100
Subject: [PATCH 01/50] Add the ability to disable rescheduling on lost.
 Implements issue #10366

---
 .changelog/16867.txt                          |  3 +
 api/jobs_test.go                              | 28 +++++---
 api/tasks.go                                  |  5 +-
 command/agent/job_endpoint.go                 |  6 ++
 command/agent/job_endpoint_test.go            |  6 +-
 nomad/core_sched.go                           | 23 +++++--
 nomad/core_sched_test.go                      | 21 ++++++
 nomad/mock/job.go                             |  5 +-
 nomad/structs/diff_test.go                    | 67 +++++++++++++++----
 nomad/structs/structs.go                      |  5 ++
 scheduler/reconcile.go                        | 55 ++++++++-------
 scheduler/reconcile_test.go                   | 49 ++++++++++++++
 scheduler/util.go                             |  5 ++
 scheduler/util_test.go                        |  7 +-
 .../content/docs/job-specification/group.mdx  |  4 ++
 15 files changed, 231 insertions(+), 58 deletions(-)
 create mode 100644 .changelog/16867.txt

diff --git a/.changelog/16867.txt b/.changelog/16867.txt
new file mode 100644
index 00000000000..dd39808bc60
--- /dev/null
+++ b/.changelog/16867.txt
@@ -0,0 +1,3 @@
+```release-note:feature
+**Reschedule on Lost**: Adds the ability to prevent tasks on down nodes from being rescheduled
+```
diff --git a/api/jobs_test.go b/api/jobs_test.go
index 88569f9a22f..5cc2777c5b8 100644
--- a/api/jobs_test.go
+++ b/api/jobs_test.go
@@ -312,8 +312,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:  pointerOf(""),
-						Count: pointerOf(1),
+						Name:             pointerOf(""),
+						Count:            pointerOf(1),
+						RescheduleOnLost: pointerOf(true),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -399,8 +400,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				JobModifyIndex:    pointerOf(uint64(0)),
 				TaskGroups: []*TaskGroup{
 					{
-						Name:  pointerOf(""),
-						Count: pointerOf(1),
+						Name:             pointerOf(""),
+						Count:            pointerOf(1),
+						RescheduleOnLost: pointerOf(true),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -555,8 +557,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:  pointerOf("cache"),
-						Count: pointerOf(1),
+						Name:             pointerOf("cache"),
+						Count:            pointerOf(1),
+						RescheduleOnLost: pointerOf(true),
 						RestartPolicy: &RestartPolicy{
 							Interval: pointerOf(5 * time.Minute),
 							Attempts: pointerOf(10),
@@ -666,8 +669,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:  pointerOf("cache"),
-						Count: pointerOf(1),
+						Name:             pointerOf("cache"),
+						Count:            pointerOf(1),
+						RescheduleOnLost: pointerOf(true),
 						RestartPolicy: &RestartPolicy{
 							Interval:        pointerOf(5 * time.Minute),
 							Attempts:        pointerOf(10),
@@ -930,8 +934,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:  pointerOf("bar"),
-						Count: pointerOf(1),
+						Name:             pointerOf("bar"),
+						Count:            pointerOf(1),
+						RescheduleOnLost: pointerOf(true),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -1038,7 +1043,8 @@ func TestJobs_Canonicalize(t *testing.T) {
 				ParentID: pointerOf("lol"),
 				TaskGroups: []*TaskGroup{
 					{
-						Name: pointerOf("bar"),
+						Name:             pointerOf("bar"),
+						RescheduleOnLost: pointerOf(true),
 						RestartPolicy: &RestartPolicy{
 							Delay:    pointerOf(15 * time.Second),
 							Attempts: pointerOf(2),
diff --git a/api/tasks.go b/api/tasks.go
index 5d828a46d4a..dbcc1bc0b1d 100644
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -459,6 +459,7 @@ type TaskGroup struct {
 	MaxClientDisconnect       *time.Duration            `mapstructure:"max_client_disconnect" hcl:"max_client_disconnect,optional"`
 	Scaling                   *ScalingPolicy            `hcl:"scaling,block"`
 	Consul                    *Consul                   `hcl:"consul,block"`
+	RescheduleOnLost          *bool                     `hcl:"reschedule_on_lost,optional"`
 }
 
 // NewTaskGroup creates a new TaskGroup.
@@ -577,7 +578,9 @@ func (g *TaskGroup) Canonicalize(job *Job) {
 	for _, s := range g.Services {
 		s.Canonicalize(nil, g, job)
 	}
-
+	if g.RescheduleOnLost == nil {
+		g.RescheduleOnLost = pointerOf(true)
+	}
 }
 
 // These needs to be in sync with DefaultServiceJobRestartPolicy in
diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go
index db7d24279c6..3e3169578df 100644
--- a/command/agent/job_endpoint.go
+++ b/command/agent/job_endpoint.go
@@ -1135,6 +1135,12 @@ func ApiTgToStructsTG(job *structs.Job, taskGroup *api.TaskGroup, tg *structs.Ta
 		RenderTemplates: *taskGroup.RestartPolicy.RenderTemplates,
 	}
 
+	if taskGroup.RescheduleOnLost == nil {
+		tg.RescheduleOnLost = true
+	} else {
+		tg.RescheduleOnLost = *taskGroup.RescheduleOnLost
+	}
+
 	if taskGroup.ShutdownDelay != nil {
 		tg.ShutdownDelay = taskGroup.ShutdownDelay
 	}
diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go
index 3dc25b5bc0e..96883f812ef 100644
--- a/command/agent/job_endpoint_test.go
+++ b/command/agent/job_endpoint_test.go
@@ -3058,6 +3058,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
 						Operand: "z",
 					},
 				},
+				RescheduleOnLost: true,
 				Affinities: []*structs.Affinity{
 					{
 						LTarget: "x",
@@ -3552,8 +3553,9 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
 		},
 		TaskGroups: []*structs.TaskGroup{
 			{
-				Name:  "group1",
-				Count: 5,
+				Name:             "group1",
+				Count:            5,
+				RescheduleOnLost: true,
 				Constraints: []*structs.Constraint{
 					{
 						LTarget: "x",
diff --git a/nomad/core_sched.go b/nomad/core_sched.go
index dbd90a6b4b9..1a52f804bad 100644
--- a/nomad/core_sched.go
+++ b/nomad/core_sched.go
@@ -629,8 +629,23 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
 		return false
 	}
 
+	// If the job is deleted all allocs can be removed
+	if job == nil {
+		return true
+	}
+
+	tg := job.LookupTaskGroup(a.TaskGroup)
+	if tg == nil {
+		return true
+	}
+
+	// Don't GC lost allocs when RescheduleOnLost is disabled
+	if !job.Stop && !tg.RescheduleOnLost && a.ClientStatus == structs.AllocClientStatusLost {
+		return false
+	}
+
 	// If the job is deleted, stopped or dead all allocs can be removed
-	if job == nil || job.Stop || job.Status == structs.JobStatusDead {
+	if job.Stop || job.Status == structs.JobStatusDead {
 		return true
 	}
 
@@ -647,12 +662,8 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
 		return true
 	}
 
-	var reschedulePolicy *structs.ReschedulePolicy
-	tg := job.LookupTaskGroup(a.TaskGroup)
+	reschedulePolicy := tg.ReschedulePolicy
 
-	if tg != nil {
-		reschedulePolicy = tg.ReschedulePolicy
-	}
 	// No reschedule policy or rescheduling is disabled
 	if reschedulePolicy == nil || (!reschedulePolicy.Unlimited && reschedulePolicy.Attempts == 0) {
 		return true
diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go
index 72f8368c669..76a32afa395 100644
--- a/nomad/core_sched_test.go
+++ b/nomad/core_sched_test.go
@@ -1946,6 +1946,7 @@ func TestAllocation_GCEligible(t *testing.T) {
 		DesiredStatus       string
 		JobStatus           string
 		JobStop             bool
+		RescheduleOnLost    *bool
 		AllocJobModifyIndex uint64
 		JobModifyIndex      uint64
 		ModifyIndex         uint64
@@ -2120,6 +2121,23 @@ func TestAllocation_GCEligible(t *testing.T) {
 			JobStop:  true,
 			ShouldGC: true,
 		},
+		{
+			Desc:          "GC when alloc is lost and eligible for reschedule",
+			ClientStatus:  structs.AllocClientStatusLost,
+			DesiredStatus: structs.AllocDesiredStatusStop,
+			GCTime:        fail,
+			JobStatus:     structs.JobStatusDead,
+			ShouldGC:      true,
+		},
+		{
+			Desc:             "Don't GC when alloc is lost and not being rescheduled",
+			ClientStatus:     structs.AllocClientStatusLost,
+			DesiredStatus:    structs.AllocDesiredStatusStop,
+			RescheduleOnLost: pointer.Of(false),
+			GCTime:           fail,
+			JobStatus:        structs.JobStatusDead,
+			ShouldGC:         false,
+		},
 		{
 			Desc:             "GC when job status is dead",
 			ClientStatus:     structs.AllocClientStatusFailed,
@@ -2166,6 +2184,9 @@ func TestAllocation_GCEligible(t *testing.T) {
 		alloc.NextAllocation = tc.NextAllocID
 		job := mock.Job()
 		alloc.TaskGroup = job.TaskGroups[0].Name
+		if tc.RescheduleOnLost != nil {
+			job.TaskGroups[0].RescheduleOnLost = *tc.RescheduleOnLost
+		}
 		job.TaskGroups[0].ReschedulePolicy = tc.ReschedulePolicy
 		if tc.JobStatus != "" {
 			job.Status = tc.JobStatus
diff --git a/nomad/mock/job.go b/nomad/mock/job.go
index 0e4600ec920..845c7fd9c81 100644
--- a/nomad/mock/job.go
+++ b/nomad/mock/job.go
@@ -31,8 +31,9 @@ func Job() *structs.Job {
 		},
 		TaskGroups: []*structs.TaskGroup{
 			{
-				Name:  "web",
-				Count: 10,
+				Name:             "web",
+				Count:            10,
+				RescheduleOnLost: true,
 				Constraints: []*structs.Constraint{
 					{
 						LTarget: "${attr.consul.version}",
diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go
index c9caa4a4887..0d2837cdfdd 100644
--- a/nomad/structs/diff_test.go
+++ b/nomad/structs/diff_test.go
@@ -1253,32 +1253,38 @@ func TestJobDiff(t *testing.T) {
 			Old: &Job{
 				TaskGroups: []*TaskGroup{
 					{
-						Name:  "foo",
-						Count: 1,
+						Name:             "foo",
+						Count:            1,
+						RescheduleOnLost: true,
 					},
 					{
-						Name:  "bar",
-						Count: 1,
+						Name:             "bar",
+						Count:            1,
+						RescheduleOnLost: false,
 					},
 					{
-						Name:  "baz",
-						Count: 1,
+						Name:             "baz",
+						Count:            1,
+						RescheduleOnLost: true,
 					},
 				},
 			},
 			New: &Job{
 				TaskGroups: []*TaskGroup{
 					{
-						Name:  "bar",
-						Count: 1,
+						Name:             "bar",
+						Count:            1,
+						RescheduleOnLost: false,
 					},
 					{
-						Name:  "baz",
-						Count: 2,
+						Name:             "baz",
+						Count:            2,
+						RescheduleOnLost: true,
 					},
 					{
-						Name:  "bam",
-						Count: 1,
+						Name:             "bam",
+						Count:            1,
+						RescheduleOnLost: true,
 					},
 				},
 			},
@@ -1295,6 +1301,12 @@ func TestJobDiff(t *testing.T) {
 								Old:  "",
 								New:  "1",
 							},
+							{
+								Type: DiffTypeAdded,
+								Name: "RescheduleOnLost",
+								Old:  "",
+								New:  "true",
+							},
 						},
 					},
 					{
@@ -1323,6 +1335,12 @@ func TestJobDiff(t *testing.T) {
 								Old:  "1",
 								New:  "",
 							},
+							{
+								Type: DiffTypeDeleted,
+								Name: "RescheduleOnLost",
+								Old:  "true",
+								New:  "",
+							},
 						},
 					},
 				},
@@ -1841,6 +1859,31 @@ func TestTaskGroupDiff(t *testing.T) {
 				},
 			},
 		},
+		{
+			TestCase: "Reschedule on lost diff",
+			Old: &TaskGroup{
+				Name:             "foo",
+				Count:            100,
+				RescheduleOnLost: true,
+			},
+			New: &TaskGroup{
+				Name:             "foo",
+				Count:            100,
+				RescheduleOnLost: false,
+			},
+			Expected: &TaskGroupDiff{
+				Type: DiffTypeEdited,
+				Name: "foo",
+				Fields: []*FieldDiff{
+					{
+						Type: DiffTypeEdited,
+						Name: "RescheduleOnLost",
+						Old:  "true",
+						New:  "false",
+					},
+				},
+			},
+		},
 		{
 			TestCase: "Map diff",
 			Old: &TaskGroup{
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 05b53f2ff3f..3b69c58b6b5 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -6641,6 +6641,11 @@ type TaskGroup struct {
 	// MaxClientDisconnect, if set, configures the client to allow placed
 	// allocations for tasks in this group to attempt to resume running without a restart.
 	MaxClientDisconnect *time.Duration
+
+	// RescheduleOnLost is used to control how allocations on disconnected
+	// nodes are handled. For backwards compatibility, it defaults to true.
+	// When true, such jobs are rescheduled.
+	RescheduleOnLost bool
 }
 
 func (tg *TaskGroup) Copy() *TaskGroup {
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index f6675aff221..bf3e484f039 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -554,7 +554,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// placements can be made without any other consideration.
 	deploymentPlaceReady := !a.deploymentPaused && !a.deploymentFailed && !isCanarying
 
-	underProvisionedBy = a.computeReplacements(deploymentPlaceReady, desiredChanges, place, rescheduleNow, lost, underProvisionedBy)
+	underProvisionedBy = a.computeReplacements(tg, deploymentPlaceReady, desiredChanges, place, rescheduleNow, lost, underProvisionedBy)
 
 	if deploymentPlaceReady {
 		a.computeDestructiveUpdates(destructive, underProvisionedBy, desiredChanges, tg)
@@ -772,25 +772,30 @@ func (a *allocReconciler) computePlacements(group *structs.TaskGroup,
 	// Add replacements for disconnected and lost allocs up to group.Count
 	existing := len(untainted) + len(migrate) + len(reschedule)
 
-	// Add replacements for lost
-	for _, alloc := range lost {
-		if existing >= group.Count {
-			// Reached desired count, do not replace remaining lost
-			// allocs
-			break
-		}
+	if group.RescheduleOnLost {
+		// Add replacements for lost
+		for _, alloc := range lost {
+			if existing >= group.Count {
+				// Reached desired count, do not replace remaining lost
+				// allocs
+				break
+			}
 
-		existing++
-		place = append(place, allocPlaceResult{
-			name:               alloc.Name,
-			taskGroup:          group,
-			previousAlloc:      alloc,
-			reschedule:         false,
-			canary:             alloc.DeploymentStatus.IsCanary(),
-			downgradeNonCanary: isCanarying && !alloc.DeploymentStatus.IsCanary(),
-			minJobVersion:      alloc.Job.Version,
-			lost:               true,
-		})
+			existing++
+			place = append(place, allocPlaceResult{
+				name:               alloc.Name,
+				taskGroup:          group,
+				previousAlloc:      alloc,
+				reschedule:         false,
+				canary:             alloc.DeploymentStatus.IsCanary(),
+				downgradeNonCanary: isCanarying && !alloc.DeploymentStatus.IsCanary(),
+				minJobVersion:      alloc.Job.Version,
+				lost:               true,
+			})
+		}
+	} else {
+		//Don't add placements for lost where RescheduleOnLost is not enabled
+		existing += len(lost)
 	}
 
 	// Add remaining placement results
@@ -812,7 +817,7 @@ func (a *allocReconciler) computePlacements(group *structs.TaskGroup,
 // and if the placement is already rescheduling or part of a failed deployment.
 // The input deploymentPlaceReady is calculated as the deployment is not paused, failed, or canarying.
 // It returns the number of allocs still needed.
-func (a *allocReconciler) computeReplacements(deploymentPlaceReady bool, desiredChanges *structs.DesiredUpdates,
+func (a *allocReconciler) computeReplacements(tg *structs.TaskGroup, deploymentPlaceReady bool, desiredChanges *structs.DesiredUpdates,
 	place []allocPlaceResult, rescheduleNow, lost allocSet, underProvisionedBy int) int {
 
 	// Disconnecting allocs are not failing, but are included in rescheduleNow.
@@ -847,8 +852,8 @@ func (a *allocReconciler) computeReplacements(deploymentPlaceReady bool, desired
 
 	// If allocs have been lost, determine the number of replacements that are needed
 	// and add placements to the result for the lost allocs.
-	if len(lost) != 0 {
-		allowed := min(len(lost), len(place))
+	if len(lost) != 0 && tg.RescheduleOnLost {
+		allowed := helper.Min(len(lost), len(place))
 		desiredChanges.Place += uint64(allowed)
 		a.result.place = append(a.result.place, place[:allowed]...)
 	}
@@ -986,7 +991,11 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
 	// Mark all lost allocations for stop.
 	var stop allocSet
 	stop = stop.union(lost)
-	a.markDelayed(lost, structs.AllocClientStatusLost, allocLost, followupEvals)
+	if group.RescheduleOnLost {
+		a.markDelayed(lost, structs.AllocClientStatusLost, allocLost, followupEvals)
+	} else {
+		a.markStop(lost, structs.AllocClientStatusLost, allocLost)
+	}
 
 	// If we are still deploying or creating canaries, don't stop them
 	if isCanarying {
diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go
index 94c268bd4c1..30c5a52a376 100644
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -887,6 +887,55 @@ func TestReconciler_Destructive_ScaleDown(t *testing.T) {
 	assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate))
 }
 
+// Tests the reconciler properly handles lost nodes with allocations
+func TestReconciler_LostNode_RescheduleOff(t *testing.T) {
+	ci.Parallel(t)
+
+	job := mock.Job()
+	job.TaskGroups[0].RescheduleOnLost = false
+	// Create 10 existing allocations
+	var allocs []*structs.Allocation
+	for i := 0; i < 10; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		allocs = append(allocs, alloc)
+	}
+
+	// Build a map of tainted nodes
+	tainted := make(map[string]*structs.Node, 2)
+	for i := 0; i < 2; i++ {
+		n := mock.Node()
+		n.ID = allocs[i].NodeID
+		n.Status = structs.NodeStatusDown
+		tainted[n.ID] = n
+	}
+
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+		nil, allocs, tainted, "", 50, true)
+	r := reconciler.Compute()
+
+	// Assert the correct results
+	assertResults(t, r, &resultExpectation{
+		createDeployment:  nil,
+		deploymentUpdates: nil,
+		place:             0,
+		inplace:           0,
+		stop:              2,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Place:  0,
+				Stop:   2,
+				Ignore: 8,
+			},
+		},
+	})
+
+	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
+}
+
 // Tests the reconciler properly handles lost nodes with allocations
 func TestReconciler_LostNode(t *testing.T) {
 	ci.Parallel(t)
diff --git a/scheduler/util.go b/scheduler/util.go
index 0dade6571ef..cf3a22a349c 100644
--- a/scheduler/util.go
+++ b/scheduler/util.go
@@ -222,6 +222,11 @@ func tasksUpdated(jobA, jobB *structs.Job, taskGroup string) comparison {
 		return difference("number of tasks", lenA, lenB)
 	}
 
+	// Check for rescheduleOnLost changes
+	if a.RescheduleOnLost != b.RescheduleOnLost {
+		return difference("reschedule on lost", a.RescheduleOnLost, b.RescheduleOnLost)
+	}
+
 	// Check ephemeral disk
 	if !a.EphemeralDisk.Equal(b.EphemeralDisk) {
 		return difference("ephemeral disk", a.EphemeralDisk, b.EphemeralDisk)
diff --git a/scheduler/util_test.go b/scheduler/util_test.go
index a7049333845..135881dbe0c 100644
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -546,7 +546,12 @@ func TestTasksUpdated(t *testing.T) {
 
 	// Compare changed Template ErrMissingKey
 	j30.TaskGroups[0].Tasks[0].Templates[0].ErrMissingKey = true
-	must.True(t, tasksUpdated(j29, j30, name).modified)
+	require.True(t, tasksUpdated(j29, j30, name).modified)
+
+	// Change rescheduleOnLost mode
+	j31 := mock.Job()
+	j31.TaskGroups[0].RescheduleOnLost = false
+	require.True(t, tasksUpdated(j1, j31, name).modified)
 }
 
 func TestTasksUpdated_connectServiceUpdated(t *testing.T) {
diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index 790f910cf4e..566dce606b4 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -63,6 +63,10 @@ job "docs" {
   rescheduling strategy. Nomad will then attempt to schedule the task on another
   node if any of the group allocation statuses become "failed".
 
+- `reschedule_on_lost` `(bool: true)` - Specifies if a groups tasks can be
+  rescheduled when their allocations become lost. If set to false, jobs with
+  lost tasks will be left in a running state until an operator intervenes.
+
 - `restart` <code>([Restart][]: nil)</code> - Specifies the restart policy for
   all tasks in this group. If omitted, a default policy exists for each job
   type, which can be found in the [restart block documentation][restart].

From ccbdc9b50ba1994398ad14c98104b14f2d18b8a4 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Fri, 3 Nov 2023 17:57:38 +0100
Subject: [PATCH 02/50] func: add lost allocs that wont be rescheduled to the
 count

---
 nomad/structs/structs.go         | 13 +++++++++++++
 scheduler/reconcile.go           |  8 ++++----
 scheduler/reconcile_util.go      | 23 ++++++++++++++++++-----
 scheduler/reconcile_util_test.go | 16 ++++++++++++++++
 4 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 3b69c58b6b5..86ed4a2c90e 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -11025,6 +11025,19 @@ func (a *Allocation) SupportsDisconnectedClients(serverSupportsDisconnectedClien
 	return false
 }
 
+// RescheduleOnLost determines if an alloc is configured to restart if
+// the client becomes lost
+func (a *Allocation) RescheduleOnLost() bool {
+	if a.Job != nil {
+		tg := a.Job.LookupTaskGroup(a.TaskGroup)
+		if tg != nil {
+			return tg.RescheduleOnLost
+		}
+	}
+
+	return false
+}
+
 // NextDelay returns a duration after which the allocation can be rescheduled.
 // It is calculated according to the delay function and previous reschedule attempts.
 func (a *Allocation) NextDelay() time.Duration {
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index bf3e484f039..64de5edb9d8 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -485,7 +485,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// Find delays for any lost allocs that have stop_after_client_disconnect
 	lostLaterEvals := map[string]string{}
 	lostLater := []*delayedRescheduleInfo{}
-	if len(lost) > 0 {
+	if len(lost) > 0 && tg.RescheduleOnLost {
 		lostLater = lost.delayByStopAfterClientDisconnect()
 		lostLaterEvals = a.createLostLaterEvals(lostLater, tg.Name)
 	}
@@ -853,7 +853,7 @@ func (a *allocReconciler) computeReplacements(tg *structs.TaskGroup, deploymentP
 	// If allocs have been lost, determine the number of replacements that are needed
 	// and add placements to the result for the lost allocs.
 	if len(lost) != 0 && tg.RescheduleOnLost {
-		allowed := helper.Min(len(lost), len(place))
+		allowed := min(len(lost), len(place))
 		desiredChanges.Place += uint64(allowed)
 		a.result.place = append(a.result.place, place[:allowed]...)
 	}
@@ -1002,8 +1002,8 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
 		untainted = untainted.difference(canaries)
 	}
 
-	// Remove disconnected allocations so they won't be stopped
-	knownUntainted := untainted.filterOutByClientStatus(structs.AllocClientStatusUnknown)
+	// Remove disconnected and lost allocations so they won't be stopped
+	knownUntainted := untainted.filterOutByClientStatus(structs.AllocClientStatusUnknown, structs.AllocClientStatusLost)
 
 	// Hot path the nothing to do case
 	//
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index 7cf7c3fb14c..b8fb2113a5f 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -16,6 +16,7 @@ import (
 	"time"
 
 	"github.com/hashicorp/nomad/nomad/structs"
+	"golang.org/x/exp/slices"
 )
 
 // placementResult is an allocation that must be placed. It potentially has a
@@ -295,6 +296,12 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 					reconnecting[alloc.ID] = alloc
 					continue
 				}
+
+			case structs.NodeStatusDown:
+				if alloc.ClientStatus == structs.AllocClientStatusLost && !alloc.RescheduleOnLost() {
+					untainted[alloc.ID] = alloc
+					continue
+				}
 			default:
 			}
 		}
@@ -448,12 +455,13 @@ func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bo
 				return true, false
 			}
 
-			return false, true
 		case structs.AllocDesiredStatusEvict:
 			return false, true
 		}
 
 		switch alloc.ClientStatus {
+		case structs.AllocClientStatusComplete:
+			return false, true
 		case structs.AllocClientStatusFailed:
 			return false, false
 		}
@@ -463,12 +471,17 @@ func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bo
 
 	// Handle service jobs
 	switch alloc.DesiredStatus {
-	case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
+	case structs.AllocDesiredStatusEvict:
 		return false, true
 	}
 
 	switch alloc.ClientStatus {
-	case structs.AllocClientStatusComplete, structs.AllocClientStatusLost:
+	case structs.AllocClientStatusComplete:
+		return false, true
+	case structs.AllocClientStatusLost:
+		if alloc.DesiredStatus == structs.AllocDesiredStatusStop {
+			return true, false
+		}
 		return false, true
 	}
 
@@ -585,10 +598,10 @@ func (a allocSet) delayByMaxClientDisconnect(now time.Time) ([]*delayedReschedul
 }
 
 // filterOutByClientStatus returns all allocs from the set without the specified client status.
-func (a allocSet) filterOutByClientStatus(clientStatus string) allocSet {
+func (a allocSet) filterOutByClientStatus(clientStatuses ...string) allocSet {
 	allocs := make(allocSet)
 	for _, alloc := range a {
-		if alloc.ClientStatus != clientStatus {
+		if slices.Contains(clientStatuses, alloc.ClientStatus) {
 			allocs[alloc.ID] = alloc
 		}
 	}
diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go
index 600b9b4918a..36daf69e879 100644
--- a/scheduler/reconcile_util_test.go
+++ b/scheduler/reconcile_util_test.go
@@ -881,6 +881,14 @@ func TestReconcile_shouldFilter(t *testing.T) {
 			untainted:     false,
 			ignore:        false,
 		},
+		{
+			description:   "batch lost",
+			batch:         true,
+			desiredStatus: structs.AllocDesiredStatusStop,
+			clientStatus:  structs.AllocClientStatusLost,
+			untainted:     true,
+			ignore:        false,
+		},
 		{
 			description:   "service running",
 			batch:         false,
@@ -917,6 +925,14 @@ func TestReconcile_shouldFilter(t *testing.T) {
 			untainted:     false,
 			ignore:        true,
 		},
+		{
+			description:   "service lost",
+			batch:         false,
+			desiredStatus: structs.AllocDesiredStatusStop,
+			clientStatus:  structs.AllocClientStatusLost,
+			untainted:     true,
+			ignore:        false,
+		},
 	}
 
 	for _, tc := range testCases {

From d9ea2385b58e3ba7e100bfefb9813e20501aaf41 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Mon, 6 Nov 2023 09:26:20 +0100
Subject: [PATCH 03/50] fix: update api tests to include new reschedule on lost
 option

---
 api/jobs_test.go | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/api/jobs_test.go b/api/jobs_test.go
index 5cc2777c5b8..895c9a238ff 100644
--- a/api/jobs_test.go
+++ b/api/jobs_test.go
@@ -493,8 +493,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:  pointerOf("bar"),
-						Count: pointerOf(1),
+						Name:             pointerOf("bar"),
+						RescheduleOnLost: pointerOf(true),
+						Count:            pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -799,7 +800,6 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 			},
 		},
-
 		{
 			name: "periodic",
 			input: &Job{
@@ -869,7 +869,8 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name: pointerOf("bar"),
+						Name:             pointerOf("bar"),
+						RescheduleOnLost: pointerOf(true),
 						Consul: &Consul{
 							Namespace: "",
 						},
@@ -889,7 +890,8 @@ func TestJobs_Canonicalize(t *testing.T) {
 						},
 					},
 					{
-						Name: pointerOf("baz"),
+						Name:             pointerOf("baz"),
+						RescheduleOnLost: pointerOf(true),
 						Tasks: []*Task{
 							{
 								Name: "task1",
@@ -984,8 +986,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 						},
 					},
 					{
-						Name:  pointerOf("baz"),
-						Count: pointerOf(1),
+						Name:             pointerOf("baz"),
+						RescheduleOnLost: pointerOf(true),
+						Count:            pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -1117,8 +1120,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:  pointerOf("bar"),
-						Count: pointerOf(1),
+						Name:             pointerOf("bar"),
+						RescheduleOnLost: pointerOf(true),
+						Count:            pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -1172,8 +1176,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 						},
 					},
 					{
-						Name:  pointerOf("baz"),
-						Count: pointerOf(1),
+						Name:             pointerOf("baz"),
+						RescheduleOnLost: pointerOf(true),
+						Count:            pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -1229,7 +1234,6 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 			},
 		},
-
 		{
 			name: "multiregion",
 			input: &Job{

From cd5c88c403e12b386275df8e584c171630654bd7 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Mon, 6 Nov 2023 12:43:03 +0100
Subject: [PATCH 04/50] func: mark non reschedule allocs as unknown

---
 scheduler/reconcile.go      |  2 +-
 scheduler/reconcile_util.go | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 64de5edb9d8..6086a193169 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -994,7 +994,7 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
 	if group.RescheduleOnLost {
 		a.markDelayed(lost, structs.AllocClientStatusLost, allocLost, followupEvals)
 	} else {
-		a.markStop(lost, structs.AllocClientStatusLost, allocLost)
+		a.markStop(lost, structs.AllocClientStatusUnknown, allocLost)
 	}
 
 	// If we are still deploying or creating canaries, don't stop them
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index b8fb2113a5f..691c965ec2d 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -298,9 +298,14 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 				}
 
 			case structs.NodeStatusDown:
-				if alloc.ClientStatus == structs.AllocClientStatusLost && !alloc.RescheduleOnLost() {
-					untainted[alloc.ID] = alloc
-					continue
+				if !alloc.RescheduleOnLost() {
+					if alloc.ClientStatus == structs.AllocClientStatusLost {
+						untainted[alloc.ID] = alloc
+						continue
+					} else if alloc.ClientStatus == structs.AllocClientStatusRunning {
+						lost[alloc.ID] = alloc
+						continue
+					}
 				}
 			default:
 			}

From fd1f54fc2b30f11784970df335921ae5a19c2574 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Mon, 6 Nov 2023 13:32:56 +0100
Subject: [PATCH 05/50] func: remove the garbage collect error

---
 nomad/core_sched.go         | 18 ++----------------
 scheduler/reconcile_util.go |  1 -
 2 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/nomad/core_sched.go b/nomad/core_sched.go
index 1a52f804bad..97b311246d6 100644
--- a/nomad/core_sched.go
+++ b/nomad/core_sched.go
@@ -629,23 +629,8 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
 		return false
 	}
 
-	// If the job is deleted all allocs can be removed
-	if job == nil {
-		return true
-	}
-
-	tg := job.LookupTaskGroup(a.TaskGroup)
-	if tg == nil {
-		return true
-	}
-
-	// Don't GC lost allocs when RescheduleOnLost is disabled
-	if !job.Stop && !tg.RescheduleOnLost && a.ClientStatus == structs.AllocClientStatusLost {
-		return false
-	}
-
 	// If the job is deleted, stopped or dead all allocs can be removed
-	if job.Stop || job.Status == structs.JobStatusDead {
+	if job == nil || job.Stop || job.Status == structs.JobStatusDead {
 		return true
 	}
 
@@ -662,6 +647,7 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
 		return true
 	}
 
+	tg := job.LookupTaskGroup(a.TaskGroup)
 	reschedulePolicy := tg.ReschedulePolicy
 
 	// No reschedule policy or rescheduling is disabled
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index 691c965ec2d..d8296953299 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -307,7 +307,6 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 						continue
 					}
 				}
-			default:
 			}
 		}
 

From b72b36498b080b1156608fe051104786ad5b17ef Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Mon, 6 Nov 2023 14:00:27 +0100
Subject: [PATCH 06/50] Update core_sched.go

---
 nomad/core_sched.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/nomad/core_sched.go b/nomad/core_sched.go
index 97b311246d6..56dd9790c91 100644
--- a/nomad/core_sched.go
+++ b/nomad/core_sched.go
@@ -647,9 +647,13 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
 		return true
 	}
 
+	var reschedulePolicy *structs.ReschedulePolicy
 	tg := job.LookupTaskGroup(a.TaskGroup)
-	reschedulePolicy := tg.ReschedulePolicy
 
+	if tg != nil {
+		reschedulePolicy = tg.ReschedulePolicy
+	}
+	
 	// No reschedule policy or rescheduling is disabled
 	if reschedulePolicy == nil || (!reschedulePolicy.Unlimited && reschedulePolicy.Attempts == 0) {
 		return true

From 866d3d3d1bfd00f78f09edf591802340fed0ade3 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 8 Nov 2023 12:16:43 +0100
Subject: [PATCH 07/50] temp: recoginize the lost alloct as reconnecting

---
 client/allocrunner/alloc_runner.go |  5 +--
 client/client.go                   |  5 +--
 nomad/core_sched.go                | 23 ++++++++++----
 nomad/job_endpoint_test.go         |  2 ++
 nomad/structs/structs.go           | 10 ++++--
 scheduler/reconcile.go             | 51 ++++++++++++++++++++----------
 scheduler/reconcile_util.go        |  7 ++++
 7 files changed, 73 insertions(+), 30 deletions(-)

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index 97b0f6ea942..1c1d6abd2ae 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -725,8 +725,10 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
 		wg.Add(1)
 		go func(name string, tr *taskrunner.TaskRunner) {
 			defer wg.Done()
+
 			taskEvent := structs.NewTaskEvent(structs.TaskKilling)
 			taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout)
+
 			err := tr.Kill(context.TODO(), taskEvent)
 			if err != nil && err != taskrunner.ErrTaskNotRunning {
 				ar.logger.Warn("error stopping task", "error", err, "task_name", name)
@@ -999,6 +1001,7 @@ func (ar *allocRunner) handleAllocUpdates() {
 // the latest update.
 func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) {
 	// Detect Stop updates
+
 	stopping := !ar.Alloc().TerminalStatus() && update.TerminalStatus()
 
 	// Update ar.alloc
@@ -1009,7 +1012,6 @@ func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) {
 		if err := ar.update(update); err != nil {
 			ar.logger.Error("error running update hooks", "error", err)
 		}
-
 	}
 
 	// Update task runners
@@ -1021,7 +1023,6 @@ func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) {
 	if stopping {
 		ar.killTasks()
 	}
-
 }
 
 func (ar *allocRunner) Listener() *cstructs.AllocListener {
diff --git a/client/client.go b/client/client.go
index 2322c0515e0..bbe300dba93 100644
--- a/client/client.go
+++ b/client/client.go
@@ -2666,10 +2666,11 @@ func (c *Client) updateAlloc(update *structs.Allocation) {
 		return
 	}
 
+	alloc := ar.Alloc()
 	// Reconnect unknown allocations if they were updated and are not terminal.
 	reconnect := update.ClientStatus == structs.AllocClientStatusUnknown &&
-		update.AllocModifyIndex > ar.Alloc().AllocModifyIndex &&
-		!update.ServerTerminalStatus()
+		update.AllocModifyIndex > alloc.AllocModifyIndex &&
+		(!update.ServerTerminalStatus() || (!alloc.RescheduleOnLost() && update.ServerTerminalStatus()))
 	if reconnect {
 		err = ar.Reconnect(update)
 		if err != nil {
diff --git a/nomad/core_sched.go b/nomad/core_sched.go
index 56dd9790c91..82c481f34b9 100644
--- a/nomad/core_sched.go
+++ b/nomad/core_sched.go
@@ -630,13 +630,28 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
 	}
 
 	// If the job is deleted, stopped or dead all allocs can be removed
-	if job == nil || job.Stop || job.Status == structs.JobStatusDead {
+	if job == nil || job.Stop {
+		return true
+	}
+
+	tg := job.LookupTaskGroup(a.TaskGroup)
+	if tg == nil {
 		return true
 	}
 
 	// If the allocation's desired state is Stop, it can be GCed even if it
 	// has failed and hasn't been rescheduled. This can happen during job updates
 	if a.DesiredStatus == structs.AllocDesiredStatusStop {
+		// Don't GC lost allocs when RescheduleOnLost is disabled
+		if !tg.RescheduleOnLost && a.ClientStatus == structs.AllocClientStatusUnknown {
+			return false
+		}
+
+		return true
+	}
+
+	// If the job is deleted, stopped or dead all allocs can be removed
+	if job.Status == structs.JobStatusDead {
 		return true
 	}
 
@@ -648,12 +663,6 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
 	}
 
 	var reschedulePolicy *structs.ReschedulePolicy
-	tg := job.LookupTaskGroup(a.TaskGroup)
-
-	if tg != nil {
-		reschedulePolicy = tg.ReschedulePolicy
-	}
-	
 	// No reschedule policy or rescheduling is disabled
 	if reschedulePolicy == nil || (!reschedulePolicy.Unlimited && reschedulePolicy.Attempts == 0) {
 		return true
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index 1d6c5109935..ea0536e7bbb 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -172,6 +172,7 @@ func TestJobEndpoint_Register_NonOverlapping(t *testing.T) {
 			Namespace: structs.DefaultNamespace,
 		},
 	}
+
 	var alloc *structs.AllocListStub
 	testutil.Wait(t, func() (bool, error) {
 		resp := structs.JobAllocationsResponse{}
@@ -183,6 +184,7 @@ func TestJobEndpoint_Register_NonOverlapping(t *testing.T) {
 		alloc = resp.Allocations[0]
 		return true, nil
 	})
+
 	must.Eq(t, alloc.NodeID, node.ID)
 	must.Eq(t, alloc.DesiredStatus, structs.AllocDesiredStatusRun)
 	must.Eq(t, alloc.ClientStatus, structs.AllocClientStatusPending)
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 86ed4a2c90e..733c11521b5 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -8876,7 +8876,7 @@ const (
 	// TaskPluginHealthy indicates that a plugin managed by Nomad became healthy
 	TaskPluginHealthy = "Plugin became healthy"
 
-	// TaskClientReconnected indicates that the client running the task disconnected.
+	// TaskClientReconnected indicates that the client running the task reconnected.
 	TaskClientReconnected = "Reconnected"
 
 	// TaskWaitingShuttingDownDelay indicates that the task is waiting for
@@ -10999,7 +10999,6 @@ func (a *Allocation) DisconnectTimeout(now time.Time) time.Time {
 	tg := a.Job.LookupTaskGroup(a.TaskGroup)
 
 	timeout := tg.MaxClientDisconnect
-
 	if timeout == nil {
 		return now
 	}
@@ -11296,6 +11295,13 @@ func (a *Allocation) NeedsToReconnect() bool {
 	return disconnected
 }
 
+// TaskIsReconnecting returns true if the last task event value is
+// "Reconnected".
+func (a *Allocation) TaskIsReconnected() bool {
+	events := len(a.TaskStates[a.TaskGroup].Events)
+	return a.TaskStates[a.TaskGroup].Events[events-1].Type == TaskClientReconnected
+}
+
 // IdentityClaims are the input to a JWT identifying a workload. It
 // should never be serialized to msgpack unsigned.
 type IdentityClaims struct {
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 6086a193169..2fea5958c92 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -460,11 +460,12 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 
 		// Validate and add reconnecting allocations to the plan so they are
 		// logged.
-		a.computeReconnecting(reconnect)
-
-		// The rest of the reconnecting allocations is now untainted and will
-		// be further reconciled below.
-		untainted = untainted.union(reconnect)
+		if len(reconnect) > 0 {
+			a.computeReconnecting(reconnect)
+			// The rest of the reconnecting allocations is now untainted and will
+			// be further reconciled below.
+			untainted = untainted.union(reconnect)
+		}
 	}
 
 	// Determine what set of disconnecting allocations need to be rescheduled now,
@@ -485,19 +486,23 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// Find delays for any lost allocs that have stop_after_client_disconnect
 	lostLaterEvals := map[string]string{}
 	lostLater := []*delayedRescheduleInfo{}
-	if len(lost) > 0 && tg.RescheduleOnLost {
-		lostLater = lost.delayByStopAfterClientDisconnect()
-		lostLaterEvals = a.createLostLaterEvals(lostLater, tg.Name)
+
+	if len(lost) > 0 {
+		if tg.RescheduleOnLost {
+			lostLater = lost.delayByStopAfterClientDisconnect()
+			lostLaterEvals = a.createLostLaterEvals(lostLater, tg.Name)
+		}
 	}
 
 	// Merge disconnecting with the stop_after_client_disconnect set into the
 	// lostLaterEvals so that computeStop can add them to the stop set.
 	lostLaterEvals = helper.MergeMapStringString(lostLaterEvals, timeoutLaterEvals)
 
-	// Create batched follow-up evaluations for allocations that are
-	// reschedulable later and mark the allocations for in place updating
-	a.createRescheduleLaterEvals(rescheduleLater, all, tg.Name)
-
+	if len(rescheduleLater) > 0 {
+		// Create batched follow-up evaluations for allocations that are
+		// reschedulable later and mark the allocations for in place updating
+		a.createRescheduleLaterEvals(rescheduleLater, all, tg.Name)
+	}
 	// Create a structure for choosing names. Seed with the taken names
 	// which is the union of untainted, rescheduled, allocs on migrating
 	// nodes, and allocs on down nodes (includes canaries)
@@ -507,6 +512,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// Stop any unneeded allocations and update the untainted set to not
 	// include stopped allocations.
 	isCanarying := dstate != nil && dstate.DesiredCanaries != 0 && !dstate.Promoted
+
 	stop := a.computeStop(tg, nameIndex, untainted, migrate, lost, canaries, isCanarying, lostLaterEvals)
 
 	desiredChanges.Stop += uint64(len(stop))
@@ -994,7 +1000,17 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
 	if group.RescheduleOnLost {
 		a.markDelayed(lost, structs.AllocClientStatusLost, allocLost, followupEvals)
 	} else {
-		a.markStop(lost, structs.AllocClientStatusUnknown, allocLost)
+		for _, alloc := range lost {
+			updatedAlloc := alloc.Copy()
+			updatedAlloc.ClientStatus = structs.AllocClientStatusUnknown
+			updatedAlloc.AppendState(structs.AllocStateFieldClientStatus, structs.AllocClientStatusUnknown)
+			updatedAlloc.ClientDescription = allocUnknown
+			a.result.stop = append(a.result.stop, allocStopResult{
+				alloc:             updatedAlloc,
+				clientStatus:      structs.AllocClientStatusUnknown,
+				statusDescription: allocUnknown,
+			})
+		}
 	}
 
 	// If we are still deploying or creating canaries, don't stop them
@@ -1003,7 +1019,7 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
 	}
 
 	// Remove disconnected and lost allocations so they won't be stopped
-	knownUntainted := untainted.filterOutByClientStatus(structs.AllocClientStatusUnknown, structs.AllocClientStatusLost)
+	knownUntainted := untainted.filterOutByClientStatus(structs.AllocClientStatusUnknown)
 
 	// Hot path the nothing to do case
 	//
@@ -1121,6 +1137,10 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc
 	reconnect := make(allocSet)
 
 	for _, reconnectingAlloc := range reconnecting {
+		if !reconnectingAlloc.RescheduleOnLost() {
+			continue
+		}
+
 		// Stop allocations that failed to reconnect.
 		reconnectFailed := !reconnectingAlloc.ServerTerminalStatus() &&
 			reconnectingAlloc.ClientStatus == structs.AllocClientStatusFailed
@@ -1298,9 +1318,6 @@ func (a *allocReconciler) createRescheduleLaterEvals(rescheduleLater []*delayedR
 // updates. Clients are responsible for reconciling the DesiredState with the
 // actual state as the node comes back online.
 func (a *allocReconciler) computeReconnecting(reconnecting allocSet) {
-	if len(reconnecting) == 0 {
-		return
-	}
 
 	// Create updates that will be appended to the plan.
 	for _, alloc := range reconnecting {
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index d8296953299..11ced44f8d5 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -264,6 +264,13 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 			continue
 		}
 
+		if alloc.TaskIsReconnected() && serverSupportsDisconnectedClients &&
+			alloc.ClientStatus == structs.AllocClientStatusRunning &&
+			alloc.DesiredStatus == structs.AllocDesiredStatusStop {
+			reconnecting[alloc.ID] = alloc
+			continue
+		}
+
 		taintedNode, nodeIsTainted := taintedNodes[alloc.NodeID]
 		if taintedNode != nil {
 			// Group disconnecting

From 2a028d6244991ff1e6c687fd88851c04a3db4819 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 8 Nov 2023 16:07:32 +0100
Subject: [PATCH 08/50] func: allow plans for lost nodes

---
 nomad/plan_apply.go         | 23 +++++++++++++++
 nomad/structs/structs.go    |  7 -----
 scheduler/reconcile.go      | 57 +++++++++++++++++++------------------
 scheduler/reconcile_util.go |  9 +-----
 4 files changed, 53 insertions(+), 43 deletions(-)

diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go
index aa02b40feed..3e0d4bc7c02 100644
--- a/nomad/plan_apply.go
+++ b/nomad/plan_apply.go
@@ -737,6 +737,17 @@ func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID stri
 			return true, "", nil
 		}
 		return false, "node is disconnected and contains invalid updates", nil
+	} else if node.Status == structs.NodeStatusDown {
+		if isValidForLostNode(plan, node.ID) {
+			return true, "", nil
+		}
+		return false, "node is lost and contains invalid updates", nil
+
+	} else if node.Status == structs.NodeStatusDown {
+		if isValidForDisconnectedNode(plan, node.ID) {
+			return true, "", nil
+		}
+		return false, "node is lost and contains invalid updates", nil
 	} else if node.Status != structs.NodeStatusReady {
 		return false, "node is not ready for placements", nil
 	}
@@ -790,3 +801,15 @@ func isValidForDisconnectedNode(plan *structs.Plan, nodeID string) bool {
 
 	return true
 }
+
+// The plan is only valid for disconnected nodes if it only contains
+// updates to mark allocations as unknown.
+func isValidForLostNode(plan *structs.Plan, nodeID string) bool {
+	for _, alloc := range plan.NodeAllocation[nodeID] {
+		if alloc.ClientStatus != structs.AllocClientStatusUnknown && !alloc.RescheduleOnLost() {
+			return false
+		}
+	}
+
+	return true
+}
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 733c11521b5..20ecd1336db 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -11295,13 +11295,6 @@ func (a *Allocation) NeedsToReconnect() bool {
 	return disconnected
 }
 
-// TaskIsReconnecting returns true if the last task event value is
-// "Reconnected".
-func (a *Allocation) TaskIsReconnected() bool {
-	events := len(a.TaskStates[a.TaskGroup].Events)
-	return a.TaskStates[a.TaskGroup].Events[events-1].Type == TaskClientReconnected
-}
-
 // IdentityClaims are the input to a JWT identifying a workload. It
 // should never be serialized to msgpack unsigned.
 type IdentityClaims struct {
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 2fea5958c92..ca7cf2691e4 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -472,15 +472,22 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// which ones later and which ones can't be rescheduled at all.
 	timeoutLaterEvals := map[string]string{}
 	if len(disconnecting) > 0 {
-		untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(a.batch, true, a.now, a.evalID, a.deployment)
+		if tg.RescheduleOnLost {
+			untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(a.batch, true, a.now, a.evalID, a.deployment)
+
+			rescheduleNow = rescheduleNow.union(rescheduleDisconnecting)
+			untainted = untainted.union(untaintedDisconnecting)
+			rescheduleLater = append(rescheduleLater, laterDisconnecting...)
+
+			// Find delays for any disconnecting allocs that have max_client_disconnect,
+			// create followup evals, and update the ClientStatus to unknown.
+			timeoutLaterEvals = a.createTimeoutLaterEvals(disconnecting, tg.Name)
 
-		rescheduleNow = rescheduleNow.union(rescheduleDisconnecting)
-		untainted = untainted.union(untaintedDisconnecting)
-		rescheduleLater = append(rescheduleLater, laterDisconnecting...)
+		} else {
+			untainted = untainted.union(disconnecting)
+		}
 
-		// Find delays for any disconnecting allocs that have max_client_disconnect,
-		// create followup evals, and update the ClientStatus to unknown.
-		timeoutLaterEvals = a.createTimeoutLaterEvals(disconnecting, tg.Name)
+		a.appendUnknownDisconnectingUpdates(disconnecting, timeoutLaterEvals)
 	}
 
 	// Find delays for any lost allocs that have stop_after_client_disconnect
@@ -1000,17 +1007,7 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
 	if group.RescheduleOnLost {
 		a.markDelayed(lost, structs.AllocClientStatusLost, allocLost, followupEvals)
 	} else {
-		for _, alloc := range lost {
-			updatedAlloc := alloc.Copy()
-			updatedAlloc.ClientStatus = structs.AllocClientStatusUnknown
-			updatedAlloc.AppendState(structs.AllocStateFieldClientStatus, structs.AllocClientStatusUnknown)
-			updatedAlloc.ClientDescription = allocUnknown
-			a.result.stop = append(a.result.stop, allocStopResult{
-				alloc:             updatedAlloc,
-				clientStatus:      structs.AllocClientStatusUnknown,
-				statusDescription: allocUnknown,
-			})
-		}
+		//a.markStop()
 	}
 
 	// If we are still deploying or creating canaries, don't stop them
@@ -1413,8 +1410,7 @@ func (a *allocReconciler) createLostLaterEvals(rescheduleLater []*delayedResched
 
 // createTimeoutLaterEvals creates followup evaluations with the
 // WaitUntil field set for allocations in an unknown state on disconnected nodes.
-// Followup Evals are appended to a.result as a side effect. It returns a map of
-// allocIDs to their associated followUpEvalIDs.
+// It returns a map of allocIDs to their associated followUpEvalIDs.
 func (a *allocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName string) map[string]string {
 	if len(disconnecting) == 0 {
 		return map[string]string{}
@@ -1478,14 +1474,6 @@ func (a *allocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName
 
 		emitRescheduleInfo(timeoutInfo.alloc, eval)
 
-		// Create updates that will be applied to the allocs to mark the FollowupEvalID
-		// and the unknown ClientStatus and AllocState.
-		updatedAlloc := timeoutInfo.alloc.Copy()
-		updatedAlloc.ClientStatus = structs.AllocClientStatusUnknown
-		updatedAlloc.AppendState(structs.AllocStateFieldClientStatus, structs.AllocClientStatusUnknown)
-		updatedAlloc.ClientDescription = allocUnknown
-		updatedAlloc.FollowupEvalID = eval.ID
-		a.result.disconnectUpdates[updatedAlloc.ID] = updatedAlloc
 	}
 
 	a.appendFollowupEvals(tgName, evals)
@@ -1493,6 +1481,19 @@ func (a *allocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName
 	return allocIDToFollowupEvalID
 }
 
+// Create updates that will be applied to the allocs to mark the FollowupEvalID
+// and the unknown ClientStatus and AllocState.
+func (a *allocReconciler) appendUnknownDisconnectingUpdates(disconnecting allocSet, allocIDToFollowupEvalID map[string]string) {
+	for id, alloc := range disconnecting {
+		updatedAlloc := alloc.Copy()
+		updatedAlloc.ClientStatus = structs.AllocClientStatusUnknown
+		updatedAlloc.AppendState(structs.AllocStateFieldClientStatus, structs.AllocClientStatusUnknown)
+		updatedAlloc.ClientDescription = allocUnknown
+		updatedAlloc.FollowupEvalID = allocIDToFollowupEvalID[id]
+		a.result.disconnectUpdates[updatedAlloc.ID] = updatedAlloc
+	}
+}
+
 // appendFollowupEvals appends a set of followup evals for a task group to the
 // desiredFollowupEvals map which is later added to the scheduler's followUpEvals set.
 func (a *allocReconciler) appendFollowupEvals(tgName string, evals []*structs.Evaluation) {
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index 11ced44f8d5..228a7bec61c 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -264,13 +264,6 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 			continue
 		}
 
-		if alloc.TaskIsReconnected() && serverSupportsDisconnectedClients &&
-			alloc.ClientStatus == structs.AllocClientStatusRunning &&
-			alloc.DesiredStatus == structs.AllocDesiredStatusStop {
-			reconnecting[alloc.ID] = alloc
-			continue
-		}
-
 		taintedNode, nodeIsTainted := taintedNodes[alloc.NodeID]
 		if taintedNode != nil {
 			// Group disconnecting
@@ -310,7 +303,7 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 						untainted[alloc.ID] = alloc
 						continue
 					} else if alloc.ClientStatus == structs.AllocClientStatusRunning {
-						lost[alloc.ID] = alloc
+						disconnecting[alloc.ID] = alloc
 						continue
 					}
 				}

From 4a57aafbf327aed26f42099d5e85c9b69f2d3cef Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Tue, 21 Nov 2023 18:23:33 +0100
Subject: [PATCH 09/50] style: rename option

---
 api/jobs_test.go                              | 66 +++++++++----------
 api/tasks.go                                  |  6 +-
 client/client.go                              |  2 +-
 command/agent/job_endpoint.go                 |  6 +-
 command/agent/job_endpoint_test.go            |  8 +--
 nomad/core_sched.go                           | 23 ++-----
 nomad/core_sched_test.go                      | 43 +++++-------
 nomad/mock/job.go                             |  6 +-
 nomad/plan_apply.go                           |  2 +-
 nomad/structs/diff_test.go                    | 54 +++++++--------
 nomad/structs/structs.go                      | 16 ++---
 scheduler/reconcile.go                        | 21 ++----
 scheduler/reconcile_test.go                   |  2 +-
 scheduler/reconcile_util.go                   |  2 +-
 scheduler/util.go                             |  6 +-
 scheduler/util_test.go                        |  4 +-
 .../content/docs/job-specification/group.mdx  |  2 +-
 17 files changed, 122 insertions(+), 147 deletions(-)

diff --git a/api/jobs_test.go b/api/jobs_test.go
index 895c9a238ff..0c9c6e829fa 100644
--- a/api/jobs_test.go
+++ b/api/jobs_test.go
@@ -312,9 +312,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:             pointerOf(""),
-						Count:            pointerOf(1),
-						RescheduleOnLost: pointerOf(true),
+						Name:                 pointerOf(""),
+						Count:                pointerOf(1),
+						SingleInstanceOnLost: pointerOf(false),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -400,9 +400,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				JobModifyIndex:    pointerOf(uint64(0)),
 				TaskGroups: []*TaskGroup{
 					{
-						Name:             pointerOf(""),
-						Count:            pointerOf(1),
-						RescheduleOnLost: pointerOf(true),
+						Name:                 pointerOf(""),
+						Count:                pointerOf(1),
+						SingleInstanceOnLost: pointerOf(false),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -493,9 +493,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:             pointerOf("bar"),
-						RescheduleOnLost: pointerOf(true),
-						Count:            pointerOf(1),
+						Name:                 pointerOf("bar"),
+						SingleInstanceOnLost: pointerOf(false),
+						Count:                pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -558,9 +558,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:             pointerOf("cache"),
-						Count:            pointerOf(1),
-						RescheduleOnLost: pointerOf(true),
+						Name:                 pointerOf("cache"),
+						Count:                pointerOf(1),
+						SingleInstanceOnLost: pointerOf(true),
 						RestartPolicy: &RestartPolicy{
 							Interval: pointerOf(5 * time.Minute),
 							Attempts: pointerOf(10),
@@ -670,9 +670,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:             pointerOf("cache"),
-						Count:            pointerOf(1),
-						RescheduleOnLost: pointerOf(true),
+						Name:                 pointerOf("cache"),
+						Count:                pointerOf(1),
+						SingleInstanceOnLost: pointerOf(true),
 						RestartPolicy: &RestartPolicy{
 							Interval:        pointerOf(5 * time.Minute),
 							Attempts:        pointerOf(10),
@@ -869,8 +869,8 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:             pointerOf("bar"),
-						RescheduleOnLost: pointerOf(true),
+						Name:                 pointerOf("bar"),
+						SingleInstanceOnLost: pointerOf(true),
 						Consul: &Consul{
 							Namespace: "",
 						},
@@ -890,8 +890,8 @@ func TestJobs_Canonicalize(t *testing.T) {
 						},
 					},
 					{
-						Name:             pointerOf("baz"),
-						RescheduleOnLost: pointerOf(true),
+						Name:                 pointerOf("baz"),
+						SingleInstanceOnLost: pointerOf(false),
 						Tasks: []*Task{
 							{
 								Name: "task1",
@@ -936,9 +936,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:             pointerOf("bar"),
-						Count:            pointerOf(1),
-						RescheduleOnLost: pointerOf(true),
+						Name:                 pointerOf("bar"),
+						Count:                pointerOf(1),
+						SingleInstanceOnLost: pointerOf(true),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -986,9 +986,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 						},
 					},
 					{
-						Name:             pointerOf("baz"),
-						RescheduleOnLost: pointerOf(true),
-						Count:            pointerOf(1),
+						Name:                 pointerOf("baz"),
+						SingleInstanceOnLost: pointerOf(false),
+						Count:                pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -1046,8 +1046,8 @@ func TestJobs_Canonicalize(t *testing.T) {
 				ParentID: pointerOf("lol"),
 				TaskGroups: []*TaskGroup{
 					{
-						Name:             pointerOf("bar"),
-						RescheduleOnLost: pointerOf(true),
+						Name:                 pointerOf("bar"),
+						SingleInstanceOnLost: pointerOf(true),
 						RestartPolicy: &RestartPolicy{
 							Delay:    pointerOf(15 * time.Second),
 							Attempts: pointerOf(2),
@@ -1120,9 +1120,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:             pointerOf("bar"),
-						RescheduleOnLost: pointerOf(true),
-						Count:            pointerOf(1),
+						Name:                 pointerOf("bar"),
+						SingleInstanceOnLost: pointerOf(true),
+						Count:                pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -1176,9 +1176,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 						},
 					},
 					{
-						Name:             pointerOf("baz"),
-						RescheduleOnLost: pointerOf(true),
-						Count:            pointerOf(1),
+						Name:                 pointerOf("baz"),
+						SingleInstanceOnLost: pointerOf(false),
+						Count:                pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
diff --git a/api/tasks.go b/api/tasks.go
index dbcc1bc0b1d..89091854f6d 100644
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -459,7 +459,7 @@ type TaskGroup struct {
 	MaxClientDisconnect       *time.Duration            `mapstructure:"max_client_disconnect" hcl:"max_client_disconnect,optional"`
 	Scaling                   *ScalingPolicy            `hcl:"scaling,block"`
 	Consul                    *Consul                   `hcl:"consul,block"`
-	RescheduleOnLost          *bool                     `hcl:"reschedule_on_lost,optional"`
+	SingleInstanceOnLost      *bool                     `hcl:"single_instance_on_lost,optional"`
 }
 
 // NewTaskGroup creates a new TaskGroup.
@@ -578,8 +578,8 @@ func (g *TaskGroup) Canonicalize(job *Job) {
 	for _, s := range g.Services {
 		s.Canonicalize(nil, g, job)
 	}
-	if g.RescheduleOnLost == nil {
-		g.RescheduleOnLost = pointerOf(true)
+	if g.SingleInstanceOnLost == nil {
+		g.SingleInstanceOnLost = pointerOf(false)
 	}
 }
 
diff --git a/client/client.go b/client/client.go
index bbe300dba93..b9095cc3db1 100644
--- a/client/client.go
+++ b/client/client.go
@@ -2670,7 +2670,7 @@ func (c *Client) updateAlloc(update *structs.Allocation) {
 	// Reconnect unknown allocations if they were updated and are not terminal.
 	reconnect := update.ClientStatus == structs.AllocClientStatusUnknown &&
 		update.AllocModifyIndex > alloc.AllocModifyIndex &&
-		(!update.ServerTerminalStatus() || (!alloc.RescheduleOnLost() && update.ServerTerminalStatus()))
+		(!update.ServerTerminalStatus() || (!alloc.SingleInstanceOnLost() && update.ServerTerminalStatus()))
 	if reconnect {
 		err = ar.Reconnect(update)
 		if err != nil {
diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go
index 3e3169578df..7a5f8c2dae1 100644
--- a/command/agent/job_endpoint.go
+++ b/command/agent/job_endpoint.go
@@ -1135,10 +1135,10 @@ func ApiTgToStructsTG(job *structs.Job, taskGroup *api.TaskGroup, tg *structs.Ta
 		RenderTemplates: *taskGroup.RestartPolicy.RenderTemplates,
 	}
 
-	if taskGroup.RescheduleOnLost == nil {
-		tg.RescheduleOnLost = true
+	if taskGroup.SingleInstanceOnLost == nil {
+		tg.SingleInstanceOnLost = false
 	} else {
-		tg.RescheduleOnLost = *taskGroup.RescheduleOnLost
+		tg.SingleInstanceOnLost = *taskGroup.SingleInstanceOnLost
 	}
 
 	if taskGroup.ShutdownDelay != nil {
diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go
index 96883f812ef..8611c385376 100644
--- a/command/agent/job_endpoint_test.go
+++ b/command/agent/job_endpoint_test.go
@@ -3058,7 +3058,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
 						Operand: "z",
 					},
 				},
-				RescheduleOnLost: true,
+				SingleInstanceOnLost: false,
 				Affinities: []*structs.Affinity{
 					{
 						LTarget: "x",
@@ -3553,9 +3553,9 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
 		},
 		TaskGroups: []*structs.TaskGroup{
 			{
-				Name:             "group1",
-				Count:            5,
-				RescheduleOnLost: true,
+				Name:                 "group1",
+				Count:                5,
+				SingleInstanceOnLost: false,
 				Constraints: []*structs.Constraint{
 					{
 						LTarget: "x",
diff --git a/nomad/core_sched.go b/nomad/core_sched.go
index 82c481f34b9..2b115fcecb4 100644
--- a/nomad/core_sched.go
+++ b/nomad/core_sched.go
@@ -630,28 +630,13 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
 	}
 
 	// If the job is deleted, stopped or dead all allocs can be removed
-	if job == nil || job.Stop {
-		return true
-	}
-
-	tg := job.LookupTaskGroup(a.TaskGroup)
-	if tg == nil {
+	if job == nil || job.Stop || job.Status == structs.JobStatusDead {
 		return true
 	}
 
 	// If the allocation's desired state is Stop, it can be GCed even if it
 	// has failed and hasn't been rescheduled. This can happen during job updates
 	if a.DesiredStatus == structs.AllocDesiredStatusStop {
-		// Don't GC lost allocs when RescheduleOnLost is disabled
-		if !tg.RescheduleOnLost && a.ClientStatus == structs.AllocClientStatusUnknown {
-			return false
-		}
-
-		return true
-	}
-
-	// If the job is deleted, stopped or dead all allocs can be removed
-	if job.Status == structs.JobStatusDead {
 		return true
 	}
 
@@ -663,6 +648,12 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
 	}
 
 	var reschedulePolicy *structs.ReschedulePolicy
+	tg := job.LookupTaskGroup(a.TaskGroup)
+
+	if tg != nil {
+		reschedulePolicy = tg.ReschedulePolicy
+	}
+
 	// No reschedule policy or rescheduling is disabled
 	if reschedulePolicy == nil || (!reschedulePolicy.Unlimited && reschedulePolicy.Attempts == 0) {
 		return true
diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go
index 76a32afa395..745ed8c9ef0 100644
--- a/nomad/core_sched_test.go
+++ b/nomad/core_sched_test.go
@@ -1940,21 +1940,21 @@ func TestCoreScheduler_PartitionJobReap(t *testing.T) {
 // Tests various scenarios when allocations are eligible to be GCed
 func TestAllocation_GCEligible(t *testing.T) {
 	type testCase struct {
-		Desc                string
-		GCTime              time.Time
-		ClientStatus        string
-		DesiredStatus       string
-		JobStatus           string
-		JobStop             bool
-		RescheduleOnLost    *bool
-		AllocJobModifyIndex uint64
-		JobModifyIndex      uint64
-		ModifyIndex         uint64
-		NextAllocID         string
-		ReschedulePolicy    *structs.ReschedulePolicy
-		RescheduleTrackers  []*structs.RescheduleEvent
-		ThresholdIndex      uint64
-		ShouldGC            bool
+		Desc                 string
+		GCTime               time.Time
+		ClientStatus         string
+		DesiredStatus        string
+		JobStatus            string
+		JobStop              bool
+		SingleInstanceOnLost *bool
+		AllocJobModifyIndex  uint64
+		JobModifyIndex       uint64
+		ModifyIndex          uint64
+		NextAllocID          string
+		ReschedulePolicy     *structs.ReschedulePolicy
+		RescheduleTrackers   []*structs.RescheduleEvent
+		ThresholdIndex       uint64
+		ShouldGC             bool
 	}
 
 	fail := time.Now()
@@ -2129,15 +2129,6 @@ func TestAllocation_GCEligible(t *testing.T) {
 			JobStatus:     structs.JobStatusDead,
 			ShouldGC:      true,
 		},
-		{
-			Desc:             "Don't GC when alloc is lost and not being rescheduled",
-			ClientStatus:     structs.AllocClientStatusLost,
-			DesiredStatus:    structs.AllocDesiredStatusStop,
-			RescheduleOnLost: pointer.Of(false),
-			GCTime:           fail,
-			JobStatus:        structs.JobStatusDead,
-			ShouldGC:         false,
-		},
 		{
 			Desc:             "GC when job status is dead",
 			ClientStatus:     structs.AllocClientStatusFailed,
@@ -2184,8 +2175,8 @@ func TestAllocation_GCEligible(t *testing.T) {
 		alloc.NextAllocation = tc.NextAllocID
 		job := mock.Job()
 		alloc.TaskGroup = job.TaskGroups[0].Name
-		if tc.RescheduleOnLost != nil {
-			job.TaskGroups[0].RescheduleOnLost = *tc.RescheduleOnLost
+		if tc.SingleInstanceOnLost != nil {
+			job.TaskGroups[0].SingleInstanceOnLost = *tc.SingleInstanceOnLost
 		}
 		job.TaskGroups[0].ReschedulePolicy = tc.ReschedulePolicy
 		if tc.JobStatus != "" {
diff --git a/nomad/mock/job.go b/nomad/mock/job.go
index 845c7fd9c81..58c0ccb7534 100644
--- a/nomad/mock/job.go
+++ b/nomad/mock/job.go
@@ -31,9 +31,9 @@ func Job() *structs.Job {
 		},
 		TaskGroups: []*structs.TaskGroup{
 			{
-				Name:             "web",
-				Count:            10,
-				RescheduleOnLost: true,
+				Name:                 "web",
+				Count:                10,
+				SingleInstanceOnLost: true,
 				Constraints: []*structs.Constraint{
 					{
 						LTarget: "${attr.consul.version}",
diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go
index 3e0d4bc7c02..1c3dcd87cf2 100644
--- a/nomad/plan_apply.go
+++ b/nomad/plan_apply.go
@@ -806,7 +806,7 @@ func isValidForDisconnectedNode(plan *structs.Plan, nodeID string) bool {
 // updates to mark allocations as unknown.
 func isValidForLostNode(plan *structs.Plan, nodeID string) bool {
 	for _, alloc := range plan.NodeAllocation[nodeID] {
-		if alloc.ClientStatus != structs.AllocClientStatusUnknown && !alloc.RescheduleOnLost() {
+		if alloc.ClientStatus != structs.AllocClientStatusUnknown && !alloc.SingleInstanceOnLost() {
 			return false
 		}
 	}
diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go
index 0d2837cdfdd..3f80bdfcfa8 100644
--- a/nomad/structs/diff_test.go
+++ b/nomad/structs/diff_test.go
@@ -1253,38 +1253,38 @@ func TestJobDiff(t *testing.T) {
 			Old: &Job{
 				TaskGroups: []*TaskGroup{
 					{
-						Name:             "foo",
-						Count:            1,
-						RescheduleOnLost: true,
+						Name:                 "foo",
+						Count:                1,
+						SingleInstanceOnLost: true,
 					},
 					{
-						Name:             "bar",
-						Count:            1,
-						RescheduleOnLost: false,
+						Name:                 "bar",
+						Count:                1,
+						SingleInstanceOnLost: false,
 					},
 					{
-						Name:             "baz",
-						Count:            1,
-						RescheduleOnLost: true,
+						Name:                 "baz",
+						Count:                1,
+						SingleInstanceOnLost: true,
 					},
 				},
 			},
 			New: &Job{
 				TaskGroups: []*TaskGroup{
 					{
-						Name:             "bar",
-						Count:            1,
-						RescheduleOnLost: false,
+						Name:                 "bar",
+						Count:                1,
+						SingleInstanceOnLost: false,
 					},
 					{
-						Name:             "baz",
-						Count:            2,
-						RescheduleOnLost: true,
+						Name:                 "baz",
+						Count:                2,
+						SingleInstanceOnLost: true,
 					},
 					{
-						Name:             "bam",
-						Count:            1,
-						RescheduleOnLost: true,
+						Name:                 "bam",
+						Count:                1,
+						SingleInstanceOnLost: true,
 					},
 				},
 			},
@@ -1303,7 +1303,7 @@ func TestJobDiff(t *testing.T) {
 							},
 							{
 								Type: DiffTypeAdded,
-								Name: "RescheduleOnLost",
+								Name: "SingleInstanceOnLost",
 								Old:  "",
 								New:  "true",
 							},
@@ -1337,7 +1337,7 @@ func TestJobDiff(t *testing.T) {
 							},
 							{
 								Type: DiffTypeDeleted,
-								Name: "RescheduleOnLost",
+								Name: "SingleInstanceOnLost",
 								Old:  "true",
 								New:  "",
 							},
@@ -1862,14 +1862,14 @@ func TestTaskGroupDiff(t *testing.T) {
 		{
 			TestCase: "Reschedule on lost diff",
 			Old: &TaskGroup{
-				Name:             "foo",
-				Count:            100,
-				RescheduleOnLost: true,
+				Name:                 "foo",
+				Count:                100,
+				SingleInstanceOnLost: true,
 			},
 			New: &TaskGroup{
-				Name:             "foo",
-				Count:            100,
-				RescheduleOnLost: false,
+				Name:                 "foo",
+				Count:                100,
+				SingleInstanceOnLost: false,
 			},
 			Expected: &TaskGroupDiff{
 				Type: DiffTypeEdited,
@@ -1877,7 +1877,7 @@ func TestTaskGroupDiff(t *testing.T) {
 				Fields: []*FieldDiff{
 					{
 						Type: DiffTypeEdited,
-						Name: "RescheduleOnLost",
+						Name: "SingleInstanceOnLost",
 						Old:  "true",
 						New:  "false",
 					},
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 20ecd1336db..5ec3e42929f 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -6642,10 +6642,10 @@ type TaskGroup struct {
 	// allocations for tasks in this group to attempt to resume running without a restart.
 	MaxClientDisconnect *time.Duration
 
-	// RescheduleOnLost is used to control how allocations on disconnected
-	// nodes are handled. For backwards compatibility, it defaults to true.
-	// When true, such jobs are rescheduled.
-	RescheduleOnLost bool
+	// SingleInstanceOnLost is used to control if a lost allocation will be replaced
+	// or not. When true, no new allocations will be scheduled to replace the lost
+	// a lost one.
+	SingleInstanceOnLost bool
 }
 
 func (tg *TaskGroup) Copy() *TaskGroup {
@@ -11024,13 +11024,13 @@ func (a *Allocation) SupportsDisconnectedClients(serverSupportsDisconnectedClien
 	return false
 }
 
-// RescheduleOnLost determines if an alloc is configured to restart if
-// the client becomes lost
-func (a *Allocation) RescheduleOnLost() bool {
+// SingleInstanceOnLost determines if an alloc allows to have a replacement
+// when lost.
+func (a *Allocation) SingleInstanceOnLost() bool {
 	if a.Job != nil {
 		tg := a.Job.LookupTaskGroup(a.TaskGroup)
 		if tg != nil {
-			return tg.RescheduleOnLost
+			return tg.SingleInstanceOnLost
 		}
 	}
 
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index ca7cf2691e4..004b0e55ccf 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -472,7 +472,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// which ones later and which ones can't be rescheduled at all.
 	timeoutLaterEvals := map[string]string{}
 	if len(disconnecting) > 0 {
-		if tg.RescheduleOnLost {
+		if tg.SingleInstanceOnLost {
 			untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(a.batch, true, a.now, a.evalID, a.deployment)
 
 			rescheduleNow = rescheduleNow.union(rescheduleDisconnecting)
@@ -495,10 +495,8 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	lostLater := []*delayedRescheduleInfo{}
 
 	if len(lost) > 0 {
-		if tg.RescheduleOnLost {
-			lostLater = lost.delayByStopAfterClientDisconnect()
-			lostLaterEvals = a.createLostLaterEvals(lostLater, tg.Name)
-		}
+		lostLater = lost.delayByStopAfterClientDisconnect()
+		lostLaterEvals = a.createLostLaterEvals(lostLater, tg.Name)
 	}
 
 	// Merge disconnecting with the stop_after_client_disconnect set into the
@@ -785,7 +783,7 @@ func (a *allocReconciler) computePlacements(group *structs.TaskGroup,
 	// Add replacements for disconnected and lost allocs up to group.Count
 	existing := len(untainted) + len(migrate) + len(reschedule)
 
-	if group.RescheduleOnLost {
+	if group.SingleInstanceOnLost {
 		// Add replacements for lost
 		for _, alloc := range lost {
 			if existing >= group.Count {
@@ -807,7 +805,7 @@ func (a *allocReconciler) computePlacements(group *structs.TaskGroup,
 			})
 		}
 	} else {
-		//Don't add placements for lost where RescheduleOnLost is not enabled
+		//Don't add placements for lost where SingleInstanceOnLost is not enabled
 		existing += len(lost)
 	}
 
@@ -865,7 +863,7 @@ func (a *allocReconciler) computeReplacements(tg *structs.TaskGroup, deploymentP
 
 	// If allocs have been lost, determine the number of replacements that are needed
 	// and add placements to the result for the lost allocs.
-	if len(lost) != 0 && tg.RescheduleOnLost {
+	if len(lost) != 0 && tg.SingleInstanceOnLost {
 		allowed := min(len(lost), len(place))
 		desiredChanges.Place += uint64(allowed)
 		a.result.place = append(a.result.place, place[:allowed]...)
@@ -1004,11 +1002,6 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
 	// Mark all lost allocations for stop.
 	var stop allocSet
 	stop = stop.union(lost)
-	if group.RescheduleOnLost {
-		a.markDelayed(lost, structs.AllocClientStatusLost, allocLost, followupEvals)
-	} else {
-		//a.markStop()
-	}
 
 	// If we are still deploying or creating canaries, don't stop them
 	if isCanarying {
@@ -1134,7 +1127,7 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc
 	reconnect := make(allocSet)
 
 	for _, reconnectingAlloc := range reconnecting {
-		if !reconnectingAlloc.RescheduleOnLost() {
+		if !reconnectingAlloc.SingleInstanceOnLost() {
 			continue
 		}
 
diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go
index 30c5a52a376..f4df5b88198 100644
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -892,7 +892,7 @@ func TestReconciler_LostNode_RescheduleOff(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
-	job.TaskGroups[0].RescheduleOnLost = false
+	job.TaskGroups[0].SingleInstanceOnLost = false
 	// Create 10 existing allocations
 	var allocs []*structs.Allocation
 	for i := 0; i < 10; i++ {
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index 228a7bec61c..3d934e398ef 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -298,7 +298,7 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 				}
 
 			case structs.NodeStatusDown:
-				if !alloc.RescheduleOnLost() {
+				if !alloc.SingleInstanceOnLost() {
 					if alloc.ClientStatus == structs.AllocClientStatusLost {
 						untainted[alloc.ID] = alloc
 						continue
diff --git a/scheduler/util.go b/scheduler/util.go
index cf3a22a349c..2156fd15e4b 100644
--- a/scheduler/util.go
+++ b/scheduler/util.go
@@ -222,9 +222,9 @@ func tasksUpdated(jobA, jobB *structs.Job, taskGroup string) comparison {
 		return difference("number of tasks", lenA, lenB)
 	}
 
-	// Check for rescheduleOnLost changes
-	if a.RescheduleOnLost != b.RescheduleOnLost {
-		return difference("reschedule on lost", a.RescheduleOnLost, b.RescheduleOnLost)
+	// Check for SingleInstanceOnLost changes
+	if a.SingleInstanceOnLost != b.SingleInstanceOnLost {
+		return difference("reschedule on lost", a.SingleInstanceOnLost, b.SingleInstanceOnLost)
 	}
 
 	// Check ephemeral disk
diff --git a/scheduler/util_test.go b/scheduler/util_test.go
index 135881dbe0c..a25a1c964a7 100644
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -548,9 +548,9 @@ func TestTasksUpdated(t *testing.T) {
 	j30.TaskGroups[0].Tasks[0].Templates[0].ErrMissingKey = true
 	require.True(t, tasksUpdated(j29, j30, name).modified)
 
-	// Change rescheduleOnLost mode
+	// Change SingleInstanceOnLost mode
 	j31 := mock.Job()
-	j31.TaskGroups[0].RescheduleOnLost = false
+	j31.TaskGroups[0].SingleInstanceOnLost = false
 	require.True(t, tasksUpdated(j1, j31, name).modified)
 }
 
diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index 566dce606b4..1766ace11f6 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -63,7 +63,7 @@ job "docs" {
   rescheduling strategy. Nomad will then attempt to schedule the task on another
   node if any of the group allocation statuses become "failed".
 
-- `reschedule_on_lost` `(bool: true)` - Specifies if a groups tasks can be
+- `single_instance_on_lost` `(bool: true)` - Specifies if a groups tasks can be
   rescheduled when their allocations become lost. If set to false, jobs with
   lost tasks will be left in a running state until an operator intervenes.
 

From 3d8f65236f878e5eab2d52c5160319c657c93258 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 22 Nov 2023 13:45:15 +0100
Subject: [PATCH 10/50] func: remove the changes to shouldFilter, unknown
 status should not be handled there

---
 nomad/mock/job.go           |  2 +-
 scheduler/reconcile_util.go | 14 ++++----------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/nomad/mock/job.go b/nomad/mock/job.go
index 58c0ccb7534..a0b8c237313 100644
--- a/nomad/mock/job.go
+++ b/nomad/mock/job.go
@@ -33,7 +33,7 @@ func Job() *structs.Job {
 			{
 				Name:                 "web",
 				Count:                10,
-				SingleInstanceOnLost: true,
+				SingleInstanceOnLost: false,
 				Constraints: []*structs.Constraint{
 					{
 						LTarget: "${attr.consul.version}",
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index 3d934e398ef..5f0d55c53cc 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -410,6 +410,7 @@ func (a allocSet) filterByRescheduleable(isBatch, isDisconnecting bool, now time
 		}
 
 		isUntainted, ignore := shouldFilter(alloc, isBatch)
+		fmt.Println(isUntainted, ignore)
 		if isUntainted && !isDisconnecting {
 			untainted[alloc.ID] = alloc
 		}
@@ -458,14 +459,12 @@ func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bo
 			if alloc.RanSuccessfully() {
 				return true, false
 			}
-
+			return false, true
 		case structs.AllocDesiredStatusEvict:
 			return false, true
 		}
 
 		switch alloc.ClientStatus {
-		case structs.AllocClientStatusComplete:
-			return false, true
 		case structs.AllocClientStatusFailed:
 			return false, false
 		}
@@ -475,17 +474,12 @@ func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bo
 
 	// Handle service jobs
 	switch alloc.DesiredStatus {
-	case structs.AllocDesiredStatusEvict:
+	case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
 		return false, true
 	}
 
 	switch alloc.ClientStatus {
-	case structs.AllocClientStatusComplete:
-		return false, true
-	case structs.AllocClientStatusLost:
-		if alloc.DesiredStatus == structs.AllocDesiredStatusStop {
-			return true, false
-		}
+	case structs.AllocClientStatusComplete, structs.AllocClientStatusLost:
 		return false, true
 	}
 

From 967f8401bc21bbc94bc7deb155300780ae6319fa Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 22 Nov 2023 14:39:52 +0100
Subject: [PATCH 11/50] func: remove the changes to shouldFilter, unknown
 status should not be handled there

---
 client/client.go            |  2 +-
 nomad/plan_apply.go         | 13 ++++------
 scheduler/reconcile.go      | 47 +++++++++++++++++--------------------
 scheduler/reconcile_util.go | 22 ++++++++---------
 4 files changed, 37 insertions(+), 47 deletions(-)

diff --git a/client/client.go b/client/client.go
index b9095cc3db1..902700b72a0 100644
--- a/client/client.go
+++ b/client/client.go
@@ -2670,7 +2670,7 @@ func (c *Client) updateAlloc(update *structs.Allocation) {
 	// Reconnect unknown allocations if they were updated and are not terminal.
 	reconnect := update.ClientStatus == structs.AllocClientStatusUnknown &&
 		update.AllocModifyIndex > alloc.AllocModifyIndex &&
-		(!update.ServerTerminalStatus() || (!alloc.SingleInstanceOnLost() && update.ServerTerminalStatus()))
+		(!update.ServerTerminalStatus() || !alloc.SingleInstanceOnLost())
 	if reconnect {
 		err = ar.Reconnect(update)
 		if err != nil {
diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go
index 1c3dcd87cf2..f7f7db0c2ab 100644
--- a/nomad/plan_apply.go
+++ b/nomad/plan_apply.go
@@ -742,12 +742,6 @@ func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID stri
 			return true, "", nil
 		}
 		return false, "node is lost and contains invalid updates", nil
-
-	} else if node.Status == structs.NodeStatusDown {
-		if isValidForDisconnectedNode(plan, node.ID) {
-			return true, "", nil
-		}
-		return false, "node is lost and contains invalid updates", nil
 	} else if node.Status != structs.NodeStatusReady {
 		return false, "node is not ready for placements", nil
 	}
@@ -802,11 +796,12 @@ func isValidForDisconnectedNode(plan *structs.Plan, nodeID string) bool {
 	return true
 }
 
-// The plan is only valid for disconnected nodes if it only contains
-// updates to mark allocations as unknown.
+// The plan is only valid for lost nodes if it only contains
+// updates to mark allocations as unknown and those allocations are configured
+// as non reschedulables when lost.
 func isValidForLostNode(plan *structs.Plan, nodeID string) bool {
 	for _, alloc := range plan.NodeAllocation[nodeID] {
-		if alloc.ClientStatus != structs.AllocClientStatusUnknown && !alloc.SingleInstanceOnLost() {
+		if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && alloc.SingleInstanceOnLost()) {
 			return false
 		}
 	}
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 004b0e55ccf..9f1d366f5d1 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -472,7 +472,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// which ones later and which ones can't be rescheduled at all.
 	timeoutLaterEvals := map[string]string{}
 	if len(disconnecting) > 0 {
-		if tg.SingleInstanceOnLost {
+		if !tg.SingleInstanceOnLost {
 			untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(a.batch, true, a.now, a.evalID, a.deployment)
 
 			rescheduleNow = rescheduleNow.union(rescheduleDisconnecting)
@@ -783,30 +783,25 @@ func (a *allocReconciler) computePlacements(group *structs.TaskGroup,
 	// Add replacements for disconnected and lost allocs up to group.Count
 	existing := len(untainted) + len(migrate) + len(reschedule)
 
-	if group.SingleInstanceOnLost {
-		// Add replacements for lost
-		for _, alloc := range lost {
-			if existing >= group.Count {
-				// Reached desired count, do not replace remaining lost
-				// allocs
-				break
-			}
-
-			existing++
-			place = append(place, allocPlaceResult{
-				name:               alloc.Name,
-				taskGroup:          group,
-				previousAlloc:      alloc,
-				reschedule:         false,
-				canary:             alloc.DeploymentStatus.IsCanary(),
-				downgradeNonCanary: isCanarying && !alloc.DeploymentStatus.IsCanary(),
-				minJobVersion:      alloc.Job.Version,
-				lost:               true,
-			})
+	// Add replacements for lost
+	for _, alloc := range lost {
+		if existing >= group.Count {
+			// Reached desired count, do not replace remaining lost
+			// allocs
+			break
 		}
-	} else {
-		//Don't add placements for lost where SingleInstanceOnLost is not enabled
-		existing += len(lost)
+
+		existing++
+		place = append(place, allocPlaceResult{
+			name:               alloc.Name,
+			taskGroup:          group,
+			previousAlloc:      alloc,
+			reschedule:         false,
+			canary:             alloc.DeploymentStatus.IsCanary(),
+			downgradeNonCanary: isCanarying && !alloc.DeploymentStatus.IsCanary(),
+			minJobVersion:      alloc.Job.Version,
+			lost:               true,
+		})
 	}
 
 	// Add remaining placement results
@@ -863,7 +858,7 @@ func (a *allocReconciler) computeReplacements(tg *structs.TaskGroup, deploymentP
 
 	// If allocs have been lost, determine the number of replacements that are needed
 	// and add placements to the result for the lost allocs.
-	if len(lost) != 0 && tg.SingleInstanceOnLost {
+	if len(lost) != 0 {
 		allowed := min(len(lost), len(place))
 		desiredChanges.Place += uint64(allowed)
 		a.result.place = append(a.result.place, place[:allowed]...)
@@ -1003,6 +998,8 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
 	var stop allocSet
 	stop = stop.union(lost)
 
+	a.markDelayed(lost, structs.AllocClientStatusLost, allocLost, followupEvals)
+
 	// If we are still deploying or creating canaries, don't stop them
 	if isCanarying {
 		untainted = untainted.difference(canaries)
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index 5f0d55c53cc..bf58f548ad9 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -296,17 +296,6 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 					reconnecting[alloc.ID] = alloc
 					continue
 				}
-
-			case structs.NodeStatusDown:
-				if !alloc.SingleInstanceOnLost() {
-					if alloc.ClientStatus == structs.AllocClientStatusLost {
-						untainted[alloc.ID] = alloc
-						continue
-					} else if alloc.ClientStatus == structs.AllocClientStatusRunning {
-						disconnecting[alloc.ID] = alloc
-						continue
-					}
-				}
 			}
 		}
 
@@ -372,6 +361,16 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 
 		// Allocs on GC'd (nil) or lost nodes are Lost
 		if taintedNode == nil || taintedNode.TerminalStatus() {
+			if alloc.SingleInstanceOnLost() {
+				if alloc.ClientStatus == structs.AllocClientStatusUnknown {
+					untainted[alloc.ID] = alloc
+					continue
+				} else if alloc.ClientStatus == structs.AllocClientStatusRunning {
+					disconnecting[alloc.ID] = alloc
+					continue
+				}
+			}
+
 			lost[alloc.ID] = alloc
 			continue
 		}
@@ -410,7 +409,6 @@ func (a allocSet) filterByRescheduleable(isBatch, isDisconnecting bool, now time
 		}
 
 		isUntainted, ignore := shouldFilter(alloc, isBatch)
-		fmt.Println(isUntainted, ignore)
 		if isUntainted && !isDisconnecting {
 			untainted[alloc.ID] = alloc
 		}

From 8d9a2ee001488ddec0a12f757f42b05f086cc560 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 22 Nov 2023 15:22:37 +0100
Subject: [PATCH 12/50] fix: add missing config to test for should filter

---
 scheduler/reconcile_util_test.go | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go
index 36daf69e879..35785a9f158 100644
--- a/scheduler/reconcile_util_test.go
+++ b/scheduler/reconcile_util_test.go
@@ -926,12 +926,13 @@ func TestReconcile_shouldFilter(t *testing.T) {
 			ignore:        true,
 		},
 		{
-			description:   "service lost",
+			description:   "service client complete",
 			batch:         false,
-			desiredStatus: structs.AllocDesiredStatusStop,
-			clientStatus:  structs.AllocClientStatusLost,
-			untainted:     true,
-			ignore:        false,
+			failed:        false,
+			desiredStatus: structs.AllocDesiredStatusRun,
+			clientStatus:  structs.AllocClientStatusComplete,
+			untainted:     false,
+			ignore:        true,
 		},
 	}
 

From c399f1316a0468f48ee12472268ba79e78092053 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 22 Nov 2023 15:34:40 +0100
Subject: [PATCH 13/50] style: improve documentation of the new option

---
 nomad/structs/structs.go                         | 6 +++---
 scheduler/reconcile.go                           | 9 +++------
 website/content/docs/job-specification/group.mdx | 8 +++++---
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 5ec3e42929f..813e34498a5 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -6642,9 +6642,9 @@ type TaskGroup struct {
 	// allocations for tasks in this group to attempt to resume running without a restart.
 	MaxClientDisconnect *time.Duration
 
-	// SingleInstanceOnLost is used to control if a lost allocation will be replaced
-	// or not. When true, no new allocations will be scheduled to replace the lost
-	// a lost one.
+	// SingleInstanceOnLost is used to signal if multiple instances of the same
+	// task can be running at the same time, it controls if a replacement is triggered
+	// when the task state is unknown
 	SingleInstanceOnLost bool
 }
 
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 9f1d366f5d1..3935bfccd21 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -565,7 +565,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// placements can be made without any other consideration.
 	deploymentPlaceReady := !a.deploymentPaused && !a.deploymentFailed && !isCanarying
 
-	underProvisionedBy = a.computeReplacements(tg, deploymentPlaceReady, desiredChanges, place, rescheduleNow, lost, underProvisionedBy)
+	underProvisionedBy = a.computeReplacements(deploymentPlaceReady, desiredChanges, place, rescheduleNow, lost, underProvisionedBy)
 
 	if deploymentPlaceReady {
 		a.computeDestructiveUpdates(destructive, underProvisionedBy, desiredChanges, tg)
@@ -823,7 +823,7 @@ func (a *allocReconciler) computePlacements(group *structs.TaskGroup,
 // and if the placement is already rescheduling or part of a failed deployment.
 // The input deploymentPlaceReady is calculated as the deployment is not paused, failed, or canarying.
 // It returns the number of allocs still needed.
-func (a *allocReconciler) computeReplacements(tg *structs.TaskGroup, deploymentPlaceReady bool, desiredChanges *structs.DesiredUpdates,
+func (a *allocReconciler) computeReplacements(deploymentPlaceReady bool, desiredChanges *structs.DesiredUpdates,
 	place []allocPlaceResult, rescheduleNow, lost allocSet, underProvisionedBy int) int {
 
 	// Disconnecting allocs are not failing, but are included in rescheduleNow.
@@ -1005,7 +1005,7 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
 		untainted = untainted.difference(canaries)
 	}
 
-	// Remove disconnected and lost allocations so they won't be stopped
+	// Remove disconnected allocations so they won't be stopped
 	knownUntainted := untainted.filterOutByClientStatus(structs.AllocClientStatusUnknown)
 
 	// Hot path the nothing to do case
@@ -1124,9 +1124,6 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc
 	reconnect := make(allocSet)
 
 	for _, reconnectingAlloc := range reconnecting {
-		if !reconnectingAlloc.SingleInstanceOnLost() {
-			continue
-		}
 
 		// Stop allocations that failed to reconnect.
 		reconnectFailed := !reconnectingAlloc.ServerTerminalStatus() &&
diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index 1766ace11f6..3015618773a 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -63,9 +63,11 @@ job "docs" {
   rescheduling strategy. Nomad will then attempt to schedule the task on another
   node if any of the group allocation statuses become "failed".
 
-- `single_instance_on_lost` `(bool: true)` - Specifies if a groups tasks can be
-  rescheduled when their allocations become lost. If set to false, jobs with
-  lost tasks will be left in a running state until an operator intervenes.
+- `single_instance_on_lost` `(bool: true)` - Specifies if a groups tasks that can't
+  have multiple instances running at the same time. If the node this tasks are 
+  running on becomes disconnected or goes down, this allocations wont be rescheduled 
+  and will show up as `unknown` until the node comes back up or they are manually
+  restarted.
 
 - `restart` <code>([Restart][]: nil)</code> - Specifies the restart policy for
   all tasks in this group. If omitted, a default policy exists for each job

From 7319000427261e08f4b157e03a19384cd4626c9a Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 22 Nov 2023 15:54:47 +0100
Subject: [PATCH 14/50] fix: wrong logic in filterOutByClientStatus from
 multiple statuses change

---
 scheduler/reconcile_util.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index bf58f548ad9..dc81f5d6517 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -597,7 +597,7 @@ func (a allocSet) delayByMaxClientDisconnect(now time.Time) ([]*delayedReschedul
 func (a allocSet) filterOutByClientStatus(clientStatuses ...string) allocSet {
 	allocs := make(allocSet)
 	for _, alloc := range a {
-		if slices.Contains(clientStatuses, alloc.ClientStatus) {
+		if !slices.Contains(clientStatuses, alloc.ClientStatus) {
 			allocs[alloc.ID] = alloc
 		}
 	}

From a2f7c4da64677c6908533be624472b602faf4f29 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 22 Nov 2023 17:31:01 +0100
Subject: [PATCH 15/50] func: update tests for new feature

---
 scheduler/reconcile_test.go | 111 +++++++++++++++++++++++-------------
 1 file changed, 71 insertions(+), 40 deletions(-)

diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go
index f4df5b88198..cc68dfefb5d 100644
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -888,52 +888,83 @@ func TestReconciler_Destructive_ScaleDown(t *testing.T) {
 }
 
 // Tests the reconciler properly handles lost nodes with allocations
-func TestReconciler_LostNode_RescheduleOff(t *testing.T) {
+func TestReconciler_LostNode_SingleInstanceOnLost(t *testing.T) {
 	ci.Parallel(t)
-
-	job := mock.Job()
-	job.TaskGroups[0].SingleInstanceOnLost = false
-	// Create 10 existing allocations
-	var allocs []*structs.Allocation
-	for i := 0; i < 10; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		allocs = append(allocs, alloc)
+	testCases := []struct {
+		name                 string
+		singleInstanceOnLost bool
+		place                int
+		stop                 int
+		ignore               int
+		disconnect           int
+		allocStatus          string
+	}{
+		{
+			name:                 "SingleInstanceOnLost off",
+			singleInstanceOnLost: false,
+			place:                2,
+			stop:                 2,
+			ignore:               8,
+			allocStatus:          structs.AllocClientStatusLost,
+		},
+		{
+			name:                 "SingleInstanceOnLost on",
+			singleInstanceOnLost: true,
+			place:                0,
+			stop:                 0,
+			ignore:               10,
+			disconnect:           2,
+			allocStatus:          structs.AllocClientStatusUnknown,
+		},
 	}
 
-	// Build a map of tainted nodes
-	tainted := make(map[string]*structs.Node, 2)
-	for i := 0; i < 2; i++ {
-		n := mock.Node()
-		n.ID = allocs[i].NodeID
-		n.Status = structs.NodeStatusDown
-		tainted[n.ID] = n
-	}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			job := mock.Job()
+			job.TaskGroups[0].SingleInstanceOnLost = tc.singleInstanceOnLost
+			// Create 10 existing allocations
+			var allocs []*structs.Allocation
+			for i := 0; i < 10; i++ {
+				alloc := mock.Alloc()
+				alloc.Job = job
+				alloc.JobID = job.ID
+				alloc.NodeID = uuid.Generate()
+				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+				allocs = append(allocs, alloc)
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+				alloc.ClientStatus = structs.AllocClientStatusRunning
+			}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, tainted, "", 50, true)
-	r := reconciler.Compute()
+			// Build a map of tainted nodes
+			tainted := make(map[string]*structs.Node, 2)
+			for i := 0; i < 2; i++ {
+				n := mock.Node()
+				n.ID = allocs[i].NodeID
+				n.Status = structs.NodeStatusDown
+				tainted[n.ID] = n
+			}
 
-	// Assert the correct results
-	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
-		deploymentUpdates: nil,
-		place:             0,
-		inplace:           0,
-		stop:              2,
-		desiredTGUpdates: map[string]*structs.DesiredUpdates{
-			job.TaskGroups[0].Name: {
-				Place:  0,
-				Stop:   2,
-				Ignore: 8,
-			},
-		},
-	})
+			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+				nil, allocs, tainted, "", 50, true)
+			r := reconciler.Compute()
 
-	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
+			// Assert the correct results
+			assertResults(t, r, &resultExpectation{
+				createDeployment:  nil,
+				deploymentUpdates: nil,
+				place:             tc.place,
+				stop:              tc.stop,
+				disconnectUpdates: tc.disconnect,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					job.TaskGroups[0].Name: {
+						Place:  uint64(tc.place),
+						Stop:   uint64(tc.stop),
+						Ignore: uint64(tc.ignore),
+					},
+				},
+			})
+		})
+	}
 }
 
 // Tests the reconciler properly handles lost nodes with allocations

From d1c2d7cb975fa2b09e98ad5513f3324e5b2537a1 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Mon, 27 Nov 2023 10:42:03 +0100
Subject: [PATCH 16/50] func: update tests

---
 nomad/structs/structs.go         |    2 +-
 nomad/worker.go                  |    1 +
 scheduler/reconcile.go           |   20 +-
 scheduler/reconcile_test.go      | 5087 +++++++++++++++---------------
 scheduler/reconcile_util.go      |   58 +-
 scheduler/reconcile_util_test.go |  577 +++-
 6 files changed, 3143 insertions(+), 2602 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 813e34498a5..76ac71158c5 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -11250,7 +11250,7 @@ func (a *Allocation) Expired(now time.Time) bool {
 		return false
 	}
 
-	if tg.MaxClientDisconnect == nil {
+	if tg.MaxClientDisconnect == nil && !tg.SingleInstanceOnLost {
 		return false
 	}
 
diff --git a/nomad/worker.go b/nomad/worker.go
index 69107cc48a8..83fb1ab671f 100644
--- a/nomad/worker.go
+++ b/nomad/worker.go
@@ -516,6 +516,7 @@ REQ:
 
 	// Check if we got a response
 	if resp.Eval != nil {
+		fmt.Println("\n **** eval triggered_by", resp.Eval.TriggeredBy)
 		w.logger.Debug("dequeued evaluation", "eval_id", resp.Eval.ID, "type", resp.Eval.Type, "namespace", resp.Eval.Namespace, "job_id", resp.Eval.JobID, "node_id", resp.Eval.NodeID, "triggered_by", resp.Eval.TriggeredBy)
 		return resp.Eval, resp.Token, resp.GetWaitIndex(), false
 	}
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 3935bfccd21..0ad9993353c 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -177,6 +177,7 @@ func (r *reconcileResults) GoString() string {
 	for tg, u := range r.desiredTGUpdates {
 		base += fmt.Sprintf("\nDesired Changes for %q: %#v", tg, u)
 	}
+	fmt.Println("\n ***", base)
 	return base
 }
 
@@ -369,13 +370,14 @@ func (a *allocReconciler) handleStop(m allocMatrix) {
 // filterAndStopAll stops all allocations in an allocSet. This is useful in when
 // stopping an entire job or task group.
 func (a *allocReconciler) filterAndStopAll(set allocSet) uint64 {
-	untainted, migrate, lost, disconnecting, reconnecting, ignore := set.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
+	untainted, migrate, lost, disconnecting, reconnecting, ignore, expiring := set.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
 	a.markStop(untainted, "", allocNotNeeded)
 	a.markStop(migrate, "", allocNotNeeded)
 	a.markStop(lost, structs.AllocClientStatusLost, allocLost)
 	a.markStop(disconnecting, "", allocNotNeeded)
 	a.markStop(reconnecting, "", allocNotNeeded)
 	a.markStop(ignore.filterByClientStatus(structs.AllocClientStatusUnknown), "", allocNotNeeded)
+	a.markStop(expiring.filterByClientStatus(structs.AllocClientStatusUnknown), "", allocNotNeeded)
 	return uint64(len(set))
 }
 
@@ -433,7 +435,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	canaries, all := a.cancelUnneededCanaries(all, desiredChanges)
 
 	// Determine what set of allocations are on tainted nodes
-	untainted, migrate, lost, disconnecting, reconnecting, ignore := all.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
+	untainted, migrate, lost, disconnecting, reconnecting, ignore, expiring := all.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
 	desiredChanges.Ignore += uint64(len(ignore))
 
 	// Determine what set of terminal allocations need to be rescheduled
@@ -468,11 +470,19 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 		}
 	}
 
+	if len(expiring) > 0 {
+		if tg.SingleInstanceOnLost {
+			untainted = untainted.union(expiring)
+		} else {
+			lost = lost.union(expiring)
+		}
+	}
 	// Determine what set of disconnecting allocations need to be rescheduled now,
 	// which ones later and which ones can't be rescheduled at all.
 	timeoutLaterEvals := map[string]string{}
 	if len(disconnecting) > 0 {
-		if !tg.SingleInstanceOnLost {
+		switch {
+		case tg.MaxClientDisconnect != nil:
 			untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(a.batch, true, a.now, a.evalID, a.deployment)
 
 			rescheduleNow = rescheduleNow.union(rescheduleDisconnecting)
@@ -483,7 +493,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 			// create followup evals, and update the ClientStatus to unknown.
 			timeoutLaterEvals = a.createTimeoutLaterEvals(disconnecting, tg.Name)
 
-		} else {
+		case tg.SingleInstanceOnLost:
 			untainted = untainted.union(disconnecting)
 		}
 
@@ -698,7 +708,7 @@ func (a *allocReconciler) cancelUnneededCanaries(original allocSet, desiredChang
 		}
 
 		canaries = all.fromKeys(canaryIDs)
-		untainted, migrate, lost, _, _, _ := canaries.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
+		untainted, migrate, lost, _, _, _, _ := canaries.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
 		// We don't add these stops to desiredChanges because the deployment is
 		// still active. DesiredChanges is used to report deployment progress/final
 		// state. These transient failures aren't meaningful.
diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go
index cc68dfefb5d..881b4ecd208 100644
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -42,6 +42,481 @@ var (
 	}
 )
 
+// Tests that when a node disconnects/reconnects allocations for that node are
+// reconciled according to the business rules.
+func TestReconciler_Disconnected_Client(t *testing.T) {
+	disconnectAllocState := []*structs.AllocState{{
+		Field: structs.AllocStateFieldClientStatus,
+		Value: structs.AllocClientStatusUnknown,
+		Time:  time.Now(),
+	}}
+
+	type testCase struct {
+		name                         string
+		allocCount                   int
+		disconnectedAllocCount       int
+		jobVersionIncrement          uint64
+		nodeScoreIncrement           float64
+		disconnectedAllocStatus      string
+		disconnectedAllocStates      []*structs.AllocState
+		isBatch                      bool
+		nodeStatusDisconnected       bool
+		replace                      bool
+		failReplacement              bool
+		taintReplacement             bool
+		disconnectReplacement        bool
+		replaceFailedReplacement     bool
+		shouldStopOnDisconnectedNode bool
+		maxDisconnect                *time.Duration
+		expected                     *resultExpectation
+	}
+
+	testCases := []testCase{
+		{
+			name:                    "reconnect-original-no-replacement",
+			allocCount:              2,
+			replace:                 false,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: false,
+			expected: &resultExpectation{
+				reconnectUpdates: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 2,
+					},
+				},
+			},
+		},
+		{
+			name:                    "resume-original-and-stop-replacement",
+			allocCount:              3,
+			replace:                 true,
+			disconnectedAllocCount:  1,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: false,
+			expected: &resultExpectation{
+				stop:             1,
+				reconnectUpdates: 1,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   1,
+						Ignore: 3,
+					},
+				},
+			},
+		},
+		{
+			name:                    "stop-original-with-lower-node-score",
+			allocCount:              4,
+			replace:                 true,
+			disconnectedAllocCount:  1,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			nodeScoreIncrement:           1,
+			expected: &resultExpectation{
+				stop: 1,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   1,
+						Ignore: 4,
+					},
+				},
+			},
+		},
+		{
+			name:                    "stop-original-failed-on-reconnect",
+			allocCount:              4,
+			replace:                 true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			expected: &resultExpectation{
+				stop: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   2,
+						Ignore: 4,
+					},
+				},
+			},
+		},
+		{
+			name:                    "reschedule-original-failed-if-not-replaced",
+			allocCount:              4,
+			replace:                 false,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			expected: &resultExpectation{
+				stop:  2,
+				place: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 2,
+						Place:  2,
+						Stop:   2,
+					},
+				},
+			},
+		},
+		{
+			name:                    "ignore-reconnect-completed",
+			allocCount:              2,
+			replace:                 false,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusComplete,
+
+			disconnectedAllocStates: disconnectAllocState,
+			isBatch:                 true,
+			expected: &resultExpectation{
+				place: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 2,
+						Place:  2,
+					},
+				},
+			},
+		},
+		{
+			name:                    "keep-original-alloc-and-stop-failed-replacement",
+			allocCount:              3,
+			replace:                 true,
+			failReplacement:         true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates: disconnectAllocState,
+			expected: &resultExpectation{
+				reconnectUpdates: 2,
+				stop:             0,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 5,
+					},
+				},
+			},
+		},
+		{
+			name:                    "keep-original-and-stop-reconnecting-replacement",
+			allocCount:              2,
+			replace:                 true,
+			disconnectReplacement:   true,
+			disconnectedAllocCount:  1,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates: disconnectAllocState,
+			expected: &resultExpectation{
+				reconnectUpdates: 1,
+				stop:             1,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 2,
+						Stop:   1,
+					},
+				},
+			},
+		},
+		{
+			name:                    "keep-original-and-stop-tainted-replacement",
+			allocCount:              3,
+			replace:                 true,
+			taintReplacement:        true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates: disconnectAllocState,
+			expected: &resultExpectation{
+				reconnectUpdates: 2,
+				stop:             2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 3,
+						Stop:   2,
+					},
+				},
+			},
+		},
+		{
+			name:                    "stop-original-alloc-with-old-job-version",
+			allocCount:              5,
+			replace:                 true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			jobVersionIncrement:          1,
+			expected: &resultExpectation{
+				stop: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 5,
+						Stop:   2,
+					},
+				},
+			},
+		},
+		{
+			name:                    "stop-original-alloc-with-old-job-version-reconnect-eval",
+			allocCount:              5,
+			replace:                 true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			jobVersionIncrement:          1,
+			expected: &resultExpectation{
+				stop: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   2,
+						Ignore: 5,
+					},
+				},
+			},
+		},
+		{
+			name:                         "stop-original-alloc-with-old-job-version-and-failed-replacements-replaced",
+			allocCount:                   5,
+			replace:                      true,
+			failReplacement:              true,
+			replaceFailedReplacement:     true,
+			disconnectedAllocCount:       2,
+			disconnectedAllocStatus:      structs.AllocClientStatusRunning,
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: false,
+			jobVersionIncrement:          1,
+			expected: &resultExpectation{
+				stop:             2,
+				reconnectUpdates: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   2,
+						Ignore: 7,
+					},
+				},
+			},
+		},
+		{
+			name:                    "stop-original-pending-alloc-for-disconnected-node",
+			allocCount:              2,
+			replace:                 true,
+			disconnectedAllocCount:  1,
+			disconnectedAllocStatus: structs.AllocClientStatusPending,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			nodeStatusDisconnected:       true,
+			expected: &resultExpectation{
+				stop: 1,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   1,
+						Ignore: 2,
+					},
+				},
+			},
+		},
+		{
+			name:                    "stop-failed-original-and-failed-replacements-and-place-new",
+			allocCount:              5,
+			replace:                 true,
+			failReplacement:         true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			expected: &resultExpectation{
+				stop:  2,
+				place: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   2,
+						Place:  2,
+						Ignore: 5,
+					},
+				},
+			},
+		},
+		{
+			name:                         "stop-expired-allocs",
+			allocCount:                   5,
+			replace:                      true,
+			disconnectedAllocCount:       2,
+			disconnectedAllocStatus:      structs.AllocClientStatusUnknown,
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			nodeStatusDisconnected:       true,
+			maxDisconnect:                pointer.Of(2 * time.Second),
+			expected: &resultExpectation{
+				stop: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   2,
+						Ignore: 5,
+					},
+				},
+			},
+		},
+		{
+			name:                    "replace-allocs-on-disconnected-node",
+			allocCount:              5,
+			replace:                 false,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+			disconnectedAllocStates: []*structs.AllocState{},
+			nodeStatusDisconnected:  true,
+			expected: &resultExpectation{
+				place:             2,
+				disconnectUpdates: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Place:  2,
+						Ignore: 3,
+					},
+				},
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			require.NotEqual(t, 0, tc.allocCount, "invalid test case: alloc count must be greater than zero")
+
+			testNode := mock.Node()
+			if tc.nodeStatusDisconnected == true {
+				testNode.Status = structs.NodeStatusDisconnected
+			}
+
+			// Create resumable allocs
+			job, allocs := buildResumableAllocations(tc.allocCount, structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun, 2)
+
+			origAllocs := set.New[string](len(allocs))
+			for _, alloc := range allocs {
+				origAllocs.Insert(alloc.ID)
+			}
+
+			if tc.isBatch {
+				job.Type = structs.JobTypeBatch
+			}
+
+			// Set alloc state
+			disconnectedAllocCount := tc.disconnectedAllocCount
+			for _, alloc := range allocs {
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+
+				if tc.maxDisconnect != nil {
+					alloc.Job.TaskGroups[0].MaxClientDisconnect = tc.maxDisconnect
+				}
+
+				if disconnectedAllocCount > 0 {
+					alloc.ClientStatus = tc.disconnectedAllocStatus
+					alloc.AllocStates = tc.disconnectedAllocStates
+					// Set the node id on all the disconnected allocs to the node under test.
+					alloc.NodeID = testNode.ID
+					alloc.NodeName = "disconnected"
+					disconnectedAllocCount--
+				}
+			}
+
+			// Place the allocs on another node.
+			if tc.replace {
+				replacements := make([]*structs.Allocation, 0)
+				for _, alloc := range allocs {
+					if alloc.NodeID != testNode.ID {
+						continue
+					}
+					replacement := alloc.Copy()
+					replacement.ID = uuid.Generate()
+					replacement.NodeID = uuid.Generate()
+					replacement.ClientStatus = structs.AllocClientStatusRunning
+					replacement.PreviousAllocation = alloc.ID
+					replacement.AllocStates = nil
+					replacement.TaskStates = nil
+					replacement.CreateIndex += 1
+					alloc.NextAllocation = replacement.ID
+
+					if tc.jobVersionIncrement != 0 {
+						replacement.Job.Version = replacement.Job.Version + tc.jobVersionIncrement
+					}
+					if tc.nodeScoreIncrement != 0 {
+						replacement.Metrics.ScoreMetaData[0].NormScore = replacement.Metrics.ScoreMetaData[0].NormScore + tc.nodeScoreIncrement
+					}
+					if tc.taintReplacement {
+						replacement.DesiredTransition.Migrate = pointer.Of(true)
+					}
+					if tc.disconnectReplacement {
+						replacement.AllocStates = tc.disconnectedAllocStates
+					}
+
+					// If we want to test intermediate replacement failures simulate that.
+					if tc.failReplacement {
+						replacement.ClientStatus = structs.AllocClientStatusFailed
+
+						if tc.replaceFailedReplacement {
+							nextReplacement := replacement.Copy()
+							nextReplacement.ID = uuid.Generate()
+							nextReplacement.ClientStatus = structs.AllocClientStatusRunning
+							nextReplacement.DesiredStatus = structs.AllocDesiredStatusRun
+							nextReplacement.PreviousAllocation = replacement.ID
+							nextReplacement.CreateIndex += 1
+
+							replacement.NextAllocation = nextReplacement.ID
+							replacement.DesiredStatus = structs.AllocDesiredStatusStop
+
+							replacements = append(replacements, nextReplacement)
+						}
+					}
+
+					replacements = append(replacements, replacement)
+				}
+
+				allocs = append(allocs, replacements...)
+			}
+
+			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, tc.isBatch, job.ID, job,
+				nil, allocs, map[string]*structs.Node{testNode.ID: testNode}, "", 50, true)
+
+			reconciler.now = time.Now()
+			if tc.maxDisconnect != nil {
+				reconciler.now = time.Now().Add(*tc.maxDisconnect * 20)
+			}
+
+			results := reconciler.Compute()
+			assertResults(t, results, tc.expected)
+
+			for _, stopResult := range results.stop {
+				// Skip replacement allocs.
+				if !origAllocs.Contains(stopResult.alloc.ID) {
+					continue
+				}
+
+				if tc.shouldStopOnDisconnectedNode {
+					require.Equal(t, testNode.ID, stopResult.alloc.NodeID)
+				} else {
+					require.NotEqual(t, testNode.ID, stopResult.alloc.NodeID)
+				}
+
+				require.Equal(t, job.Version, stopResult.alloc.Job.Version)
+			}
+		})
+	}
+}
+
 func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
 	return true, false, nil
 }
@@ -2023,263 +2498,14 @@ func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) {
 	})
 
 	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
-
-	// Should not have any follow up evals created
-	require := require.New(t)
-	require.Equal(0, len(r.desiredFollowupEvals))
-}
-
-// Tests rescheduling failed service allocations with desired state stop
-func TestReconciler_RescheduleNow_Service(t *testing.T) {
-	ci.Parallel(t)
-
-	require := require.New(t)
-
-	// Set desired 5
-	job := mock.Job()
-	job.TaskGroups[0].Count = 5
-	tgName := job.TaskGroups[0].Name
-	now := time.Now()
-
-	// Set up reschedule policy and update block
-	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
-		Attempts:      1,
-		Interval:      24 * time.Hour,
-		Delay:         5 * time.Second,
-		DelayFunction: "",
-		MaxDelay:      1 * time.Hour,
-		Unlimited:     false,
-	}
-	job.TaskGroups[0].Update = noCanaryUpdate
-
-	// Create 5 existing allocations
-	var allocs []*structs.Allocation
-	for i := 0; i < 5; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		allocs = append(allocs, alloc)
-		alloc.ClientStatus = structs.AllocClientStatusRunning
-	}
-
-	// Mark two as failed
-	allocs[0].ClientStatus = structs.AllocClientStatusFailed
-
-	// Mark one of them as already rescheduled once
-	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
-		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
-			PrevAllocID: uuid.Generate(),
-			PrevNodeID:  uuid.Generate(),
-		},
-	}}
-	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
-		StartedAt:  now.Add(-1 * time.Hour),
-		FinishedAt: now.Add(-10 * time.Second)}}
-	allocs[1].ClientStatus = structs.AllocClientStatusFailed
-
-	// Mark one as desired state stop
-	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
-
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	r := reconciler.Compute()
-
-	// Verify that no follow up evals were created
-	evals := r.desiredFollowupEvals[tgName]
-	require.Nil(evals)
-
-	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
-	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
-		deploymentUpdates: nil,
-		place:             2,
-		inplace:           0,
-		stop:              1,
-		desiredTGUpdates: map[string]*structs.DesiredUpdates{
-			job.TaskGroups[0].Name: {
-				Place:  2,
-				Ignore: 3,
-				Stop:   1,
-			},
-		},
-	})
-
-	// Rescheduled allocs should have previous allocs
-	assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
-	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
-	assertPlacementsAreRescheduled(t, 1, r.place)
-}
-
-// Tests rescheduling failed service allocations when there's clock drift (upto a second)
-func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) {
-	ci.Parallel(t)
-
-	require := require.New(t)
-
-	// Set desired 5
-	job := mock.Job()
-	job.TaskGroups[0].Count = 5
-	tgName := job.TaskGroups[0].Name
-	now := time.Now()
-
-	// Set up reschedule policy and update block
-	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
-		Attempts:      1,
-		Interval:      24 * time.Hour,
-		Delay:         5 * time.Second,
-		DelayFunction: "",
-		MaxDelay:      1 * time.Hour,
-		Unlimited:     false,
-	}
-	job.TaskGroups[0].Update = noCanaryUpdate
-
-	// Create 5 existing allocations
-	var allocs []*structs.Allocation
-	for i := 0; i < 5; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		allocs = append(allocs, alloc)
-		alloc.ClientStatus = structs.AllocClientStatusRunning
-	}
-
-	// Mark one as failed
-	allocs[0].ClientStatus = structs.AllocClientStatusFailed
-
-	// Mark one of them as already rescheduled once
-	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
-		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
-			PrevAllocID: uuid.Generate(),
-			PrevNodeID:  uuid.Generate(),
-		},
-	}}
-	// Set fail time to 4 seconds ago which falls within the reschedule window
-	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
-		StartedAt:  now.Add(-1 * time.Hour),
-		FinishedAt: now.Add(-4 * time.Second)}}
-	allocs[1].ClientStatus = structs.AllocClientStatusFailed
-
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.now = now
-	r := reconciler.Compute()
-
-	// Verify that no follow up evals were created
-	evals := r.desiredFollowupEvals[tgName]
-	require.Nil(evals)
-
-	// Verify that one rescheduled alloc was placed
-	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
-		deploymentUpdates: nil,
-		place:             1,
-		inplace:           0,
-		stop:              1,
-		desiredTGUpdates: map[string]*structs.DesiredUpdates{
-			job.TaskGroups[0].Name: {
-				Place:  1,
-				Stop:   1,
-				Ignore: 4,
-			},
-		},
-	})
-
-	// Rescheduled allocs should have previous allocs
-	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
-	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
-	assertPlacementsAreRescheduled(t, 1, r.place)
-}
-
-// Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift
-func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) {
-	ci.Parallel(t)
-
-	require := require.New(t)
-
-	// Set desired 5
-	job := mock.Job()
-	job.TaskGroups[0].Count = 5
-	tgName := job.TaskGroups[0].Name
-	now := time.Now()
-
-	// Set up reschedule policy and update block
-	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
-		Attempts:      1,
-		Interval:      24 * time.Hour,
-		Delay:         5 * time.Second,
-		DelayFunction: "",
-		MaxDelay:      1 * time.Hour,
-		Unlimited:     false,
-	}
-	job.TaskGroups[0].Update = noCanaryUpdate
-
-	// Create 5 existing allocations
-	var allocs []*structs.Allocation
-	for i := 0; i < 5; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		allocs = append(allocs, alloc)
-		alloc.ClientStatus = structs.AllocClientStatusRunning
-	}
-
-	// Mark one as failed
-	allocs[0].ClientStatus = structs.AllocClientStatusFailed
-
-	// Mark one of them as already rescheduled once
-	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
-		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
-			PrevAllocID: uuid.Generate(),
-			PrevNodeID:  uuid.Generate(),
-		},
-	}}
-	// Set fail time to 5 seconds ago and eval ID
-	evalID := uuid.Generate()
-	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
-		StartedAt:  now.Add(-1 * time.Hour),
-		FinishedAt: now.Add(-5 * time.Second)}}
-	allocs[1].ClientStatus = structs.AllocClientStatusFailed
-	allocs[1].FollowupEvalID = evalID
-
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, evalID, 50, true)
-	reconciler.now = now.Add(-30 * time.Second)
-	r := reconciler.Compute()
-
-	// Verify that no follow up evals were created
-	evals := r.desiredFollowupEvals[tgName]
-	require.Nil(evals)
-
-	// Verify that one rescheduled alloc was placed
-	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
-		deploymentUpdates: nil,
-		place:             1,
-		stop:              1,
-		inplace:           0,
-		desiredTGUpdates: map[string]*structs.DesiredUpdates{
-			job.TaskGroups[0].Name: {
-				Place:  1,
-				Stop:   1,
-				Ignore: 4,
-			},
-		},
-	})
-
-	// Rescheduled allocs should have previous allocs
-	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
-	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
-	assertPlacementsAreRescheduled(t, 1, r.place)
+
+	// Should not have any follow up evals created
+	require := require.New(t)
+	require.Equal(0, len(r.desiredFollowupEvals))
 }
 
-// Tests rescheduling failed service allocations when there are canaries
-func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
+// Tests rescheduling failed service allocations with desired state stop
+func TestReconciler_RescheduleNow_Service(t *testing.T) {
 	ci.Parallel(t)
 
 	require := require.New(t)
@@ -2299,18 +2525,7 @@ func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
 		MaxDelay:      1 * time.Hour,
 		Unlimited:     false,
 	}
-	job.TaskGroups[0].Update = canaryUpdate
-
-	job2 := job.Copy()
-	job2.Version++
-
-	d := structs.NewDeployment(job2, 50)
-	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
-	s := &structs.DeploymentState{
-		DesiredCanaries: 2,
-		DesiredTotal:    5,
-	}
-	d.TaskGroups[job.TaskGroups[0].Name] = s
+	job.TaskGroups[0].Update = noCanaryUpdate
 
 	// Create 5 existing allocations
 	var allocs []*structs.Allocation
@@ -2324,7 +2539,7 @@ func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
 		alloc.ClientStatus = structs.AllocClientStatusRunning
 	}
 
-	// Mark three as failed
+	// Mark two as failed
 	allocs[0].ClientStatus = structs.AllocClientStatusFailed
 
 	// Mark one of them as already rescheduled once
@@ -2340,27 +2555,10 @@ func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
 	allocs[1].ClientStatus = structs.AllocClientStatusFailed
 
 	// Mark one as desired state stop
-	allocs[4].ClientStatus = structs.AllocClientStatusFailed
-
-	// Create 2 canary allocations
-	for i := 0; i < 2; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		alloc.ClientStatus = structs.AllocClientStatusRunning
-		alloc.DeploymentID = d.ID
-		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
-			Canary:  true,
-			Healthy: pointer.Of(false),
-		}
-		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
-		allocs = append(allocs, alloc)
-	}
+	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
-		d, allocs, nil, "", 50, true)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
@@ -2372,25 +2570,25 @@ func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
 		createDeployment:  nil,
 		deploymentUpdates: nil,
 		place:             2,
-		stop:              2,
 		inplace:           0,
+		stop:              1,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
 				Place:  2,
-				Stop:   2,
-				Ignore: 5,
+				Ignore: 3,
+				Stop:   1,
 			},
 		},
 	})
 
 	// Rescheduled allocs should have previous allocs
 	assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
-	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
-	assertPlacementsAreRescheduled(t, 2, r.place)
+	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
+	assertPlacementsAreRescheduled(t, 1, r.place)
 }
 
-// Tests rescheduling failed canary service allocations
-func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) {
+// Tests rescheduling failed service allocations when there's clock drift (upto a second)
+func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) {
 	ci.Parallel(t)
 
 	require := require.New(t)
@@ -2403,23 +2601,14 @@ func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) {
 
 	// Set up reschedule policy and update block
 	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
+		Attempts:      1,
+		Interval:      24 * time.Hour,
 		Delay:         5 * time.Second,
-		DelayFunction: "constant",
+		DelayFunction: "",
 		MaxDelay:      1 * time.Hour,
-		Unlimited:     true,
-	}
-	job.TaskGroups[0].Update = canaryUpdate
-
-	job2 := job.Copy()
-	job2.Version++
-
-	d := structs.NewDeployment(job2, 50)
-	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
-	s := &structs.DeploymentState{
-		DesiredCanaries: 2,
-		DesiredTotal:    5,
+		Unlimited:     false,
 	}
-	d.TaskGroups[job.TaskGroups[0].Name] = s
+	job.TaskGroups[0].Update = noCanaryUpdate
 
 	// Create 5 existing allocations
 	var allocs []*structs.Allocation
@@ -2433,60 +2622,24 @@ func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) {
 		alloc.ClientStatus = structs.AllocClientStatusRunning
 	}
 
-	// Create 2 healthy canary allocations
-	for i := 0; i < 2; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		alloc.ClientStatus = structs.AllocClientStatusRunning
-		alloc.DeploymentID = d.ID
-		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
-			Canary:  true,
-			Healthy: pointer.Of(false),
-		}
-		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
-		allocs = append(allocs, alloc)
-	}
-
-	// Mark the canaries as failed
-	allocs[5].ClientStatus = structs.AllocClientStatusFailed
-	allocs[5].DesiredTransition.Reschedule = pointer.Of(true)
+	// Mark one as failed
+	allocs[0].ClientStatus = structs.AllocClientStatusFailed
 
 	// Mark one of them as already rescheduled once
-	allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
-		{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
+	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
+		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
 			PrevAllocID: uuid.Generate(),
 			PrevNodeID:  uuid.Generate(),
 		},
 	}}
-
-	allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
+	// Set fail time to 4 seconds ago which falls within the reschedule window
+	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
 		StartedAt:  now.Add(-1 * time.Hour),
-		FinishedAt: now.Add(-10 * time.Second)}}
-	allocs[6].ClientStatus = structs.AllocClientStatusFailed
-	allocs[6].DesiredTransition.Reschedule = pointer.Of(true)
-
-	// Create 4 unhealthy canary allocations that have already been replaced
-	for i := 0; i < 4; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
-		alloc.ClientStatus = structs.AllocClientStatusFailed
-		alloc.DeploymentID = d.ID
-		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
-			Canary:  true,
-			Healthy: pointer.Of(false),
-		}
-		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
-		allocs = append(allocs, alloc)
-	}
+		FinishedAt: now.Add(-4 * time.Second)}}
+	allocs[1].ClientStatus = structs.AllocClientStatusFailed
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
-		d, allocs, nil, "", 50, true)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
 	reconciler.now = now
 	r := reconciler.Compute()
 
@@ -2494,31 +2647,30 @@ func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) {
 	evals := r.desiredFollowupEvals[tgName]
 	require.Nil(evals)
 
-	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
+	// Verify that one rescheduled alloc was placed
 	assertResults(t, r, &resultExpectation{
 		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             2,
-		stop:              2,
+		place:             1,
 		inplace:           0,
+		stop:              1,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Place:  2,
-				Stop:   2,
-				Ignore: 9,
+				Place:  1,
+				Stop:   1,
+				Ignore: 4,
 			},
 		},
 	})
 
 	// Rescheduled allocs should have previous allocs
-	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
-	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
-	assertPlacementsAreRescheduled(t, 2, r.place)
+	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
+	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
+	assertPlacementsAreRescheduled(t, 1, r.place)
 }
 
-// Tests rescheduling failed canary service allocations when one has reached its
-// reschedule limit
-func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
+// Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift
+func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) {
 	ci.Parallel(t)
 
 	require := require.New(t)
@@ -2538,18 +2690,7 @@ func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
 		MaxDelay:      1 * time.Hour,
 		Unlimited:     false,
 	}
-	job.TaskGroups[0].Update = canaryUpdate
-
-	job2 := job.Copy()
-	job2.Version++
-
-	d := structs.NewDeployment(job2, 50)
-	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
-	s := &structs.DeploymentState{
-		DesiredCanaries: 2,
-		DesiredTotal:    5,
-	}
-	d.TaskGroups[job.TaskGroups[0].Name] = s
+	job.TaskGroups[0].Update = noCanaryUpdate
 
 	// Create 5 existing allocations
 	var allocs []*structs.Allocation
@@ -2563,68 +2704,34 @@ func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
 		alloc.ClientStatus = structs.AllocClientStatusRunning
 	}
 
-	// Create 2 healthy canary allocations
-	for i := 0; i < 2; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		alloc.ClientStatus = structs.AllocClientStatusRunning
-		alloc.DeploymentID = d.ID
-		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
-			Canary:  true,
-			Healthy: pointer.Of(false),
-		}
-		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
-		allocs = append(allocs, alloc)
-	}
-
-	// Mark the canaries as failed
-	allocs[5].ClientStatus = structs.AllocClientStatusFailed
-	allocs[5].DesiredTransition.Reschedule = pointer.Of(true)
+	// Mark one as failed
+	allocs[0].ClientStatus = structs.AllocClientStatusFailed
 
 	// Mark one of them as already rescheduled once
-	allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
-		{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
+	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
+		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
 			PrevAllocID: uuid.Generate(),
 			PrevNodeID:  uuid.Generate(),
 		},
 	}}
-
-	allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
+	// Set fail time to 5 seconds ago and eval ID
+	evalID := uuid.Generate()
+	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
 		StartedAt:  now.Add(-1 * time.Hour),
-		FinishedAt: now.Add(-10 * time.Second)}}
-	allocs[6].ClientStatus = structs.AllocClientStatusFailed
-	allocs[6].DesiredTransition.Reschedule = pointer.Of(true)
-
-	// Create 4 unhealthy canary allocations that have already been replaced
-	for i := 0; i < 4; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
-		alloc.ClientStatus = structs.AllocClientStatusFailed
-		alloc.DeploymentID = d.ID
-		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
-			Canary:  true,
-			Healthy: pointer.Of(false),
-		}
-		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
-		allocs = append(allocs, alloc)
-	}
+		FinishedAt: now.Add(-5 * time.Second)}}
+	allocs[1].ClientStatus = structs.AllocClientStatusFailed
+	allocs[1].FollowupEvalID = evalID
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
-		d, allocs, nil, "", 50, true)
-	reconciler.now = now
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+		nil, allocs, nil, evalID, 50, true)
+	reconciler.now = now.Add(-30 * time.Second)
 	r := reconciler.Compute()
 
 	// Verify that no follow up evals were created
 	evals := r.desiredFollowupEvals[tgName]
 	require.Nil(evals)
 
-	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
+	// Verify that one rescheduled alloc was placed
 	assertResults(t, r, &resultExpectation{
 		createDeployment:  nil,
 		deploymentUpdates: nil,
@@ -2635,7 +2742,7 @@ func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
 			job.TaskGroups[0].Name: {
 				Place:  1,
 				Stop:   1,
-				Ignore: 10,
+				Ignore: 4,
 			},
 		},
 	})
@@ -2646,20 +2753,43 @@ func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
 	assertPlacementsAreRescheduled(t, 1, r.place)
 }
 
-// Tests failed service allocations that were already rescheduled won't be rescheduled again
-func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) {
+// Tests rescheduling failed service allocations when there are canaries
+func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
 	ci.Parallel(t)
 
+	require := require.New(t)
+
 	// Set desired 5
 	job := mock.Job()
 	job.TaskGroups[0].Count = 5
+	tgName := job.TaskGroups[0].Name
+	now := time.Now()
+
+	// Set up reschedule policy and update block
+	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
+		Attempts:      1,
+		Interval:      24 * time.Hour,
+		Delay:         5 * time.Second,
+		DelayFunction: "",
+		MaxDelay:      1 * time.Hour,
+		Unlimited:     false,
+	}
+	job.TaskGroups[0].Update = canaryUpdate
+
+	job2 := job.Copy()
+	job2.Version++
 
-	// Set up reschedule policy
-	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour}
+	d := structs.NewDeployment(job2, 50)
+	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
+	s := &structs.DeploymentState{
+		DesiredCanaries: 2,
+		DesiredTotal:    5,
+	}
+	d.TaskGroups[job.TaskGroups[0].Name] = s
 
-	// Create 7 existing allocations
+	// Create 5 existing allocations
 	var allocs []*structs.Allocation
-	for i := 0; i < 7; i++ {
+	for i := 0; i < 5; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
@@ -2668,587 +2798,527 @@ func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) {
 		allocs = append(allocs, alloc)
 		alloc.ClientStatus = structs.AllocClientStatusRunning
 	}
-	// Mark two as failed and rescheduled
+
+	// Mark three as failed
 	allocs[0].ClientStatus = structs.AllocClientStatusFailed
-	allocs[0].ID = allocs[1].ID
-	allocs[1].ClientStatus = structs.AllocClientStatusFailed
-	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
+
+	// Mark one of them as already rescheduled once
+	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
 		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
 			PrevAllocID: uuid.Generate(),
 			PrevNodeID:  uuid.Generate(),
 		},
 	}}
-	allocs[1].NextAllocation = allocs[2].ID
+	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
+		StartedAt:  now.Add(-1 * time.Hour),
+		FinishedAt: now.Add(-10 * time.Second)}}
+	allocs[1].ClientStatus = structs.AllocClientStatusFailed
 
 	// Mark one as desired state stop
-	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
+	allocs[4].ClientStatus = structs.AllocClientStatusFailed
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
+	// Create 2 canary allocations
+	for i := 0; i < 2; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		alloc.ClientStatus = structs.AllocClientStatusRunning
+		alloc.DeploymentID = d.ID
+		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Canary:  true,
+			Healthy: pointer.Of(false),
+		}
+		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
+		allocs = append(allocs, alloc)
+	}
+
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
+		d, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
-	// Should place 1 - one is a new placement to make up the desired count of 5
-	// failing allocs are not rescheduled
+	// Verify that no follow up evals were created
+	evals := r.desiredFollowupEvals[tgName]
+	require.Nil(evals)
+
+	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
 	assertResults(t, r, &resultExpectation{
 		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             1,
+		place:             2,
+		stop:              2,
 		inplace:           0,
-		stop:              0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Place:  1,
-				Ignore: 4,
+				Place:  2,
+				Stop:   2,
+				Ignore: 5,
 			},
 		},
 	})
 
-	// name index 0 is used for the replacement because its
-	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
+	// Rescheduled allocs should have previous allocs
+	assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
+	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
+	assertPlacementsAreRescheduled(t, 2, r.place)
 }
 
-// Tests the reconciler cancels an old deployment when the job is being stopped
-func TestReconciler_CancelDeployment_JobStop(t *testing.T) {
+// Tests rescheduling failed canary service allocations
+func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) {
 	ci.Parallel(t)
 
-	job := mock.Job()
-	job.Stop = true
-
-	running := structs.NewDeployment(job, 50)
-	failed := structs.NewDeployment(job, 50)
-	failed.Status = structs.DeploymentStatusFailed
-
-	cases := []struct {
-		name             string
-		job              *structs.Job
-		jobID, taskGroup string
-		deployment       *structs.Deployment
-		cancel           bool
-	}{
-		{
-			name:       "stopped job, running deployment",
-			job:        job,
-			jobID:      job.ID,
-			taskGroup:  job.TaskGroups[0].Name,
-			deployment: running,
-			cancel:     true,
-		},
-		{
-			name:       "nil job, running deployment",
-			job:        nil,
-			jobID:      "foo",
-			taskGroup:  "bar",
-			deployment: running,
-			cancel:     true,
-		},
-		{
-			name:       "stopped job, failed deployment",
-			job:        job,
-			jobID:      job.ID,
-			taskGroup:  job.TaskGroups[0].Name,
-			deployment: failed,
-			cancel:     false,
-		},
-		{
-			name:       "nil job, failed deployment",
-			job:        nil,
-			jobID:      "foo",
-			taskGroup:  "bar",
-			deployment: failed,
-			cancel:     false,
-		},
-	}
-
-	for _, c := range cases {
-		t.Run(c.name, func(t *testing.T) {
-			// Create 10 allocations
-			var allocs []*structs.Allocation
-			for i := 0; i < 10; i++ {
-				alloc := mock.Alloc()
-				alloc.Job = c.job
-				alloc.JobID = c.jobID
-				alloc.NodeID = uuid.Generate()
-				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
-				alloc.TaskGroup = c.taskGroup
-				allocs = append(allocs, alloc)
-			}
-
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job,
-				c.deployment, allocs, nil, "", 50, true)
-			r := reconciler.Compute()
-
-			var updates []*structs.DeploymentStatusUpdate
-			if c.cancel {
-				updates = []*structs.DeploymentStatusUpdate{
-					{
-						DeploymentID:      c.deployment.ID,
-						Status:            structs.DeploymentStatusCancelled,
-						StatusDescription: structs.DeploymentStatusDescriptionStoppedJob,
-					},
-				}
-			}
-
-			// Assert the correct results
-			assertResults(t, r, &resultExpectation{
-				createDeployment:  nil,
-				deploymentUpdates: updates,
-				place:             0,
-				inplace:           0,
-				stop:              10,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					c.taskGroup: {
-						Stop: 10,
-					},
-				},
-			})
-
-			assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
-		})
-	}
-}
-
-// Tests the reconciler cancels an old deployment when the job is updated
-func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) {
-	ci.Parallel(t)
+	require := require.New(t)
 
-	// Create a base job
+	// Set desired 5
 	job := mock.Job()
+	job.TaskGroups[0].Count = 5
+	tgName := job.TaskGroups[0].Name
+	now := time.Now()
 
-	// Create two deployments
-	running := structs.NewDeployment(job, 50)
-	failed := structs.NewDeployment(job, 50)
-	failed.Status = structs.DeploymentStatusFailed
-
-	// Make the job newer than the deployment
-	job.Version += 10
-
-	cases := []struct {
-		name       string
-		deployment *structs.Deployment
-		cancel     bool
-	}{
-		{
-			name:       "running deployment",
-			deployment: running,
-			cancel:     true,
-		},
-		{
-			name:       "failed deployment",
-			deployment: failed,
-			cancel:     false,
-		},
-	}
-
-	for _, c := range cases {
-		t.Run(c.name, func(t *testing.T) {
-			// Create 10 allocations
-			var allocs []*structs.Allocation
-			for i := 0; i < 10; i++ {
-				alloc := mock.Alloc()
-				alloc.Job = job
-				alloc.JobID = job.ID
-				alloc.NodeID = uuid.Generate()
-				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-				alloc.TaskGroup = job.TaskGroups[0].Name
-				allocs = append(allocs, alloc)
-			}
-
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-				c.deployment, allocs, nil, "", 50, true)
-			r := reconciler.Compute()
-
-			var updates []*structs.DeploymentStatusUpdate
-			if c.cancel {
-				updates = []*structs.DeploymentStatusUpdate{
-					{
-						DeploymentID:      c.deployment.ID,
-						Status:            structs.DeploymentStatusCancelled,
-						StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
-					},
-				}
-			}
-
-			// Assert the correct results
-			assertResults(t, r, &resultExpectation{
-				createDeployment:  nil,
-				deploymentUpdates: updates,
-				place:             0,
-				inplace:           0,
-				stop:              0,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					job.TaskGroups[0].Name: {
-						Ignore: 10,
-					},
-				},
-			})
-		})
+	// Set up reschedule policy and update block
+	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
+		Delay:         5 * time.Second,
+		DelayFunction: "constant",
+		MaxDelay:      1 * time.Hour,
+		Unlimited:     true,
 	}
-}
+	job.TaskGroups[0].Update = canaryUpdate
 
-// Tests the reconciler creates a deployment and does a rolling upgrade with
-// destructive changes
-func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) {
-	ci.Parallel(t)
+	job2 := job.Copy()
+	job2.Version++
 
-	job := mock.Job()
-	job.TaskGroups[0].Update = noCanaryUpdate
+	d := structs.NewDeployment(job2, 50)
+	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
+	s := &structs.DeploymentState{
+		DesiredCanaries: 2,
+		DesiredTotal:    5,
+	}
+	d.TaskGroups[job.TaskGroups[0].Name] = s
 
-	// Create 10 allocations from the old job
+	// Create 5 existing allocations
 	var allocs []*structs.Allocation
-	for i := 0; i < 10; i++ {
+	for i := 0; i < 5; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		alloc.TaskGroup = job.TaskGroups[0].Name
 		allocs = append(allocs, alloc)
+		alloc.ClientStatus = structs.AllocClientStatusRunning
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	r := reconciler.Compute()
-
-	d := structs.NewDeployment(job, 50)
-	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		DesiredTotal: 10,
+	// Create 2 healthy canary allocations
+	for i := 0; i < 2; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		alloc.ClientStatus = structs.AllocClientStatusRunning
+		alloc.DeploymentID = d.ID
+		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Canary:  true,
+			Healthy: pointer.Of(false),
+		}
+		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
+		allocs = append(allocs, alloc)
 	}
 
-	// Assert the correct results
-	assertResults(t, r, &resultExpectation{
-		createDeployment:  d,
-		deploymentUpdates: nil,
-		destructive:       4,
-		desiredTGUpdates: map[string]*structs.DesiredUpdates{
-			job.TaskGroups[0].Name: {
-				DestructiveUpdate: 4,
-				Ignore:            6,
-			},
-		},
-	})
-
-	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
-}
+	// Mark the canaries as failed
+	allocs[5].ClientStatus = structs.AllocClientStatusFailed
+	allocs[5].DesiredTransition.Reschedule = pointer.Of(true)
 
-// Tests the reconciler creates a deployment for inplace updates
-func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) {
-	ci.Parallel(t)
+	// Mark one of them as already rescheduled once
+	allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
+		{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
+			PrevAllocID: uuid.Generate(),
+			PrevNodeID:  uuid.Generate(),
+		},
+	}}
 
-	jobOld := mock.Job()
-	job := jobOld.Copy()
-	job.Version++
-	job.TaskGroups[0].Update = noCanaryUpdate
+	allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
+		StartedAt:  now.Add(-1 * time.Hour),
+		FinishedAt: now.Add(-10 * time.Second)}}
+	allocs[6].ClientStatus = structs.AllocClientStatusFailed
+	allocs[6].DesiredTransition.Reschedule = pointer.Of(true)
 
-	// Create 10 allocations from the old job
-	var allocs []*structs.Allocation
-	for i := 0; i < 10; i++ {
+	// Create 4 unhealthy canary allocations that have already been replaced
+	for i := 0; i < 4; i++ {
 		alloc := mock.Alloc()
-		alloc.Job = jobOld
+		alloc.Job = job
 		alloc.JobID = job.ID
 		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		alloc.TaskGroup = job.TaskGroups[0].Name
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
+		alloc.ClientStatus = structs.AllocClientStatusFailed
+		alloc.DeploymentID = d.ID
+		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Canary:  true,
+			Healthy: pointer.Of(false),
+		}
+		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
+		d, allocs, nil, "", 50, true)
+	reconciler.now = now
 	r := reconciler.Compute()
 
-	d := structs.NewDeployment(job, 50)
-	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		DesiredTotal: 10,
-	}
+	// Verify that no follow up evals were created
+	evals := r.desiredFollowupEvals[tgName]
+	require.Nil(evals)
 
-	// Assert the correct results
+	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  d,
+		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             0,
-		inplace:           10,
-		stop:              0,
+		place:             2,
+		stop:              2,
+		inplace:           0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				InPlaceUpdate: 10,
+				Place:  2,
+				Stop:   2,
+				Ignore: 9,
 			},
 		},
 	})
+
+	// Rescheduled allocs should have previous allocs
+	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
+	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
+	assertPlacementsAreRescheduled(t, 2, r.place)
 }
 
-// Tests the reconciler creates a deployment when the job has a newer create index
-func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) {
+// Tests rescheduling failed canary service allocations when one has reached its
+// reschedule limit
+func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
 	ci.Parallel(t)
 
-	jobOld := mock.Job()
-	job := jobOld.Copy()
-	job.TaskGroups[0].Update = noCanaryUpdate
-	job.CreateIndex += 100
+	require := require.New(t)
 
-	// Create 5 allocations from the old job
+	// Set desired 5
+	job := mock.Job()
+	job.TaskGroups[0].Count = 5
+	tgName := job.TaskGroups[0].Name
+	now := time.Now()
+
+	// Set up reschedule policy and update block
+	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
+		Attempts:      1,
+		Interval:      24 * time.Hour,
+		Delay:         5 * time.Second,
+		DelayFunction: "",
+		MaxDelay:      1 * time.Hour,
+		Unlimited:     false,
+	}
+	job.TaskGroups[0].Update = canaryUpdate
+
+	job2 := job.Copy()
+	job2.Version++
+
+	d := structs.NewDeployment(job2, 50)
+	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
+	s := &structs.DeploymentState{
+		DesiredCanaries: 2,
+		DesiredTotal:    5,
+	}
+	d.TaskGroups[job.TaskGroups[0].Name] = s
+
+	// Create 5 existing allocations
 	var allocs []*structs.Allocation
 	for i := 0; i < 5; i++ {
 		alloc := mock.Alloc()
-		alloc.Job = jobOld
-		alloc.JobID = jobOld.ID
+		alloc.Job = job
+		alloc.JobID = job.ID
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		alloc.TaskGroup = job.TaskGroups[0].Name
 		allocs = append(allocs, alloc)
+		alloc.ClientStatus = structs.AllocClientStatusRunning
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	r := reconciler.Compute()
+	// Create 2 healthy canary allocations
+	for i := 0; i < 2; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		alloc.ClientStatus = structs.AllocClientStatusRunning
+		alloc.DeploymentID = d.ID
+		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Canary:  true,
+			Healthy: pointer.Of(false),
+		}
+		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
+		allocs = append(allocs, alloc)
+	}
 
-	d := structs.NewDeployment(job, 50)
-	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		DesiredTotal: 5,
+	// Mark the canaries as failed
+	allocs[5].ClientStatus = structs.AllocClientStatusFailed
+	allocs[5].DesiredTransition.Reschedule = pointer.Of(true)
+
+	// Mark one of them as already rescheduled once
+	allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
+		{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
+			PrevAllocID: uuid.Generate(),
+			PrevNodeID:  uuid.Generate(),
+		},
+	}}
+
+	allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
+		StartedAt:  now.Add(-1 * time.Hour),
+		FinishedAt: now.Add(-10 * time.Second)}}
+	allocs[6].ClientStatus = structs.AllocClientStatusFailed
+	allocs[6].DesiredTransition.Reschedule = pointer.Of(true)
+
+	// Create 4 unhealthy canary allocations that have already been replaced
+	for i := 0; i < 4; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
+		alloc.ClientStatus = structs.AllocClientStatusFailed
+		alloc.DeploymentID = d.ID
+		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Canary:  true,
+			Healthy: pointer.Of(false),
+		}
+		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
+		allocs = append(allocs, alloc)
 	}
 
-	// Assert the correct results
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
+		d, allocs, nil, "", 50, true)
+	reconciler.now = now
+	r := reconciler.Compute()
+
+	// Verify that no follow up evals were created
+	evals := r.desiredFollowupEvals[tgName]
+	require.Nil(evals)
+
+	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  d,
+		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             5,
-		destructive:       0,
+		place:             1,
+		stop:              1,
 		inplace:           0,
-		stop:              0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				InPlaceUpdate:     0,
-				Ignore:            5,
-				Place:             5,
-				DestructiveUpdate: 0,
+				Place:  1,
+				Stop:   1,
+				Ignore: 10,
 			},
 		},
 	})
+
+	// Rescheduled allocs should have previous allocs
+	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
+	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
+	assertPlacementsAreRescheduled(t, 1, r.place)
 }
 
-// Tests the reconciler doesn't creates a deployment if there are no changes
-func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) {
+// Tests failed service allocations that were already rescheduled won't be rescheduled again
+func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) {
 	ci.Parallel(t)
 
+	// Set desired 5
 	job := mock.Job()
-	job.TaskGroups[0].Update = noCanaryUpdate
+	job.TaskGroups[0].Count = 5
 
-	// Create 10 allocations from the job
+	// Set up reschedule policy
+	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour}
+
+	// Create 7 existing allocations
 	var allocs []*structs.Allocation
-	for i := 0; i < 10; i++ {
+	for i := 0; i < 7; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		alloc.TaskGroup = job.TaskGroups[0].Name
 		allocs = append(allocs, alloc)
+		alloc.ClientStatus = structs.AllocClientStatusRunning
 	}
+	// Mark two as failed and rescheduled
+	allocs[0].ClientStatus = structs.AllocClientStatusFailed
+	allocs[0].ID = allocs[1].ID
+	allocs[1].ClientStatus = structs.AllocClientStatusFailed
+	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
+		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
+			PrevAllocID: uuid.Generate(),
+			PrevNodeID:  uuid.Generate(),
+		},
+	}}
+	allocs[1].NextAllocation = allocs[2].ID
+
+	// Mark one as desired state stop
+	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
 
 	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
 		nil, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
-	// Assert the correct results
+	// Should place 1 - one is a new placement to make up the desired count of 5
+	// failing allocs are not rescheduled
 	assertResults(t, r, &resultExpectation{
 		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             0,
+		place:             1,
 		inplace:           0,
 		stop:              0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				DestructiveUpdate: 0,
-				Ignore:            10,
+				Place:  1,
+				Ignore: 4,
 			},
 		},
 	})
+
+	// name index 0 is used for the replacement because its
+	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
 }
 
-// Tests the reconciler doesn't place any more canaries when the deployment is
-// paused or failed
-func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) {
+// Tests the reconciler cancels an old deployment when the job is being stopped
+func TestReconciler_CancelDeployment_JobStop(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
-	job.TaskGroups[0].Update = canaryUpdate
+	job.Stop = true
+
+	running := structs.NewDeployment(job, 50)
+	failed := structs.NewDeployment(job, 50)
+	failed.Status = structs.DeploymentStatusFailed
 
 	cases := []struct {
 		name             string
-		deploymentStatus string
-		stop             uint64
+		job              *structs.Job
+		jobID, taskGroup string
+		deployment       *structs.Deployment
+		cancel           bool
 	}{
 		{
-			name:             "paused deployment",
-			deploymentStatus: structs.DeploymentStatusPaused,
-			stop:             0,
+			name:       "stopped job, running deployment",
+			job:        job,
+			jobID:      job.ID,
+			taskGroup:  job.TaskGroups[0].Name,
+			deployment: running,
+			cancel:     true,
 		},
 		{
-			name:             "failed deployment",
-			deploymentStatus: structs.DeploymentStatusFailed,
-			stop:             1,
+			name:       "nil job, running deployment",
+			job:        nil,
+			jobID:      "foo",
+			taskGroup:  "bar",
+			deployment: running,
+			cancel:     true,
 		},
-	}
-
-	for _, c := range cases {
-		t.Run(c.name, func(t *testing.T) {
-			// Create a deployment that is paused/failed and has placed some canaries
-			d := structs.NewDeployment(job, 50)
-			d.Status = c.deploymentStatus
-			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-				Promoted:        false,
-				DesiredCanaries: 2,
-				DesiredTotal:    10,
-				PlacedAllocs:    1,
-			}
-
-			// Create 10 allocations for the original job
-			var allocs []*structs.Allocation
-			for i := 0; i < 10; i++ {
-				alloc := mock.Alloc()
-				alloc.Job = job
-				alloc.JobID = job.ID
-				alloc.NodeID = uuid.Generate()
-				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-				alloc.TaskGroup = job.TaskGroups[0].Name
-				allocs = append(allocs, alloc)
-			}
-
-			// Create one canary
-			canary := mock.Alloc()
-			canary.Job = job
-			canary.JobID = job.ID
-			canary.NodeID = uuid.Generate()
-			canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
-			canary.TaskGroup = job.TaskGroups[0].Name
-			canary.DeploymentID = d.ID
-			allocs = append(allocs, canary)
-			d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID}
-
-			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-				d, allocs, nil, "", 50, true)
-			r := reconciler.Compute()
-
-			// Assert the correct results
-			assertResults(t, r, &resultExpectation{
-				createDeployment:  nil,
-				deploymentUpdates: nil,
-				place:             0,
-				inplace:           0,
-				stop:              int(c.stop),
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					job.TaskGroups[0].Name: {
-						Ignore: 11 - c.stop,
-						Stop:   c.stop,
-					},
-				},
-			})
-		})
-	}
-}
-
-// Tests the reconciler doesn't place any more allocs when the deployment is
-// paused or failed
-func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) {
-	ci.Parallel(t)
-
-	job := mock.Job()
-	job.TaskGroups[0].Update = noCanaryUpdate
-	job.TaskGroups[0].Count = 15
-
-	cases := []struct {
-		name             string
-		deploymentStatus string
-	}{
 		{
-			name:             "paused deployment",
-			deploymentStatus: structs.DeploymentStatusPaused,
+			name:       "stopped job, failed deployment",
+			job:        job,
+			jobID:      job.ID,
+			taskGroup:  job.TaskGroups[0].Name,
+			deployment: failed,
+			cancel:     false,
 		},
 		{
-			name:             "failed deployment",
-			deploymentStatus: structs.DeploymentStatusFailed,
+			name:       "nil job, failed deployment",
+			job:        nil,
+			jobID:      "foo",
+			taskGroup:  "bar",
+			deployment: failed,
+			cancel:     false,
 		},
 	}
 
 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
-			// Create a deployment that is paused and has placed some canaries
-			d := structs.NewDeployment(job, 50)
-			d.Status = c.deploymentStatus
-			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-				Promoted:     false,
-				DesiredTotal: 15,
-				PlacedAllocs: 10,
-			}
-
-			// Create 10 allocations for the new job
+			// Create 10 allocations
 			var allocs []*structs.Allocation
 			for i := 0; i < 10; i++ {
 				alloc := mock.Alloc()
-				alloc.Job = job
-				alloc.JobID = job.ID
+				alloc.Job = c.job
+				alloc.JobID = c.jobID
 				alloc.NodeID = uuid.Generate()
-				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-				alloc.TaskGroup = job.TaskGroups[0].Name
+				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
+				alloc.TaskGroup = c.taskGroup
 				allocs = append(allocs, alloc)
 			}
 
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-				d, allocs, nil, "", 50, true)
+			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job,
+				c.deployment, allocs, nil, "", 50, true)
 			r := reconciler.Compute()
 
+			var updates []*structs.DeploymentStatusUpdate
+			if c.cancel {
+				updates = []*structs.DeploymentStatusUpdate{
+					{
+						DeploymentID:      c.deployment.ID,
+						Status:            structs.DeploymentStatusCancelled,
+						StatusDescription: structs.DeploymentStatusDescriptionStoppedJob,
+					},
+				}
+			}
+
 			// Assert the correct results
 			assertResults(t, r, &resultExpectation{
 				createDeployment:  nil,
-				deploymentUpdates: nil,
+				deploymentUpdates: updates,
 				place:             0,
 				inplace:           0,
-				stop:              0,
+				stop:              10,
 				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					job.TaskGroups[0].Name: {
-						Ignore: 10,
+					c.taskGroup: {
+						Stop: 10,
 					},
 				},
 			})
+
+			assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
 		})
 	}
 }
 
-// Tests the reconciler doesn't do any more destructive updates when the
-// deployment is paused or failed
-func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) {
+// Tests the reconciler cancels an old deployment when the job is updated
+func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) {
 	ci.Parallel(t)
 
+	// Create a base job
 	job := mock.Job()
-	job.TaskGroups[0].Update = noCanaryUpdate
+
+	// Create two deployments
+	running := structs.NewDeployment(job, 50)
+	failed := structs.NewDeployment(job, 50)
+	failed.Status = structs.DeploymentStatusFailed
+
+	// Make the job newer than the deployment
+	job.Version += 10
 
 	cases := []struct {
-		name             string
-		deploymentStatus string
+		name       string
+		deployment *structs.Deployment
+		cancel     bool
 	}{
 		{
-			name:             "paused deployment",
-			deploymentStatus: structs.DeploymentStatusPaused,
+			name:       "running deployment",
+			deployment: running,
+			cancel:     true,
 		},
 		{
-			name:             "failed deployment",
-			deploymentStatus: structs.DeploymentStatusFailed,
+			name:       "failed deployment",
+			deployment: failed,
+			cancel:     false,
 		},
 	}
 
-	for _, c := range cases {
-		t.Run(c.name, func(t *testing.T) {
-			// Create a deployment that is paused and has placed some canaries
-			d := structs.NewDeployment(job, 50)
-			d.Status = c.deploymentStatus
-			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-				Promoted:     false,
-				DesiredTotal: 10,
-				PlacedAllocs: 1,
-			}
-
-			// Create 9 allocations for the original job
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			// Create 10 allocations
 			var allocs []*structs.Allocation
-			for i := 1; i < 10; i++ {
+			for i := 0; i < 10; i++ {
 				alloc := mock.Alloc()
 				alloc.Job = job
 				alloc.JobID = job.ID
@@ -3258,25 +3328,25 @@ func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing
 				allocs = append(allocs, alloc)
 			}
 
-			// Create one for the new job
-			newAlloc := mock.Alloc()
-			newAlloc.Job = job
-			newAlloc.JobID = job.ID
-			newAlloc.NodeID = uuid.Generate()
-			newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
-			newAlloc.TaskGroup = job.TaskGroups[0].Name
-			newAlloc.DeploymentID = d.ID
-			allocs = append(allocs, newAlloc)
-
-			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-				d, allocs, nil, "", 50, true)
+			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+				c.deployment, allocs, nil, "", 50, true)
 			r := reconciler.Compute()
 
+			var updates []*structs.DeploymentStatusUpdate
+			if c.cancel {
+				updates = []*structs.DeploymentStatusUpdate{
+					{
+						DeploymentID:      c.deployment.ID,
+						Status:            structs.DeploymentStatusCancelled,
+						StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
+					},
+				}
+			}
+
 			// Assert the correct results
 			assertResults(t, r, &resultExpectation{
 				createDeployment:  nil,
-				deploymentUpdates: nil,
+				deploymentUpdates: updates,
 				place:             0,
 				inplace:           0,
 				stop:              0,
@@ -3290,22 +3360,13 @@ func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing
 	}
 }
 
-// Tests the reconciler handles migrating a canary correctly on a draining node
-func TestReconciler_DrainNode_Canary(t *testing.T) {
+// Tests the reconciler creates a deployment and does a rolling upgrade with
+// destructive changes
+func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
-	job.TaskGroups[0].Update = canaryUpdate
-
-	// Create a deployment that is paused and has placed some canaries
-	d := structs.NewDeployment(job, 50)
-	s := &structs.DeploymentState{
-		Promoted:        false,
-		DesiredTotal:    10,
-		DesiredCanaries: 2,
-		PlacedAllocs:    2,
-	}
-	d.TaskGroups[job.TaskGroups[0].Name] = s
+	job.TaskGroups[0].Update = noCanaryUpdate
 
 	// Create 10 allocations from the old job
 	var allocs []*structs.Allocation
@@ -3319,153 +3380,45 @@ func TestReconciler_DrainNode_Canary(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	// Create two canaries for the new job
-	handled := make(map[string]allocUpdateType)
-	for i := 0; i < 2; i++ {
-		// Create one canary
-		canary := mock.Alloc()
-		canary.Job = job
-		canary.JobID = job.ID
-		canary.NodeID = uuid.Generate()
-		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		canary.TaskGroup = job.TaskGroups[0].Name
-		canary.DeploymentID = d.ID
-		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
-		allocs = append(allocs, canary)
-		handled[canary.ID] = allocUpdateFnIgnore
-	}
-
-	// Build a map of tainted nodes that contains the last canary
-	tainted := make(map[string]*structs.Node, 1)
-	n := mock.DrainNode()
-	n.ID = allocs[11].NodeID
-	allocs[11].DesiredTransition.Migrate = pointer.Of(true)
-	tainted[n.ID] = n
-
-	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, tainted, "", 50, true)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
-	// Assert the correct results
-	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
-		deploymentUpdates: nil,
-		place:             1,
-		inplace:           0,
-		stop:              1,
-		desiredTGUpdates: map[string]*structs.DesiredUpdates{
-			job.TaskGroups[0].Name: {
-				Canary: 1,
-				Ignore: 11,
-			},
-		},
-	})
-	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
-	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
-}
-
-// Tests the reconciler handles migrating a canary correctly on a lost node
-func TestReconciler_LostNode_Canary(t *testing.T) {
-	ci.Parallel(t)
-
-	job := mock.Job()
-	job.TaskGroups[0].Update = canaryUpdate
-
-	// Create a deployment that is paused and has placed some canaries
 	d := structs.NewDeployment(job, 50)
-	s := &structs.DeploymentState{
-		Promoted:        false,
-		DesiredTotal:    10,
-		DesiredCanaries: 2,
-		PlacedAllocs:    2,
-	}
-	d.TaskGroups[job.TaskGroups[0].Name] = s
-
-	// Create 10 allocations from the old job
-	var allocs []*structs.Allocation
-	for i := 0; i < 10; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		alloc.TaskGroup = job.TaskGroups[0].Name
-		allocs = append(allocs, alloc)
-	}
-
-	// Create two canaries for the new job
-	handled := make(map[string]allocUpdateType)
-	for i := 0; i < 2; i++ {
-		// Create one canary
-		canary := mock.Alloc()
-		canary.Job = job
-		canary.JobID = job.ID
-		canary.NodeID = uuid.Generate()
-		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		canary.TaskGroup = job.TaskGroups[0].Name
-		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
-		canary.DeploymentID = d.ID
-		allocs = append(allocs, canary)
-		handled[canary.ID] = allocUpdateFnIgnore
+	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		DesiredTotal: 10,
 	}
 
-	// Build a map of tainted nodes that contains the last canary
-	tainted := make(map[string]*structs.Node, 1)
-	n := mock.Node()
-	n.ID = allocs[11].NodeID
-	n.Status = structs.NodeStatusDown
-	tainted[n.ID] = n
-
-	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, tainted, "", 50, true)
-	r := reconciler.Compute()
-
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
+		createDeployment:  d,
 		deploymentUpdates: nil,
-		place:             1,
-		inplace:           0,
-		stop:              1,
+		destructive:       4,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Canary: 1,
-				Ignore: 11,
+				DestructiveUpdate: 4,
+				Ignore:            6,
 			},
 		},
 	})
 
-	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
-	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
+	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
 }
 
-// Tests the reconciler handles stopping canaries from older deployments
-func TestReconciler_StopOldCanaries(t *testing.T) {
+// Tests the reconciler creates a deployment for inplace updates
+func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) {
 	ci.Parallel(t)
 
-	job := mock.Job()
-	job.TaskGroups[0].Update = canaryUpdate
-
-	// Create an old deployment that has placed some canaries
-	d := structs.NewDeployment(job, 50)
-	s := &structs.DeploymentState{
-		Promoted:        false,
-		DesiredTotal:    10,
-		DesiredCanaries: 2,
-		PlacedAllocs:    2,
-	}
-	d.TaskGroups[job.TaskGroups[0].Name] = s
-
-	// Update the job
-	job.Version += 10
+	jobOld := mock.Job()
+	job := jobOld.Copy()
+	job.Version++
+	job.TaskGroups[0].Update = noCanaryUpdate
 
 	// Create 10 allocations from the old job
 	var allocs []*structs.Allocation
 	for i := 0; i < 10; i++ {
 		alloc := mock.Alloc()
-		alloc.Job = job
+		alloc.Job = jobOld
 		alloc.JobID = job.ID
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
@@ -3473,118 +3426,89 @@ func TestReconciler_StopOldCanaries(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	// Create canaries
-	for i := 0; i < 2; i++ {
-		// Create one canary
-		canary := mock.Alloc()
-		canary.Job = job
-		canary.JobID = job.ID
-		canary.NodeID = uuid.Generate()
-		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		canary.TaskGroup = job.TaskGroups[0].Name
-		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
-		canary.DeploymentID = d.ID
-		allocs = append(allocs, canary)
-	}
-
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d,
-		allocs, nil, "", 50, true)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
-	newD := structs.NewDeployment(job, 50)
-	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
-	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		DesiredCanaries: 2,
-		DesiredTotal:    10,
+	d := structs.NewDeployment(job, 50)
+	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		DesiredTotal: 10,
 	}
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		createDeployment: newD,
-		deploymentUpdates: []*structs.DeploymentStatusUpdate{
-			{
-				DeploymentID:      d.ID,
-				Status:            structs.DeploymentStatusCancelled,
-				StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
-			},
-		},
-		place:   2,
-		inplace: 0,
-		stop:    2,
+		createDeployment:  d,
+		deploymentUpdates: nil,
+		place:             0,
+		inplace:           10,
+		stop:              0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Canary: 2,
-				Stop:   2,
-				Ignore: 10,
+				InPlaceUpdate: 10,
 			},
 		},
 	})
-
-	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
-	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
 }
 
-// Tests the reconciler creates new canaries when the job changes
-func TestReconciler_NewCanaries(t *testing.T) {
+// Tests the reconciler creates a deployment when the job has a newer create index
+func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) {
 	ci.Parallel(t)
 
-	job := mock.Job()
-	job.TaskGroups[0].Update = canaryUpdate
+	jobOld := mock.Job()
+	job := jobOld.Copy()
+	job.TaskGroups[0].Update = noCanaryUpdate
+	job.CreateIndex += 100
 
-	// Create 10 allocations from the old job
+	// Create 5 allocations from the old job
 	var allocs []*structs.Allocation
-	for i := 0; i < 10; i++ {
+	for i := 0; i < 5; i++ {
 		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
+		alloc.Job = jobOld
+		alloc.JobID = jobOld.ID
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
 		alloc.TaskGroup = job.TaskGroups[0].Name
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
 		nil, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
-	newD := structs.NewDeployment(job, 50)
-	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
-	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		DesiredCanaries: 2,
-		DesiredTotal:    10,
+	d := structs.NewDeployment(job, 50)
+	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		DesiredTotal: 5,
 	}
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  newD,
+		createDeployment:  d,
 		deploymentUpdates: nil,
-		place:             2,
+		place:             5,
+		destructive:       0,
 		inplace:           0,
 		stop:              0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Canary: 2,
-				Ignore: 10,
+				InPlaceUpdate:     0,
+				Ignore:            5,
+				Place:             5,
+				DestructiveUpdate: 0,
 			},
 		},
 	})
-
-	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
 }
 
-// Tests the reconciler creates new canaries when the job changes and the
-// canary count is greater than the task group count
-func TestReconciler_NewCanaries_CountGreater(t *testing.T) {
+// Tests the reconciler doesn't creates a deployment if there are no changes
+func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
-	job.TaskGroups[0].Count = 3
-	job.TaskGroups[0].Update = canaryUpdate.Copy()
-	job.TaskGroups[0].Update.Canary = 7
+	job.TaskGroups[0].Update = noCanaryUpdate
 
-	// Create 3 allocations from the old job
+	// Create 10 allocations from the job
 	var allocs []*structs.Allocation
-	for i := 0; i < 3; i++ {
+	for i := 0; i < 10; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
@@ -3594,103 +3518,269 @@ func TestReconciler_NewCanaries_CountGreater(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
 		nil, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
-	newD := structs.NewDeployment(job, 50)
-	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
-	state := &structs.DeploymentState{
-		DesiredCanaries: 7,
-		DesiredTotal:    3,
-	}
-	newD.TaskGroups[job.TaskGroups[0].Name] = state
-
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  newD,
+		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             7,
+		place:             0,
 		inplace:           0,
 		stop:              0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Canary: 7,
-				Ignore: 3,
+				DestructiveUpdate: 0,
+				Ignore:            10,
 			},
 		},
 	})
-
-	assertNamesHaveIndexes(t, intRange(0, 2, 3, 6), placeResultsToNames(r.place))
 }
 
-// Tests the reconciler creates new canaries when the job changes for multiple
-// task groups
-func TestReconciler_NewCanaries_MultiTG(t *testing.T) {
+// Tests the reconciler doesn't place any more canaries when the deployment is
+// paused or failed
+func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
 	job.TaskGroups[0].Update = canaryUpdate
-	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
-	job.TaskGroups[0].Name = "tg2"
 
-	// Create 10 allocations from the old job for each tg
-	var allocs []*structs.Allocation
-	for j := 0; j < 2; j++ {
-		for i := 0; i < 10; i++ {
-			alloc := mock.Alloc()
-			alloc.Job = job
-			alloc.JobID = job.ID
-			alloc.NodeID = uuid.Generate()
-			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i))
-			alloc.TaskGroup = job.TaskGroups[j].Name
-			allocs = append(allocs, alloc)
-		}
+	cases := []struct {
+		name             string
+		deploymentStatus string
+		stop             uint64
+	}{
+		{
+			name:             "paused deployment",
+			deploymentStatus: structs.DeploymentStatusPaused,
+			stop:             0,
+		},
+		{
+			name:             "failed deployment",
+			deploymentStatus: structs.DeploymentStatusFailed,
+			stop:             1,
+		},
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	r := reconciler.Compute()
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			// Create a deployment that is paused/failed and has placed some canaries
+			d := structs.NewDeployment(job, 50)
+			d.Status = c.deploymentStatus
+			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+				Promoted:        false,
+				DesiredCanaries: 2,
+				DesiredTotal:    10,
+				PlacedAllocs:    1,
+			}
+
+			// Create 10 allocations for the original job
+			var allocs []*structs.Allocation
+			for i := 0; i < 10; i++ {
+				alloc := mock.Alloc()
+				alloc.Job = job
+				alloc.JobID = job.ID
+				alloc.NodeID = uuid.Generate()
+				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+				alloc.TaskGroup = job.TaskGroups[0].Name
+				allocs = append(allocs, alloc)
+			}
+
+			// Create one canary
+			canary := mock.Alloc()
+			canary.Job = job
+			canary.JobID = job.ID
+			canary.NodeID = uuid.Generate()
+			canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
+			canary.TaskGroup = job.TaskGroups[0].Name
+			canary.DeploymentID = d.ID
+			allocs = append(allocs, canary)
+			d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID}
+
+			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
+			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
+				d, allocs, nil, "", 50, true)
+			r := reconciler.Compute()
+
+			// Assert the correct results
+			assertResults(t, r, &resultExpectation{
+				createDeployment:  nil,
+				deploymentUpdates: nil,
+				place:             0,
+				inplace:           0,
+				stop:              int(c.stop),
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					job.TaskGroups[0].Name: {
+						Ignore: 11 - c.stop,
+						Stop:   c.stop,
+					},
+				},
+			})
+		})
+	}
+}
+
+// Tests the reconciler doesn't place any more allocs when the deployment is
+// paused or failed
+func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) {
+	ci.Parallel(t)
+
+	job := mock.Job()
+	job.TaskGroups[0].Update = noCanaryUpdate
+	job.TaskGroups[0].Count = 15
+
+	cases := []struct {
+		name             string
+		deploymentStatus string
+	}{
+		{
+			name:             "paused deployment",
+			deploymentStatus: structs.DeploymentStatusPaused,
+		},
+		{
+			name:             "failed deployment",
+			deploymentStatus: structs.DeploymentStatusFailed,
+		},
+	}
+
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			// Create a deployment that is paused and has placed some canaries
+			d := structs.NewDeployment(job, 50)
+			d.Status = c.deploymentStatus
+			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+				Promoted:     false,
+				DesiredTotal: 15,
+				PlacedAllocs: 10,
+			}
+
+			// Create 10 allocations for the new job
+			var allocs []*structs.Allocation
+			for i := 0; i < 10; i++ {
+				alloc := mock.Alloc()
+				alloc.Job = job
+				alloc.JobID = job.ID
+				alloc.NodeID = uuid.Generate()
+				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+				alloc.TaskGroup = job.TaskGroups[0].Name
+				allocs = append(allocs, alloc)
+			}
+
+			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+				d, allocs, nil, "", 50, true)
+			r := reconciler.Compute()
+
+			// Assert the correct results
+			assertResults(t, r, &resultExpectation{
+				createDeployment:  nil,
+				deploymentUpdates: nil,
+				place:             0,
+				inplace:           0,
+				stop:              0,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					job.TaskGroups[0].Name: {
+						Ignore: 10,
+					},
+				},
+			})
+		})
+	}
+}
+
+// Tests the reconciler doesn't do any more destructive updates when the
+// deployment is paused or failed
+func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) {
+	ci.Parallel(t)
+
+	job := mock.Job()
+	job.TaskGroups[0].Update = noCanaryUpdate
+
+	cases := []struct {
+		name             string
+		deploymentStatus string
+	}{
+		{
+			name:             "paused deployment",
+			deploymentStatus: structs.DeploymentStatusPaused,
+		},
+		{
+			name:             "failed deployment",
+			deploymentStatus: structs.DeploymentStatusFailed,
+		},
+	}
+
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			// Create a deployment that is paused and has placed some canaries
+			d := structs.NewDeployment(job, 50)
+			d.Status = c.deploymentStatus
+			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+				Promoted:     false,
+				DesiredTotal: 10,
+				PlacedAllocs: 1,
+			}
+
+			// Create 9 allocations for the original job
+			var allocs []*structs.Allocation
+			for i := 1; i < 10; i++ {
+				alloc := mock.Alloc()
+				alloc.Job = job
+				alloc.JobID = job.ID
+				alloc.NodeID = uuid.Generate()
+				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+				alloc.TaskGroup = job.TaskGroups[0].Name
+				allocs = append(allocs, alloc)
+			}
 
-	newD := structs.NewDeployment(job, 50)
-	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
-	state := &structs.DeploymentState{
-		DesiredCanaries: 2,
-		DesiredTotal:    10,
-	}
-	newD.TaskGroups[job.TaskGroups[0].Name] = state
-	newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy()
+			// Create one for the new job
+			newAlloc := mock.Alloc()
+			newAlloc.Job = job
+			newAlloc.JobID = job.ID
+			newAlloc.NodeID = uuid.Generate()
+			newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
+			newAlloc.TaskGroup = job.TaskGroups[0].Name
+			newAlloc.DeploymentID = d.ID
+			allocs = append(allocs, newAlloc)
 
-	// Assert the correct results
-	assertResults(t, r, &resultExpectation{
-		createDeployment:  newD,
-		deploymentUpdates: nil,
-		place:             4,
-		inplace:           0,
-		stop:              0,
-		desiredTGUpdates: map[string]*structs.DesiredUpdates{
-			job.TaskGroups[0].Name: {
-				Canary: 2,
-				Ignore: 10,
-			},
-			job.TaskGroups[1].Name: {
-				Canary: 2,
-				Ignore: 10,
-			},
-		},
-	})
+			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
+			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
+				d, allocs, nil, "", 50, true)
+			r := reconciler.Compute()
 
-	assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place))
+			// Assert the correct results
+			assertResults(t, r, &resultExpectation{
+				createDeployment:  nil,
+				deploymentUpdates: nil,
+				place:             0,
+				inplace:           0,
+				stop:              0,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					job.TaskGroups[0].Name: {
+						Ignore: 10,
+					},
+				},
+			})
+		})
+	}
 }
 
-// Tests the reconciler creates new canaries when the job changes and scales up
-func TestReconciler_NewCanaries_ScaleUp(t *testing.T) {
+// Tests the reconciler handles migrating a canary correctly on a draining node
+func TestReconciler_DrainNode_Canary(t *testing.T) {
 	ci.Parallel(t)
 
-	// Scale the job up to 15
 	job := mock.Job()
 	job.TaskGroups[0].Update = canaryUpdate
-	job.TaskGroups[0].Count = 15
+
+	// Create a deployment that is paused and has placed some canaries
+	d := structs.NewDeployment(job, 50)
+	s := &structs.DeploymentState{
+		Promoted:        false,
+		DesiredTotal:    10,
+		DesiredCanaries: 2,
+		PlacedAllocs:    2,
+	}
+	d.TaskGroups[job.TaskGroups[0].Name] = s
 
 	// Create 10 allocations from the old job
 	var allocs []*structs.Allocation
@@ -3704,44 +3794,68 @@ func TestReconciler_NewCanaries_ScaleUp(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	r := reconciler.Compute()
-
-	newD := structs.NewDeployment(job, 50)
-	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
-	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		DesiredCanaries: 2,
-		DesiredTotal:    15,
+	// Create two canaries for the new job
+	handled := make(map[string]allocUpdateType)
+	for i := 0; i < 2; i++ {
+		// Create one canary
+		canary := mock.Alloc()
+		canary.Job = job
+		canary.JobID = job.ID
+		canary.NodeID = uuid.Generate()
+		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		canary.TaskGroup = job.TaskGroups[0].Name
+		canary.DeploymentID = d.ID
+		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
+		allocs = append(allocs, canary)
+		handled[canary.ID] = allocUpdateFnIgnore
 	}
 
+	// Build a map of tainted nodes that contains the last canary
+	tainted := make(map[string]*structs.Node, 1)
+	n := mock.DrainNode()
+	n.ID = allocs[11].NodeID
+	allocs[11].DesiredTransition.Migrate = pointer.Of(true)
+	tainted[n.ID] = n
+
+	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
+		d, allocs, tainted, "", 50, true)
+	r := reconciler.Compute()
+
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  newD,
+		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             2,
+		place:             1,
 		inplace:           0,
-		stop:              0,
+		stop:              1,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Canary: 2,
-				Ignore: 10,
+				Canary: 1,
+				Ignore: 11,
 			},
 		},
 	})
-
-	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
+	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
+	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
 }
 
-// Tests the reconciler creates new canaries when the job changes and scales
-// down
-func TestReconciler_NewCanaries_ScaleDown(t *testing.T) {
+// Tests the reconciler handles migrating a canary correctly on a lost node
+func TestReconciler_LostNode_Canary(t *testing.T) {
 	ci.Parallel(t)
 
-	// Scale the job down to 5
 	job := mock.Job()
 	job.TaskGroups[0].Update = canaryUpdate
-	job.TaskGroups[0].Count = 5
+
+	// Create a deployment that is paused and has placed some canaries
+	d := structs.NewDeployment(job, 50)
+	s := &structs.DeploymentState{
+		Promoted:        false,
+		DesiredTotal:    10,
+		DesiredCanaries: 2,
+		PlacedAllocs:    2,
+	}
+	d.TaskGroups[job.TaskGroups[0].Name] = s
 
 	// Create 10 allocations from the old job
 	var allocs []*structs.Allocation
@@ -3755,60 +3869,73 @@ func TestReconciler_NewCanaries_ScaleDown(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	r := reconciler.Compute()
-
-	newD := structs.NewDeployment(job, 50)
-	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
-	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		DesiredCanaries: 2,
-		DesiredTotal:    5,
+	// Create two canaries for the new job
+	handled := make(map[string]allocUpdateType)
+	for i := 0; i < 2; i++ {
+		// Create one canary
+		canary := mock.Alloc()
+		canary.Job = job
+		canary.JobID = job.ID
+		canary.NodeID = uuid.Generate()
+		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		canary.TaskGroup = job.TaskGroups[0].Name
+		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
+		canary.DeploymentID = d.ID
+		allocs = append(allocs, canary)
+		handled[canary.ID] = allocUpdateFnIgnore
 	}
 
+	// Build a map of tainted nodes that contains the last canary
+	tainted := make(map[string]*structs.Node, 1)
+	n := mock.Node()
+	n.ID = allocs[11].NodeID
+	n.Status = structs.NodeStatusDown
+	tainted[n.ID] = n
+
+	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
+		d, allocs, tainted, "", 50, true)
+	r := reconciler.Compute()
+
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  newD,
+		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             2,
+		place:             1,
 		inplace:           0,
-		stop:              5,
+		stop:              1,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Canary: 2,
-				Stop:   5,
-				Ignore: 5,
+				Canary: 1,
+				Ignore: 11,
 			},
 		},
 	})
 
-	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
-	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
+	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
+	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
 }
 
-// Tests the reconciler handles filling the names of partially placed canaries
-func TestReconciler_NewCanaries_FillNames(t *testing.T) {
+// Tests the reconciler handles stopping canaries from older deployments
+func TestReconciler_StopOldCanaries(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
-	job.TaskGroups[0].Update = &structs.UpdateStrategy{
-		Canary:          4,
-		MaxParallel:     2,
-		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
-		MinHealthyTime:  10 * time.Second,
-		HealthyDeadline: 10 * time.Minute,
-	}
+	job.TaskGroups[0].Update = canaryUpdate
 
-	// Create an existing deployment that has placed some canaries
+	// Create an old deployment that has placed some canaries
 	d := structs.NewDeployment(job, 50)
 	s := &structs.DeploymentState{
 		Promoted:        false,
 		DesiredTotal:    10,
-		DesiredCanaries: 4,
+		DesiredCanaries: 2,
 		PlacedAllocs:    2,
 	}
 	d.TaskGroups[job.TaskGroups[0].Name] = s
 
+	// Update the job
+	job.Version += 10
+
 	// Create 10 allocations from the old job
 	var allocs []*structs.Allocation
 	for i := 0; i < 10; i++ {
@@ -3821,8 +3948,8 @@ func TestReconciler_NewCanaries_FillNames(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	// Create canaries but pick names at the ends
-	for i := 0; i < 4; i += 3 {
+	// Create canaries
+	for i := 0; i < 2; i++ {
 		// Create one canary
 		canary := mock.Alloc()
 		canary.Job = job
@@ -3835,46 +3962,50 @@ func TestReconciler_NewCanaries_FillNames(t *testing.T) {
 		allocs = append(allocs, canary)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d,
+		allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
+	newD := structs.NewDeployment(job, 50)
+	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
+	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		DesiredCanaries: 2,
+		DesiredTotal:    10,
+	}
+
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
-		deploymentUpdates: nil,
-		place:             2,
-		inplace:           0,
-		stop:              0,
+		createDeployment: newD,
+		deploymentUpdates: []*structs.DeploymentStatusUpdate{
+			{
+				DeploymentID:      d.ID,
+				Status:            structs.DeploymentStatusCancelled,
+				StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
+			},
+		},
+		place:   2,
+		inplace: 0,
+		stop:    2,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
 				Canary: 2,
-				Ignore: 12,
+				Stop:   2,
+				Ignore: 10,
 			},
 		},
 	})
 
-	assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place))
+	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
+	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
 }
 
-// Tests the reconciler handles canary promotion by unblocking max_parallel
-func TestReconciler_PromoteCanaries_Unblock(t *testing.T) {
+// Tests the reconciler creates new canaries when the job changes
+func TestReconciler_NewCanaries(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
 	job.TaskGroups[0].Update = canaryUpdate
 
-	// Create an existing deployment that has placed some canaries and mark them
-	// promoted
-	d := structs.NewDeployment(job, 50)
-	s := &structs.DeploymentState{
-		Promoted:        true,
-		DesiredTotal:    10,
-		DesiredCanaries: 2,
-		PlacedAllocs:    2,
-	}
-	d.TaskGroups[job.TaskGroups[0].Name] = s
-
 	// Create 10 allocations from the old job
 	var allocs []*structs.Allocation
 	for i := 0; i < 10; i++ {
@@ -3887,74 +4018,48 @@ func TestReconciler_PromoteCanaries_Unblock(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	// Create the canaries
-	handled := make(map[string]allocUpdateType)
-	for i := 0; i < 2; i++ {
-		// Create one canary
-		canary := mock.Alloc()
-		canary.Job = job
-		canary.JobID = job.ID
-		canary.NodeID = uuid.Generate()
-		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		canary.TaskGroup = job.TaskGroups[0].Name
-		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
-		canary.DeploymentID = d.ID
-		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
-			Healthy: pointer.Of(true),
-		}
-		allocs = append(allocs, canary)
-		handled[canary.ID] = allocUpdateFnIgnore
-	}
-
-	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
+	newD := structs.NewDeployment(job, 50)
+	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
+	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		DesiredCanaries: 2,
+		DesiredTotal:    10,
+	}
+
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
+		createDeployment:  newD,
 		deploymentUpdates: nil,
-		destructive:       2,
-		stop:              2,
+		place:             2,
+		inplace:           0,
+		stop:              0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Stop:              2,
-				DestructiveUpdate: 2,
-				Ignore:            8,
+				Canary: 2,
+				Ignore: 10,
 			},
 		},
 	})
 
-	assertNoCanariesStopped(t, d, r.stop)
-	assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate))
-	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
+	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
 }
 
-// Tests the reconciler handles canary promotion when the canary count equals
-// the total correctly
-func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) {
+// Tests the reconciler creates new canaries when the job changes and the
+// canary count is greater than the task group count
+func TestReconciler_NewCanaries_CountGreater(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
-	job.TaskGroups[0].Update = canaryUpdate
-	job.TaskGroups[0].Count = 2
-
-	// Create an existing deployment that has placed some canaries and mark them
-	// promoted
-	d := structs.NewDeployment(job, 50)
-	s := &structs.DeploymentState{
-		Promoted:        true,
-		DesiredTotal:    2,
-		DesiredCanaries: 2,
-		PlacedAllocs:    2,
-		HealthyAllocs:   2,
-	}
-	d.TaskGroups[job.TaskGroups[0].Name] = s
+	job.TaskGroups[0].Count = 3
+	job.TaskGroups[0].Update = canaryUpdate.Copy()
+	job.TaskGroups[0].Update.Canary = 7
 
-	// Create 2 allocations from the old job
+	// Create 3 allocations from the old job
 	var allocs []*structs.Allocation
-	for i := 0; i < 2; i++ {
+	for i := 0; i < 3; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
@@ -3964,171 +4069,107 @@ func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	// Create the canaries
-	handled := make(map[string]allocUpdateType)
-	for i := 0; i < 2; i++ {
-		// Create one canary
-		canary := mock.Alloc()
-		canary.Job = job
-		canary.JobID = job.ID
-		canary.NodeID = uuid.Generate()
-		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		canary.TaskGroup = job.TaskGroups[0].Name
-		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
-		canary.DeploymentID = d.ID
-		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
-			Healthy: pointer.Of(true),
-		}
-		allocs = append(allocs, canary)
-		handled[canary.ID] = allocUpdateFnIgnore
-	}
-
-	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
-	updates := []*structs.DeploymentStatusUpdate{
-		{
-			DeploymentID:      d.ID,
-			Status:            structs.DeploymentStatusSuccessful,
-			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
-		},
+	newD := structs.NewDeployment(job, 50)
+	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
+	state := &structs.DeploymentState{
+		DesiredCanaries: 7,
+		DesiredTotal:    3,
 	}
+	newD.TaskGroups[job.TaskGroups[0].Name] = state
 
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
-		deploymentUpdates: updates,
-		place:             0,
+		createDeployment:  newD,
+		deploymentUpdates: nil,
+		place:             7,
 		inplace:           0,
-		stop:              2,
+		stop:              0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Stop:   2,
-				Ignore: 2,
+				Canary: 7,
+				Ignore: 3,
 			},
 		},
 	})
 
-	assertNoCanariesStopped(t, d, r.stop)
-	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
+	assertNamesHaveIndexes(t, intRange(0, 2, 3, 6), placeResultsToNames(r.place))
 }
 
-// Tests the reconciler checks the health of placed allocs to determine the
-// limit
-func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) {
+// Tests the reconciler creates new canaries when the job changes for multiple
+// task groups
+func TestReconciler_NewCanaries_MultiTG(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
-	job.TaskGroups[0].Update = noCanaryUpdate
+	job.TaskGroups[0].Update = canaryUpdate
+	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
+	job.TaskGroups[0].Name = "tg2"
 
-	cases := []struct {
-		healthy int
-	}{
-		{
-			healthy: 0,
-		},
-		{
-			healthy: 1,
-		},
-		{
-			healthy: 2,
-		},
-		{
-			healthy: 3,
-		},
-		{
-			healthy: 4,
-		},
+	// Create 10 allocations from the old job for each tg
+	var allocs []*structs.Allocation
+	for j := 0; j < 2; j++ {
+		for i := 0; i < 10; i++ {
+			alloc := mock.Alloc()
+			alloc.Job = job
+			alloc.JobID = job.ID
+			alloc.NodeID = uuid.Generate()
+			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i))
+			alloc.TaskGroup = job.TaskGroups[j].Name
+			allocs = append(allocs, alloc)
+		}
 	}
 
-	for _, c := range cases {
-		t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) {
-			// Create an existing deployment that has placed some canaries and mark them
-			// promoted
-			d := structs.NewDeployment(job, 50)
-			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-				Promoted:     true,
-				DesiredTotal: 10,
-				PlacedAllocs: 4,
-			}
-
-			// Create 6 allocations from the old job
-			var allocs []*structs.Allocation
-			for i := 4; i < 10; i++ {
-				alloc := mock.Alloc()
-				alloc.Job = job
-				alloc.JobID = job.ID
-				alloc.NodeID = uuid.Generate()
-				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-				alloc.TaskGroup = job.TaskGroups[0].Name
-				allocs = append(allocs, alloc)
-			}
-
-			// Create the new allocs
-			handled := make(map[string]allocUpdateType)
-			for i := 0; i < 4; i++ {
-				new := mock.Alloc()
-				new.Job = job
-				new.JobID = job.ID
-				new.NodeID = uuid.Generate()
-				new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-				new.TaskGroup = job.TaskGroups[0].Name
-				new.DeploymentID = d.ID
-				if i < c.healthy {
-					new.DeploymentStatus = &structs.AllocDeploymentStatus{
-						Healthy: pointer.Of(true),
-					}
-				}
-				allocs = append(allocs, new)
-				handled[new.ID] = allocUpdateFnIgnore
-			}
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
+	r := reconciler.Compute()
 
-			mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-				d, allocs, nil, "", 50, true)
-			r := reconciler.Compute()
+	newD := structs.NewDeployment(job, 50)
+	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
+	state := &structs.DeploymentState{
+		DesiredCanaries: 2,
+		DesiredTotal:    10,
+	}
+	newD.TaskGroups[job.TaskGroups[0].Name] = state
+	newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy()
 
-			// Assert the correct results
-			assertResults(t, r, &resultExpectation{
-				createDeployment:  nil,
-				deploymentUpdates: nil,
-				destructive:       c.healthy,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					job.TaskGroups[0].Name: {
-						DestructiveUpdate: uint64(c.healthy),
-						Ignore:            uint64(10 - c.healthy),
-					},
-				},
-			})
+	// Assert the correct results
+	assertResults(t, r, &resultExpectation{
+		createDeployment:  newD,
+		deploymentUpdates: nil,
+		place:             4,
+		inplace:           0,
+		stop:              0,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Canary: 2,
+				Ignore: 10,
+			},
+			job.TaskGroups[1].Name: {
+				Canary: 2,
+				Ignore: 10,
+			},
+		},
+	})
 
-			if c.healthy != 0 {
-				assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate))
-			}
-		})
-	}
+	assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place))
 }
 
-// Tests the reconciler handles an alloc on a tainted node during a rolling
-// update
-func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
+// Tests the reconciler creates new canaries when the job changes and scales up
+func TestReconciler_NewCanaries_ScaleUp(t *testing.T) {
 	ci.Parallel(t)
 
+	// Scale the job up to 15
 	job := mock.Job()
-	job.TaskGroups[0].Update = noCanaryUpdate
-
-	// Create an existing deployment that has some placed allocs
-	d := structs.NewDeployment(job, 50)
-	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		Promoted:     true,
-		DesiredTotal: 10,
-		PlacedAllocs: 7,
-	}
+	job.TaskGroups[0].Update = canaryUpdate
+	job.TaskGroups[0].Count = 15
 
-	// Create 2 allocations from the old job
+	// Create 10 allocations from the old job
 	var allocs []*structs.Allocation
-	for i := 8; i < 10; i++ {
+	for i := 0; i < 10; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
@@ -4138,85 +4179,114 @@ func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	// Create the healthy replacements
-	handled := make(map[string]allocUpdateType)
-	for i := 0; i < 8; i++ {
-		new := mock.Alloc()
-		new.Job = job
-		new.JobID = job.ID
-		new.NodeID = uuid.Generate()
-		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		new.TaskGroup = job.TaskGroups[0].Name
-		new.DeploymentID = d.ID
-		new.DeploymentStatus = &structs.AllocDeploymentStatus{
-			Healthy: pointer.Of(true),
-		}
-		allocs = append(allocs, new)
-		handled[new.ID] = allocUpdateFnIgnore
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
+	r := reconciler.Compute()
+
+	newD := structs.NewDeployment(job, 50)
+	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
+	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		DesiredCanaries: 2,
+		DesiredTotal:    15,
 	}
 
-	// Build a map of tainted nodes
-	tainted := make(map[string]*structs.Node, 3)
-	for i := 0; i < 3; i++ {
-		n := mock.Node()
-		n.ID = allocs[2+i].NodeID
-		if i == 0 {
-			n.Status = structs.NodeStatusDown
-		} else {
-			n.DrainStrategy = mock.DrainNode().DrainStrategy
-			allocs[2+i].DesiredTransition.Migrate = pointer.Of(true)
-		}
-		tainted[n.ID] = n
+	// Assert the correct results
+	assertResults(t, r, &resultExpectation{
+		createDeployment:  newD,
+		deploymentUpdates: nil,
+		place:             2,
+		inplace:           0,
+		stop:              0,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Canary: 2,
+				Ignore: 10,
+			},
+		},
+	})
+
+	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
+}
+
+// Tests the reconciler creates new canaries when the job changes and scales
+// down
+func TestReconciler_NewCanaries_ScaleDown(t *testing.T) {
+	ci.Parallel(t)
+
+	// Scale the job down to 5
+	job := mock.Job()
+	job.TaskGroups[0].Update = canaryUpdate
+	job.TaskGroups[0].Count = 5
+
+	// Create 10 allocations from the old job
+	var allocs []*structs.Allocation
+	for i := 0; i < 10; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		alloc.TaskGroup = job.TaskGroups[0].Name
+		allocs = append(allocs, alloc)
 	}
 
-	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, tainted, "", 50, true)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
+	newD := structs.NewDeployment(job, 50)
+	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
+	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		DesiredCanaries: 2,
+		DesiredTotal:    5,
+	}
+
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
+		createDeployment:  newD,
 		deploymentUpdates: nil,
-		place:             3,
-		destructive:       2,
-		stop:              3,
+		place:             2,
+		inplace:           0,
+		stop:              5,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Place:             1, // Place the lost
-				Stop:              1, // Stop the lost
-				Migrate:           2, // Migrate the tainted
-				DestructiveUpdate: 2,
-				Ignore:            5,
+				Canary: 2,
+				Stop:   5,
+				Ignore: 5,
 			},
 		},
 	})
 
-	assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate))
-	assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place))
-	assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
+	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
+	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
 }
 
-// Tests the reconciler handles a failed deployment with allocs on tainted
-// nodes
-func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) {
+// Tests the reconciler handles filling the names of partially placed canaries
+func TestReconciler_NewCanaries_FillNames(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
-	job.TaskGroups[0].Update = noCanaryUpdate
+	job.TaskGroups[0].Update = &structs.UpdateStrategy{
+		Canary:          4,
+		MaxParallel:     2,
+		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
+		MinHealthyTime:  10 * time.Second,
+		HealthyDeadline: 10 * time.Minute,
+	}
 
-	// Create an existing failed deployment that has some placed allocs
+	// Create an existing deployment that has placed some canaries
 	d := structs.NewDeployment(job, 50)
-	d.Status = structs.DeploymentStatusFailed
-	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		Promoted:     true,
-		DesiredTotal: 10,
-		PlacedAllocs: 4,
+	s := &structs.DeploymentState{
+		Promoted:        false,
+		DesiredTotal:    10,
+		DesiredCanaries: 4,
+		PlacedAllocs:    2,
 	}
+	d.TaskGroups[job.TaskGroups[0].Name] = s
 
-	// Create 6 allocations from the old job
+	// Create 10 allocations from the old job
 	var allocs []*structs.Allocation
-	for i := 4; i < 10; i++ {
+	for i := 0; i < 10; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
@@ -4226,40 +4296,22 @@ func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) {
 		allocs = append(allocs, alloc)
 	}
 
-	// Create the healthy replacements
-	handled := make(map[string]allocUpdateType)
-	for i := 0; i < 4; i++ {
-		new := mock.Alloc()
-		new.Job = job
-		new.JobID = job.ID
-		new.NodeID = uuid.Generate()
-		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		new.TaskGroup = job.TaskGroups[0].Name
-		new.DeploymentID = d.ID
-		new.DeploymentStatus = &structs.AllocDeploymentStatus{
-			Healthy: pointer.Of(true),
-		}
-		allocs = append(allocs, new)
-		handled[new.ID] = allocUpdateFnIgnore
-	}
-
-	// Build a map of tainted nodes
-	tainted := make(map[string]*structs.Node, 2)
-	for i := 0; i < 2; i++ {
-		n := mock.Node()
-		n.ID = allocs[6+i].NodeID
-		if i == 0 {
-			n.Status = structs.NodeStatusDown
-		} else {
-			n.DrainStrategy = mock.DrainNode().DrainStrategy
-			allocs[6+i].DesiredTransition.Migrate = pointer.Of(true)
-		}
-		tainted[n.ID] = n
+	// Create canaries but pick names at the ends
+	for i := 0; i < 4; i += 3 {
+		// Create one canary
+		canary := mock.Alloc()
+		canary.Job = job
+		canary.JobID = job.ID
+		canary.NodeID = uuid.Generate()
+		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		canary.TaskGroup = job.TaskGroups[0].Name
+		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
+		canary.DeploymentID = d.ID
+		allocs = append(allocs, canary)
 	}
 
-	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, tainted, "", 50, true)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+		d, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
 	// Assert the correct results
@@ -4268,40 +4320,37 @@ func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) {
 		deploymentUpdates: nil,
 		place:             2,
 		inplace:           0,
-		stop:              2,
+		stop:              0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Place:   1,
-				Migrate: 1,
-				Stop:    1,
-				Ignore:  8,
+				Canary: 2,
+				Ignore: 12,
 			},
 		},
 	})
 
-	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
-	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
+	assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place))
 }
 
-// Tests the reconciler handles a run after a deployment is complete
-// successfully.
-func TestReconciler_CompleteDeployment(t *testing.T) {
+// Tests the reconciler handles canary promotion by unblocking max_parallel
+func TestReconciler_PromoteCanaries_Unblock(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
 	job.TaskGroups[0].Update = canaryUpdate
 
+	// Create an existing deployment that has placed some canaries and mark them
+	// promoted
 	d := structs.NewDeployment(job, 50)
-	d.Status = structs.DeploymentStatusSuccessful
-	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+	s := &structs.DeploymentState{
 		Promoted:        true,
 		DesiredTotal:    10,
 		DesiredCanaries: 2,
-		PlacedAllocs:    10,
-		HealthyAllocs:   10,
+		PlacedAllocs:    2,
 	}
+	d.TaskGroups[job.TaskGroups[0].Name] = s
 
-	// Create allocations from the old job
+	// Create 10 allocations from the old job
 	var allocs []*structs.Allocation
 	for i := 0; i < 10; i++ {
 		alloc := mock.Alloc()
@@ -4310,14 +4359,30 @@ func TestReconciler_CompleteDeployment(t *testing.T) {
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
 		alloc.TaskGroup = job.TaskGroups[0].Name
-		alloc.DeploymentID = d.ID
-		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
+		allocs = append(allocs, alloc)
+	}
+
+	// Create the canaries
+	handled := make(map[string]allocUpdateType)
+	for i := 0; i < 2; i++ {
+		// Create one canary
+		canary := mock.Alloc()
+		canary.Job = job
+		canary.JobID = job.ID
+		canary.NodeID = uuid.Generate()
+		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		canary.TaskGroup = job.TaskGroups[0].Name
+		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
+		canary.DeploymentID = d.ID
+		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
 			Healthy: pointer.Of(true),
 		}
-		allocs = append(allocs, alloc)
+		allocs = append(allocs, canary)
+		handled[canary.ID] = allocUpdateFnIgnore
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
 		d, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
@@ -4325,58 +4390,77 @@ func TestReconciler_CompleteDeployment(t *testing.T) {
 	assertResults(t, r, &resultExpectation{
 		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             0,
-		inplace:           0,
-		stop:              0,
+		destructive:       2,
+		stop:              2,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Ignore: 10,
+				Stop:              2,
+				DestructiveUpdate: 2,
+				Ignore:            8,
 			},
 		},
 	})
+
+	assertNoCanariesStopped(t, d, r.stop)
+	assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate))
+	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
 }
 
-// Tests that the reconciler marks a deployment as complete once there is
-// nothing left to place even if there are failed allocations that are part of
-// the deployment.
-func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) {
+// Tests the reconciler handles canary promotion when the canary count equals
+// the total correctly
+func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
-	job.TaskGroups[0].Update = noCanaryUpdate
+	job.TaskGroups[0].Update = canaryUpdate
+	job.TaskGroups[0].Count = 2
 
+	// Create an existing deployment that has placed some canaries and mark them
+	// promoted
 	d := structs.NewDeployment(job, 50)
-	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		DesiredTotal:  10,
-		PlacedAllocs:  20,
-		HealthyAllocs: 10,
+	s := &structs.DeploymentState{
+		Promoted:        true,
+		DesiredTotal:    2,
+		DesiredCanaries: 2,
+		PlacedAllocs:    2,
+		HealthyAllocs:   2,
 	}
+	d.TaskGroups[job.TaskGroups[0].Name] = s
 
-	// Create 10 healthy allocs and 10 allocs that are failed
+	// Create 2 allocations from the old job
 	var allocs []*structs.Allocation
-	for i := 0; i < 20; i++ {
+	for i := 0; i < 2; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
 		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%10))
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
 		alloc.TaskGroup = job.TaskGroups[0].Name
-		alloc.DeploymentID = d.ID
-		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{}
-		if i < 10 {
-			alloc.ClientStatus = structs.AllocClientStatusRunning
-			alloc.DeploymentStatus.Healthy = pointer.Of(true)
-		} else {
-			alloc.DesiredStatus = structs.AllocDesiredStatusStop
-			alloc.ClientStatus = structs.AllocClientStatusFailed
-			alloc.DeploymentStatus.Healthy = pointer.Of(false)
-		}
-
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID,
-		job, d, allocs, nil, "", 50, true)
+	// Create the canaries
+	handled := make(map[string]allocUpdateType)
+	for i := 0; i < 2; i++ {
+		// Create one canary
+		canary := mock.Alloc()
+		canary.Job = job
+		canary.JobID = job.ID
+		canary.NodeID = uuid.Generate()
+		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		canary.TaskGroup = job.TaskGroups[0].Name
+		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
+		canary.DeploymentID = d.ID
+		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Healthy: pointer.Of(true),
+		}
+		allocs = append(allocs, canary)
+		handled[canary.ID] = allocUpdateFnIgnore
+	}
+
+	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
+		d, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
 	updates := []*structs.DeploymentStatusUpdate{
@@ -4393,130 +4477,133 @@ func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) {
 		deploymentUpdates: updates,
 		place:             0,
 		inplace:           0,
-		stop:              0,
+		stop:              2,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Ignore: 10,
+				Stop:   2,
+				Ignore: 2,
 			},
 		},
 	})
+
+	assertNoCanariesStopped(t, d, r.stop)
+	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
 }
 
-// Test that a failed deployment cancels non-promoted canaries
-func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) {
+// Tests the reconciler checks the health of placed allocs to determine the
+// limit
+func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) {
 	ci.Parallel(t)
 
-	// Create a job with two task groups
 	job := mock.Job()
-	job.TaskGroups[0].Update = canaryUpdate
-	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
-	job.TaskGroups[1].Name = "two"
+	job.TaskGroups[0].Update = noCanaryUpdate
 
-	// Create an existing failed deployment that has promoted one task group
-	d := structs.NewDeployment(job, 50)
-	d.Status = structs.DeploymentStatusFailed
-	s0 := &structs.DeploymentState{
-		Promoted:        true,
-		DesiredTotal:    10,
-		DesiredCanaries: 2,
-		PlacedAllocs:    4,
-	}
-	s1 := &structs.DeploymentState{
-		Promoted:        false,
-		DesiredTotal:    10,
-		DesiredCanaries: 2,
-		PlacedAllocs:    2,
+	cases := []struct {
+		healthy int
+	}{
+		{
+			healthy: 0,
+		},
+		{
+			healthy: 1,
+		},
+		{
+			healthy: 2,
+		},
+		{
+			healthy: 3,
+		},
+		{
+			healthy: 4,
+		},
 	}
-	d.TaskGroups[job.TaskGroups[0].Name] = s0
-	d.TaskGroups[job.TaskGroups[1].Name] = s1
 
-	// Create 6 allocations from the old job
-	var allocs []*structs.Allocation
-	handled := make(map[string]allocUpdateType)
-	for _, group := range []int{0, 1} {
-		replacements := 4
-		state := s0
-		if group == 1 {
-			replacements = 2
-			state = s1
-		}
+	for _, c := range cases {
+		t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) {
+			// Create an existing deployment that has placed some canaries and mark them
+			// promoted
+			d := structs.NewDeployment(job, 50)
+			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+				Promoted:     true,
+				DesiredTotal: 10,
+				PlacedAllocs: 4,
+			}
 
-		// Create the healthy replacements
-		for i := 0; i < replacements; i++ {
-			new := mock.Alloc()
-			new.Job = job
-			new.JobID = job.ID
-			new.NodeID = uuid.Generate()
-			new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
-			new.TaskGroup = job.TaskGroups[group].Name
-			new.DeploymentID = d.ID
-			new.DeploymentStatus = &structs.AllocDeploymentStatus{
-				Healthy: pointer.Of(true),
+			// Create 6 allocations from the old job
+			var allocs []*structs.Allocation
+			for i := 4; i < 10; i++ {
+				alloc := mock.Alloc()
+				alloc.Job = job
+				alloc.JobID = job.ID
+				alloc.NodeID = uuid.Generate()
+				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+				alloc.TaskGroup = job.TaskGroups[0].Name
+				allocs = append(allocs, alloc)
 			}
-			allocs = append(allocs, new)
-			handled[new.ID] = allocUpdateFnIgnore
 
-			// Add the alloc to the canary list
-			if i < 2 {
-				state.PlacedCanaries = append(state.PlacedCanaries, new.ID)
+			// Create the new allocs
+			handled := make(map[string]allocUpdateType)
+			for i := 0; i < 4; i++ {
+				new := mock.Alloc()
+				new.Job = job
+				new.JobID = job.ID
+				new.NodeID = uuid.Generate()
+				new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+				new.TaskGroup = job.TaskGroups[0].Name
+				new.DeploymentID = d.ID
+				if i < c.healthy {
+					new.DeploymentStatus = &structs.AllocDeploymentStatus{
+						Healthy: pointer.Of(true),
+					}
+				}
+				allocs = append(allocs, new)
+				handled[new.ID] = allocUpdateFnIgnore
 			}
-		}
-		for i := replacements; i < 10; i++ {
-			alloc := mock.Alloc()
-			alloc.Job = job
-			alloc.JobID = job.ID
-			alloc.NodeID = uuid.Generate()
-			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
-			alloc.TaskGroup = job.TaskGroups[group].Name
-			allocs = append(allocs, alloc)
-		}
-	}
 
-	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	r := reconciler.Compute()
+			mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
+			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
+				d, allocs, nil, "", 50, true)
+			r := reconciler.Compute()
 
-	// Assert the correct results
-	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
-		deploymentUpdates: nil,
-		place:             0,
-		inplace:           0,
-		stop:              2,
-		desiredTGUpdates: map[string]*structs.DesiredUpdates{
-			job.TaskGroups[0].Name: {
-				Ignore: 10,
-			},
-			job.TaskGroups[1].Name: {
-				Stop:   2,
-				Ignore: 8,
-			},
-		},
-	})
+			// Assert the correct results
+			assertResults(t, r, &resultExpectation{
+				createDeployment:  nil,
+				deploymentUpdates: nil,
+				destructive:       c.healthy,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					job.TaskGroups[0].Name: {
+						DestructiveUpdate: uint64(c.healthy),
+						Ignore:            uint64(10 - c.healthy),
+					},
+				},
+			})
 
-	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
+			if c.healthy != 0 {
+				assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate))
+			}
+		})
+	}
 }
 
-// Test that a failed deployment and updated job works
-func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
+// Tests the reconciler handles an alloc on a tainted node during a rolling
+// update
+func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
 	job.TaskGroups[0].Update = noCanaryUpdate
 
-	// Create an existing failed deployment that has some placed allocs
+	// Create an existing deployment that has some placed allocs
 	d := structs.NewDeployment(job, 50)
-	d.Status = structs.DeploymentStatusFailed
 	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
 		Promoted:     true,
 		DesiredTotal: 10,
-		PlacedAllocs: 4,
+		PlacedAllocs: 7,
 	}
 
-	// Create 6 allocations from the old job
+	// Create 2 allocations from the old job
 	var allocs []*structs.Allocation
-	for i := 4; i < 10; i++ {
+	for i := 8; i < 10; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
@@ -4527,7 +4614,8 @@ func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
 	}
 
 	// Create the healthy replacements
-	for i := 0; i < 4; i++ {
+	handled := make(map[string]allocUpdateType)
+	for i := 0; i < 8; i++ {
 		new := mock.Alloc()
 		new.Job = job
 		new.JobID = job.ID
@@ -4539,114 +4627,156 @@ func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
 			Healthy: pointer.Of(true),
 		}
 		allocs = append(allocs, new)
+		handled[new.ID] = allocUpdateFnIgnore
 	}
 
-	// Up the job version
-	jobNew := job.Copy()
-	jobNew.Version += 100
+	// Build a map of tainted nodes
+	tainted := make(map[string]*structs.Node, 3)
+	for i := 0; i < 3; i++ {
+		n := mock.Node()
+		n.ID = allocs[2+i].NodeID
+		if i == 0 {
+			n.Status = structs.NodeStatusDown
+		} else {
+			n.DrainStrategy = mock.DrainNode().DrainStrategy
+			allocs[2+i].DesiredTransition.Migrate = pointer.Of(true)
+		}
+		tainted[n.ID] = n
+	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, jobNew,
-		d, allocs, nil, "", 50, true)
+	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
+		d, allocs, tainted, "", 50, true)
 	r := reconciler.Compute()
 
-	dnew := structs.NewDeployment(jobNew, 50)
-	dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		DesiredTotal: 10,
-	}
-
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  dnew,
+		createDeployment:  nil,
 		deploymentUpdates: nil,
-		destructive:       4,
+		place:             3,
+		destructive:       2,
+		stop:              3,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				DestructiveUpdate: 4,
-				Ignore:            6,
+				Place:             1, // Place the lost
+				Stop:              1, // Stop the lost
+				Migrate:           2, // Migrate the tainted
+				DestructiveUpdate: 2,
+				Ignore:            5,
 			},
 		},
 	})
 
-	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
+	assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate))
+	assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place))
+	assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
 }
 
-// Tests the reconciler marks a deployment as complete
-func TestReconciler_MarkDeploymentComplete(t *testing.T) {
+// Tests the reconciler handles a failed deployment with allocs on tainted
+// nodes
+func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
 	job.TaskGroups[0].Update = noCanaryUpdate
 
+	// Create an existing failed deployment that has some placed allocs
 	d := structs.NewDeployment(job, 50)
+	d.Status = structs.DeploymentStatusFailed
 	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		Promoted:      true,
-		DesiredTotal:  10,
-		PlacedAllocs:  10,
-		HealthyAllocs: 10,
+		Promoted:     true,
+		DesiredTotal: 10,
+		PlacedAllocs: 4,
 	}
 
-	// Create allocations from the old job
+	// Create 6 allocations from the old job
 	var allocs []*structs.Allocation
-	for i := 0; i < 10; i++ {
+	for i := 4; i < 10; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
 		alloc.TaskGroup = job.TaskGroups[0].Name
-		alloc.DeploymentID = d.ID
-		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
-			Healthy: pointer.Of(true),
-		}
 		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		d, allocs, nil, "", 50, true)
-	r := reconciler.Compute()
+	// Create the healthy replacements
+	handled := make(map[string]allocUpdateType)
+	for i := 0; i < 4; i++ {
+		new := mock.Alloc()
+		new.Job = job
+		new.JobID = job.ID
+		new.NodeID = uuid.Generate()
+		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		new.TaskGroup = job.TaskGroups[0].Name
+		new.DeploymentID = d.ID
+		new.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Healthy: pointer.Of(true),
+		}
+		allocs = append(allocs, new)
+		handled[new.ID] = allocUpdateFnIgnore
+	}
 
-	updates := []*structs.DeploymentStatusUpdate{
-		{
-			DeploymentID:      d.ID,
-			Status:            structs.DeploymentStatusSuccessful,
-			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
-		},
+	// Build a map of tainted nodes
+	tainted := make(map[string]*structs.Node, 2)
+	for i := 0; i < 2; i++ {
+		n := mock.Node()
+		n.ID = allocs[6+i].NodeID
+		if i == 0 {
+			n.Status = structs.NodeStatusDown
+		} else {
+			n.DrainStrategy = mock.DrainNode().DrainStrategy
+			allocs[6+i].DesiredTransition.Migrate = pointer.Of(true)
+		}
+		tainted[n.ID] = n
 	}
 
+	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
+		d, allocs, tainted, "", 50, true)
+	r := reconciler.Compute()
+
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
 		createDeployment:  nil,
-		deploymentUpdates: updates,
-		place:             0,
+		deploymentUpdates: nil,
+		place:             2,
 		inplace:           0,
-		stop:              0,
+		stop:              2,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Ignore: 10,
+				Place:   1,
+				Migrate: 1,
+				Stop:    1,
+				Ignore:  8,
 			},
 		},
 	})
+
+	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
+	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
 }
 
-// Tests the reconciler handles changing a job such that a deployment is created
-// while doing a scale up but as the second eval.
-func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) {
+// Tests the reconciler handles a run after a deployment is complete
+// successfully.
+func TestReconciler_CompleteDeployment(t *testing.T) {
 	ci.Parallel(t)
 
-	// Scale the job up to 15
 	job := mock.Job()
-	job.TaskGroups[0].Update = noCanaryUpdate
-	job.TaskGroups[0].Count = 30
+	job.TaskGroups[0].Update = canaryUpdate
 
-	// Create a deployment that is paused and has placed some canaries
 	d := structs.NewDeployment(job, 50)
+	d.Status = structs.DeploymentStatusSuccessful
 	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		Promoted:     false,
-		DesiredTotal: 30,
-		PlacedAllocs: 20,
+		Promoted:        true,
+		DesiredTotal:    10,
+		DesiredCanaries: 2,
+		PlacedAllocs:    10,
+		HealthyAllocs:   10,
 	}
 
-	// Create 10 allocations from the old job
+	// Create allocations from the old job
 	var allocs []*structs.Allocation
 	for i := 0; i < 10; i++ {
 		alloc := mock.Alloc()
@@ -4655,25 +4785,14 @@ func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) {
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
 		alloc.TaskGroup = job.TaskGroups[0].Name
-		allocs = append(allocs, alloc)
-	}
-
-	// Create 20 from new job
-	handled := make(map[string]allocUpdateType)
-	for i := 10; i < 30; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
 		alloc.DeploymentID = d.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		alloc.TaskGroup = job.TaskGroups[0].Name
+		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Healthy: pointer.Of(true),
+		}
 		allocs = append(allocs, alloc)
-		handled[alloc.ID] = allocUpdateFnIgnore
 	}
 
-	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
 		d, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
@@ -4681,302 +4800,283 @@ func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) {
 	assertResults(t, r, &resultExpectation{
 		createDeployment:  nil,
 		deploymentUpdates: nil,
+		place:             0,
+		inplace:           0,
+		stop:              0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				// All should be ignored because nothing has been marked as
-				// healthy.
-				Ignore: 30,
+				Ignore: 10,
 			},
 		},
 	})
 }
 
-// Tests the reconciler doesn't stop allocations when doing a rolling upgrade
-// where the count of the old job allocs is < desired count.
-func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) {
+// Tests that the reconciler marks a deployment as complete once there is
+// nothing left to place even if there are failed allocations that are part of
+// the deployment.
+func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
 	job.TaskGroups[0].Update = noCanaryUpdate
 
-	// Create 7 allocations from the old job
-	var allocs []*structs.Allocation
-	for i := 0; i < 7; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		alloc.TaskGroup = job.TaskGroups[0].Name
-		allocs = append(allocs, alloc)
-	}
-
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	r := reconciler.Compute()
-
 	d := structs.NewDeployment(job, 50)
 	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		DesiredTotal: 10,
+		DesiredTotal:  10,
+		PlacedAllocs:  20,
+		HealthyAllocs: 10,
 	}
 
-	// Assert the correct results
-	assertResults(t, r, &resultExpectation{
-		createDeployment:  d,
-		deploymentUpdates: nil,
-		place:             3,
-		destructive:       1,
-		desiredTGUpdates: map[string]*structs.DesiredUpdates{
-			job.TaskGroups[0].Name: {
-				Place:             3,
-				DestructiveUpdate: 1,
-				Ignore:            6,
-			},
-		},
-	})
-
-	assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place))
-	assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate))
-}
-
-// Tests that the reconciler handles rerunning a batch job in the case that the
-// allocations are from an older instance of the job.
-func TestReconciler_Batch_Rerun(t *testing.T) {
-	ci.Parallel(t)
-
-	job := mock.Job()
-	job.Type = structs.JobTypeBatch
-	job.TaskGroups[0].Update = nil
-
-	// Create 10 allocations from the old job and have them be complete
+	// Create 10 healthy allocs and 10 allocs that are failed
 	var allocs []*structs.Allocation
-	for i := 0; i < 10; i++ {
+	for i := 0; i < 20; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
 		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%10))
 		alloc.TaskGroup = job.TaskGroups[0].Name
-		alloc.ClientStatus = structs.AllocClientStatusComplete
-		alloc.DesiredStatus = structs.AllocDesiredStatusStop
+		alloc.DeploymentID = d.ID
+		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{}
+		if i < 10 {
+			alloc.ClientStatus = structs.AllocClientStatusRunning
+			alloc.DeploymentStatus.Healthy = pointer.Of(true)
+		} else {
+			alloc.DesiredStatus = structs.AllocDesiredStatusStop
+			alloc.ClientStatus = structs.AllocClientStatusFailed
+			alloc.DeploymentStatus.Healthy = pointer.Of(false)
+		}
+
 		allocs = append(allocs, alloc)
 	}
 
-	// Create a copy of the job that is "new"
-	job2 := job.Copy()
-	job2.CreateIndex++
-
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job2.ID, job2,
-		nil, allocs, nil, "", 50, true)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID,
+		job, d, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
+	updates := []*structs.DeploymentStatusUpdate{
+		{
+			DeploymentID:      d.ID,
+			Status:            structs.DeploymentStatusSuccessful,
+			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
+		},
+	}
+
 	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
 		createDeployment:  nil,
-		deploymentUpdates: nil,
-		place:             10,
-		destructive:       0,
+		deploymentUpdates: updates,
+		place:             0,
+		inplace:           0,
+		stop:              0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Place:             10,
-				DestructiveUpdate: 0,
-				Ignore:            10,
+				Ignore: 10,
 			},
 		},
 	})
-
-	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
 }
 
-// Test that a failed deployment will not result in rescheduling failed allocations
-func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) {
+// Test that a failed deployment cancels non-promoted canaries
+func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) {
 	ci.Parallel(t)
 
+	// Create a job with two task groups
 	job := mock.Job()
-	job.TaskGroups[0].Update = noCanaryUpdate
+	job.TaskGroups[0].Update = canaryUpdate
+	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
+	job.TaskGroups[1].Name = "two"
 
-	tgName := job.TaskGroups[0].Name
-	now := time.Now()
-	// Create an existing failed deployment that has some placed allocs
+	// Create an existing failed deployment that has promoted one task group
 	d := structs.NewDeployment(job, 50)
 	d.Status = structs.DeploymentStatusFailed
-	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		Promoted:     true,
-		DesiredTotal: 5,
-		PlacedAllocs: 4,
+	s0 := &structs.DeploymentState{
+		Promoted:        true,
+		DesiredTotal:    10,
+		DesiredCanaries: 2,
+		PlacedAllocs:    4,
+	}
+	s1 := &structs.DeploymentState{
+		Promoted:        false,
+		DesiredTotal:    10,
+		DesiredCanaries: 2,
+		PlacedAllocs:    2,
 	}
+	d.TaskGroups[job.TaskGroups[0].Name] = s0
+	d.TaskGroups[job.TaskGroups[1].Name] = s1
 
-	// Create 4 allocations and mark two as failed
+	// Create 6 allocations from the old job
 	var allocs []*structs.Allocation
-	for i := 0; i < 4; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		alloc.TaskGroup = job.TaskGroups[0].Name
-		alloc.DeploymentID = d.ID
-		allocs = append(allocs, alloc)
-	}
+	handled := make(map[string]allocUpdateType)
+	for _, group := range []int{0, 1} {
+		replacements := 4
+		state := s0
+		if group == 1 {
+			replacements = 2
+			state = s1
+		}
 
-	//create some allocations that are reschedulable now
-	allocs[2].ClientStatus = structs.AllocClientStatusFailed
-	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
-		StartedAt:  now.Add(-1 * time.Hour),
-		FinishedAt: now.Add(-10 * time.Second)}}
+		// Create the healthy replacements
+		for i := 0; i < replacements; i++ {
+			new := mock.Alloc()
+			new.Job = job
+			new.JobID = job.ID
+			new.NodeID = uuid.Generate()
+			new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
+			new.TaskGroup = job.TaskGroups[group].Name
+			new.DeploymentID = d.ID
+			new.DeploymentStatus = &structs.AllocDeploymentStatus{
+				Healthy: pointer.Of(true),
+			}
+			allocs = append(allocs, new)
+			handled[new.ID] = allocUpdateFnIgnore
 
-	allocs[3].ClientStatus = structs.AllocClientStatusFailed
-	allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
-		StartedAt:  now.Add(-1 * time.Hour),
-		FinishedAt: now.Add(-10 * time.Second)}}
+			// Add the alloc to the canary list
+			if i < 2 {
+				state.PlacedCanaries = append(state.PlacedCanaries, new.ID)
+			}
+		}
+		for i := replacements; i < 10; i++ {
+			alloc := mock.Alloc()
+			alloc.Job = job
+			alloc.JobID = job.ID
+			alloc.NodeID = uuid.Generate()
+			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
+			alloc.TaskGroup = job.TaskGroups[group].Name
+			allocs = append(allocs, alloc)
+		}
+	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
 		d, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
-	// Assert that no rescheduled placements were created
+	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		place:             0,
 		createDeployment:  nil,
 		deploymentUpdates: nil,
+		place:             0,
+		inplace:           0,
+		stop:              2,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Ignore: 2,
+				Ignore: 10,
+			},
+			job.TaskGroups[1].Name: {
+				Stop:   2,
+				Ignore: 8,
 			},
 		},
 	})
+
+	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
 }
 
-// Test that a running deployment with failed allocs will not result in
-// rescheduling failed allocations unless they are marked as reschedulable.
-func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
+// Test that a failed deployment and updated job works
+func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
 	ci.Parallel(t)
 
 	job := mock.Job()
 	job.TaskGroups[0].Update = noCanaryUpdate
-	tgName := job.TaskGroups[0].Name
-	now := time.Now()
 
-	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
+	// Create an existing failed deployment that has some placed allocs
 	d := structs.NewDeployment(job, 50)
-	d.Status = structs.DeploymentStatusRunning
+	d.Status = structs.DeploymentStatusFailed
 	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
-		Promoted:     false,
+		Promoted:     true,
 		DesiredTotal: 10,
-		PlacedAllocs: 10,
+		PlacedAllocs: 4,
 	}
 
-	// Create 10 allocations
+	// Create 6 allocations from the old job
 	var allocs []*structs.Allocation
-	for i := 0; i < 10; i++ {
+	for i := 4; i < 10; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
 		alloc.TaskGroup = job.TaskGroups[0].Name
-		alloc.DeploymentID = d.ID
-		alloc.ClientStatus = structs.AllocClientStatusFailed
-		alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
-			StartedAt:  now.Add(-1 * time.Hour),
-			FinishedAt: now.Add(-10 * time.Second)}}
 		allocs = append(allocs, alloc)
 	}
 
-	// Mark half of them as reschedulable
-	for i := 0; i < 5; i++ {
-		allocs[i].DesiredTransition.Reschedule = pointer.Of(true)
+	// Create the healthy replacements
+	for i := 0; i < 4; i++ {
+		new := mock.Alloc()
+		new.Job = job
+		new.JobID = job.ID
+		new.NodeID = uuid.Generate()
+		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		new.TaskGroup = job.TaskGroups[0].Name
+		new.DeploymentID = d.ID
+		new.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Healthy: pointer.Of(true),
+		}
+		allocs = append(allocs, new)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+	// Up the job version
+	jobNew := job.Copy()
+	jobNew.Version += 100
+
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, jobNew,
 		d, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
-	// Assert that no rescheduled placements were created
+	dnew := structs.NewDeployment(jobNew, 50)
+	dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		DesiredTotal: 10,
+	}
+
+	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		place:             5,
-		stop:              5,
-		createDeployment:  nil,
+		createDeployment:  dnew,
 		deploymentUpdates: nil,
+		destructive:       4,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Place:  5,
-				Stop:   5,
-				Ignore: 5,
+				DestructiveUpdate: 4,
+				Ignore:            6,
 			},
 		},
 	})
+
+	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
 }
 
-// Test that a failed deployment cancels non-promoted canaries
-func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
+// Tests the reconciler marks a deployment as complete
+func TestReconciler_MarkDeploymentComplete(t *testing.T) {
 	ci.Parallel(t)
 
-	// Create a job
-	job := mock.Job()
-	job.TaskGroups[0].Count = 3
-	job.TaskGroups[0].Update = &structs.UpdateStrategy{
-		Canary:          3,
-		MaxParallel:     2,
-		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
-		MinHealthyTime:  10 * time.Second,
-		HealthyDeadline: 10 * time.Minute,
-		Stagger:         31 * time.Second,
-	}
-
-	// Create v1 of the job
-	jobv1 := job.Copy()
-	jobv1.Version = 1
-	jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"}
-
-	// Create v2 of the job
-	jobv2 := job.Copy()
-	jobv2.Version = 2
-	jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"}
-
-	d := structs.NewDeployment(jobv2, 50)
-	state := &structs.DeploymentState{
-		Promoted:      true,
-		DesiredTotal:  3,
-		PlacedAllocs:  3,
-		HealthyAllocs: 3,
-	}
-	d.TaskGroups[job.TaskGroups[0].Name] = state
-
-	// Create the original
-	var allocs []*structs.Allocation
-	for i := 0; i < 3; i++ {
-		new := mock.Alloc()
-		new.Job = jobv2
-		new.JobID = job.ID
-		new.NodeID = uuid.Generate()
-		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		new.TaskGroup = job.TaskGroups[0].Name
-		new.DeploymentID = d.ID
-		new.DeploymentStatus = &structs.AllocDeploymentStatus{
-			Healthy: pointer.Of(true),
-		}
-		new.ClientStatus = structs.AllocClientStatusRunning
-		allocs = append(allocs, new)
-
+	job := mock.Job()
+	job.TaskGroups[0].Update = noCanaryUpdate
+
+	d := structs.NewDeployment(job, 50)
+	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		Promoted:      true,
+		DesiredTotal:  10,
+		PlacedAllocs:  10,
+		HealthyAllocs: 10,
 	}
-	for i := 0; i < 3; i++ {
-		new := mock.Alloc()
-		new.Job = jobv1
-		new.JobID = jobv1.ID
-		new.NodeID = uuid.Generate()
-		new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i))
-		new.TaskGroup = job.TaskGroups[0].Name
-		new.DeploymentID = uuid.Generate()
-		new.DeploymentStatus = &structs.AllocDeploymentStatus{
-			Healthy: pointer.Of(false),
+
+	// Create allocations from the old job
+	var allocs []*structs.Allocation
+	for i := 0; i < 10; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		alloc.TaskGroup = job.TaskGroups[0].Name
+		alloc.DeploymentID = d.ID
+		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Healthy: pointer.Of(true),
 		}
-		new.DesiredStatus = structs.AllocDesiredStatusStop
-		new.ClientStatus = structs.AllocClientStatusFailed
-		allocs = append(allocs, new)
+		allocs = append(allocs, alloc)
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, jobv2,
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
 		d, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
@@ -4997,34 +5097,31 @@ func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
 		stop:              0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Stop:          0,
-				InPlaceUpdate: 0,
-				Ignore:        3,
+				Ignore: 10,
 			},
 		},
 	})
 }
 
-// Test that a successful deployment with failed allocs will result in
-// rescheduling failed allocations
-func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T) {
+// Tests the reconciler handles changing a job such that a deployment is created
+// while doing a scale up but as the second eval.
+func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) {
 	ci.Parallel(t)
 
+	// Scale the job up to 15
 	job := mock.Job()
 	job.TaskGroups[0].Update = noCanaryUpdate
-	tgName := job.TaskGroups[0].Name
-	now := time.Now()
+	job.TaskGroups[0].Count = 30
 
-	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
+	// Create a deployment that is paused and has placed some canaries
 	d := structs.NewDeployment(job, 50)
-	d.Status = structs.DeploymentStatusSuccessful
 	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
 		Promoted:     false,
-		DesiredTotal: 10,
-		PlacedAllocs: 10,
+		DesiredTotal: 30,
+		PlacedAllocs: 20,
 	}
 
-	// Create 10 allocations
+	// Create 10 allocations from the old job
 	var allocs []*structs.Allocation
 	for i := 0; i < 10; i++ {
 		alloc := mock.Alloc()
@@ -5033,837 +5130,739 @@ func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
 		alloc.TaskGroup = job.TaskGroups[0].Name
+		allocs = append(allocs, alloc)
+	}
+
+	// Create 20 from new job
+	handled := make(map[string]allocUpdateType)
+	for i := 10; i < 30; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
 		alloc.DeploymentID = d.ID
-		alloc.ClientStatus = structs.AllocClientStatusFailed
-		alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
-			StartedAt:  now.Add(-1 * time.Hour),
-			FinishedAt: now.Add(-10 * time.Second)}}
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		alloc.TaskGroup = job.TaskGroups[0].Name
 		allocs = append(allocs, alloc)
+		handled[alloc.ID] = allocUpdateFnIgnore
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
 		d, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
-	// Assert that rescheduled placements were created
+	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		place:             10,
-		stop:              10,
 		createDeployment:  nil,
 		deploymentUpdates: nil,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Place:  10,
-				Stop:   10,
-				Ignore: 0,
+				// All should be ignored because nothing has been marked as
+				// healthy.
+				Ignore: 30,
 			},
 		},
 	})
-	assertPlaceResultsHavePreviousAllocs(t, 10, r.place)
 }
 
-// Tests force rescheduling a failed alloc that is past its reschedule limit
-func TestReconciler_ForceReschedule_Service(t *testing.T) {
+// Tests the reconciler doesn't stop allocations when doing a rolling upgrade
+// where the count of the old job allocs is < desired count.
+func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) {
 	ci.Parallel(t)
 
-	require := require.New(t)
-
-	// Set desired 5
 	job := mock.Job()
-	job.TaskGroups[0].Count = 5
-	tgName := job.TaskGroups[0].Name
-
-	// Set up reschedule policy and update block
-	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
-		Attempts:      1,
-		Interval:      24 * time.Hour,
-		Delay:         5 * time.Second,
-		DelayFunction: "",
-		MaxDelay:      1 * time.Hour,
-		Unlimited:     false,
-	}
 	job.TaskGroups[0].Update = noCanaryUpdate
 
-	// Create 5 existing allocations
+	// Create 7 allocations from the old job
 	var allocs []*structs.Allocation
-	for i := 0; i < 5; i++ {
+	for i := 0; i < 7; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		alloc.TaskGroup = job.TaskGroups[0].Name
 		allocs = append(allocs, alloc)
-		alloc.ClientStatus = structs.AllocClientStatusRunning
 	}
 
-	// Mark one as failed and past its reschedule limit so not eligible to reschedule
-	allocs[0].ClientStatus = structs.AllocClientStatusFailed
-	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
-		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
-			PrevAllocID: uuid.Generate(),
-			PrevNodeID:  uuid.Generate(),
-		},
-	}}
-
-	// Mark DesiredTransition ForceReschedule
-	allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: pointer.Of(true)}
-
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
 		nil, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
-	// Verify that no follow up evals were created
-	evals := r.desiredFollowupEvals[tgName]
-	require.Nil(evals)
+	d := structs.NewDeployment(job, 50)
+	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		DesiredTotal: 10,
+	}
 
-	// Verify that one rescheduled alloc was created because of the forced reschedule
+	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
+		createDeployment:  d,
 		deploymentUpdates: nil,
-		place:             1,
-		stop:              1,
-		inplace:           0,
+		place:             3,
+		destructive:       1,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Place:  1,
-				Stop:   1,
-				Ignore: 4,
+				Place:             3,
+				DestructiveUpdate: 1,
+				Ignore:            6,
 			},
 		},
 	})
 
-	// Rescheduled allocs should have previous allocs
-	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
-	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
-	assertPlacementsAreRescheduled(t, 1, r.place)
+	assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place))
+	assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate))
 }
 
-// Tests behavior of service failure with rescheduling policy preventing rescheduling:
-// new allocs should be placed to satisfy the job count, and current allocations are
-// left unmodified
-func TestReconciler_RescheduleNot_Service(t *testing.T) {
+// Tests that the reconciler handles rerunning a batch job in the case that the
+// allocations are from an older instance of the job.
+func TestReconciler_Batch_Rerun(t *testing.T) {
 	ci.Parallel(t)
 
-	require := require.New(t)
-
-	// Set desired 5
 	job := mock.Job()
-	job.TaskGroups[0].Count = 5
-	tgName := job.TaskGroups[0].Name
-	now := time.Now()
-
-	// Set up reschedule policy and update block
-	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
-		Attempts:      0,
-		Interval:      24 * time.Hour,
-		Delay:         5 * time.Second,
-		DelayFunction: "",
-		MaxDelay:      1 * time.Hour,
-		Unlimited:     false,
-	}
-	job.TaskGroups[0].Update = noCanaryUpdate
+	job.Type = structs.JobTypeBatch
+	job.TaskGroups[0].Update = nil
 
-	// Create 5 existing allocations
+	// Create 10 allocations from the old job and have them be complete
 	var allocs []*structs.Allocation
-	for i := 0; i < 5; i++ {
+	for i := 0; i < 10; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		alloc.TaskGroup = job.TaskGroups[0].Name
+		alloc.ClientStatus = structs.AllocClientStatusComplete
+		alloc.DesiredStatus = structs.AllocDesiredStatusStop
 		allocs = append(allocs, alloc)
-		alloc.ClientStatus = structs.AllocClientStatusRunning
 	}
 
-	// Mark two as failed
-	allocs[0].ClientStatus = structs.AllocClientStatusFailed
-
-	// Mark one of them as already rescheduled once
-	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
-		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
-			PrevAllocID: uuid.Generate(),
-			PrevNodeID:  uuid.Generate(),
-		},
-	}}
-	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
-		StartedAt:  now.Add(-1 * time.Hour),
-		FinishedAt: now.Add(-10 * time.Second)}}
-	allocs[1].ClientStatus = structs.AllocClientStatusFailed
-
-	// Mark one as desired state stop
-	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
+	// Create a copy of the job that is "new"
+	job2 := job.Copy()
+	job2.CreateIndex++
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job2.ID, job2,
 		nil, allocs, nil, "", 50, true)
 	r := reconciler.Compute()
 
-	// Verify that no follow up evals were created
-	evals := r.desiredFollowupEvals[tgName]
-	require.Nil(evals)
-
-	// no rescheduling, ignore all 4 allocs
-	// but place one to substitute allocs[4] that was stopped explicitly
+	// Assert the correct results
 	assertResults(t, r, &resultExpectation{
 		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             1,
-		inplace:           0,
-		stop:              0,
+		place:             10,
+		destructive:       0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Place:  1,
-				Ignore: 4,
-				Stop:   0,
+				Place:             10,
+				DestructiveUpdate: 0,
+				Ignore:            10,
 			},
 		},
 	})
 
-	// none of the placement should have preallocs or rescheduled
-	assertPlaceResultsHavePreviousAllocs(t, 0, r.place)
-	assertPlacementsAreRescheduled(t, 0, r.place)
+	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
 }
 
-// Tests behavior of batch failure with rescheduling policy preventing rescheduling:
-// current allocations are left unmodified and no follow up
-func TestReconciler_RescheduleNot_Batch(t *testing.T) {
+// Test that a failed deployment will not result in rescheduling failed allocations
+func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) {
 	ci.Parallel(t)
 
-	require := require.New(t)
-	// Set desired 4
 	job := mock.Job()
-	job.TaskGroups[0].Count = 4
-	now := time.Now()
-	// Set up reschedule policy
-	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
-		Attempts:      0,
-		Interval:      24 * time.Hour,
-		Delay:         5 * time.Second,
-		DelayFunction: "constant",
-	}
+	job.TaskGroups[0].Update = noCanaryUpdate
+
 	tgName := job.TaskGroups[0].Name
-	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
+	now := time.Now()
+	// Create an existing failed deployment that has some placed allocs
+	d := structs.NewDeployment(job, 50)
+	d.Status = structs.DeploymentStatusFailed
+	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		Promoted:     true,
+		DesiredTotal: 5,
+		PlacedAllocs: 4,
+	}
+
+	// Create 4 allocations and mark two as failed
 	var allocs []*structs.Allocation
-	for i := 0; i < 6; i++ {
+	for i := 0; i < 4; i++ {
 		alloc := mock.Alloc()
 		alloc.Job = job
 		alloc.JobID = job.ID
 		alloc.NodeID = uuid.Generate()
 		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		alloc.TaskGroup = job.TaskGroups[0].Name
+		alloc.DeploymentID = d.ID
 		allocs = append(allocs, alloc)
-		alloc.ClientStatus = structs.AllocClientStatusRunning
 	}
-	// Mark 3 as failed with restart tracking info
-	allocs[0].ClientStatus = structs.AllocClientStatusFailed
-	allocs[0].NextAllocation = allocs[1].ID
-	allocs[1].ClientStatus = structs.AllocClientStatusFailed
-	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
-		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
-			PrevAllocID: allocs[0].ID,
-			PrevNodeID:  uuid.Generate(),
-		},
-	}}
-	allocs[1].NextAllocation = allocs[2].ID
+
+	//create some allocations that are reschedulable now
 	allocs[2].ClientStatus = structs.AllocClientStatusFailed
 	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
 		StartedAt:  now.Add(-1 * time.Hour),
-		FinishedAt: now.Add(-5 * time.Second)}}
-	allocs[2].FollowupEvalID = uuid.Generate()
-	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
-		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
-			PrevAllocID: allocs[0].ID,
-			PrevNodeID:  uuid.Generate(),
-		},
-		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
-			PrevAllocID: allocs[1].ID,
-			PrevNodeID:  uuid.Generate(),
-		},
-	}}
-	// Mark one as complete
-	allocs[5].ClientStatus = structs.AllocClientStatusComplete
+		FinishedAt: now.Add(-10 * time.Second)}}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	reconciler.now = now
-	r := reconciler.Compute()
+	allocs[3].ClientStatus = structs.AllocClientStatusFailed
+	allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
+		StartedAt:  now.Add(-1 * time.Hour),
+		FinishedAt: now.Add(-10 * time.Second)}}
 
-	// Verify that no follow up evals were created
-	evals := r.desiredFollowupEvals[tgName]
-	require.Nil(evals)
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+		d, allocs, nil, "", 50, true)
+	r := reconciler.Compute()
 
-	// No reschedule attempts were made and all allocs are untouched
+	// Assert that no rescheduled placements were created
 	assertResults(t, r, &resultExpectation{
+		place:             0,
 		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             0,
-		stop:              0,
-		inplace:           0,
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Place:  0,
-				Stop:   0,
-				Ignore: 4,
+				Ignore: 2,
 			},
 		},
 	})
 }
 
-// Tests that when a node disconnects running allocations are queued to transition to unknown.
-func TestReconciler_Node_Disconnect_Updates_Alloc_To_Unknown(t *testing.T) {
-	job, allocs := buildResumableAllocations(3, structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun, 2)
-	// Build a map of disconnected nodes
-	nodes := buildDisconnectedNodes(allocs, 2)
+// Test that a running deployment with failed allocs will not result in
+// rescheduling failed allocations unless they are marked as reschedulable.
+func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
+	ci.Parallel(t)
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nodes, "", 50, true)
-	reconciler.now = time.Now().UTC()
-	results := reconciler.Compute()
+	job := mock.Job()
+	job.TaskGroups[0].Update = noCanaryUpdate
+	tgName := job.TaskGroups[0].Name
+	now := time.Now()
 
-	// Verify that 1 follow up eval was created with the values we expect.
-	evals := results.desiredFollowupEvals[job.TaskGroups[0].Name]
-	require.Len(t, evals, 1)
-	expectedTime := reconciler.now.Add(5 * time.Minute)
+	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
+	d := structs.NewDeployment(job, 50)
+	d.Status = structs.DeploymentStatusRunning
+	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		Promoted:     false,
+		DesiredTotal: 10,
+		PlacedAllocs: 10,
+	}
 
-	eval := evals[0]
-	require.NotNil(t, eval.WaitUntil)
-	require.Equal(t, expectedTime, eval.WaitUntil)
+	// Create 10 allocations
+	var allocs []*structs.Allocation
+	for i := 0; i < 10; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		alloc.TaskGroup = job.TaskGroups[0].Name
+		alloc.DeploymentID = d.ID
+		alloc.ClientStatus = structs.AllocClientStatusFailed
+		alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
+			StartedAt:  now.Add(-1 * time.Hour),
+			FinishedAt: now.Add(-10 * time.Second)}}
+		allocs = append(allocs, alloc)
+	}
 
-	// Validate that the queued disconnectUpdates have the right client status,
-	// and that they have a valid FollowUpdEvalID.
-	for _, disconnectUpdate := range results.disconnectUpdates {
-		require.Equal(t, structs.AllocClientStatusUnknown, disconnectUpdate.ClientStatus)
-		require.NotEmpty(t, disconnectUpdate.FollowupEvalID)
-		require.Equal(t, eval.ID, disconnectUpdate.FollowupEvalID)
+	// Mark half of them as reschedulable
+	for i := 0; i < 5; i++ {
+		allocs[i].DesiredTransition.Reschedule = pointer.Of(true)
 	}
 
-	// 2 to place, 2 to update, 1 to ignore
-	assertResults(t, results, &resultExpectation{
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+		d, allocs, nil, "", 50, true)
+	r := reconciler.Compute()
+
+	// Assert that no rescheduled placements were created
+	assertResults(t, r, &resultExpectation{
+		place:             5,
+		stop:              5,
 		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             2,
-		stop:              0,
-		inplace:           0,
-		disconnectUpdates: 2,
-
-		// 2 to place and 1 to ignore
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
-				Place:         2,
-				Stop:          0,
-				Ignore:        1,
-				InPlaceUpdate: 0,
+				Place:  5,
+				Stop:   5,
+				Ignore: 5,
 			},
 		},
 	})
 }
 
-func TestReconciler_Disconnect_UpdateJobAfterReconnect(t *testing.T) {
+// Test that a failed deployment cancels non-promoted canaries
+func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
 	ci.Parallel(t)
 
-	// Create 2 allocs and simulate one have being previously disconnected and
-	// then reconnected.
-	job, allocs := buildResumableAllocations(2, structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun, 2)
-	allocs[0].AllocStates = []*structs.AllocState{
-		{
-			Field: structs.AllocStateFieldClientStatus,
-			Value: structs.AllocClientStatusUnknown,
-			Time:  time.Now().Add(-5 * time.Minute),
-		},
-		{
-			Field: structs.AllocStateFieldClientStatus,
-			Value: structs.AllocClientStatusRunning,
-			Time:  time.Now(),
-		},
+	// Create a job
+	job := mock.Job()
+	job.TaskGroups[0].Count = 3
+	job.TaskGroups[0].Update = &structs.UpdateStrategy{
+		Canary:          3,
+		MaxParallel:     2,
+		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
+		MinHealthyTime:  10 * time.Second,
+		HealthyDeadline: 10 * time.Minute,
+		Stagger:         31 * time.Second,
 	}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	results := reconciler.Compute()
+	// Create v1 of the job
+	jobv1 := job.Copy()
+	jobv1.Version = 1
+	jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"}
 
-	// Assert both allocations will be updated.
-	assertResults(t, results, &resultExpectation{
-		inplace: 2,
-		desiredTGUpdates: map[string]*structs.DesiredUpdates{
-			job.TaskGroups[0].Name: {
-				InPlaceUpdate: 2,
-			},
-		},
-	})
-}
+	// Create v2 of the job
+	jobv2 := job.Copy()
+	jobv2.Version = 2
+	jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"}
 
-// Tests that when a node disconnects/reconnects allocations for that node are
-// reconciled according to the business rules.
-func TestReconciler_Disconnected_Client(t *testing.T) {
-	disconnectAllocState := []*structs.AllocState{{
-		Field: structs.AllocStateFieldClientStatus,
-		Value: structs.AllocClientStatusUnknown,
-		Time:  time.Now(),
-	}}
+	d := structs.NewDeployment(jobv2, 50)
+	state := &structs.DeploymentState{
+		Promoted:      true,
+		DesiredTotal:  3,
+		PlacedAllocs:  3,
+		HealthyAllocs: 3,
+	}
+	d.TaskGroups[job.TaskGroups[0].Name] = state
 
-	type testCase struct {
-		name                         string
-		allocCount                   int
-		disconnectedAllocCount       int
-		jobVersionIncrement          uint64
-		nodeScoreIncrement           float64
-		disconnectedAllocStatus      string
-		disconnectedAllocStates      []*structs.AllocState
-		isBatch                      bool
-		nodeStatusDisconnected       bool
-		replace                      bool
-		failReplacement              bool
-		taintReplacement             bool
-		disconnectReplacement        bool
-		replaceFailedReplacement     bool
-		shouldStopOnDisconnectedNode bool
-		maxDisconnect                *time.Duration
-		expected                     *resultExpectation
+	// Create the original
+	var allocs []*structs.Allocation
+	for i := 0; i < 3; i++ {
+		new := mock.Alloc()
+		new.Job = jobv2
+		new.JobID = job.ID
+		new.NodeID = uuid.Generate()
+		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		new.TaskGroup = job.TaskGroups[0].Name
+		new.DeploymentID = d.ID
+		new.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Healthy: pointer.Of(true),
+		}
+		new.ClientStatus = structs.AllocClientStatusRunning
+		allocs = append(allocs, new)
+
+	}
+	for i := 0; i < 3; i++ {
+		new := mock.Alloc()
+		new.Job = jobv1
+		new.JobID = jobv1.ID
+		new.NodeID = uuid.Generate()
+		new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i))
+		new.TaskGroup = job.TaskGroups[0].Name
+		new.DeploymentID = uuid.Generate()
+		new.DeploymentStatus = &structs.AllocDeploymentStatus{
+			Healthy: pointer.Of(false),
+		}
+		new.DesiredStatus = structs.AllocDesiredStatusStop
+		new.ClientStatus = structs.AllocClientStatusFailed
+		allocs = append(allocs, new)
 	}
 
-	testCases := []testCase{
-		{
-			name:                    "reconnect-original-no-replacement",
-			allocCount:              2,
-			replace:                 false,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
-
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: false,
-			expected: &resultExpectation{
-				reconnectUpdates: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 2,
-					},
-				},
-			},
-		},
-		{
-			name:                    "resume-original-and-stop-replacement",
-			allocCount:              3,
-			replace:                 true,
-			disconnectedAllocCount:  1,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, jobv2,
+		d, allocs, nil, "", 50, true)
+	r := reconciler.Compute()
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: false,
-			expected: &resultExpectation{
-				stop:             1,
-				reconnectUpdates: 1,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   1,
-						Ignore: 3,
-					},
-				},
-			},
-		},
+	updates := []*structs.DeploymentStatusUpdate{
 		{
-			name:                    "stop-original-with-lower-node-score",
-			allocCount:              4,
-			replace:                 true,
-			disconnectedAllocCount:  1,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
-
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			nodeScoreIncrement:           1,
-			expected: &resultExpectation{
-				stop: 1,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   1,
-						Ignore: 4,
-					},
-				},
-			},
+			DeploymentID:      d.ID,
+			Status:            structs.DeploymentStatusSuccessful,
+			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
 		},
-		{
-			name:                    "stop-original-failed-on-reconnect",
-			allocCount:              4,
-			replace:                 true,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+	}
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			expected: &resultExpectation{
-				stop: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   2,
-						Ignore: 4,
-					},
-				},
+	// Assert the correct results
+	assertResults(t, r, &resultExpectation{
+		createDeployment:  nil,
+		deploymentUpdates: updates,
+		place:             0,
+		inplace:           0,
+		stop:              0,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Stop:          0,
+				InPlaceUpdate: 0,
+				Ignore:        3,
 			},
 		},
-		{
-			name:                    "reschedule-original-failed-if-not-replaced",
-			allocCount:              4,
-			replace:                 false,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+	})
+}
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			expected: &resultExpectation{
-				stop:  2,
-				place: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 2,
-						Place:  2,
-						Stop:   2,
-					},
-				},
-			},
-		},
-		{
-			name:                    "ignore-reconnect-completed",
-			allocCount:              2,
-			replace:                 false,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusComplete,
+// Test that a successful deployment with failed allocs will result in
+// rescheduling failed allocations
+func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T) {
+	ci.Parallel(t)
 
-			disconnectedAllocStates: disconnectAllocState,
-			isBatch:                 true,
-			expected: &resultExpectation{
-				place: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 2,
-						Place:  2,
-					},
-				},
-			},
-		},
-		{
-			name:                    "keep-original-alloc-and-stop-failed-replacement",
-			allocCount:              3,
-			replace:                 true,
-			failReplacement:         true,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+	job := mock.Job()
+	job.TaskGroups[0].Update = noCanaryUpdate
+	tgName := job.TaskGroups[0].Name
+	now := time.Now()
 
-			disconnectedAllocStates: disconnectAllocState,
-			expected: &resultExpectation{
-				reconnectUpdates: 2,
-				stop:             0,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 5,
-					},
-				},
-			},
-		},
-		{
-			name:                    "keep-original-and-stop-reconnecting-replacement",
-			allocCount:              2,
-			replace:                 true,
-			disconnectReplacement:   true,
-			disconnectedAllocCount:  1,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
+	d := structs.NewDeployment(job, 50)
+	d.Status = structs.DeploymentStatusSuccessful
+	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
+		Promoted:     false,
+		DesiredTotal: 10,
+		PlacedAllocs: 10,
+	}
 
-			disconnectedAllocStates: disconnectAllocState,
-			expected: &resultExpectation{
-				reconnectUpdates: 1,
-				stop:             1,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 2,
-						Stop:   1,
-					},
-				},
-			},
-		},
-		{
-			name:                    "keep-original-and-stop-tainted-replacement",
-			allocCount:              3,
-			replace:                 true,
-			taintReplacement:        true,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+	// Create 10 allocations
+	var allocs []*structs.Allocation
+	for i := 0; i < 10; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		alloc.TaskGroup = job.TaskGroups[0].Name
+		alloc.DeploymentID = d.ID
+		alloc.ClientStatus = structs.AllocClientStatusFailed
+		alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
+			StartedAt:  now.Add(-1 * time.Hour),
+			FinishedAt: now.Add(-10 * time.Second)}}
+		allocs = append(allocs, alloc)
+	}
 
-			disconnectedAllocStates: disconnectAllocState,
-			expected: &resultExpectation{
-				reconnectUpdates: 2,
-				stop:             2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 3,
-						Stop:   2,
-					},
-				},
-			},
-		},
-		{
-			name:                    "stop-original-alloc-with-old-job-version",
-			allocCount:              5,
-			replace:                 true,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
+		d, allocs, nil, "", 50, true)
+	r := reconciler.Compute()
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			jobVersionIncrement:          1,
-			expected: &resultExpectation{
-				stop: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 5,
-						Stop:   2,
-					},
-				},
+	// Assert that rescheduled placements were created
+	assertResults(t, r, &resultExpectation{
+		place:             10,
+		stop:              10,
+		createDeployment:  nil,
+		deploymentUpdates: nil,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Place:  10,
+				Stop:   10,
+				Ignore: 0,
 			},
 		},
-		{
-			name:                    "stop-original-alloc-with-old-job-version-reconnect-eval",
-			allocCount:              5,
-			replace:                 true,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+	})
+	assertPlaceResultsHavePreviousAllocs(t, 10, r.place)
+}
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			jobVersionIncrement:          1,
-			expected: &resultExpectation{
-				stop: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   2,
-						Ignore: 5,
-					},
-				},
-			},
-		},
-		{
-			name:                     "stop-original-alloc-with-old-job-version-and-failed-replacements-replaced",
-			allocCount:               5,
-			replace:                  true,
-			failReplacement:          true,
-			replaceFailedReplacement: true,
-			disconnectedAllocCount:   2,
-			disconnectedAllocStatus:  structs.AllocClientStatusRunning,
+// Tests force rescheduling a failed alloc that is past its reschedule limit
+func TestReconciler_ForceReschedule_Service(t *testing.T) {
+	ci.Parallel(t)
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: false,
-			jobVersionIncrement:          1,
-			expected: &resultExpectation{
-				stop:             2,
-				reconnectUpdates: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   2,
-						Ignore: 7,
-					},
-				},
-			},
-		},
-		{
-			name:                    "stop-original-pending-alloc-for-disconnected-node",
-			allocCount:              2,
-			replace:                 true,
-			disconnectedAllocCount:  1,
-			disconnectedAllocStatus: structs.AllocClientStatusPending,
+	require := require.New(t)
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			nodeStatusDisconnected:       true,
-			expected: &resultExpectation{
-				stop: 1,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   1,
-						Ignore: 2,
-					},
-				},
-			},
-		},
-		{
-			name:                    "stop-failed-original-and-failed-replacements-and-place-new",
-			allocCount:              5,
-			replace:                 true,
-			failReplacement:         true,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+	// Set desired 5
+	job := mock.Job()
+	job.TaskGroups[0].Count = 5
+	tgName := job.TaskGroups[0].Name
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			expected: &resultExpectation{
-				stop:  2,
-				place: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   2,
-						Place:  2,
-						Ignore: 5,
-					},
-				},
-			},
-		},
-		{
-			name:                         "stop-expired-allocs",
-			allocCount:                   5,
-			replace:                      true,
-			disconnectedAllocCount:       2,
-			disconnectedAllocStatus:      structs.AllocClientStatusUnknown,
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			nodeStatusDisconnected:       true,
-			maxDisconnect:                pointer.Of(2 * time.Second),
-			expected: &resultExpectation{
-				stop: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   2,
-						Ignore: 5,
-					},
-				},
-			},
+	// Set up reschedule policy and update block
+	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
+		Attempts:      1,
+		Interval:      24 * time.Hour,
+		Delay:         5 * time.Second,
+		DelayFunction: "",
+		MaxDelay:      1 * time.Hour,
+		Unlimited:     false,
+	}
+	job.TaskGroups[0].Update = noCanaryUpdate
+
+	// Create 5 existing allocations
+	var allocs []*structs.Allocation
+	for i := 0; i < 5; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		allocs = append(allocs, alloc)
+		alloc.ClientStatus = structs.AllocClientStatusRunning
+	}
+
+	// Mark one as failed and past its reschedule limit so not eligible to reschedule
+	allocs[0].ClientStatus = structs.AllocClientStatusFailed
+	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
+		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
+			PrevAllocID: uuid.Generate(),
+			PrevNodeID:  uuid.Generate(),
 		},
-		{
-			name:                    "replace-allocs-on-disconnected-node",
-			allocCount:              5,
-			replace:                 false,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
-			disconnectedAllocStates: []*structs.AllocState{},
-			nodeStatusDisconnected:  true,
-			expected: &resultExpectation{
-				place:             2,
-				disconnectUpdates: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Place:  2,
-						Ignore: 3,
-					},
-				},
+	}}
+
+	// Mark DesiredTransition ForceReschedule
+	allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: pointer.Of(true)}
+
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
+	r := reconciler.Compute()
+
+	// Verify that no follow up evals were created
+	evals := r.desiredFollowupEvals[tgName]
+	require.Nil(evals)
+
+	// Verify that one rescheduled alloc was created because of the forced reschedule
+	assertResults(t, r, &resultExpectation{
+		createDeployment:  nil,
+		deploymentUpdates: nil,
+		place:             1,
+		stop:              1,
+		inplace:           0,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Place:  1,
+				Stop:   1,
+				Ignore: 4,
 			},
 		},
-	}
+	})
 
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			require.NotEqual(t, 0, tc.allocCount, "invalid test case: alloc count must be greater than zero")
+	// Rescheduled allocs should have previous allocs
+	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
+	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
+	assertPlacementsAreRescheduled(t, 1, r.place)
+}
 
-			testNode := mock.Node()
-			if tc.nodeStatusDisconnected == true {
-				testNode.Status = structs.NodeStatusDisconnected
-			}
+// Tests behavior of service failure with rescheduling policy preventing rescheduling:
+// new allocs should be placed to satisfy the job count, and current allocations are
+// left unmodified
+func TestReconciler_RescheduleNot_Service(t *testing.T) {
+	ci.Parallel(t)
 
-			// Create resumable allocs
-			job, allocs := buildResumableAllocations(tc.allocCount, structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun, 2)
+	require := require.New(t)
 
-			origAllocs := set.New[string](len(allocs))
-			for _, alloc := range allocs {
-				origAllocs.Insert(alloc.ID)
-			}
+	// Set desired 5
+	job := mock.Job()
+	job.TaskGroups[0].Count = 5
+	tgName := job.TaskGroups[0].Name
+	now := time.Now()
 
-			if tc.isBatch {
-				job.Type = structs.JobTypeBatch
-			}
+	// Set up reschedule policy and update block
+	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
+		Attempts:      0,
+		Interval:      24 * time.Hour,
+		Delay:         5 * time.Second,
+		DelayFunction: "",
+		MaxDelay:      1 * time.Hour,
+		Unlimited:     false,
+	}
+	job.TaskGroups[0].Update = noCanaryUpdate
 
-			// Set alloc state
-			disconnectedAllocCount := tc.disconnectedAllocCount
-			for _, alloc := range allocs {
-				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+	// Create 5 existing allocations
+	var allocs []*structs.Allocation
+	for i := 0; i < 5; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		allocs = append(allocs, alloc)
+		alloc.ClientStatus = structs.AllocClientStatusRunning
+	}
+
+	// Mark two as failed
+	allocs[0].ClientStatus = structs.AllocClientStatusFailed
+
+	// Mark one of them as already rescheduled once
+	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
+		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
+			PrevAllocID: uuid.Generate(),
+			PrevNodeID:  uuid.Generate(),
+		},
+	}}
+	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
+		StartedAt:  now.Add(-1 * time.Hour),
+		FinishedAt: now.Add(-10 * time.Second)}}
+	allocs[1].ClientStatus = structs.AllocClientStatusFailed
+
+	// Mark one as desired state stop
+	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
+
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
+	r := reconciler.Compute()
 
-				if tc.maxDisconnect != nil {
-					alloc.Job.TaskGroups[0].MaxClientDisconnect = tc.maxDisconnect
-				}
+	// Verify that no follow up evals were created
+	evals := r.desiredFollowupEvals[tgName]
+	require.Nil(evals)
 
-				if disconnectedAllocCount > 0 {
-					alloc.ClientStatus = tc.disconnectedAllocStatus
-					alloc.AllocStates = tc.disconnectedAllocStates
-					// Set the node id on all the disconnected allocs to the node under test.
-					alloc.NodeID = testNode.ID
-					alloc.NodeName = "disconnected"
-					disconnectedAllocCount--
-				}
-			}
+	// no rescheduling, ignore all 4 allocs
+	// but place one to substitute allocs[4] that was stopped explicitly
+	assertResults(t, r, &resultExpectation{
+		createDeployment:  nil,
+		deploymentUpdates: nil,
+		place:             1,
+		inplace:           0,
+		stop:              0,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Place:  1,
+				Ignore: 4,
+				Stop:   0,
+			},
+		},
+	})
 
-			// Place the allocs on another node.
-			if tc.replace {
-				replacements := make([]*structs.Allocation, 0)
-				for _, alloc := range allocs {
-					if alloc.NodeID != testNode.ID {
-						continue
-					}
-					replacement := alloc.Copy()
-					replacement.ID = uuid.Generate()
-					replacement.NodeID = uuid.Generate()
-					replacement.ClientStatus = structs.AllocClientStatusRunning
-					replacement.PreviousAllocation = alloc.ID
-					replacement.AllocStates = nil
-					replacement.TaskStates = nil
-					replacement.CreateIndex += 1
-					alloc.NextAllocation = replacement.ID
+	// none of the placement should have preallocs or rescheduled
+	assertPlaceResultsHavePreviousAllocs(t, 0, r.place)
+	assertPlacementsAreRescheduled(t, 0, r.place)
+}
 
-					if tc.jobVersionIncrement != 0 {
-						replacement.Job.Version = replacement.Job.Version + tc.jobVersionIncrement
-					}
-					if tc.nodeScoreIncrement != 0 {
-						replacement.Metrics.ScoreMetaData[0].NormScore = replacement.Metrics.ScoreMetaData[0].NormScore + tc.nodeScoreIncrement
-					}
-					if tc.taintReplacement {
-						replacement.DesiredTransition.Migrate = pointer.Of(true)
-					}
-					if tc.disconnectReplacement {
-						replacement.AllocStates = tc.disconnectedAllocStates
-					}
+// Tests behavior of batch failure with rescheduling policy preventing rescheduling:
+// current allocations are left unmodified and no follow up
+func TestReconciler_RescheduleNot_Batch(t *testing.T) {
+	ci.Parallel(t)
 
-					// If we want to test intermediate replacement failures simulate that.
-					if tc.failReplacement {
-						replacement.ClientStatus = structs.AllocClientStatusFailed
+	require := require.New(t)
+	// Set desired 4
+	job := mock.Job()
+	job.TaskGroups[0].Count = 4
+	now := time.Now()
+	// Set up reschedule policy
+	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
+		Attempts:      0,
+		Interval:      24 * time.Hour,
+		Delay:         5 * time.Second,
+		DelayFunction: "constant",
+	}
+	tgName := job.TaskGroups[0].Name
+	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
+	var allocs []*structs.Allocation
+	for i := 0; i < 6; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		allocs = append(allocs, alloc)
+		alloc.ClientStatus = structs.AllocClientStatusRunning
+	}
+	// Mark 3 as failed with restart tracking info
+	allocs[0].ClientStatus = structs.AllocClientStatusFailed
+	allocs[0].NextAllocation = allocs[1].ID
+	allocs[1].ClientStatus = structs.AllocClientStatusFailed
+	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
+		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
+			PrevAllocID: allocs[0].ID,
+			PrevNodeID:  uuid.Generate(),
+		},
+	}}
+	allocs[1].NextAllocation = allocs[2].ID
+	allocs[2].ClientStatus = structs.AllocClientStatusFailed
+	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
+		StartedAt:  now.Add(-1 * time.Hour),
+		FinishedAt: now.Add(-5 * time.Second)}}
+	allocs[2].FollowupEvalID = uuid.Generate()
+	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
+		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
+			PrevAllocID: allocs[0].ID,
+			PrevNodeID:  uuid.Generate(),
+		},
+		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
+			PrevAllocID: allocs[1].ID,
+			PrevNodeID:  uuid.Generate(),
+		},
+	}}
+	// Mark one as complete
+	allocs[5].ClientStatus = structs.AllocClientStatusComplete
 
-						if tc.replaceFailedReplacement {
-							nextReplacement := replacement.Copy()
-							nextReplacement.ID = uuid.Generate()
-							nextReplacement.ClientStatus = structs.AllocClientStatusRunning
-							nextReplacement.DesiredStatus = structs.AllocDesiredStatusRun
-							nextReplacement.PreviousAllocation = replacement.ID
-							nextReplacement.CreateIndex += 1
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
+		nil, allocs, nil, "", 50, true)
+	reconciler.now = now
+	r := reconciler.Compute()
 
-							replacement.NextAllocation = nextReplacement.ID
-							replacement.DesiredStatus = structs.AllocDesiredStatusStop
+	// Verify that no follow up evals were created
+	evals := r.desiredFollowupEvals[tgName]
+	require.Nil(evals)
 
-							replacements = append(replacements, nextReplacement)
-						}
-					}
+	// No reschedule attempts were made and all allocs are untouched
+	assertResults(t, r, &resultExpectation{
+		createDeployment:  nil,
+		deploymentUpdates: nil,
+		place:             0,
+		stop:              0,
+		inplace:           0,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Place:  0,
+				Stop:   0,
+				Ignore: 4,
+			},
+		},
+	})
+}
 
-					replacements = append(replacements, replacement)
-				}
+// Tests that when a node disconnects running allocations are queued to transition to unknown.
+func TestReconciler_Node_Disconnect_Updates_Alloc_To_Unknown(t *testing.T) {
+	job, allocs := buildResumableAllocations(3, structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun, 2)
+	// Build a map of disconnected nodes
+	nodes := buildDisconnectedNodes(allocs, 2)
 
-				allocs = append(allocs, replacements...)
-			}
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+		nil, allocs, nodes, "", 50, true)
+	reconciler.now = time.Now().UTC()
+	results := reconciler.Compute()
 
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, tc.isBatch, job.ID, job,
-				nil, allocs, map[string]*structs.Node{testNode.ID: testNode}, "", 50, true)
+	// Verify that 1 follow up eval was created with the values we expect.
+	evals := results.desiredFollowupEvals[job.TaskGroups[0].Name]
+	require.Len(t, evals, 1)
+	expectedTime := reconciler.now.Add(5 * time.Minute)
 
-			reconciler.now = time.Now()
-			if tc.maxDisconnect != nil {
-				reconciler.now = time.Now().Add(*tc.maxDisconnect * 20)
-			}
+	eval := evals[0]
+	require.NotNil(t, eval.WaitUntil)
+	require.Equal(t, expectedTime, eval.WaitUntil)
 
-			results := reconciler.Compute()
-			assertResults(t, results, tc.expected)
+	// Validate that the queued disconnectUpdates have the right client status,
+	// and that they have a valid FollowUpdEvalID.
+	for _, disconnectUpdate := range results.disconnectUpdates {
+		require.Equal(t, structs.AllocClientStatusUnknown, disconnectUpdate.ClientStatus)
+		require.NotEmpty(t, disconnectUpdate.FollowupEvalID)
+		require.Equal(t, eval.ID, disconnectUpdate.FollowupEvalID)
+	}
 
-			for _, stopResult := range results.stop {
-				// Skip replacement allocs.
-				if !origAllocs.Contains(stopResult.alloc.ID) {
-					continue
-				}
+	// 2 to place, 2 to update, 1 to ignore
+	assertResults(t, results, &resultExpectation{
+		createDeployment:  nil,
+		deploymentUpdates: nil,
+		place:             2,
+		stop:              0,
+		inplace:           0,
+		disconnectUpdates: 2,
 
-				if tc.shouldStopOnDisconnectedNode {
-					require.Equal(t, testNode.ID, stopResult.alloc.NodeID)
-				} else {
-					require.NotEqual(t, testNode.ID, stopResult.alloc.NodeID)
-				}
+		// 2 to place and 1 to ignore
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Place:         2,
+				Stop:          0,
+				Ignore:        1,
+				InPlaceUpdate: 0,
+			},
+		},
+	})
+}
 
-				require.Equal(t, job.Version, stopResult.alloc.Job.Version)
-			}
-		})
+func TestReconciler_Disconnect_UpdateJobAfterReconnect(t *testing.T) {
+	ci.Parallel(t)
+
+	// Create 2 allocs and simulate one have being previously disconnected and
+	// then reconnected.
+	job, allocs := buildResumableAllocations(2, structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun, 2)
+	allocs[0].AllocStates = []*structs.AllocState{
+		{
+			Field: structs.AllocStateFieldClientStatus,
+			Value: structs.AllocClientStatusUnknown,
+			Time:  time.Now().Add(-5 * time.Minute),
+		},
+		{
+			Field: structs.AllocStateFieldClientStatus,
+			Value: structs.AllocClientStatusRunning,
+			Time:  time.Now(),
+		},
 	}
+
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
+	results := reconciler.Compute()
+
+	// Assert both allocations will be updated.
+	assertResults(t, results, &resultExpectation{
+		inplace: 2,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				InPlaceUpdate: 2,
+			},
+		},
+	})
 }
 
 // Tests that a client disconnect while a canary is in progress generates the result.
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index dc81f5d6517..4083ccf8f12 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -226,13 +226,14 @@ func (a allocSet) fromKeys(keys ...[]string) allocSet {
 // 4. Those that are on nodes that are disconnected, but have not had their ClientState set to unknown
 // 5. Those that are on a node that has reconnected.
 // 6. Those that are in a state that results in a noop.
-func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverSupportsDisconnectedClients bool, now time.Time) (untainted, migrate, lost, disconnecting, reconnecting, ignore allocSet) {
+func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverSupportsDisconnectedClients bool, now time.Time) (untainted, migrate, lost, disconnecting, reconnecting, ignore, expiring allocSet) {
 	untainted = make(map[string]*structs.Allocation)
 	migrate = make(map[string]*structs.Allocation)
 	lost = make(map[string]*structs.Allocation)
 	disconnecting = make(map[string]*structs.Allocation)
 	reconnecting = make(map[string]*structs.Allocation)
 	ignore = make(map[string]*structs.Allocation)
+	expiring = make(map[string]*structs.Allocation)
 
 	for _, alloc := range a {
 		// make sure we don't apply any reconnect logic to task groups
@@ -240,7 +241,7 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 		supportsDisconnectedClients := alloc.SupportsDisconnectedClients(serverSupportsDisconnectedClients)
 
 		reconnect := false
-		expired := false
+		//expired := false
 
 		// Only compute reconnect for unknown, running, and failed since they
 		// need to go through the reconnect logic.
@@ -249,9 +250,9 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 				alloc.ClientStatus == structs.AllocClientStatusRunning ||
 				alloc.ClientStatus == structs.AllocClientStatusFailed) {
 			reconnect = alloc.NeedsToReconnect()
-			if reconnect {
-				expired = alloc.Expired(now)
-			}
+			//if reconnect {
+			//	expired = alloc.Expired(now)
+			//}
 		}
 
 		// Failed allocs that need to be reconnected must be added to
@@ -270,7 +271,6 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 			switch taintedNode.Status {
 			case structs.NodeStatusDisconnected:
 				if supportsDisconnectedClients {
-
 					// Filter running allocs on a node that is disconnected to be marked as unknown.
 					if alloc.ClientStatus == structs.AllocClientStatusRunning {
 						disconnecting[alloc.ID] = alloc
@@ -281,21 +281,36 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 						lost[alloc.ID] = alloc
 						continue
 					}
+
 				} else {
+					if alloc.SingleInstanceOnLost() {
+						if alloc.ClientStatus == structs.AllocClientStatusRunning {
+							disconnecting[alloc.ID] = alloc
+							continue
+						}
+
+						untainted[alloc.ID] = alloc
+						continue
+					}
+
 					lost[alloc.ID] = alloc
 					continue
 				}
-			case structs.NodeStatusReady:
+				/* 			case structs.NodeStatusReady:
 				// Filter reconnecting allocs on a node that is now connected.
 				if reconnect {
-					if expired {
+					if alloc.Expired(now) {
+						if alloc.SingleInstanceOnLost() {
+							expiring[alloc.ID] = alloc
+							continue
+						}
 						lost[alloc.ID] = alloc
 						continue
 					}
 
 					reconnecting[alloc.ID] = alloc
 					continue
-				}
+				}*/
 			}
 		}
 
@@ -319,9 +334,8 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 			continue
 		}
 
-		// Expired unknown allocs are lost
 		if supportsDisconnectedClients && alloc.Expired(now) {
-			lost[alloc.ID] = alloc
+			expiring[alloc.ID] = alloc
 			continue
 		}
 
@@ -343,12 +357,19 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 			continue
 		}
 
-		if !nodeIsTainted {
+		if !nodeIsTainted || taintedNode.Status == structs.NodeStatusReady {
 			// Filter allocs on a node that is now re-connected to be resumed.
 			if reconnect {
-				if expired {
-					lost[alloc.ID] = alloc
+				// Expired unknown allocs should be processed depending on the max client disconnect
+				// and single instance on lost configurations, they are both treated as
+				// expiring.
+				if alloc.Expired(now) {
+					//		if alloc.SingleInstanceOnLost() {
+					expiring[alloc.ID] = alloc
 					continue
+					//		}
+					//	lost[alloc.ID] = alloc
+					//	continue
 				}
 				reconnecting[alloc.ID] = alloc
 				continue
@@ -360,7 +381,14 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 		}
 
 		// Allocs on GC'd (nil) or lost nodes are Lost
-		if taintedNode == nil || taintedNode.TerminalStatus() {
+		if taintedNode == nil {
+			lost[alloc.ID] = alloc
+			continue
+		}
+
+		// Allocs on terminal nodes that can't be rescheduled need to be treated
+		// differently than those that can.
+		if taintedNode.TerminalStatus() {
 			if alloc.SingleInstanceOnLost() {
 				if alloc.ClientStatus == structs.AllocClientStatusUnknown {
 					untainted[alloc.ID] = alloc
diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go
index 35785a9f158..f5bdcda392e 100644
--- a/scheduler/reconcile_util_test.go
+++ b/scheduler/reconcile_util_test.go
@@ -4,6 +4,7 @@
 package scheduler
 
 import (
+	"fmt"
 	"testing"
 	"time"
 
@@ -41,9 +42,17 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 	testJob.TaskGroups[0].MaxClientDisconnect = pointer.Of(5 * time.Second)
 	now := time.Now()
 
+	testJobSingle := mock.Job()
+	testJobSingle.TaskGroups[0].MaxClientDisconnect = pointer.Of(5 * time.Second)
+	testJobSingle.TaskGroups[0].SingleInstanceOnLost = true
+
 	testJobNoMaxDisconnect := mock.Job()
 	testJobNoMaxDisconnect.TaskGroups[0].MaxClientDisconnect = nil
 
+	testJobNoMaxDisconnectSingle := mock.Job()
+	testJobNoMaxDisconnectSingle.TaskGroups[0].MaxClientDisconnect = nil
+	testJobNoMaxDisconnectSingle.TaskGroups[0].SingleInstanceOnLost = true
+
 	unknownAllocState := []*structs.AllocState{{
 		Field: structs.AllocStateFieldClientStatus,
 		Value: structs.AllocClientStatusUnknown,
@@ -68,7 +77,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			Time:  now,
 		},
 	}
-
+	fmt.Println(expiredAllocState, reconnectedAllocState, unknownAllocState)
 	type testCase struct {
 		name                        string
 		all                         allocSet
@@ -76,6 +85,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 		supportsDisconnectedClients bool
 		skipNilNodeTest             bool
 		now                         time.Time
+		singleInstanceOnLost        bool
 		// expected results
 		untainted     allocSet
 		migrate       allocSet
@@ -83,6 +93,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 		disconnecting allocSet
 		reconnecting  allocSet
 		ignore        allocSet
+		expiring      allocSet
 	}
 
 	testCases := []testCase{
@@ -189,6 +200,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			reconnecting:  allocSet{},
 			ignore:        allocSet{},
 			lost:          allocSet{},
+			expiring:      allocSet{},
 		},
 		{
 			name:                        "lost-client-only-tainted-nodes",
@@ -236,6 +248,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					NodeID:       "lost",
 				},
 			},
+			expiring: allocSet{},
 		},
 		{
 			name:                        "disco-client-disconnect-unset-max-disconnect",
@@ -271,6 +284,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					TaskGroup:     "web",
 				},
 			},
+			expiring: allocSet{},
 		},
 		// Everything below this line tests the disconnected client mode.
 		{
@@ -330,8 +344,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					AllocStates:   unknownAllocState,
 				},
 			},
-			ignore: allocSet{},
-			lost:   allocSet{},
+			ignore:   allocSet{},
+			lost:     allocSet{},
+			expiring: allocSet{},
 		},
 		{
 			name:                        "disco-client-reconnecting-running-no-replacement",
@@ -369,8 +384,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					AllocStates:   unknownAllocState,
 				},
 			},
-			ignore: allocSet{},
-			lost:   allocSet{},
+			ignore:   allocSet{},
+			lost:     allocSet{},
+			expiring: allocSet{},
 		},
 		{
 			name:                        "disco-client-terminal",
@@ -465,7 +481,6 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 				},
 			},
 			ignore: allocSet{
-
 				"ignored-reconnect-complete": {
 					ID:            "ignored-reconnect-complete",
 					Name:          "ignored-reconnect-complete",
@@ -519,7 +534,8 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					PreviousAllocation: "untainted-reconnect-lost",
 				},
 			},
-			lost: allocSet{},
+			lost:     allocSet{},
+			expiring: allocSet{},
 		},
 		{
 			name:                        "disco-client-disconnect",
@@ -550,9 +566,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					AllocStates:   unknownAllocState,
 				},
 				// Unknown allocs on disconnected nodes are lost when expired
-				"lost-unknown": {
-					ID:            "lost-unknown",
-					Name:          "lost-unknown",
+				"expiring-unknown": {
+					ID:            "expiring-unknown",
+					Name:          "expiring-unknown",
 					ClientStatus:  structs.AllocClientStatusUnknown,
 					DesiredStatus: structs.AllocDesiredStatusRun,
 					Job:           testJob,
@@ -571,10 +587,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					TaskGroup:     "web",
 				},
 				// Expired allocs on reconnected clients are lost
-				// Pending allocs on disconnected nodes are lost
-				"lost-expired": {
-					ID:            "lost-expired",
-					Name:          "lost-expired",
+				"expiring-expired": {
+					ID:            "expiring-expired",
+					Name:          "expiring-expired",
 					ClientStatus:  structs.AllocClientStatusUnknown,
 					DesiredStatus: structs.AllocDesiredStatusRun,
 					Job:           testJob,
@@ -633,28 +648,30 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 				},
 			},
 			lost: allocSet{
-				"lost-unknown": {
-					ID:            "lost-unknown",
-					Name:          "lost-unknown",
-					ClientStatus:  structs.AllocClientStatusUnknown,
+				"lost-pending": {
+					ID:            "lost-pending",
+					Name:          "lost-pending",
+					ClientStatus:  structs.AllocClientStatusPending,
 					DesiredStatus: structs.AllocDesiredStatusRun,
 					Job:           testJob,
 					NodeID:        "disconnected",
 					TaskGroup:     "web",
-					AllocStates:   expiredAllocState,
 				},
-				"lost-pending": {
-					ID:            "lost-pending",
-					Name:          "lost-pending",
-					ClientStatus:  structs.AllocClientStatusPending,
+			},
+			expiring: allocSet{
+				"expiring-unknown": {
+					ID:            "expiring-unknown",
+					Name:          "expiring-unknown",
+					ClientStatus:  structs.AllocClientStatusUnknown,
 					DesiredStatus: structs.AllocDesiredStatusRun,
 					Job:           testJob,
 					NodeID:        "disconnected",
 					TaskGroup:     "web",
+					AllocStates:   expiredAllocState,
 				},
-				"lost-expired": {
-					ID:            "lost-expired",
-					Name:          "lost-expired",
+				"expiring-expired": {
+					ID:            "expiring-expired",
+					Name:          "expiring-expired",
 					ClientStatus:  structs.AllocClientStatusUnknown,
 					DesiredStatus: structs.AllocDesiredStatusRun,
 					Job:           testJob,
@@ -672,9 +689,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			skipNilNodeTest:             false,
 			all: allocSet{
 				// Expired allocs on reconnected clients are lost
-				"lost-expired-reconnect": {
-					ID:            "lost-expired-reconnect",
-					Name:          "lost-expired-reconnect",
+				"expired-reconnect": {
+					ID:            "expired-reconnect",
+					Name:          "expired-reconnect",
 					ClientStatus:  structs.AllocClientStatusUnknown,
 					DesiredStatus: structs.AllocDesiredStatusRun,
 					Job:           testJob,
@@ -688,10 +705,11 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			disconnecting: allocSet{},
 			reconnecting:  allocSet{},
 			ignore:        allocSet{},
-			lost: allocSet{
-				"lost-expired-reconnect": {
-					ID:            "lost-expired-reconnect",
-					Name:          "lost-expired-reconnect",
+			lost:          allocSet{},
+			expiring: allocSet{
+				"expired-reconnect": {
+					ID:            "expired-reconnect",
+					Name:          "expired-reconnect",
 					ClientStatus:  structs.AllocClientStatusUnknown,
 					DesiredStatus: structs.AllocDesiredStatusRun,
 					Job:           testJob,
@@ -756,8 +774,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					AllocStates:   unknownAllocState,
 				},
 			},
-			ignore: allocSet{},
-			lost:   allocSet{},
+			ignore:   allocSet{},
+			lost:     allocSet{},
+			expiring: allocSet{},
 		},
 		{
 			// After an alloc is reconnected, it should be considered
@@ -797,32 +816,516 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			reconnecting:  allocSet{},
 			ignore:        allocSet{},
 			lost:          allocSet{},
+			expiring:      allocSet{},
+		},
+		// Everything below this line tests the single instance on lost mode.
+		{
+			name:                        "lost-client-single-instance-on",
+			supportsDisconnectedClients: true,
+			now:                         time.Now(),
+			taintedNodes:                nodes,
+			skipNilNodeTest:             false,
+			all: allocSet{
+				"untainted1": {
+					ID:           "untainted1",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          testJobSingle,
+					NodeID:       "normal",
+				},
+				// Terminal allocs are always untainted
+				"untainted2": {
+					ID:           "untainted2",
+					ClientStatus: structs.AllocClientStatusComplete,
+					Job:          testJobSingle,
+					NodeID:       "normal",
+				},
+				// Terminal allocs are always untainted, even on draining nodes
+				"untainted3": {
+					ID:           "untainted3",
+					ClientStatus: structs.AllocClientStatusComplete,
+					Job:          testJobSingle,
+					NodeID:       "draining",
+				},
+				// Terminal allocs are always untainted, even on lost nodes
+				"untainted4": {
+					ID:           "untainted4",
+					ClientStatus: structs.AllocClientStatusComplete,
+					Job:          testJobSingle,
+					NodeID:       "lost",
+				},
+				// Non-terminal alloc with migrate=true should migrate on a draining node
+				"migrating1": {
+					ID:                "migrating1",
+					ClientStatus:      structs.AllocClientStatusRunning,
+					DesiredTransition: structs.DesiredTransition{Migrate: pointer.Of(true)},
+					Job:               testJobSingle,
+					NodeID:            "draining",
+				},
+				// Non-terminal alloc with migrate=true should migrate on an unknown node
+				"migrating2": {
+					ID:                "migrating2",
+					ClientStatus:      structs.AllocClientStatusRunning,
+					DesiredTransition: structs.DesiredTransition{Migrate: pointer.Of(true)},
+					Job:               testJobSingle,
+					NodeID:            "nil",
+				},
+			},
+			untainted: allocSet{
+				"untainted1": {
+					ID:           "untainted1",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          testJobSingle,
+					NodeID:       "normal",
+				},
+				// Terminal allocs are always untainted
+				"untainted2": {
+					ID:           "untainted2",
+					ClientStatus: structs.AllocClientStatusComplete,
+					Job:          testJobSingle,
+					NodeID:       "normal",
+				},
+				// Terminal allocs are always untainted, even on draining nodes
+				"untainted3": {
+					ID:           "untainted3",
+					ClientStatus: structs.AllocClientStatusComplete,
+					Job:          testJobSingle,
+					NodeID:       "draining",
+				},
+				// Terminal allocs are always untainted, even on lost nodes
+				"untainted4": {
+					ID:           "untainted4",
+					ClientStatus: structs.AllocClientStatusComplete,
+					Job:          testJobSingle,
+					NodeID:       "lost",
+				},
+			},
+			migrate: allocSet{
+				// Non-terminal alloc with migrate=true should migrate on a draining node
+				"migrating1": {
+					ID:                "migrating1",
+					ClientStatus:      structs.AllocClientStatusRunning,
+					DesiredTransition: structs.DesiredTransition{Migrate: pointer.Of(true)},
+					Job:               testJobSingle,
+					NodeID:            "draining",
+				},
+				// Non-terminal alloc with migrate=true should migrate on an unknown node
+				"migrating2": {
+					ID:                "migrating2",
+					ClientStatus:      structs.AllocClientStatusRunning,
+					DesiredTransition: structs.DesiredTransition{Migrate: pointer.Of(true)},
+					Job:               testJobSingle,
+					NodeID:            "nil",
+				},
+			},
+			disconnecting: allocSet{},
+			reconnecting:  allocSet{},
+			ignore:        allocSet{},
+			lost:          allocSet{},
+			expiring:      allocSet{},
+		},
+		{
+			name:                        "lost-client-only-tainted-nodes-single-instance-on",
+			supportsDisconnectedClients: false,
+			now:                         time.Now(),
+			taintedNodes:                nodes,
+			// The logic associated with this test case can only trigger if there
+			// is a tainted node. Therefore, testing with a nil node set produces
+			// false failures, so don't perform that test if in this case.
+			skipNilNodeTest: true,
+			all: allocSet{
+				// Non-terminal allocs on lost nodes are lost
+				"lost1": {
+					ID:           "lost1",
+					ClientStatus: structs.AllocClientStatusPending,
+					Job:          testJobSingle,
+					NodeID:       "lost",
+				},
+				// Non-terminal allocs on lost nodes are lost
+				"lost2": {
+					ID:           "lost2",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          testJobSingle,
+					NodeID:       "lost",
+				},
+			},
+			untainted:     allocSet{},
+			migrate:       allocSet{},
+			disconnecting: allocSet{},
+			reconnecting:  allocSet{},
+			ignore:        allocSet{},
+			lost: allocSet{
+				// Non-terminal allocs on lost nodes are lost
+				"lost1": {
+					ID:           "lost1",
+					ClientStatus: structs.AllocClientStatusPending,
+					Job:          testJobSingle,
+					NodeID:       "lost",
+				},
+				// Non-terminal allocs on lost nodes are lost
+				"lost2": {
+					ID:           "lost2",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          testJobSingle,
+					NodeID:       "lost",
+				},
+			},
+			expiring: allocSet{},
+		},
+		{
+			name:                        "disco-client-disconnect-unset-max-disconnect-single-instance-on",
+			supportsDisconnectedClients: true,
+			now:                         time.Now(),
+			taintedNodes:                nodes,
+			skipNilNodeTest:             true,
+			all: allocSet{
+				// Non-terminal allocs on disconnected nodes w/o max-disconnect are lost
+				"disconnecting-running": {
+					ID:            "disconnecting-running",
+					Name:          "disconnecting-running",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobNoMaxDisconnectSingle,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
+				},
+			},
+			untainted: allocSet{},
+			migrate:   allocSet{},
+			disconnecting: allocSet{"disconnecting-running": {
+				ID:            "disconnecting-running",
+				Name:          "disconnecting-running",
+				ClientStatus:  structs.AllocClientStatusRunning,
+				DesiredStatus: structs.AllocDesiredStatusRun,
+				Job:           testJobNoMaxDisconnectSingle,
+				NodeID:        "disconnected",
+				TaskGroup:     "web",
+			}},
+			reconnecting: allocSet{},
+			ignore:       allocSet{},
+			lost:         allocSet{},
+			expiring:     allocSet{},
+		},
+		{
+			name:                        "disco-client-untainted-reconnect-failed-and-replaced-single-instance-on",
+			supportsDisconnectedClients: true,
+			now:                         time.Now(),
+			taintedNodes:                nodes,
+			skipNilNodeTest:             false,
+			all: allocSet{
+				"running-replacement": {
+					ID:                 "running-replacement",
+					Name:               "web",
+					ClientStatus:       structs.AllocClientStatusRunning,
+					DesiredStatus:      structs.AllocDesiredStatusRun,
+					Job:                testJobSingle,
+					NodeID:             "normal",
+					TaskGroup:          "web",
+					PreviousAllocation: "failed-original",
+				},
+				// Failed and replaced allocs on reconnected nodes
+				// that are still desired-running are reconnected so
+				// we can stop them
+				"failed-original": {
+					ID:            "failed-original",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusFailed,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobSingle,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+				},
+			},
+			untainted: allocSet{
+				"running-replacement": {
+					ID:                 "running-replacement",
+					Name:               "web",
+					ClientStatus:       structs.AllocClientStatusRunning,
+					DesiredStatus:      structs.AllocDesiredStatusRun,
+					Job:                testJobSingle,
+					NodeID:             "normal",
+					TaskGroup:          "web",
+					PreviousAllocation: "failed-original",
+				},
+			},
+			migrate:       allocSet{},
+			disconnecting: allocSet{},
+			reconnecting: allocSet{
+				"failed-original": {
+					ID:            "failed-original",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusFailed,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobSingle,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+				},
+			},
+			ignore:   allocSet{},
+			lost:     allocSet{},
+			expiring: allocSet{},
+		},
+		{
+			name:                        "disco-client-reconnect-single-instance-on",
+			supportsDisconnectedClients: true,
+			now:                         time.Now(),
+			taintedNodes:                nodes,
+			skipNilNodeTest:             false,
+			all: allocSet{
+				// Expired allocs on reconnected clients are lost
+				"expired-reconnect": {
+					ID:            "expired-reconnect",
+					Name:          "expired-reconnect",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobSingle,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   expiredAllocState,
+				},
+			},
+			untainted:     allocSet{},
+			migrate:       allocSet{},
+			disconnecting: allocSet{},
+			reconnecting:  allocSet{},
+			ignore:        allocSet{},
+			lost:          allocSet{},
+			expiring: allocSet{
+				"expired-reconnect": {
+					ID:            "expired-reconnect",
+					Name:          "expired-reconnect",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobSingle,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   expiredAllocState,
+				},
+			},
+		},
+		{
+			name:                        "disco-client-running-reconnecting-and-replacement-untainted-single-instance-on",
+			supportsDisconnectedClients: true,
+			now:                         time.Now(),
+			taintedNodes:                nodes,
+			skipNilNodeTest:             false,
+			all: allocSet{
+				"running-replacement": {
+					ID:                 "running-replacement",
+					Name:               "web",
+					ClientStatus:       structs.AllocClientStatusRunning,
+					DesiredStatus:      structs.AllocDesiredStatusRun,
+					Job:                testJobSingle,
+					NodeID:             "normal",
+					TaskGroup:          "web",
+					PreviousAllocation: "running-original",
+				},
+				// Running and replaced allocs on reconnected nodes are reconnecting
+				"running-original": {
+					ID:            "running-original",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobSingle,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+				},
+			},
+			untainted: allocSet{
+				"running-replacement": {
+					ID:                 "running-replacement",
+					Name:               "web",
+					ClientStatus:       structs.AllocClientStatusRunning,
+					DesiredStatus:      structs.AllocDesiredStatusRun,
+					Job:                testJobSingle,
+					NodeID:             "normal",
+					TaskGroup:          "web",
+					PreviousAllocation: "running-original",
+				},
+			},
+			migrate:       allocSet{},
+			disconnecting: allocSet{},
+			reconnecting: allocSet{
+				"running-original": {
+					ID:            "running-original",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobSingle,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+				},
+			},
+			ignore:   allocSet{},
+			lost:     allocSet{},
+			expiring: allocSet{},
+		},
+		{
+			// After an alloc is reconnected, it should be considered
+			// "untainted" instead of "reconnecting" to allow changes such as
+			// job updates to be applied properly.
+			name:                        "disco-client-reconnected-alloc-untainted",
+			supportsDisconnectedClients: true,
+			now:                         time.Now(),
+			taintedNodes:                nodes,
+			skipNilNodeTest:             false,
+			all: allocSet{
+				"running-reconnected": {
+					ID:            "running-reconnected",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobSingle,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   reconnectedAllocState,
+				},
+			},
+			untainted: allocSet{
+				"running-reconnected": {
+					ID:            "running-reconnected",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobSingle,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   reconnectedAllocState,
+				},
+			},
+			migrate:       allocSet{},
+			disconnecting: allocSet{},
+			reconnecting:  allocSet{},
+			ignore:        allocSet{},
+			lost:          allocSet{},
+			expiring:      allocSet{},
+		},
+		{
+			name:                        "disco-client-reconnected-alloc-untainted-single-instance-on",
+			supportsDisconnectedClients: true,
+			now:                         time.Now(),
+			taintedNodes:                nodes,
+			skipNilNodeTest:             true,
+			all: allocSet{
+				"untainted-unknown": {
+					ID:            "untainted-unknown",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobNoMaxDisconnectSingle,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+				},
+				"disconnecting-running": {
+					ID:            "disconnecting-running",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobNoMaxDisconnectSingle,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
+				},
+				"lost-running": {
+					ID:            "lost-running",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobNoMaxDisconnect,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
+				},
+				"untainted-unknown-on-down-node": {
+					ID:            "untainted-unknown-on-down-node",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobNoMaxDisconnectSingle,
+					NodeID:        "down",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+				},
+			},
+			untainted: allocSet{
+				"untainted-unknown": {
+					ID:            "untainted-unknown",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobNoMaxDisconnectSingle,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+				},
+				"untainted-unknown-on-down-node": {
+					ID:            "untainted-unknown-on-down-node",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobNoMaxDisconnectSingle,
+					NodeID:        "down",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+				},
+			},
+			migrate: allocSet{},
+			disconnecting: allocSet{
+				"disconnecting-running": {
+					ID:            "disconnecting-running",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobNoMaxDisconnectSingle,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
+				},
+			},
+			reconnecting: allocSet{},
+			ignore:       allocSet{},
+			lost: allocSet{
+				"lost-running": {
+					ID:            "lost-running",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJobNoMaxDisconnect,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
+				},
+			},
+			expiring: allocSet{},
 		},
 	}
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			// With tainted nodes
-			untainted, migrate, lost, disconnecting, reconnecting, ignore := tc.all.filterByTainted(tc.taintedNodes, tc.supportsDisconnectedClients, tc.now)
+			untainted, migrate, lost, disconnecting, reconnecting, ignore, expired := tc.all.filterByTainted(tc.taintedNodes, tc.supportsDisconnectedClients, tc.now)
 			must.Eq(t, tc.untainted, untainted, must.Sprintf("with-nodes: untainted"))
 			must.Eq(t, tc.migrate, migrate, must.Sprintf("with-nodes: migrate"))
 			must.Eq(t, tc.lost, lost, must.Sprintf("with-nodes: lost"))
 			must.Eq(t, tc.disconnecting, disconnecting, must.Sprintf("with-nodes: disconnecting"))
 			must.Eq(t, tc.reconnecting, reconnecting, must.Sprintf("with-nodes: reconnecting"))
 			must.Eq(t, tc.ignore, ignore, must.Sprintf("with-nodes: ignore"))
+			must.Eq(t, tc.expiring, expired, must.Sprintf("with-nodes: expiring"))
 
 			if tc.skipNilNodeTest {
 				return
 			}
 
 			// Now again with nodes nil
-			untainted, migrate, lost, disconnecting, reconnecting, ignore = tc.all.filterByTainted(nil, tc.supportsDisconnectedClients, tc.now)
+			untainted, migrate, lost, disconnecting, reconnecting, ignore, expired = tc.all.filterByTainted(nil, tc.supportsDisconnectedClients, tc.now)
 			must.Eq(t, tc.untainted, untainted, must.Sprintf("with-nodes: untainted"))
 			must.Eq(t, tc.migrate, migrate, must.Sprintf("with-nodes: migrate"))
 			must.Eq(t, tc.lost, lost, must.Sprintf("with-nodes: lost"))
 			must.Eq(t, tc.disconnecting, disconnecting, must.Sprintf("with-nodes: disconnecting"))
 			must.Eq(t, tc.reconnecting, reconnecting, must.Sprintf("with-nodes: reconnecting"))
 			must.Eq(t, tc.ignore, ignore, must.Sprintf("with-nodes: ignore"))
+			must.Eq(t, tc.ignore, ignore, must.Sprintf("with-nodes: expiring"))
+			must.Eq(t, tc.expiring, expired, must.Sprintf("with-nodes: expiring"))
 		})
 	}
 }

From 2f7d431f1e907a6eaa780fd3f1ce2475f3ad2d20 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Mon, 27 Nov 2023 13:33:19 +0100
Subject: [PATCH 17/50] fix: veryfy the tainted nod is not nil before checking
 the status

---
 scheduler/reconcile_util.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index 4083ccf8f12..a8dd8eeb0d8 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -357,7 +357,7 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 			continue
 		}
 
-		if !nodeIsTainted || taintedNode.Status == structs.NodeStatusReady {
+		if !nodeIsTainted || (taintedNode != nil && taintedNode.Status == structs.NodeStatusReady) {
 			// Filter allocs on a node that is now re-connected to be resumed.
 			if reconnect {
 				// Expired unknown allocs should be processed depending on the max client disconnect

From 85c9e7845adc79b290869a1fea0ab048de67c3de Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Mon, 27 Nov 2023 13:54:16 +0100
Subject: [PATCH 18/50] fix: update test after inverse the logic of the new
 option

---
 scheduler/util_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scheduler/util_test.go b/scheduler/util_test.go
index a25a1c964a7..8217464f4fb 100644
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -550,7 +550,7 @@ func TestTasksUpdated(t *testing.T) {
 
 	// Change SingleInstanceOnLost mode
 	j31 := mock.Job()
-	j31.TaskGroups[0].SingleInstanceOnLost = false
+	j31.TaskGroups[0].SingleInstanceOnLost = true
 	require.True(t, tasksUpdated(j1, j31, name).modified)
 }
 

From 749ca4ca78123fa501e55699074cfe243b5f0daf Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Mon, 27 Nov 2023 14:52:34 +0100
Subject: [PATCH 19/50] func: add warning for single on lost while reschedule

---
 nomad/structs/structs.go | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 76ac71158c5..1ac8b7da95e 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -4736,6 +4736,13 @@ func (j *Job) Warnings() error {
 			// Having no canaries implies auto-promotion since there are no canaries to promote.
 			allAutoPromote = allAutoPromote && (u.Canary == 0 || u.AutoPromote)
 		}
+
+		if tg.MaxClientDisconnect != nil &&
+			tg.ReschedulePolicy.Attempts > 0 &&
+			tg.SingleInstanceOnLost == true {
+			err := fmt.Errorf("having max_client_disconnect enable along with a reschedule policy can lead to having multiple instances of a task running at the same time")
+			mErr.Errors = append(mErr.Errors, err)
+		}
 	}
 
 	// Check AutoPromote, should be all or none

From 6cf3501244263fab4dafe56d7592cfe37d7e0bfa Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Mon, 27 Nov 2023 14:58:29 +0100
Subject: [PATCH 20/50] style: refactor filter by tainted

---
 scheduler/reconcile.go      |  2 +-
 scheduler/reconcile_util.go | 63 +++++++++++--------------------------
 2 files changed, 20 insertions(+), 45 deletions(-)

diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 0ad9993353c..64c6d800864 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -177,7 +177,7 @@ func (r *reconcileResults) GoString() string {
 	for tg, u := range r.desiredTGUpdates {
 		base += fmt.Sprintf("\nDesired Changes for %q: %#v", tg, u)
 	}
-	fmt.Println("\n ***", base)
+
 	return base
 }
 
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index a8dd8eeb0d8..f1ddce6cf5d 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -241,7 +241,6 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 		supportsDisconnectedClients := alloc.SupportsDisconnectedClients(serverSupportsDisconnectedClients)
 
 		reconnect := false
-		//expired := false
 
 		// Only compute reconnect for unknown, running, and failed since they
 		// need to go through the reconnect logic.
@@ -250,9 +249,6 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 				alloc.ClientStatus == structs.AllocClientStatusRunning ||
 				alloc.ClientStatus == structs.AllocClientStatusFailed) {
 			reconnect = alloc.NeedsToReconnect()
-			//if reconnect {
-			//	expired = alloc.Expired(now)
-			//}
 		}
 
 		// Failed allocs that need to be reconnected must be added to
@@ -266,51 +262,33 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 		}
 
 		taintedNode, nodeIsTainted := taintedNodes[alloc.NodeID]
-		if taintedNode != nil {
+		if taintedNode != nil && taintedNode.Status == structs.NodeStatusDisconnected {
 			// Group disconnecting
-			switch taintedNode.Status {
-			case structs.NodeStatusDisconnected:
-				if supportsDisconnectedClients {
-					// Filter running allocs on a node that is disconnected to be marked as unknown.
+			if supportsDisconnectedClients {
+				// Filter running allocs on a node that is disconnected to be marked as unknown.
+				if alloc.ClientStatus == structs.AllocClientStatusRunning {
+					disconnecting[alloc.ID] = alloc
+					continue
+				}
+				// Filter pending allocs on a node that is disconnected to be marked as lost.
+				if alloc.ClientStatus == structs.AllocClientStatusPending {
+					lost[alloc.ID] = alloc
+					continue
+				}
+
+			} else {
+				if alloc.SingleInstanceOnLost() {
 					if alloc.ClientStatus == structs.AllocClientStatusRunning {
 						disconnecting[alloc.ID] = alloc
 						continue
 					}
-					// Filter pending allocs on a node that is disconnected to be marked as lost.
-					if alloc.ClientStatus == structs.AllocClientStatusPending {
-						lost[alloc.ID] = alloc
-						continue
-					}
-
-				} else {
-					if alloc.SingleInstanceOnLost() {
-						if alloc.ClientStatus == structs.AllocClientStatusRunning {
-							disconnecting[alloc.ID] = alloc
-							continue
-						}
 
-						untainted[alloc.ID] = alloc
-						continue
-					}
-
-					lost[alloc.ID] = alloc
+					untainted[alloc.ID] = alloc
 					continue
 				}
-				/* 			case structs.NodeStatusReady:
-				// Filter reconnecting allocs on a node that is now connected.
-				if reconnect {
-					if alloc.Expired(now) {
-						if alloc.SingleInstanceOnLost() {
-							expiring[alloc.ID] = alloc
-							continue
-						}
-						lost[alloc.ID] = alloc
-						continue
-					}
 
-					reconnecting[alloc.ID] = alloc
-					continue
-				}*/
+				lost[alloc.ID] = alloc
+				continue
 			}
 		}
 
@@ -364,13 +342,10 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 				// and single instance on lost configurations, they are both treated as
 				// expiring.
 				if alloc.Expired(now) {
-					//		if alloc.SingleInstanceOnLost() {
 					expiring[alloc.ID] = alloc
 					continue
-					//		}
-					//	lost[alloc.ID] = alloc
-					//	continue
 				}
+
 				reconnecting[alloc.ID] = alloc
 				continue
 			}

From 072e5fdc5ebcfa94e29ee75f17f57186a8086f99 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Mon, 27 Nov 2023 17:07:58 +0100
Subject: [PATCH 21/50] style: linyer fix

---
 nomad/structs/structs.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 1ac8b7da95e..e2723a99c41 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -4739,7 +4739,7 @@ func (j *Job) Warnings() error {
 
 		if tg.MaxClientDisconnect != nil &&
 			tg.ReschedulePolicy.Attempts > 0 &&
-			tg.SingleInstanceOnLost == true {
+			tg.SingleInstanceOnLost {
 			err := fmt.Errorf("having max_client_disconnect enable along with a reschedule policy can lead to having multiple instances of a task running at the same time")
 			mErr.Errors = append(mErr.Errors, err)
 		}

From b1bf7772250690ce3061d7b6daad4a93a610a729 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Mon, 27 Nov 2023 17:59:41 +0100
Subject: [PATCH 22/50] func: race an error when singleInstance and reschedle
 policy are both enabled on max lcient disconnect

---
 nomad/structs/structs.go | 14 +++++++-------
 nomad/worker.go          |  1 -
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index e2723a99c41..98392de3ad9 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -4672,6 +4672,13 @@ func (j *Job) Validate() error {
 				fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler",
 					tg.Name, tg.Count))
 		}
+
+		if tg.MaxClientDisconnect != nil &&
+			tg.ReschedulePolicy.Attempts > 0 &&
+			tg.SingleInstanceOnLost {
+			err := fmt.Errorf("having max_client_disconnect enable along with a reschedule policy can lead to having multiple instances of a task running at the same time")
+			mErr.Errors = append(mErr.Errors, err)
+		}
 	}
 
 	// Validate the task group
@@ -4736,13 +4743,6 @@ func (j *Job) Warnings() error {
 			// Having no canaries implies auto-promotion since there are no canaries to promote.
 			allAutoPromote = allAutoPromote && (u.Canary == 0 || u.AutoPromote)
 		}
-
-		if tg.MaxClientDisconnect != nil &&
-			tg.ReschedulePolicy.Attempts > 0 &&
-			tg.SingleInstanceOnLost {
-			err := fmt.Errorf("having max_client_disconnect enable along with a reschedule policy can lead to having multiple instances of a task running at the same time")
-			mErr.Errors = append(mErr.Errors, err)
-		}
 	}
 
 	// Check AutoPromote, should be all or none
diff --git a/nomad/worker.go b/nomad/worker.go
index 83fb1ab671f..69107cc48a8 100644
--- a/nomad/worker.go
+++ b/nomad/worker.go
@@ -516,7 +516,6 @@ REQ:
 
 	// Check if we got a response
 	if resp.Eval != nil {
-		fmt.Println("\n **** eval triggered_by", resp.Eval.TriggeredBy)
 		w.logger.Debug("dequeued evaluation", "eval_id", resp.Eval.ID, "type", resp.Eval.Type, "namespace", resp.Eval.Namespace, "job_id", resp.Eval.JobID, "node_id", resp.Eval.NodeID, "triggered_by", resp.Eval.TriggeredBy)
 		return resp.Eval, resp.Token, resp.GetWaitIndex(), false
 	}

From f82a2b6c8cbce6954da5a14786dfc975854ec2f9 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Tue, 28 Nov 2023 12:55:53 +0100
Subject: [PATCH 23/50] style: remove added new lines

---
 client/allocrunner/alloc_runner.go | 5 ++---
 nomad/core_sched.go                | 1 -
 nomad/job_endpoint_test.go         | 2 --
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index 1c1d6abd2ae..97b0f6ea942 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -725,10 +725,8 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
 		wg.Add(1)
 		go func(name string, tr *taskrunner.TaskRunner) {
 			defer wg.Done()
-
 			taskEvent := structs.NewTaskEvent(structs.TaskKilling)
 			taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout)
-
 			err := tr.Kill(context.TODO(), taskEvent)
 			if err != nil && err != taskrunner.ErrTaskNotRunning {
 				ar.logger.Warn("error stopping task", "error", err, "task_name", name)
@@ -1001,7 +999,6 @@ func (ar *allocRunner) handleAllocUpdates() {
 // the latest update.
 func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) {
 	// Detect Stop updates
-
 	stopping := !ar.Alloc().TerminalStatus() && update.TerminalStatus()
 
 	// Update ar.alloc
@@ -1012,6 +1009,7 @@ func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) {
 		if err := ar.update(update); err != nil {
 			ar.logger.Error("error running update hooks", "error", err)
 		}
+
 	}
 
 	// Update task runners
@@ -1023,6 +1021,7 @@ func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) {
 	if stopping {
 		ar.killTasks()
 	}
+
 }
 
 func (ar *allocRunner) Listener() *cstructs.AllocListener {
diff --git a/nomad/core_sched.go b/nomad/core_sched.go
index 2b115fcecb4..dbd90a6b4b9 100644
--- a/nomad/core_sched.go
+++ b/nomad/core_sched.go
@@ -653,7 +653,6 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
 	if tg != nil {
 		reschedulePolicy = tg.ReschedulePolicy
 	}
-
 	// No reschedule policy or rescheduling is disabled
 	if reschedulePolicy == nil || (!reschedulePolicy.Unlimited && reschedulePolicy.Attempts == 0) {
 		return true
diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go
index ea0536e7bbb..1d6c5109935 100644
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@@ -172,7 +172,6 @@ func TestJobEndpoint_Register_NonOverlapping(t *testing.T) {
 			Namespace: structs.DefaultNamespace,
 		},
 	}
-
 	var alloc *structs.AllocListStub
 	testutil.Wait(t, func() (bool, error) {
 		resp := structs.JobAllocationsResponse{}
@@ -184,7 +183,6 @@ func TestJobEndpoint_Register_NonOverlapping(t *testing.T) {
 		alloc = resp.Allocations[0]
 		return true, nil
 	})
-
 	must.Eq(t, alloc.NodeID, node.ID)
 	must.Eq(t, alloc.DesiredStatus, structs.AllocDesiredStatusRun)
 	must.Eq(t, alloc.ClientStatus, structs.AllocClientStatusPending)

From 3ec11a16042db13ba26eb1dce3b83298c1148489 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Tue, 28 Nov 2023 13:25:55 +0100
Subject: [PATCH 24/50] style: rename new option

---
 api/jobs_test.go                              | 66 +++++++++----------
 api/tasks.go                                  |  6 +-
 client/client.go                              |  2 +-
 command/agent/job_endpoint.go                 |  6 +-
 command/agent/job_endpoint_test.go            |  8 +--
 nomad/core_sched_test.go                      | 34 +++++-----
 nomad/mock/job.go                             |  6 +-
 nomad/plan_apply.go                           |  2 +-
 nomad/structs/diff_test.go                    | 54 +++++++--------
 nomad/structs/structs.go                      | 18 ++---
 scheduler/reconcile.go                        |  4 +-
 scheduler/reconcile_test.go                   | 44 ++++++-------
 scheduler/reconcile_util.go                   |  4 +-
 scheduler/reconcile_util_test.go              |  6 +-
 scheduler/util.go                             |  6 +-
 scheduler/util_test.go                        |  4 +-
 .../content/docs/job-specification/group.mdx  |  2 +-
 17 files changed, 136 insertions(+), 136 deletions(-)

diff --git a/api/jobs_test.go b/api/jobs_test.go
index 0c9c6e829fa..05b822d1df2 100644
--- a/api/jobs_test.go
+++ b/api/jobs_test.go
@@ -312,9 +312,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                 pointerOf(""),
-						Count:                pointerOf(1),
-						SingleInstanceOnLost: pointerOf(false),
+						Name:                  pointerOf(""),
+						Count:                 pointerOf(1),
+						AvoidRescheduleOnLost: pointerOf(false),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -400,9 +400,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				JobModifyIndex:    pointerOf(uint64(0)),
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                 pointerOf(""),
-						Count:                pointerOf(1),
-						SingleInstanceOnLost: pointerOf(false),
+						Name:                  pointerOf(""),
+						Count:                 pointerOf(1),
+						AvoidRescheduleOnLost: pointerOf(false),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -493,9 +493,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                 pointerOf("bar"),
-						SingleInstanceOnLost: pointerOf(false),
-						Count:                pointerOf(1),
+						Name:                  pointerOf("bar"),
+						AvoidRescheduleOnLost: pointerOf(false),
+						Count:                 pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -558,9 +558,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                 pointerOf("cache"),
-						Count:                pointerOf(1),
-						SingleInstanceOnLost: pointerOf(true),
+						Name:                  pointerOf("cache"),
+						Count:                 pointerOf(1),
+						AvoidRescheduleOnLost: pointerOf(true),
 						RestartPolicy: &RestartPolicy{
 							Interval: pointerOf(5 * time.Minute),
 							Attempts: pointerOf(10),
@@ -670,9 +670,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                 pointerOf("cache"),
-						Count:                pointerOf(1),
-						SingleInstanceOnLost: pointerOf(true),
+						Name:                  pointerOf("cache"),
+						Count:                 pointerOf(1),
+						AvoidRescheduleOnLost: pointerOf(true),
 						RestartPolicy: &RestartPolicy{
 							Interval:        pointerOf(5 * time.Minute),
 							Attempts:        pointerOf(10),
@@ -869,8 +869,8 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                 pointerOf("bar"),
-						SingleInstanceOnLost: pointerOf(true),
+						Name:                  pointerOf("bar"),
+						AvoidRescheduleOnLost: pointerOf(true),
 						Consul: &Consul{
 							Namespace: "",
 						},
@@ -890,8 +890,8 @@ func TestJobs_Canonicalize(t *testing.T) {
 						},
 					},
 					{
-						Name:                 pointerOf("baz"),
-						SingleInstanceOnLost: pointerOf(false),
+						Name:                  pointerOf("baz"),
+						AvoidRescheduleOnLost: pointerOf(false),
 						Tasks: []*Task{
 							{
 								Name: "task1",
@@ -936,9 +936,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                 pointerOf("bar"),
-						Count:                pointerOf(1),
-						SingleInstanceOnLost: pointerOf(true),
+						Name:                  pointerOf("bar"),
+						Count:                 pointerOf(1),
+						AvoidRescheduleOnLost: pointerOf(true),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -986,9 +986,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 						},
 					},
 					{
-						Name:                 pointerOf("baz"),
-						SingleInstanceOnLost: pointerOf(false),
-						Count:                pointerOf(1),
+						Name:                  pointerOf("baz"),
+						AvoidRescheduleOnLost: pointerOf(false),
+						Count:                 pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -1046,8 +1046,8 @@ func TestJobs_Canonicalize(t *testing.T) {
 				ParentID: pointerOf("lol"),
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                 pointerOf("bar"),
-						SingleInstanceOnLost: pointerOf(true),
+						Name:                  pointerOf("bar"),
+						AvoidRescheduleOnLost: pointerOf(true),
 						RestartPolicy: &RestartPolicy{
 							Delay:    pointerOf(15 * time.Second),
 							Attempts: pointerOf(2),
@@ -1120,9 +1120,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                 pointerOf("bar"),
-						SingleInstanceOnLost: pointerOf(true),
-						Count:                pointerOf(1),
+						Name:                  pointerOf("bar"),
+						AvoidRescheduleOnLost: pointerOf(true),
+						Count:                 pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -1176,9 +1176,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 						},
 					},
 					{
-						Name:                 pointerOf("baz"),
-						SingleInstanceOnLost: pointerOf(false),
-						Count:                pointerOf(1),
+						Name:                  pointerOf("baz"),
+						AvoidRescheduleOnLost: pointerOf(false),
+						Count:                 pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
diff --git a/api/tasks.go b/api/tasks.go
index 89091854f6d..68e55cb12eb 100644
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -459,7 +459,7 @@ type TaskGroup struct {
 	MaxClientDisconnect       *time.Duration            `mapstructure:"max_client_disconnect" hcl:"max_client_disconnect,optional"`
 	Scaling                   *ScalingPolicy            `hcl:"scaling,block"`
 	Consul                    *Consul                   `hcl:"consul,block"`
-	SingleInstanceOnLost      *bool                     `hcl:"single_instance_on_lost,optional"`
+	AvoidRescheduleOnLost     *bool                     `hcl:"avoid_reschedule_on_lost,optional"`
 }
 
 // NewTaskGroup creates a new TaskGroup.
@@ -578,8 +578,8 @@ func (g *TaskGroup) Canonicalize(job *Job) {
 	for _, s := range g.Services {
 		s.Canonicalize(nil, g, job)
 	}
-	if g.SingleInstanceOnLost == nil {
-		g.SingleInstanceOnLost = pointerOf(false)
+	if g.AvoidRescheduleOnLost == nil {
+		g.AvoidRescheduleOnLost = pointerOf(false)
 	}
 }
 
diff --git a/client/client.go b/client/client.go
index 902700b72a0..6a8c5c0570c 100644
--- a/client/client.go
+++ b/client/client.go
@@ -2670,7 +2670,7 @@ func (c *Client) updateAlloc(update *structs.Allocation) {
 	// Reconnect unknown allocations if they were updated and are not terminal.
 	reconnect := update.ClientStatus == structs.AllocClientStatusUnknown &&
 		update.AllocModifyIndex > alloc.AllocModifyIndex &&
-		(!update.ServerTerminalStatus() || !alloc.SingleInstanceOnLost())
+		(!update.ServerTerminalStatus() || !alloc.AvoidRescheduleOnLost())
 	if reconnect {
 		err = ar.Reconnect(update)
 		if err != nil {
diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go
index 7a5f8c2dae1..c689ef09216 100644
--- a/command/agent/job_endpoint.go
+++ b/command/agent/job_endpoint.go
@@ -1135,10 +1135,10 @@ func ApiTgToStructsTG(job *structs.Job, taskGroup *api.TaskGroup, tg *structs.Ta
 		RenderTemplates: *taskGroup.RestartPolicy.RenderTemplates,
 	}
 
-	if taskGroup.SingleInstanceOnLost == nil {
-		tg.SingleInstanceOnLost = false
+	if taskGroup.AvoidRescheduleOnLost == nil {
+		tg.AvoidRescheduleOnLost = false
 	} else {
-		tg.SingleInstanceOnLost = *taskGroup.SingleInstanceOnLost
+		tg.AvoidRescheduleOnLost = *taskGroup.AvoidRescheduleOnLost
 	}
 
 	if taskGroup.ShutdownDelay != nil {
diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go
index 8611c385376..0e95908ebe2 100644
--- a/command/agent/job_endpoint_test.go
+++ b/command/agent/job_endpoint_test.go
@@ -3058,7 +3058,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
 						Operand: "z",
 					},
 				},
-				SingleInstanceOnLost: false,
+				AvoidRescheduleOnLost: false,
 				Affinities: []*structs.Affinity{
 					{
 						LTarget: "x",
@@ -3553,9 +3553,9 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
 		},
 		TaskGroups: []*structs.TaskGroup{
 			{
-				Name:                 "group1",
-				Count:                5,
-				SingleInstanceOnLost: false,
+				Name:                  "group1",
+				Count:                 5,
+				AvoidRescheduleOnLost: false,
 				Constraints: []*structs.Constraint{
 					{
 						LTarget: "x",
diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go
index 745ed8c9ef0..d4e826b7f64 100644
--- a/nomad/core_sched_test.go
+++ b/nomad/core_sched_test.go
@@ -1940,21 +1940,21 @@ func TestCoreScheduler_PartitionJobReap(t *testing.T) {
 // Tests various scenarios when allocations are eligible to be GCed
 func TestAllocation_GCEligible(t *testing.T) {
 	type testCase struct {
-		Desc                 string
-		GCTime               time.Time
-		ClientStatus         string
-		DesiredStatus        string
-		JobStatus            string
-		JobStop              bool
-		SingleInstanceOnLost *bool
-		AllocJobModifyIndex  uint64
-		JobModifyIndex       uint64
-		ModifyIndex          uint64
-		NextAllocID          string
-		ReschedulePolicy     *structs.ReschedulePolicy
-		RescheduleTrackers   []*structs.RescheduleEvent
-		ThresholdIndex       uint64
-		ShouldGC             bool
+		Desc                  string
+		GCTime                time.Time
+		ClientStatus          string
+		DesiredStatus         string
+		JobStatus             string
+		JobStop               bool
+		AvoidRescheduleOnLost *bool
+		AllocJobModifyIndex   uint64
+		JobModifyIndex        uint64
+		ModifyIndex           uint64
+		NextAllocID           string
+		ReschedulePolicy      *structs.ReschedulePolicy
+		RescheduleTrackers    []*structs.RescheduleEvent
+		ThresholdIndex        uint64
+		ShouldGC              bool
 	}
 
 	fail := time.Now()
@@ -2175,8 +2175,8 @@ func TestAllocation_GCEligible(t *testing.T) {
 		alloc.NextAllocation = tc.NextAllocID
 		job := mock.Job()
 		alloc.TaskGroup = job.TaskGroups[0].Name
-		if tc.SingleInstanceOnLost != nil {
-			job.TaskGroups[0].SingleInstanceOnLost = *tc.SingleInstanceOnLost
+		if tc.AvoidRescheduleOnLost != nil {
+			job.TaskGroups[0].AvoidRescheduleOnLost = *tc.AvoidRescheduleOnLost
 		}
 		job.TaskGroups[0].ReschedulePolicy = tc.ReschedulePolicy
 		if tc.JobStatus != "" {
diff --git a/nomad/mock/job.go b/nomad/mock/job.go
index a0b8c237313..15c53707c15 100644
--- a/nomad/mock/job.go
+++ b/nomad/mock/job.go
@@ -31,9 +31,9 @@ func Job() *structs.Job {
 		},
 		TaskGroups: []*structs.TaskGroup{
 			{
-				Name:                 "web",
-				Count:                10,
-				SingleInstanceOnLost: false,
+				Name:                  "web",
+				Count:                 10,
+				AvoidRescheduleOnLost: false,
 				Constraints: []*structs.Constraint{
 					{
 						LTarget: "${attr.consul.version}",
diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go
index f7f7db0c2ab..9c894b3084f 100644
--- a/nomad/plan_apply.go
+++ b/nomad/plan_apply.go
@@ -801,7 +801,7 @@ func isValidForDisconnectedNode(plan *structs.Plan, nodeID string) bool {
 // as non reschedulables when lost.
 func isValidForLostNode(plan *structs.Plan, nodeID string) bool {
 	for _, alloc := range plan.NodeAllocation[nodeID] {
-		if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && alloc.SingleInstanceOnLost()) {
+		if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && alloc.AvoidRescheduleOnLost()) {
 			return false
 		}
 	}
diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go
index 3f80bdfcfa8..a058a9f4b6f 100644
--- a/nomad/structs/diff_test.go
+++ b/nomad/structs/diff_test.go
@@ -1253,38 +1253,38 @@ func TestJobDiff(t *testing.T) {
 			Old: &Job{
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                 "foo",
-						Count:                1,
-						SingleInstanceOnLost: true,
+						Name:                  "foo",
+						Count:                 1,
+						AvoidRescheduleOnLost: true,
 					},
 					{
-						Name:                 "bar",
-						Count:                1,
-						SingleInstanceOnLost: false,
+						Name:                  "bar",
+						Count:                 1,
+						AvoidRescheduleOnLost: false,
 					},
 					{
-						Name:                 "baz",
-						Count:                1,
-						SingleInstanceOnLost: true,
+						Name:                  "baz",
+						Count:                 1,
+						AvoidRescheduleOnLost: true,
 					},
 				},
 			},
 			New: &Job{
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                 "bar",
-						Count:                1,
-						SingleInstanceOnLost: false,
+						Name:                  "bar",
+						Count:                 1,
+						AvoidRescheduleOnLost: false,
 					},
 					{
-						Name:                 "baz",
-						Count:                2,
-						SingleInstanceOnLost: true,
+						Name:                  "baz",
+						Count:                 2,
+						AvoidRescheduleOnLost: true,
 					},
 					{
-						Name:                 "bam",
-						Count:                1,
-						SingleInstanceOnLost: true,
+						Name:                  "bam",
+						Count:                 1,
+						AvoidRescheduleOnLost: true,
 					},
 				},
 			},
@@ -1303,7 +1303,7 @@ func TestJobDiff(t *testing.T) {
 							},
 							{
 								Type: DiffTypeAdded,
-								Name: "SingleInstanceOnLost",
+								Name: "AvoidRescheduleOnLost",
 								Old:  "",
 								New:  "true",
 							},
@@ -1337,7 +1337,7 @@ func TestJobDiff(t *testing.T) {
 							},
 							{
 								Type: DiffTypeDeleted,
-								Name: "SingleInstanceOnLost",
+								Name: "AvoidRescheduleOnLost",
 								Old:  "true",
 								New:  "",
 							},
@@ -1862,14 +1862,14 @@ func TestTaskGroupDiff(t *testing.T) {
 		{
 			TestCase: "Reschedule on lost diff",
 			Old: &TaskGroup{
-				Name:                 "foo",
-				Count:                100,
-				SingleInstanceOnLost: true,
+				Name:                  "foo",
+				Count:                 100,
+				AvoidRescheduleOnLost: true,
 			},
 			New: &TaskGroup{
-				Name:                 "foo",
-				Count:                100,
-				SingleInstanceOnLost: false,
+				Name:                  "foo",
+				Count:                 100,
+				AvoidRescheduleOnLost: false,
 			},
 			Expected: &TaskGroupDiff{
 				Type: DiffTypeEdited,
@@ -1877,7 +1877,7 @@ func TestTaskGroupDiff(t *testing.T) {
 				Fields: []*FieldDiff{
 					{
 						Type: DiffTypeEdited,
-						Name: "SingleInstanceOnLost",
+						Name: "AvoidRescheduleOnLost",
 						Old:  "true",
 						New:  "false",
 					},
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 98392de3ad9..64de3deddaf 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -4675,7 +4675,7 @@ func (j *Job) Validate() error {
 
 		if tg.MaxClientDisconnect != nil &&
 			tg.ReschedulePolicy.Attempts > 0 &&
-			tg.SingleInstanceOnLost {
+			tg.AvoidRescheduleOnLost {
 			err := fmt.Errorf("having max_client_disconnect enable along with a reschedule policy can lead to having multiple instances of a task running at the same time")
 			mErr.Errors = append(mErr.Errors, err)
 		}
@@ -6649,10 +6649,10 @@ type TaskGroup struct {
 	// allocations for tasks in this group to attempt to resume running without a restart.
 	MaxClientDisconnect *time.Duration
 
-	// SingleInstanceOnLost is used to signal if multiple instances of the same
-	// task can be running at the same time, it controls if a replacement is triggered
-	// when the task state is unknown
-	SingleInstanceOnLost bool
+	// AvoidRescheduleOnLost is used to signal that an allocation should not
+	// be rescheduled if its node becomes lost. If the node is disconnected, it will
+	// be also considered as lost and wont be rescheduled.
+	AvoidRescheduleOnLost bool
 }
 
 func (tg *TaskGroup) Copy() *TaskGroup {
@@ -11031,13 +11031,13 @@ func (a *Allocation) SupportsDisconnectedClients(serverSupportsDisconnectedClien
 	return false
 }
 
-// SingleInstanceOnLost determines if an alloc allows to have a replacement
+// AvoidRescheduleOnLost determines if an alloc allows to have a replacement
 // when lost.
-func (a *Allocation) SingleInstanceOnLost() bool {
+func (a *Allocation) AvoidRescheduleOnLost() bool {
 	if a.Job != nil {
 		tg := a.Job.LookupTaskGroup(a.TaskGroup)
 		if tg != nil {
-			return tg.SingleInstanceOnLost
+			return tg.AvoidRescheduleOnLost
 		}
 	}
 
@@ -11257,7 +11257,7 @@ func (a *Allocation) Expired(now time.Time) bool {
 		return false
 	}
 
-	if tg.MaxClientDisconnect == nil && !tg.SingleInstanceOnLost {
+	if tg.MaxClientDisconnect == nil && !tg.AvoidRescheduleOnLost {
 		return false
 	}
 
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 64c6d800864..b8cdd882a0f 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -471,7 +471,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	}
 
 	if len(expiring) > 0 {
-		if tg.SingleInstanceOnLost {
+		if tg.AvoidRescheduleOnLost {
 			untainted = untainted.union(expiring)
 		} else {
 			lost = lost.union(expiring)
@@ -493,7 +493,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 			// create followup evals, and update the ClientStatus to unknown.
 			timeoutLaterEvals = a.createTimeoutLaterEvals(disconnecting, tg.Name)
 
-		case tg.SingleInstanceOnLost:
+		case tg.AvoidRescheduleOnLost:
 			untainted = untainted.union(disconnecting)
 		}
 
diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go
index 881b4ecd208..341a8d0d8f1 100644
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -1363,40 +1363,40 @@ func TestReconciler_Destructive_ScaleDown(t *testing.T) {
 }
 
 // Tests the reconciler properly handles lost nodes with allocations
-func TestReconciler_LostNode_SingleInstanceOnLost(t *testing.T) {
+func TestReconciler_LostNode_AvoidRescheduleOnLost(t *testing.T) {
 	ci.Parallel(t)
 	testCases := []struct {
-		name                 string
-		singleInstanceOnLost bool
-		place                int
-		stop                 int
-		ignore               int
-		disconnect           int
-		allocStatus          string
+		name                  string
+		AvoidRescheduleOnLost bool
+		place                 int
+		stop                  int
+		ignore                int
+		disconnect            int
+		allocStatus           string
 	}{
 		{
-			name:                 "SingleInstanceOnLost off",
-			singleInstanceOnLost: false,
-			place:                2,
-			stop:                 2,
-			ignore:               8,
-			allocStatus:          structs.AllocClientStatusLost,
+			name:                  "AvoidRescheduleOnLost off",
+			AvoidRescheduleOnLost: false,
+			place:                 2,
+			stop:                  2,
+			ignore:                8,
+			allocStatus:           structs.AllocClientStatusLost,
 		},
 		{
-			name:                 "SingleInstanceOnLost on",
-			singleInstanceOnLost: true,
-			place:                0,
-			stop:                 0,
-			ignore:               10,
-			disconnect:           2,
-			allocStatus:          structs.AllocClientStatusUnknown,
+			name:                  "AvoidRescheduleOnLost on",
+			AvoidRescheduleOnLost: true,
+			place:                 0,
+			stop:                  0,
+			ignore:                10,
+			disconnect:            2,
+			allocStatus:           structs.AllocClientStatusUnknown,
 		},
 	}
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			job := mock.Job()
-			job.TaskGroups[0].SingleInstanceOnLost = tc.singleInstanceOnLost
+			job.TaskGroups[0].AvoidRescheduleOnLost = tc.AvoidRescheduleOnLost
 			// Create 10 existing allocations
 			var allocs []*structs.Allocation
 			for i := 0; i < 10; i++ {
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index f1ddce6cf5d..15cbdafe444 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -277,7 +277,7 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 				}
 
 			} else {
-				if alloc.SingleInstanceOnLost() {
+				if alloc.AvoidRescheduleOnLost() {
 					if alloc.ClientStatus == structs.AllocClientStatusRunning {
 						disconnecting[alloc.ID] = alloc
 						continue
@@ -364,7 +364,7 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 		// Allocs on terminal nodes that can't be rescheduled need to be treated
 		// differently than those that can.
 		if taintedNode.TerminalStatus() {
-			if alloc.SingleInstanceOnLost() {
+			if alloc.AvoidRescheduleOnLost() {
 				if alloc.ClientStatus == structs.AllocClientStatusUnknown {
 					untainted[alloc.ID] = alloc
 					continue
diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go
index f5bdcda392e..11861a6a665 100644
--- a/scheduler/reconcile_util_test.go
+++ b/scheduler/reconcile_util_test.go
@@ -44,14 +44,14 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 
 	testJobSingle := mock.Job()
 	testJobSingle.TaskGroups[0].MaxClientDisconnect = pointer.Of(5 * time.Second)
-	testJobSingle.TaskGroups[0].SingleInstanceOnLost = true
+	testJobSingle.TaskGroups[0].AvoidRescheduleOnLost = true
 
 	testJobNoMaxDisconnect := mock.Job()
 	testJobNoMaxDisconnect.TaskGroups[0].MaxClientDisconnect = nil
 
 	testJobNoMaxDisconnectSingle := mock.Job()
 	testJobNoMaxDisconnectSingle.TaskGroups[0].MaxClientDisconnect = nil
-	testJobNoMaxDisconnectSingle.TaskGroups[0].SingleInstanceOnLost = true
+	testJobNoMaxDisconnectSingle.TaskGroups[0].AvoidRescheduleOnLost = true
 
 	unknownAllocState := []*structs.AllocState{{
 		Field: structs.AllocStateFieldClientStatus,
@@ -85,7 +85,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 		supportsDisconnectedClients bool
 		skipNilNodeTest             bool
 		now                         time.Time
-		singleInstanceOnLost        bool
+		AvoidRescheduleOnLost       bool
 		// expected results
 		untainted     allocSet
 		migrate       allocSet
diff --git a/scheduler/util.go b/scheduler/util.go
index 2156fd15e4b..56296ebc200 100644
--- a/scheduler/util.go
+++ b/scheduler/util.go
@@ -222,9 +222,9 @@ func tasksUpdated(jobA, jobB *structs.Job, taskGroup string) comparison {
 		return difference("number of tasks", lenA, lenB)
 	}
 
-	// Check for SingleInstanceOnLost changes
-	if a.SingleInstanceOnLost != b.SingleInstanceOnLost {
-		return difference("reschedule on lost", a.SingleInstanceOnLost, b.SingleInstanceOnLost)
+	// Check for AvoidRescheduleOnLost changes
+	if a.AvoidRescheduleOnLost != b.AvoidRescheduleOnLost {
+		return difference("reschedule on lost", a.AvoidRescheduleOnLost, b.AvoidRescheduleOnLost)
 	}
 
 	// Check ephemeral disk
diff --git a/scheduler/util_test.go b/scheduler/util_test.go
index 8217464f4fb..f04523e11c6 100644
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -548,9 +548,9 @@ func TestTasksUpdated(t *testing.T) {
 	j30.TaskGroups[0].Tasks[0].Templates[0].ErrMissingKey = true
 	require.True(t, tasksUpdated(j29, j30, name).modified)
 
-	// Change SingleInstanceOnLost mode
+	// Change AvoidRescheduleOnLost mode
 	j31 := mock.Job()
-	j31.TaskGroups[0].SingleInstanceOnLost = true
+	j31.TaskGroups[0].AvoidRescheduleOnLost = true
 	require.True(t, tasksUpdated(j1, j31, name).modified)
 }
 
diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index 3015618773a..4b4d777b7c1 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -63,7 +63,7 @@ job "docs" {
   rescheduling strategy. Nomad will then attempt to schedule the task on another
   node if any of the group allocation statuses become "failed".
 
-- `single_instance_on_lost` `(bool: true)` - Specifies if a groups tasks that can't
+- `avoid_reschedule_on_lost` `(bool: true)` - Specifies if a groups tasks that can't
   have multiple instances running at the same time. If the node this tasks are 
   running on becomes disconnected or goes down, this allocations wont be rescheduled 
   and will show up as `unknown` until the node comes back up or they are manually

From e4ab2f5bdecf07931ce0d45f6044a99b9bb2c83e Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Tue, 28 Nov 2023 14:58:36 +0100
Subject: [PATCH 25/50] Update scheduler/reconcile_test.go

Co-authored-by: Tim Gross <tgross@hashicorp.com>
---
 scheduler/reconcile_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go
index 341a8d0d8f1..250dd6e1012 100644
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -506,7 +506,7 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 				}
 
 				if tc.shouldStopOnDisconnectedNode {
-					require.Equal(t, testNode.ID, stopResult.alloc.NodeID)
+					must.Eq(t, testNode.ID, stopResult.alloc.NodeID)
 				} else {
 					require.NotEqual(t, testNode.ID, stopResult.alloc.NodeID)
 				}

From 08cdc2960d2ff107840596cf111ab802e68af11e Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Tue, 28 Nov 2023 15:01:01 +0100
Subject: [PATCH 26/50] style: update documentation

---
 .../content/docs/job-specification/group.mdx  | 27 ++++++++++++-------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index 4b4d777b7c1..805db0fb38f 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -63,11 +63,16 @@ job "docs" {
   rescheduling strategy. Nomad will then attempt to schedule the task on another
   node if any of the group allocation statuses become "failed".
 
-- `avoid_reschedule_on_lost` `(bool: true)` - Specifies if a groups tasks that can't
-  have multiple instances running at the same time. If the node this tasks are 
-  running on becomes disconnected or goes down, this allocations wont be rescheduled 
-  and will show up as `unknown` until the node comes back up or they are manually
-  restarted.
+- `avoid_reschedule_on_lost` `(bool: true)` - Defines the reschedule behaviour 
+  of an allocation when the node it is running on becomes disconnect or lost. 
+  When enabled, if the node it is running on becomes disconnected 
+  or goes down, this allocations wont be rescheduled and will show up as `unknown`
+  until the node comes back up or it is manually restarted. 
+  This behaviour will only modify the reschedule process on the server.
+  To modify the allocation behaviour on the client, see `stop_after_client_disconnect`.
+
+  In case of `max_client_disconnect_disconnect` also being enabled, 
+  `reschedule_policy` must be disabled. Setting both up will return an error.
 
 - `restart` <code>([Restart][]: nil)</code> - Specifies the restart policy for
   all tasks in this group. If omitted, a default policy exists for each job
@@ -329,11 +334,13 @@ group "second" {
 }
 ```
 
-~> **Note:** The `max_client_disconnect` feature is only supported on Nomad
-version 1.3.0 and above. If you run a job with `max_client_disconnect` on servers
-where some servers are not upgraded to 1.3.0, the `max_client_disconnect`
-flag will be _ignored_. Deploying a job with `max_client_disconnect` to a
-`datacenter` of Nomad clients where all clients are not 1.3.0 or above is unsupported.
+~> **Note:** The `max_client_disconnect` and `avoid_reschedule_on_lost` 
+feature is only supported on Nomad version 1.3.0 and above. If you run a job
+with `max_client_disconnect`  or `avoid_reschedule_on_lost` on servers where 
+some servers are not upgraded to 1.3.0, the `max_client_disconnect` and 
+`avoid_reschedule_on_lost` flags will be _ignored_. Deploying a job with 
+`max_client_disconnect` or `avoid_reschedule_on_lost` to a`datacenter` of 
+Nomad clients where all clients are not 1.3.0 or above is unsupported.
 
 [task]: /nomad/docs/job-specification/task 'Nomad task Job Specification'
 [job]: /nomad/docs/job-specification/job 'Nomad job Job Specification'

From 53d40e01ee2aa600b607207398103891739cacf4 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Tue, 28 Nov 2023 16:01:47 +0100
Subject: [PATCH 27/50] style: improve documentation, add test for dont GC when
 desired status is run

---
 nomad/core_sched_test.go    | 8 ++++++++
 nomad/plan_apply.go         | 3 ++-
 nomad/structs/structs.go    | 1 +
 scheduler/reconcile.go      | 5 +++++
 scheduler/reconcile_test.go | 2 +-
 5 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go
index d4e826b7f64..7b2adc3cf19 100644
--- a/nomad/core_sched_test.go
+++ b/nomad/core_sched_test.go
@@ -2164,6 +2164,14 @@ func TestAllocation_GCEligible(t *testing.T) {
 			},
 			ShouldGC: true,
 		},
+		{
+			Desc:          "GC when alloc is unknown and but desired state is running",
+			ClientStatus:  structs.AllocClientStatusUnknown,
+			DesiredStatus: structs.AllocDesiredStatusRun,
+			GCTime:        fail,
+			JobStatus:     structs.JobStatusRunning,
+			ShouldGC:      false,
+		},
 	}
 
 	for _, tc := range harness {
diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go
index 9c894b3084f..3ba9bf5c3f1 100644
--- a/nomad/plan_apply.go
+++ b/nomad/plan_apply.go
@@ -801,7 +801,8 @@ func isValidForDisconnectedNode(plan *structs.Plan, nodeID string) bool {
 // as non reschedulables when lost.
 func isValidForLostNode(plan *structs.Plan, nodeID string) bool {
 	for _, alloc := range plan.NodeAllocation[nodeID] {
-		if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && alloc.AvoidRescheduleOnLost()) {
+		if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && alloc.AvoidRescheduleOnLost()) &&
+			(alloc.ClientStatus != structs.AllocClientStatusLost) {
 			return false
 		}
 	}
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 64de3deddaf..062eeb78287 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -10824,6 +10824,7 @@ func (a *Allocation) copyImpl(job bool) *Allocation {
 func (a *Allocation) TerminalStatus() bool {
 	// First check the desired state and if that isn't terminal, check client
 	// state.
+
 	return a.ServerTerminalStatus() || a.ClientTerminalStatus()
 }
 
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index b8cdd882a0f..8eacff30393 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -481,6 +481,11 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// which ones later and which ones can't be rescheduled at all.
 	timeoutLaterEvals := map[string]string{}
 	if len(disconnecting) > 0 {
+		// If MaxClientDisconnect is enabled as well as tg.AvoidRescheduleOnLost,
+		// the reschedule policy won't be enable and the lost allocations
+		// wont be rescheduled, there is no need to specifically do any extra processing.
+		// If MaxClientDisconnect is not enabled, then AvoidRescheduleOnLost
+		// requires handling.
 		switch {
 		case tg.MaxClientDisconnect != nil:
 			untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(a.batch, true, a.now, a.evalID, a.deployment)
diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go
index 250dd6e1012..215c4a0c697 100644
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -19,6 +19,7 @@ import (
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/kr/pretty"
+	"github.com/shoenig/test/must"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -395,7 +396,6 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			require.NotEqual(t, 0, tc.allocCount, "invalid test case: alloc count must be greater than zero")
 
 			testNode := mock.Node()
 			if tc.nodeStatusDisconnected == true {

From 2ef92634f94d78e688366f17f46f2b11aad286e3 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 29 Nov 2023 13:15:17 +0100
Subject: [PATCH 28/50] func: expand tests for LostNode_AvoidRescheduleOnLost,
 add functional params to the scheduler for testing

---
 scheduler/reconcile.go      |  18 +++-
 scheduler/reconcile_test.go | 166 ++++++++++++++++++++++++++++--------
 2 files changed, 146 insertions(+), 38 deletions(-)

diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 8eacff30393..5ce4274ed26 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -40,6 +40,14 @@ const (
 type allocUpdateType func(existing *structs.Allocation, newJob *structs.Job,
 	newTG *structs.TaskGroup) (ignore, destructive bool, updated *structs.Allocation)
 
+type AllocReconcilerOption func(*allocReconciler)
+
+func AllocRenconcilerWithNow(now time.Time) AllocReconcilerOption {
+	return func(ar *allocReconciler) {
+		ar.now = now
+	}
+}
+
 // allocReconciler is used to determine the set of allocations that require
 // placement, inplace updating or stopping given the job specification and
 // existing cluster state. The reconciler should only be used for batch and
@@ -186,8 +194,8 @@ func (r *reconcileResults) GoString() string {
 func NewAllocReconciler(logger log.Logger, allocUpdateFn allocUpdateType, batch bool,
 	jobID string, job *structs.Job, deployment *structs.Deployment,
 	existingAllocs []*structs.Allocation, taintedNodes map[string]*structs.Node, evalID string,
-	evalPriority int, supportsDisconnectedClients bool) *allocReconciler {
-	return &allocReconciler{
+	evalPriority int, supportsDisconnectedClients bool, opts ...AllocReconcilerOption) *allocReconciler {
+	ar := &allocReconciler{
 		logger:                      logger.Named("reconciler"),
 		allocUpdateFn:               allocUpdateFn,
 		batch:                       batch,
@@ -209,6 +217,12 @@ func NewAllocReconciler(logger log.Logger, allocUpdateFn allocUpdateType, batch
 			taskGroupAllocNameIndexes: make(map[string]*allocNameIndex),
 		},
 	}
+
+	for _, op := range opts {
+		op(ar)
+	}
+
+	return ar
 }
 
 // Compute reconciles the existing cluster state and returns the set of changes
diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go
index 215c4a0c697..4aa5364c54f 100644
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -1362,79 +1362,173 @@ func TestReconciler_Destructive_ScaleDown(t *testing.T) {
 	assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate))
 }
 
-// Tests the reconciler properly handles lost nodes with allocations
+// Tests the reconciler properly handles allocations when a node
+// goes down or disconnects, using all possible combinations of
+// AvoidRescheduleOnLost, MaxClientDisconnect and ReschedulePolicy.
+// Having the 3 configurations enabled is not a valid option and is not
+// included in the test.
 func TestReconciler_LostNode_AvoidRescheduleOnLost(t *testing.T) {
+	disabledReschedulePolicy := &structs.ReschedulePolicy{
+		Attempts:  0,
+		Unlimited: false,
+	}
+	fmt.Println(disabledReschedulePolicy)
 	ci.Parallel(t)
+	now := time.Now()
+
 	testCases := []struct {
 		name                  string
-		AvoidRescheduleOnLost bool
-		place                 int
-		stop                  int
-		ignore                int
-		disconnect            int
+		avoidRescheduleOnLost bool
+		maxClientDisconnect   *time.Duration
+		reschedulePolicy      *structs.ReschedulePolicy
+		expectPlace           int
+		expectStop            int
+		expectIgnore          int
+		expectDisconnect      int
 		allocStatus           string
 	}{
 		{
-			name:                  "AvoidRescheduleOnLost off",
-			AvoidRescheduleOnLost: false,
-			place:                 2,
-			stop:                  2,
-			ignore:                8,
+			name:                  "AvoidRescheduleOnLost off, MaxClientDisconnect off, Reschedule off",
+			maxClientDisconnect:   nil,
+			avoidRescheduleOnLost: false,
+			reschedulePolicy:      disabledReschedulePolicy,
+			expectPlace:           2,
+			expectStop:            2,
+			expectIgnore:          3,
+			expectDisconnect:      0,
+			allocStatus:           structs.AllocClientStatusLost,
+		},
+		{
+			name:                  "AvoidRescheduleOnLost on, MaxClientDisconnect off, Reschedule off",
+			maxClientDisconnect:   nil,
+			avoidRescheduleOnLost: true,
+			reschedulePolicy:      disabledReschedulePolicy,
+			expectPlace:           0,
+			expectStop:            0,
+			expectIgnore:          5,
+			expectDisconnect:      2,
+			allocStatus:           structs.AllocClientStatusUnknown,
+		},
+		{
+			name:                  "AvoidRescheduleOnLost off, MaxClientDisconnect on, Reschedule off",
+			maxClientDisconnect:   pointer.Of(10 * time.Second),
+			avoidRescheduleOnLost: false,
+			reschedulePolicy:      disabledReschedulePolicy,
+			expectPlace:           2,
+			expectStop:            1,
+			expectIgnore:          4,
+			expectDisconnect:      1,
 			allocStatus:           structs.AllocClientStatusLost,
 		},
 		{
-			name:                  "AvoidRescheduleOnLost on",
-			AvoidRescheduleOnLost: true,
-			place:                 0,
-			stop:                  0,
-			ignore:                10,
-			disconnect:            2,
+			name:                  "AvoidRescheduleOnLost on, MaxClientDisconnect on, Reschedule off",
+			maxClientDisconnect:   pointer.Of(10 * time.Second),
+			avoidRescheduleOnLost: true,
+			reschedulePolicy:      disabledReschedulePolicy,
+			expectPlace:           1, // This behaviour needs to be verified
+			expectStop:            0,
+			expectIgnore:          5,
+			expectDisconnect:      2,
 			allocStatus:           structs.AllocClientStatusUnknown,
 		},
+
+		{
+			name:                  "AvoidRescheduleOnLost off, MaxClientDisconnect off, Reschedule on",
+			maxClientDisconnect:   nil,
+			avoidRescheduleOnLost: false,
+			reschedulePolicy: &structs.ReschedulePolicy{
+				Attempts: 1,
+			},
+			expectPlace:  3,
+			expectStop:   3,
+			expectIgnore: 2,
+			allocStatus:  structs.AllocClientStatusLost,
+		},
+		{
+			name:                  "AvoidRescheduleOnLost on, MaxClientDisconnect off, Reschedule on",
+			maxClientDisconnect:   nil,
+			avoidRescheduleOnLost: true,
+			reschedulePolicy: &structs.ReschedulePolicy{
+				Attempts: 1,
+			},
+			expectPlace:      1,
+			expectStop:       1,
+			expectIgnore:     4,
+			expectDisconnect: 2,
+			allocStatus:      structs.AllocClientStatusUnknown,
+		},
+		{
+			name:                  "AvoidRescheduleOnLost off, MaxClientDisconnect on, Reschedule on",
+			maxClientDisconnect:   pointer.Of(10 * time.Second),
+			avoidRescheduleOnLost: false,
+			reschedulePolicy: &structs.ReschedulePolicy{
+				Attempts: 1,
+			},
+			expectPlace:      3,
+			expectStop:       1,
+			expectIgnore:     3,
+			expectDisconnect: 1,
+			allocStatus:      structs.AllocClientStatusLost,
+		},
 	}
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			job := mock.Job()
-			job.TaskGroups[0].AvoidRescheduleOnLost = tc.AvoidRescheduleOnLost
-			// Create 10 existing allocations
+			job.TaskGroups[0].Count = 5
+			job.TaskGroups[0].AvoidRescheduleOnLost = tc.avoidRescheduleOnLost
+			job.TaskGroups[0].MaxClientDisconnect = tc.maxClientDisconnect
+			job.TaskGroups[0].ReschedulePolicy = tc.reschedulePolicy
+
+			// Create 9 existing running allocations and a failed one
 			var allocs []*structs.Allocation
-			for i := 0; i < 10; i++ {
+			for i := 0; i < 5; i++ {
 				alloc := mock.Alloc()
 				alloc.Job = job
 				alloc.JobID = job.ID
+
 				alloc.NodeID = uuid.Generate()
 				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-				allocs = append(allocs, alloc)
 				alloc.DesiredStatus = structs.AllocDesiredStatusRun
-				alloc.ClientStatus = structs.AllocClientStatusRunning
+
+				// Set one of the allocations to failed
+				if i == 4 {
+					alloc.ClientStatus = structs.AllocClientStatusFailed
+				} else {
+					alloc.ClientStatus = structs.AllocClientStatusRunning
+				}
+
+				allocs = append(allocs, alloc)
 			}
 
-			// Build a map of tainted nodes
+			// Build a map of tainted nodes, one down one disconnected
 			tainted := make(map[string]*structs.Node, 2)
-			for i := 0; i < 2; i++ {
-				n := mock.Node()
-				n.ID = allocs[i].NodeID
-				n.Status = structs.NodeStatusDown
-				tainted[n.ID] = n
-			}
+			downNode := mock.Node()
+			downNode.ID = allocs[0].NodeID
+			downNode.Status = structs.NodeStatusDown
+			tainted[downNode.ID] = downNode
+
+			disconnected := mock.Node()
+			disconnected.ID = allocs[1].NodeID
+			disconnected.Status = structs.NodeStatusDisconnected
+			tainted[disconnected.ID] = disconnected
 
 			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-				nil, allocs, tainted, "", 50, true)
+				nil, allocs, tainted, "", 50, true, AllocRenconcilerWithNow(now))
 			r := reconciler.Compute()
 
 			// Assert the correct results
 			assertResults(t, r, &resultExpectation{
 				createDeployment:  nil,
 				deploymentUpdates: nil,
-				place:             tc.place,
-				stop:              tc.stop,
-				disconnectUpdates: tc.disconnect,
+				place:             tc.expectPlace,
+				stop:              tc.expectStop,
+				disconnectUpdates: tc.expectDisconnect,
 				desiredTGUpdates: map[string]*structs.DesiredUpdates{
 					job.TaskGroups[0].Name: {
-						Place:  uint64(tc.place),
-						Stop:   uint64(tc.stop),
-						Ignore: uint64(tc.ignore),
+						Place:  uint64(tc.expectPlace),
+						Stop:   uint64(tc.expectStop),
+						Ignore: uint64(tc.expectIgnore),
 					},
 				},
 			})

From cc5c95c4a84c89cffd90102078f825bac6eebd4d Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 29 Nov 2023 13:33:58 +0100
Subject: [PATCH 29/50] fix: update error message

---
 nomad/structs/structs.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 062eeb78287..fd505250f0b 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -4676,7 +4676,7 @@ func (j *Job) Validate() error {
 		if tg.MaxClientDisconnect != nil &&
 			tg.ReschedulePolicy.Attempts > 0 &&
 			tg.AvoidRescheduleOnLost {
-			err := fmt.Errorf("having max_client_disconnect enable along with a reschedule policy can lead to having multiple instances of a task running at the same time")
+			err := fmt.Errorf("max_client_disconnect and single_instance_on_lost cannot be enabled when rechedule.attempts > 0")
 			mErr.Errors = append(mErr.Errors, err)
 		}
 	}

From 1111c35a7820756fb5886d489816e23a34322117 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 29 Nov 2023 13:34:50 +0100
Subject: [PATCH 30/50] style: replace switch for else/if block

---
 scheduler/reconcile.go      |    9 +-
 scheduler/reconcile_test.go | 1468 +++++++++++++++++------------------
 2 files changed, 739 insertions(+), 738 deletions(-)

diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 5ce4274ed26..99d42c8386a 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -500,8 +500,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 		// wont be rescheduled, there is no need to specifically do any extra processing.
 		// If MaxClientDisconnect is not enabled, then AvoidRescheduleOnLost
 		// requires handling.
-		switch {
-		case tg.MaxClientDisconnect != nil:
+		if tg.MaxClientDisconnect != nil {
 			untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(a.batch, true, a.now, a.evalID, a.deployment)
 
 			rescheduleNow = rescheduleNow.union(rescheduleDisconnecting)
@@ -512,8 +511,10 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 			// create followup evals, and update the ClientStatus to unknown.
 			timeoutLaterEvals = a.createTimeoutLaterEvals(disconnecting, tg.Name)
 
-		case tg.AvoidRescheduleOnLost:
-			untainted = untainted.union(disconnecting)
+		} else {
+			if tg.AvoidRescheduleOnLost {
+				untainted = untainted.union(disconnecting)
+			}
 		}
 
 		a.appendUnknownDisconnectingUpdates(disconnecting, timeoutLaterEvals)
diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go
index 4aa5364c54f..b0d1cfd3b9f 100644
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -43,624 +43,150 @@ var (
 	}
 )
 
-// Tests that when a node disconnects/reconnects allocations for that node are
-// reconciled according to the business rules.
-func TestReconciler_Disconnected_Client(t *testing.T) {
-	disconnectAllocState := []*structs.AllocState{{
-		Field: structs.AllocStateFieldClientStatus,
-		Value: structs.AllocClientStatusUnknown,
-		Time:  time.Now(),
-	}}
-
-	type testCase struct {
-		name                         string
-		allocCount                   int
-		disconnectedAllocCount       int
-		jobVersionIncrement          uint64
-		nodeScoreIncrement           float64
-		disconnectedAllocStatus      string
-		disconnectedAllocStates      []*structs.AllocState
-		isBatch                      bool
-		nodeStatusDisconnected       bool
-		replace                      bool
-		failReplacement              bool
-		taintReplacement             bool
-		disconnectReplacement        bool
-		replaceFailedReplacement     bool
-		shouldStopOnDisconnectedNode bool
-		maxDisconnect                *time.Duration
-		expected                     *resultExpectation
-	}
+func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
+	return true, false, nil
+}
 
-	testCases := []testCase{
-		{
-			name:                    "reconnect-original-no-replacement",
-			allocCount:              2,
-			replace:                 false,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
+	return false, true, nil
+}
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: false,
-			expected: &resultExpectation{
-				reconnectUpdates: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 2,
-					},
-				},
-			},
+func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
+	// Create a shallow copy
+	newAlloc := existing.CopySkipJob()
+	newAlloc.AllocatedResources = &structs.AllocatedResources{
+		Tasks: map[string]*structs.AllocatedTaskResources{},
+		Shared: structs.AllocatedSharedResources{
+			DiskMB: int64(newTG.EphemeralDisk.SizeMB),
 		},
-		{
-			name:                    "resume-original-and-stop-replacement",
-			allocCount:              3,
-			replace:                 true,
-			disconnectedAllocCount:  1,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+	}
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: false,
-			expected: &resultExpectation{
-				stop:             1,
-				reconnectUpdates: 1,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   1,
-						Ignore: 3,
-					},
-				},
+	// Use the new task resources but keep the network from the old
+	for _, task := range newTG.Tasks {
+		networks := existing.AllocatedResources.Tasks[task.Name].Copy().Networks
+		newAlloc.AllocatedResources.Tasks[task.Name] = &structs.AllocatedTaskResources{
+			Cpu: structs.AllocatedCpuResources{
+				CpuShares: int64(task.Resources.CPU),
 			},
-		},
-		{
-			name:                    "stop-original-with-lower-node-score",
-			allocCount:              4,
-			replace:                 true,
-			disconnectedAllocCount:  1,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
-
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			nodeScoreIncrement:           1,
-			expected: &resultExpectation{
-				stop: 1,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   1,
-						Ignore: 4,
-					},
-				},
+			Memory: structs.AllocatedMemoryResources{
+				MemoryMB: int64(task.Resources.MemoryMB),
 			},
-		},
-		{
-			name:                    "stop-original-failed-on-reconnect",
-			allocCount:              4,
-			replace:                 true,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+			Networks: networks,
+		}
+	}
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			expected: &resultExpectation{
-				stop: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   2,
-						Ignore: 4,
-					},
-				},
-			},
-		},
-		{
-			name:                    "reschedule-original-failed-if-not-replaced",
-			allocCount:              4,
-			replace:                 false,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+	return false, false, newAlloc
+}
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			expected: &resultExpectation{
-				stop:  2,
-				place: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 2,
-						Place:  2,
-						Stop:   2,
-					},
-				},
-			},
-		},
-		{
-			name:                    "ignore-reconnect-completed",
-			allocCount:              2,
-			replace:                 false,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusComplete,
+func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType {
+	return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
+		if fn, ok := handled[existing.ID]; ok {
+			return fn(existing, newJob, newTG)
+		}
 
-			disconnectedAllocStates: disconnectAllocState,
-			isBatch:                 true,
-			expected: &resultExpectation{
-				place: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 2,
-						Place:  2,
-					},
-				},
-			},
-		},
-		{
-			name:                    "keep-original-alloc-and-stop-failed-replacement",
-			allocCount:              3,
-			replace:                 true,
-			failReplacement:         true,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+		return unhandled(existing, newJob, newTG)
+	}
+}
 
-			disconnectedAllocStates: disconnectAllocState,
-			expected: &resultExpectation{
-				reconnectUpdates: 2,
-				stop:             0,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 5,
-					},
-				},
-			},
-		},
-		{
-			name:                    "keep-original-and-stop-reconnecting-replacement",
-			allocCount:              2,
-			replace:                 true,
-			disconnectReplacement:   true,
-			disconnectedAllocCount:  1,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+var (
+	// AllocationIndexRegex is a regular expression to find the allocation index.
+	allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$")
+)
 
-			disconnectedAllocStates: disconnectAllocState,
-			expected: &resultExpectation{
-				reconnectUpdates: 1,
-				stop:             1,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 2,
-						Stop:   1,
-					},
-				},
-			},
-		},
-		{
-			name:                    "keep-original-and-stop-tainted-replacement",
-			allocCount:              3,
-			replace:                 true,
-			taintReplacement:        true,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+// allocNameToIndex returns the index of the allocation.
+func allocNameToIndex(name string) uint {
+	matches := allocationIndexRegex.FindStringSubmatch(name)
+	if len(matches) != 2 {
+		return 0
+	}
 
-			disconnectedAllocStates: disconnectAllocState,
-			expected: &resultExpectation{
-				reconnectUpdates: 2,
-				stop:             2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 3,
-						Stop:   2,
-					},
-				},
-			},
-		},
-		{
-			name:                    "stop-original-alloc-with-old-job-version",
-			allocCount:              5,
-			replace:                 true,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+	index, err := strconv.Atoi(matches[1])
+	if err != nil {
+		return 0
+	}
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			jobVersionIncrement:          1,
-			expected: &resultExpectation{
-				stop: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Ignore: 5,
-						Stop:   2,
-					},
-				},
-			},
-		},
-		{
-			name:                    "stop-original-alloc-with-old-job-version-reconnect-eval",
-			allocCount:              5,
-			replace:                 true,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+	return uint(index)
+}
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			jobVersionIncrement:          1,
-			expected: &resultExpectation{
-				stop: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   2,
-						Ignore: 5,
-					},
-				},
-			},
-		},
-		{
-			name:                         "stop-original-alloc-with-old-job-version-and-failed-replacements-replaced",
-			allocCount:                   5,
-			replace:                      true,
-			failReplacement:              true,
-			replaceFailedReplacement:     true,
-			disconnectedAllocCount:       2,
-			disconnectedAllocStatus:      structs.AllocClientStatusRunning,
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: false,
-			jobVersionIncrement:          1,
-			expected: &resultExpectation{
-				stop:             2,
-				reconnectUpdates: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   2,
-						Ignore: 7,
-					},
-				},
-			},
-		},
-		{
-			name:                    "stop-original-pending-alloc-for-disconnected-node",
-			allocCount:              2,
-			replace:                 true,
-			disconnectedAllocCount:  1,
-			disconnectedAllocStatus: structs.AllocClientStatusPending,
+func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) {
+	t.Helper()
+	m := make(map[uint]int)
+	for _, i := range indexes {
+		m[uint(i)] += 1
+	}
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			nodeStatusDisconnected:       true,
-			expected: &resultExpectation{
-				stop: 1,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   1,
-						Ignore: 2,
-					},
-				},
-			},
-		},
-		{
-			name:                    "stop-failed-original-and-failed-replacements-and-place-new",
-			allocCount:              5,
-			replace:                 true,
-			failReplacement:         true,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+	for _, n := range names {
+		index := allocNameToIndex(n)
+		val, contained := m[index]
+		if !contained {
+			t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names)
+		}
 
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			expected: &resultExpectation{
-				stop:  2,
-				place: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   2,
-						Place:  2,
-						Ignore: 5,
-					},
-				},
-			},
-		},
-		{
-			name:                         "stop-expired-allocs",
-			allocCount:                   5,
-			replace:                      true,
-			disconnectedAllocCount:       2,
-			disconnectedAllocStatus:      structs.AllocClientStatusUnknown,
-			disconnectedAllocStates:      disconnectAllocState,
-			shouldStopOnDisconnectedNode: true,
-			nodeStatusDisconnected:       true,
-			maxDisconnect:                pointer.Of(2 * time.Second),
-			expected: &resultExpectation{
-				stop: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Stop:   2,
-						Ignore: 5,
-					},
-				},
-			},
-		},
-		{
-			name:                    "replace-allocs-on-disconnected-node",
-			allocCount:              5,
-			replace:                 false,
-			disconnectedAllocCount:  2,
-			disconnectedAllocStatus: structs.AllocClientStatusRunning,
-			disconnectedAllocStates: []*structs.AllocState{},
-			nodeStatusDisconnected:  true,
-			expected: &resultExpectation{
-				place:             2,
-				disconnectUpdates: 2,
-				desiredTGUpdates: map[string]*structs.DesiredUpdates{
-					"web": {
-						Place:  2,
-						Ignore: 3,
-					},
-				},
-			},
-		},
+		val--
+		if val < 0 {
+			t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names)
+		}
+		m[index] = val
 	}
 
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-
-			testNode := mock.Node()
-			if tc.nodeStatusDisconnected == true {
-				testNode.Status = structs.NodeStatusDisconnected
-			}
+	for k, remainder := range m {
+		if remainder != 0 {
+			t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names)
+		}
+	}
+}
 
-			// Create resumable allocs
-			job, allocs := buildResumableAllocations(tc.allocCount, structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun, 2)
+func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) {
+	t.Helper()
+	canaryIndex := make(map[string]struct{})
+	for _, state := range d.TaskGroups {
+		for _, c := range state.PlacedCanaries {
+			canaryIndex[c] = struct{}{}
+		}
+	}
 
-			origAllocs := set.New[string](len(allocs))
-			for _, alloc := range allocs {
-				origAllocs.Insert(alloc.ID)
-			}
+	for _, s := range stop {
+		if _, ok := canaryIndex[s.alloc.ID]; ok {
+			t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name)
+		}
+	}
+}
 
-			if tc.isBatch {
-				job.Type = structs.JobTypeBatch
-			}
+func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) {
+	t.Helper()
+	names := make(map[string]struct{}, numPrevious)
 
-			// Set alloc state
-			disconnectedAllocCount := tc.disconnectedAllocCount
-			for _, alloc := range allocs {
-				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+	found := 0
+	for _, p := range place {
+		if _, ok := names[p.name]; ok {
+			t.Fatalf("Name %q already placed", p.name)
+		}
+		names[p.name] = struct{}{}
 
-				if tc.maxDisconnect != nil {
-					alloc.Job.TaskGroups[0].MaxClientDisconnect = tc.maxDisconnect
-				}
+		if p.previousAlloc == nil {
+			continue
+		}
 
-				if disconnectedAllocCount > 0 {
-					alloc.ClientStatus = tc.disconnectedAllocStatus
-					alloc.AllocStates = tc.disconnectedAllocStates
-					// Set the node id on all the disconnected allocs to the node under test.
-					alloc.NodeID = testNode.ID
-					alloc.NodeName = "disconnected"
-					disconnectedAllocCount--
-				}
-			}
+		if act := p.previousAlloc.Name; p.name != act {
+			t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name)
+		}
+		found++
+	}
+	if numPrevious != found {
+		t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found)
+	}
+}
 
-			// Place the allocs on another node.
-			if tc.replace {
-				replacements := make([]*structs.Allocation, 0)
-				for _, alloc := range allocs {
-					if alloc.NodeID != testNode.ID {
-						continue
-					}
-					replacement := alloc.Copy()
-					replacement.ID = uuid.Generate()
-					replacement.NodeID = uuid.Generate()
-					replacement.ClientStatus = structs.AllocClientStatusRunning
-					replacement.PreviousAllocation = alloc.ID
-					replacement.AllocStates = nil
-					replacement.TaskStates = nil
-					replacement.CreateIndex += 1
-					alloc.NextAllocation = replacement.ID
+func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) {
+	t.Helper()
+	names := make(map[string]struct{}, numRescheduled)
 
-					if tc.jobVersionIncrement != 0 {
-						replacement.Job.Version = replacement.Job.Version + tc.jobVersionIncrement
-					}
-					if tc.nodeScoreIncrement != 0 {
-						replacement.Metrics.ScoreMetaData[0].NormScore = replacement.Metrics.ScoreMetaData[0].NormScore + tc.nodeScoreIncrement
-					}
-					if tc.taintReplacement {
-						replacement.DesiredTransition.Migrate = pointer.Of(true)
-					}
-					if tc.disconnectReplacement {
-						replacement.AllocStates = tc.disconnectedAllocStates
-					}
-
-					// If we want to test intermediate replacement failures simulate that.
-					if tc.failReplacement {
-						replacement.ClientStatus = structs.AllocClientStatusFailed
-
-						if tc.replaceFailedReplacement {
-							nextReplacement := replacement.Copy()
-							nextReplacement.ID = uuid.Generate()
-							nextReplacement.ClientStatus = structs.AllocClientStatusRunning
-							nextReplacement.DesiredStatus = structs.AllocDesiredStatusRun
-							nextReplacement.PreviousAllocation = replacement.ID
-							nextReplacement.CreateIndex += 1
-
-							replacement.NextAllocation = nextReplacement.ID
-							replacement.DesiredStatus = structs.AllocDesiredStatusStop
-
-							replacements = append(replacements, nextReplacement)
-						}
-					}
-
-					replacements = append(replacements, replacement)
-				}
-
-				allocs = append(allocs, replacements...)
-			}
-
-			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, tc.isBatch, job.ID, job,
-				nil, allocs, map[string]*structs.Node{testNode.ID: testNode}, "", 50, true)
-
-			reconciler.now = time.Now()
-			if tc.maxDisconnect != nil {
-				reconciler.now = time.Now().Add(*tc.maxDisconnect * 20)
-			}
-
-			results := reconciler.Compute()
-			assertResults(t, results, tc.expected)
-
-			for _, stopResult := range results.stop {
-				// Skip replacement allocs.
-				if !origAllocs.Contains(stopResult.alloc.ID) {
-					continue
-				}
-
-				if tc.shouldStopOnDisconnectedNode {
-					must.Eq(t, testNode.ID, stopResult.alloc.NodeID)
-				} else {
-					require.NotEqual(t, testNode.ID, stopResult.alloc.NodeID)
-				}
-
-				require.Equal(t, job.Version, stopResult.alloc.Job.Version)
-			}
-		})
-	}
-}
-
-func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
-	return true, false, nil
-}
-
-func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
-	return false, true, nil
-}
-
-func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
-	// Create a shallow copy
-	newAlloc := existing.CopySkipJob()
-	newAlloc.AllocatedResources = &structs.AllocatedResources{
-		Tasks: map[string]*structs.AllocatedTaskResources{},
-		Shared: structs.AllocatedSharedResources{
-			DiskMB: int64(newTG.EphemeralDisk.SizeMB),
-		},
-	}
-
-	// Use the new task resources but keep the network from the old
-	for _, task := range newTG.Tasks {
-		networks := existing.AllocatedResources.Tasks[task.Name].Copy().Networks
-		newAlloc.AllocatedResources.Tasks[task.Name] = &structs.AllocatedTaskResources{
-			Cpu: structs.AllocatedCpuResources{
-				CpuShares: int64(task.Resources.CPU),
-			},
-			Memory: structs.AllocatedMemoryResources{
-				MemoryMB: int64(task.Resources.MemoryMB),
-			},
-			Networks: networks,
-		}
-	}
-
-	return false, false, newAlloc
-}
-
-func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType {
-	return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
-		if fn, ok := handled[existing.ID]; ok {
-			return fn(existing, newJob, newTG)
-		}
-
-		return unhandled(existing, newJob, newTG)
-	}
-}
-
-var (
-	// AllocationIndexRegex is a regular expression to find the allocation index.
-	allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$")
-)
-
-// allocNameToIndex returns the index of the allocation.
-func allocNameToIndex(name string) uint {
-	matches := allocationIndexRegex.FindStringSubmatch(name)
-	if len(matches) != 2 {
-		return 0
-	}
-
-	index, err := strconv.Atoi(matches[1])
-	if err != nil {
-		return 0
-	}
-
-	return uint(index)
-}
-
-func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) {
-	t.Helper()
-	m := make(map[uint]int)
-	for _, i := range indexes {
-		m[uint(i)] += 1
-	}
-
-	for _, n := range names {
-		index := allocNameToIndex(n)
-		val, contained := m[index]
-		if !contained {
-			t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names)
-		}
-
-		val--
-		if val < 0 {
-			t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names)
-		}
-		m[index] = val
-	}
-
-	for k, remainder := range m {
-		if remainder != 0 {
-			t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names)
-		}
-	}
-}
-
-func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) {
-	t.Helper()
-	canaryIndex := make(map[string]struct{})
-	for _, state := range d.TaskGroups {
-		for _, c := range state.PlacedCanaries {
-			canaryIndex[c] = struct{}{}
-		}
-	}
-
-	for _, s := range stop {
-		if _, ok := canaryIndex[s.alloc.ID]; ok {
-			t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name)
-		}
-	}
-}
-
-func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) {
-	t.Helper()
-	names := make(map[string]struct{}, numPrevious)
-
-	found := 0
-	for _, p := range place {
-		if _, ok := names[p.name]; ok {
-			t.Fatalf("Name %q already placed", p.name)
-		}
-		names[p.name] = struct{}{}
-
-		if p.previousAlloc == nil {
-			continue
-		}
-
-		if act := p.previousAlloc.Name; p.name != act {
-			t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name)
-		}
-		found++
-	}
-	if numPrevious != found {
-		t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found)
-	}
-}
-
-func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) {
-	t.Helper()
-	names := make(map[string]struct{}, numRescheduled)
-
-	found := 0
-	for _, p := range place {
-		if _, ok := names[p.name]; ok {
-			t.Fatalf("Name %q already placed", p.name)
-		}
-		names[p.name] = struct{}{}
+	found := 0
+	for _, p := range place {
+		if _, ok := names[p.name]; ok {
+			t.Fatalf("Name %q already placed", p.name)
+		}
+		names[p.name] = struct{}{}
 
 		if p.previousAlloc == nil {
 			continue
@@ -1372,7 +898,7 @@ func TestReconciler_LostNode_AvoidRescheduleOnLost(t *testing.T) {
 		Attempts:  0,
 		Unlimited: false,
 	}
-	fmt.Println(disabledReschedulePolicy)
+
 	ci.Parallel(t)
 	now := time.Now()
 
@@ -5627,169 +5153,643 @@ func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T
 				Ignore: 0,
 			},
 		},
-	})
-	assertPlaceResultsHavePreviousAllocs(t, 10, r.place)
-}
-
-// Tests force rescheduling a failed alloc that is past its reschedule limit
-func TestReconciler_ForceReschedule_Service(t *testing.T) {
-	ci.Parallel(t)
+	})
+	assertPlaceResultsHavePreviousAllocs(t, 10, r.place)
+}
+
+// Tests force rescheduling a failed alloc that is past its reschedule limit
+func TestReconciler_ForceReschedule_Service(t *testing.T) {
+	ci.Parallel(t)
+
+	require := require.New(t)
+
+	// Set desired 5
+	job := mock.Job()
+	job.TaskGroups[0].Count = 5
+	tgName := job.TaskGroups[0].Name
+
+	// Set up reschedule policy and update block
+	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
+		Attempts:      1,
+		Interval:      24 * time.Hour,
+		Delay:         5 * time.Second,
+		DelayFunction: "",
+		MaxDelay:      1 * time.Hour,
+		Unlimited:     false,
+	}
+	job.TaskGroups[0].Update = noCanaryUpdate
+
+	// Create 5 existing allocations
+	var allocs []*structs.Allocation
+	for i := 0; i < 5; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		allocs = append(allocs, alloc)
+		alloc.ClientStatus = structs.AllocClientStatusRunning
+	}
+
+	// Mark one as failed and past its reschedule limit so not eligible to reschedule
+	allocs[0].ClientStatus = structs.AllocClientStatusFailed
+	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
+		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
+			PrevAllocID: uuid.Generate(),
+			PrevNodeID:  uuid.Generate(),
+		},
+	}}
+
+	// Mark DesiredTransition ForceReschedule
+	allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: pointer.Of(true)}
+
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
+	r := reconciler.Compute()
+
+	// Verify that no follow up evals were created
+	evals := r.desiredFollowupEvals[tgName]
+	require.Nil(evals)
+
+	// Verify that one rescheduled alloc was created because of the forced reschedule
+	assertResults(t, r, &resultExpectation{
+		createDeployment:  nil,
+		deploymentUpdates: nil,
+		place:             1,
+		stop:              1,
+		inplace:           0,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Place:  1,
+				Stop:   1,
+				Ignore: 4,
+			},
+		},
+	})
+
+	// Rescheduled allocs should have previous allocs
+	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
+	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
+	assertPlacementsAreRescheduled(t, 1, r.place)
+}
+
+// Tests behavior of service failure with rescheduling policy preventing rescheduling:
+// new allocs should be placed to satisfy the job count, and current allocations are
+// left unmodified
+func TestReconciler_RescheduleNot_Service(t *testing.T) {
+	ci.Parallel(t)
+
+	require := require.New(t)
+
+	// Set desired 5
+	job := mock.Job()
+	job.TaskGroups[0].Count = 5
+	tgName := job.TaskGroups[0].Name
+	now := time.Now()
+
+	// Set up reschedule policy and update block
+	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
+		Attempts:      0,
+		Interval:      24 * time.Hour,
+		Delay:         5 * time.Second,
+		DelayFunction: "",
+		MaxDelay:      1 * time.Hour,
+		Unlimited:     false,
+	}
+	job.TaskGroups[0].Update = noCanaryUpdate
+
+	// Create 5 existing allocations
+	var allocs []*structs.Allocation
+	for i := 0; i < 5; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = uuid.Generate()
+		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
+		allocs = append(allocs, alloc)
+		alloc.ClientStatus = structs.AllocClientStatusRunning
+	}
+
+	// Mark two as failed
+	allocs[0].ClientStatus = structs.AllocClientStatusFailed
+
+	// Mark one of them as already rescheduled once
+	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
+		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
+			PrevAllocID: uuid.Generate(),
+			PrevNodeID:  uuid.Generate(),
+		},
+	}}
+	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
+		StartedAt:  now.Add(-1 * time.Hour),
+		FinishedAt: now.Add(-10 * time.Second)}}
+	allocs[1].ClientStatus = structs.AllocClientStatusFailed
+
+	// Mark one as desired state stop
+	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
+
+	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
+		nil, allocs, nil, "", 50, true)
+	r := reconciler.Compute()
+
+	// Verify that no follow up evals were created
+	evals := r.desiredFollowupEvals[tgName]
+	require.Nil(evals)
+
+	// no rescheduling, ignore all 4 allocs
+	// but place one to substitute allocs[4] that was stopped explicitly
+	assertResults(t, r, &resultExpectation{
+		createDeployment:  nil,
+		deploymentUpdates: nil,
+		place:             1,
+		inplace:           0,
+		stop:              0,
+		desiredTGUpdates: map[string]*structs.DesiredUpdates{
+			job.TaskGroups[0].Name: {
+				Place:  1,
+				Ignore: 4,
+				Stop:   0,
+			},
+		},
+	})
+
+	// none of the placement should have preallocs or rescheduled
+	assertPlaceResultsHavePreviousAllocs(t, 0, r.place)
+	assertPlacementsAreRescheduled(t, 0, r.place)
+}
+
+// Tests that when a node disconnects/reconnects allocations for that node are
+// reconciled according to the business rules.
+func TestReconciler_Disconnected_Client(t *testing.T) {
+	disconnectAllocState := []*structs.AllocState{{
+		Field: structs.AllocStateFieldClientStatus,
+		Value: structs.AllocClientStatusUnknown,
+		Time:  time.Now(),
+	}}
+
+	type testCase struct {
+		name                         string
+		allocCount                   int
+		disconnectedAllocCount       int
+		jobVersionIncrement          uint64
+		nodeScoreIncrement           float64
+		disconnectedAllocStatus      string
+		disconnectedAllocStates      []*structs.AllocState
+		isBatch                      bool
+		nodeStatusDisconnected       bool
+		replace                      bool
+		failReplacement              bool
+		taintReplacement             bool
+		disconnectReplacement        bool
+		replaceFailedReplacement     bool
+		shouldStopOnDisconnectedNode bool
+		maxDisconnect                *time.Duration
+		expected                     *resultExpectation
+	}
+
+	testCases := []testCase{
+		{
+			name:                    "reconnect-original-no-replacement",
+			allocCount:              2,
+			replace:                 false,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: false,
+			expected: &resultExpectation{
+				reconnectUpdates: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 2,
+					},
+				},
+			},
+		},
+		{
+			name:                    "resume-original-and-stop-replacement",
+			allocCount:              3,
+			replace:                 true,
+			disconnectedAllocCount:  1,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: false,
+			expected: &resultExpectation{
+				stop:             1,
+				reconnectUpdates: 1,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   1,
+						Ignore: 3,
+					},
+				},
+			},
+		},
+		{
+			name:                    "stop-original-with-lower-node-score",
+			allocCount:              4,
+			replace:                 true,
+			disconnectedAllocCount:  1,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			nodeScoreIncrement:           1,
+			expected: &resultExpectation{
+				stop: 1,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   1,
+						Ignore: 4,
+					},
+				},
+			},
+		},
+		{
+			name:                    "stop-original-failed-on-reconnect",
+			allocCount:              4,
+			replace:                 true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			expected: &resultExpectation{
+				stop: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   2,
+						Ignore: 4,
+					},
+				},
+			},
+		},
+		{
+			name:                    "reschedule-original-failed-if-not-replaced",
+			allocCount:              4,
+			replace:                 false,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			expected: &resultExpectation{
+				stop:  2,
+				place: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 2,
+						Place:  2,
+						Stop:   2,
+					},
+				},
+			},
+		},
+		{
+			name:                    "ignore-reconnect-completed",
+			allocCount:              2,
+			replace:                 false,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusComplete,
+
+			disconnectedAllocStates: disconnectAllocState,
+			isBatch:                 true,
+			expected: &resultExpectation{
+				place: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 2,
+						Place:  2,
+					},
+				},
+			},
+		},
+		{
+			name:                    "keep-original-alloc-and-stop-failed-replacement",
+			allocCount:              3,
+			replace:                 true,
+			failReplacement:         true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates: disconnectAllocState,
+			expected: &resultExpectation{
+				reconnectUpdates: 2,
+				stop:             0,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 5,
+					},
+				},
+			},
+		},
+		{
+			name:                    "keep-original-and-stop-reconnecting-replacement",
+			allocCount:              2,
+			replace:                 true,
+			disconnectReplacement:   true,
+			disconnectedAllocCount:  1,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates: disconnectAllocState,
+			expected: &resultExpectation{
+				reconnectUpdates: 1,
+				stop:             1,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 2,
+						Stop:   1,
+					},
+				},
+			},
+		},
+		{
+			name:                    "keep-original-and-stop-tainted-replacement",
+			allocCount:              3,
+			replace:                 true,
+			taintReplacement:        true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates: disconnectAllocState,
+			expected: &resultExpectation{
+				reconnectUpdates: 2,
+				stop:             2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 3,
+						Stop:   2,
+					},
+				},
+			},
+		},
+		{
+			name:                    "stop-original-alloc-with-old-job-version",
+			allocCount:              5,
+			replace:                 true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			jobVersionIncrement:          1,
+			expected: &resultExpectation{
+				stop: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Ignore: 5,
+						Stop:   2,
+					},
+				},
+			},
+		},
+		{
+			name:                    "stop-original-alloc-with-old-job-version-reconnect-eval",
+			allocCount:              5,
+			replace:                 true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			jobVersionIncrement:          1,
+			expected: &resultExpectation{
+				stop: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   2,
+						Ignore: 5,
+					},
+				},
+			},
+		},
+		{
+			name:                         "stop-original-alloc-with-old-job-version-and-failed-replacements-replaced",
+			allocCount:                   5,
+			replace:                      true,
+			failReplacement:              true,
+			replaceFailedReplacement:     true,
+			disconnectedAllocCount:       2,
+			disconnectedAllocStatus:      structs.AllocClientStatusRunning,
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: false,
+			jobVersionIncrement:          1,
+			expected: &resultExpectation{
+				stop:             2,
+				reconnectUpdates: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   2,
+						Ignore: 7,
+					},
+				},
+			},
+		},
+		{
+			name:                    "stop-original-pending-alloc-for-disconnected-node",
+			allocCount:              2,
+			replace:                 true,
+			disconnectedAllocCount:  1,
+			disconnectedAllocStatus: structs.AllocClientStatusPending,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			nodeStatusDisconnected:       true,
+			expected: &resultExpectation{
+				stop: 1,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   1,
+						Ignore: 2,
+					},
+				},
+			},
+		},
+		{
+			name:                    "stop-failed-original-and-failed-replacements-and-place-new",
+			allocCount:              5,
+			replace:                 true,
+			failReplacement:         true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			expected: &resultExpectation{
+				stop:  2,
+				place: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   2,
+						Place:  2,
+						Ignore: 5,
+					},
+				},
+			},
+		},
+		{
+			name:                         "stop-expired-allocs",
+			allocCount:                   5,
+			replace:                      true,
+			disconnectedAllocCount:       2,
+			disconnectedAllocStatus:      structs.AllocClientStatusUnknown,
+			disconnectedAllocStates:      disconnectAllocState,
+			shouldStopOnDisconnectedNode: true,
+			nodeStatusDisconnected:       true,
+			maxDisconnect:                pointer.Of(2 * time.Second),
+			expected: &resultExpectation{
+				stop: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Stop:   2,
+						Ignore: 5,
+					},
+				},
+			},
+		},
+		{
+			name:                    "replace-allocs-on-disconnected-node",
+			allocCount:              5,
+			replace:                 false,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+			disconnectedAllocStates: []*structs.AllocState{},
+			nodeStatusDisconnected:  true,
+			expected: &resultExpectation{
+				place:             2,
+				disconnectUpdates: 2,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					"web": {
+						Place:  2,
+						Ignore: 3,
+					},
+				},
+			},
+		},
+	}
 
-	require := require.New(t)
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
 
-	// Set desired 5
-	job := mock.Job()
-	job.TaskGroups[0].Count = 5
-	tgName := job.TaskGroups[0].Name
+			testNode := mock.Node()
+			if tc.nodeStatusDisconnected == true {
+				testNode.Status = structs.NodeStatusDisconnected
+			}
 
-	// Set up reschedule policy and update block
-	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
-		Attempts:      1,
-		Interval:      24 * time.Hour,
-		Delay:         5 * time.Second,
-		DelayFunction: "",
-		MaxDelay:      1 * time.Hour,
-		Unlimited:     false,
-	}
-	job.TaskGroups[0].Update = noCanaryUpdate
+			// Create resumable allocs
+			job, allocs := buildResumableAllocations(tc.allocCount, structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun, 2)
 
-	// Create 5 existing allocations
-	var allocs []*structs.Allocation
-	for i := 0; i < 5; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		allocs = append(allocs, alloc)
-		alloc.ClientStatus = structs.AllocClientStatusRunning
-	}
+			origAllocs := set.New[string](len(allocs))
+			for _, alloc := range allocs {
+				origAllocs.Insert(alloc.ID)
+			}
 
-	// Mark one as failed and past its reschedule limit so not eligible to reschedule
-	allocs[0].ClientStatus = structs.AllocClientStatusFailed
-	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
-		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
-			PrevAllocID: uuid.Generate(),
-			PrevNodeID:  uuid.Generate(),
-		},
-	}}
+			if tc.isBatch {
+				job.Type = structs.JobTypeBatch
+			}
 
-	// Mark DesiredTransition ForceReschedule
-	allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: pointer.Of(true)}
+			// Set alloc state
+			disconnectedAllocCount := tc.disconnectedAllocCount
+			for _, alloc := range allocs {
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	r := reconciler.Compute()
+				if tc.maxDisconnect != nil {
+					alloc.Job.TaskGroups[0].MaxClientDisconnect = tc.maxDisconnect
+				}
 
-	// Verify that no follow up evals were created
-	evals := r.desiredFollowupEvals[tgName]
-	require.Nil(evals)
+				if disconnectedAllocCount > 0 {
+					alloc.ClientStatus = tc.disconnectedAllocStatus
+					alloc.AllocStates = tc.disconnectedAllocStates
+					// Set the node id on all the disconnected allocs to the node under test.
+					alloc.NodeID = testNode.ID
+					alloc.NodeName = "disconnected"
+					disconnectedAllocCount--
+				}
+			}
 
-	// Verify that one rescheduled alloc was created because of the forced reschedule
-	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
-		deploymentUpdates: nil,
-		place:             1,
-		stop:              1,
-		inplace:           0,
-		desiredTGUpdates: map[string]*structs.DesiredUpdates{
-			job.TaskGroups[0].Name: {
-				Place:  1,
-				Stop:   1,
-				Ignore: 4,
-			},
-		},
-	})
+			// Place the allocs on another node.
+			if tc.replace {
+				replacements := make([]*structs.Allocation, 0)
+				for _, alloc := range allocs {
+					if alloc.NodeID != testNode.ID {
+						continue
+					}
+					replacement := alloc.Copy()
+					replacement.ID = uuid.Generate()
+					replacement.NodeID = uuid.Generate()
+					replacement.ClientStatus = structs.AllocClientStatusRunning
+					replacement.PreviousAllocation = alloc.ID
+					replacement.AllocStates = nil
+					replacement.TaskStates = nil
+					replacement.CreateIndex += 1
+					alloc.NextAllocation = replacement.ID
 
-	// Rescheduled allocs should have previous allocs
-	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
-	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
-	assertPlacementsAreRescheduled(t, 1, r.place)
-}
+					if tc.jobVersionIncrement != 0 {
+						replacement.Job.Version = replacement.Job.Version + tc.jobVersionIncrement
+					}
+					if tc.nodeScoreIncrement != 0 {
+						replacement.Metrics.ScoreMetaData[0].NormScore = replacement.Metrics.ScoreMetaData[0].NormScore + tc.nodeScoreIncrement
+					}
+					if tc.taintReplacement {
+						replacement.DesiredTransition.Migrate = pointer.Of(true)
+					}
+					if tc.disconnectReplacement {
+						replacement.AllocStates = tc.disconnectedAllocStates
+					}
 
-// Tests behavior of service failure with rescheduling policy preventing rescheduling:
-// new allocs should be placed to satisfy the job count, and current allocations are
-// left unmodified
-func TestReconciler_RescheduleNot_Service(t *testing.T) {
-	ci.Parallel(t)
+					// If we want to test intermediate replacement failures simulate that.
+					if tc.failReplacement {
+						replacement.ClientStatus = structs.AllocClientStatusFailed
 
-	require := require.New(t)
+						if tc.replaceFailedReplacement {
+							nextReplacement := replacement.Copy()
+							nextReplacement.ID = uuid.Generate()
+							nextReplacement.ClientStatus = structs.AllocClientStatusRunning
+							nextReplacement.DesiredStatus = structs.AllocDesiredStatusRun
+							nextReplacement.PreviousAllocation = replacement.ID
+							nextReplacement.CreateIndex += 1
 
-	// Set desired 5
-	job := mock.Job()
-	job.TaskGroups[0].Count = 5
-	tgName := job.TaskGroups[0].Name
-	now := time.Now()
+							replacement.NextAllocation = nextReplacement.ID
+							replacement.DesiredStatus = structs.AllocDesiredStatusStop
 
-	// Set up reschedule policy and update block
-	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
-		Attempts:      0,
-		Interval:      24 * time.Hour,
-		Delay:         5 * time.Second,
-		DelayFunction: "",
-		MaxDelay:      1 * time.Hour,
-		Unlimited:     false,
-	}
-	job.TaskGroups[0].Update = noCanaryUpdate
+							replacements = append(replacements, nextReplacement)
+						}
+					}
 
-	// Create 5 existing allocations
-	var allocs []*structs.Allocation
-	for i := 0; i < 5; i++ {
-		alloc := mock.Alloc()
-		alloc.Job = job
-		alloc.JobID = job.ID
-		alloc.NodeID = uuid.Generate()
-		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
-		allocs = append(allocs, alloc)
-		alloc.ClientStatus = structs.AllocClientStatusRunning
-	}
+					replacements = append(replacements, replacement)
+				}
 
-	// Mark two as failed
-	allocs[0].ClientStatus = structs.AllocClientStatusFailed
+				allocs = append(allocs, replacements...)
+			}
 
-	// Mark one of them as already rescheduled once
-	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
-		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
-			PrevAllocID: uuid.Generate(),
-			PrevNodeID:  uuid.Generate(),
-		},
-	}}
-	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
-		StartedAt:  now.Add(-1 * time.Hour),
-		FinishedAt: now.Add(-10 * time.Second)}}
-	allocs[1].ClientStatus = structs.AllocClientStatusFailed
+			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, tc.isBatch, job.ID, job,
+				nil, allocs, map[string]*structs.Node{testNode.ID: testNode}, "", 50, true)
 
-	// Mark one as desired state stop
-	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
+			reconciler.now = time.Now()
+			if tc.maxDisconnect != nil {
+				reconciler.now = time.Now().Add(*tc.maxDisconnect * 20)
+			}
 
-	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
-		nil, allocs, nil, "", 50, true)
-	r := reconciler.Compute()
+			results := reconciler.Compute()
+			assertResults(t, results, tc.expected)
 
-	// Verify that no follow up evals were created
-	evals := r.desiredFollowupEvals[tgName]
-	require.Nil(evals)
+			for _, stopResult := range results.stop {
+				// Skip replacement allocs.
+				if !origAllocs.Contains(stopResult.alloc.ID) {
+					continue
+				}
 
-	// no rescheduling, ignore all 4 allocs
-	// but place one to substitute allocs[4] that was stopped explicitly
-	assertResults(t, r, &resultExpectation{
-		createDeployment:  nil,
-		deploymentUpdates: nil,
-		place:             1,
-		inplace:           0,
-		stop:              0,
-		desiredTGUpdates: map[string]*structs.DesiredUpdates{
-			job.TaskGroups[0].Name: {
-				Place:  1,
-				Ignore: 4,
-				Stop:   0,
-			},
-		},
-	})
+				if tc.shouldStopOnDisconnectedNode {
+					must.Eq(t, testNode.ID, stopResult.alloc.NodeID)
+				} else {
+					require.NotEqual(t, testNode.ID, stopResult.alloc.NodeID)
+				}
 
-	// none of the placement should have preallocs or rescheduled
-	assertPlaceResultsHavePreviousAllocs(t, 0, r.place)
-	assertPlacementsAreRescheduled(t, 0, r.place)
+				require.Equal(t, job.Version, stopResult.alloc.Job.Version)
+			}
+		})
+	}
 }
 
 // Tests behavior of batch failure with rescheduling policy preventing rescheduling:

From 46038d3fbff8cf5496df2dc289314e55d5f98b20 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 29 Nov 2023 13:40:57 +0100
Subject: [PATCH 31/50] style: linter fix

---
 scheduler/reconcile.go | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 99d42c8386a..94fa984aa44 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -511,10 +511,8 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 			// create followup evals, and update the ClientStatus to unknown.
 			timeoutLaterEvals = a.createTimeoutLaterEvals(disconnecting, tg.Name)
 
-		} else {
-			if tg.AvoidRescheduleOnLost {
-				untainted = untainted.union(disconnecting)
-			}
+		} else if tg.AvoidRescheduleOnLost {
+			untainted = untainted.union(disconnecting)
 		}
 
 		a.appendUnknownDisconnectingUpdates(disconnecting, timeoutLaterEvals)

From 18efe4c2519bdbeb46dd9150194c15f3cdcce962 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 29 Nov 2023 14:20:25 +0100
Subject: [PATCH 32/50] fix: update tests results

---
 nomad/structs/diff_test.go | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go
index a058a9f4b6f..ae6b4f19c93 100644
--- a/nomad/structs/diff_test.go
+++ b/nomad/structs/diff_test.go
@@ -441,7 +441,6 @@ func TestJobDiff(t *testing.T) {
 				},
 			},
 		},
-
 		{
 			// NodePool added
 			Old: &Job{},
@@ -510,7 +509,6 @@ func TestJobDiff(t *testing.T) {
 				Type: DiffTypeNone,
 			},
 		},
-
 		{
 			// Periodic added
 			Old: &Job{},
@@ -1297,15 +1295,15 @@ func TestJobDiff(t *testing.T) {
 						Fields: []*FieldDiff{
 							{
 								Type: DiffTypeAdded,
-								Name: "Count",
+								Name: "AvoidRescheduleOnLost",
 								Old:  "",
-								New:  "1",
+								New:  "true",
 							},
 							{
 								Type: DiffTypeAdded,
-								Name: "AvoidRescheduleOnLost",
+								Name: "Count",
 								Old:  "",
-								New:  "true",
+								New:  "1",
 							},
 						},
 					},
@@ -1329,16 +1327,17 @@ func TestJobDiff(t *testing.T) {
 						Type: DiffTypeDeleted,
 						Name: "foo",
 						Fields: []*FieldDiff{
+
 							{
 								Type: DiffTypeDeleted,
-								Name: "Count",
-								Old:  "1",
+								Name: "AvoidRescheduleOnLost",
+								Old:  "true",
 								New:  "",
 							},
 							{
 								Type: DiffTypeDeleted,
-								Name: "AvoidRescheduleOnLost",
-								Old:  "true",
+								Name: "Count",
+								Old:  "1",
 								New:  "",
 							},
 						},

From 4e7a7f21eb9523abca1097d3003e3525a009b2cb Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 29 Nov 2023 17:31:29 +0100
Subject: [PATCH 33/50] style: fix documentation

---
 scheduler/reconcile_util.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index 15cbdafe444..d792226bed8 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -339,7 +339,7 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 			// Filter allocs on a node that is now re-connected to be resumed.
 			if reconnect {
 				// Expired unknown allocs should be processed depending on the max client disconnect
-				// and single instance on lost configurations, they are both treated as
+				// and/or avoid reschedule on lost configurations, they are both treated as
 				// expiring.
 				if alloc.Expired(now) {
 					expiring[alloc.ID] = alloc

From 9d09e7f527e0b21f4950aeea3b3d6e0a52390aad Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 29 Nov 2023 23:08:00 +0100
Subject: [PATCH 34/50] Update website/content/docs/job-specification/group.mdx

Co-authored-by: Tim Gross <tgross@hashicorp.com>
---
 website/content/docs/job-specification/group.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index 805db0fb38f..a0f6b7da628 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -63,7 +63,7 @@ job "docs" {
   rescheduling strategy. Nomad will then attempt to schedule the task on another
   node if any of the group allocation statuses become "failed".
 
-- `avoid_reschedule_on_lost` `(bool: true)` - Defines the reschedule behaviour 
+- `avoid_reschedule_on_lost` `(bool: false)` - Defines the reschedule behaviour 
   of an allocation when the node it is running on becomes disconnect or lost. 
   When enabled, if the node it is running on becomes disconnected 
   or goes down, this allocations wont be rescheduled and will show up as `unknown`

From e48c6c6a8fa16aa3977398c72671a2dd7f87fc7e Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Thu, 30 Nov 2023 11:52:12 +0100
Subject: [PATCH 35/50] Update website/content/docs/job-specification/group.mdx

Co-authored-by: Tim Gross <tgross@hashicorp.com>
---
 website/content/docs/job-specification/group.mdx | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index a0f6b7da628..57149002cd5 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -71,8 +71,11 @@ job "docs" {
   This behaviour will only modify the reschedule process on the server.
   To modify the allocation behaviour on the client, see `stop_after_client_disconnect`.
 
-  In case of `max_client_disconnect_disconnect` also being enabled, 
-  `reschedule_policy` must be disabled. Setting both up will return an error.
+  Setting `max_client_disconnect` and `avoid_reschedule_on_list=true` at the same time 
+  requires that [rescheduling is disabled entirely][] If [`max_client_disconnect`](#max_client_disconnect) 
+  is set and `avoid_reschedule_on_lost=true`, allocations on disconnected nodes will be
+  `unknown` until the `max_client_disconnect` window expires, at which point they 
+  become `lost` but will not be rescheduled. 
 
 - `restart` <code>([Restart][]: nil)</code> - Specifies the restart policy for
   all tasks in this group. If omitted, a default policy exists for each job

From 90e5a6bc90a1f1917418dc651e1401c9e0def96e Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Thu, 30 Nov 2023 11:56:12 +0100
Subject: [PATCH 36/50] style: improve documentation

---
 scheduler/reconcile_util_test.go                 | 3 +--
 website/content/docs/job-specification/group.mdx | 8 --------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go
index 11861a6a665..88b899f5a60 100644
--- a/scheduler/reconcile_util_test.go
+++ b/scheduler/reconcile_util_test.go
@@ -4,7 +4,6 @@
 package scheduler
 
 import (
-	"fmt"
 	"testing"
 	"time"
 
@@ -77,7 +76,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			Time:  now,
 		},
 	}
-	fmt.Println(expiredAllocState, reconnectedAllocState, unknownAllocState)
+
 	type testCase struct {
 		name                        string
 		all                         allocSet
diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index 57149002cd5..427da5858f7 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -337,14 +337,6 @@ group "second" {
 }
 ```
 
-~> **Note:** The `max_client_disconnect` and `avoid_reschedule_on_lost` 
-feature is only supported on Nomad version 1.3.0 and above. If you run a job
-with `max_client_disconnect`  or `avoid_reschedule_on_lost` on servers where 
-some servers are not upgraded to 1.3.0, the `max_client_disconnect` and 
-`avoid_reschedule_on_lost` flags will be _ignored_. Deploying a job with 
-`max_client_disconnect` or `avoid_reschedule_on_lost` to a`datacenter` of 
-Nomad clients where all clients are not 1.3.0 or above is unsupported.
-
 [task]: /nomad/docs/job-specification/task 'Nomad task Job Specification'
 [job]: /nomad/docs/job-specification/job 'Nomad job Job Specification'
 [constraint]: /nomad/docs/job-specification/constraint 'Nomad constraint Job Specification'

From d5998eb3b288398d0b8b0180bce54f2cf9fc49a2 Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Thu, 30 Nov 2023 11:56:45 +0100
Subject: [PATCH 37/50] Update scheduler/reconcile.go

Co-authored-by: Tim Gross <tgross@hashicorp.com>
---
 scheduler/reconcile.go | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 94fa984aa44..9f8fd9b5780 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -496,10 +496,8 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	timeoutLaterEvals := map[string]string{}
 	if len(disconnecting) > 0 {
 		// If MaxClientDisconnect is enabled as well as tg.AvoidRescheduleOnLost,
-		// the reschedule policy won't be enable and the lost allocations
-		// wont be rescheduled, there is no need to specifically do any extra processing.
-		// If MaxClientDisconnect is not enabled, then AvoidRescheduleOnLost
-		// requires handling.
+		// the reschedule policy won't be enabled and the lost allocations
+		// wont be rescheduled, and AvoidRescheduleOnLost is ignored.
 		if tg.MaxClientDisconnect != nil {
 			untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(a.batch, true, a.now, a.evalID, a.deployment)
 

From 7b46d070aae2df8ffc23ed37399ad7843129d91e Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Thu, 30 Nov 2023 12:13:05 +0100
Subject: [PATCH 38/50] func: remove changes of the avoidreschedule policy from
 destructive updates

---
 scheduler/util.go      | 5 -----
 scheduler/util_test.go | 4 ----
 2 files changed, 9 deletions(-)

diff --git a/scheduler/util.go b/scheduler/util.go
index 56296ebc200..0dade6571ef 100644
--- a/scheduler/util.go
+++ b/scheduler/util.go
@@ -222,11 +222,6 @@ func tasksUpdated(jobA, jobB *structs.Job, taskGroup string) comparison {
 		return difference("number of tasks", lenA, lenB)
 	}
 
-	// Check for AvoidRescheduleOnLost changes
-	if a.AvoidRescheduleOnLost != b.AvoidRescheduleOnLost {
-		return difference("reschedule on lost", a.AvoidRescheduleOnLost, b.AvoidRescheduleOnLost)
-	}
-
 	// Check ephemeral disk
 	if !a.EphemeralDisk.Equal(b.EphemeralDisk) {
 		return difference("ephemeral disk", a.EphemeralDisk, b.EphemeralDisk)
diff --git a/scheduler/util_test.go b/scheduler/util_test.go
index f04523e11c6..8d4b135dcf2 100644
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -548,10 +548,6 @@ func TestTasksUpdated(t *testing.T) {
 	j30.TaskGroups[0].Tasks[0].Templates[0].ErrMissingKey = true
 	require.True(t, tasksUpdated(j29, j30, name).modified)
 
-	// Change AvoidRescheduleOnLost mode
-	j31 := mock.Job()
-	j31.TaskGroups[0].AvoidRescheduleOnLost = true
-	require.True(t, tasksUpdated(j1, j31, name).modified)
 }
 
 func TestTasksUpdated_connectServiceUpdated(t *testing.T) {

From 8fda4374910f5da60fe2724295c6739dac2c2a83 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Thu, 30 Nov 2023 17:47:57 +0100
Subject: [PATCH 39/50] style: improve documentation

---
 .../content/docs/job-specification/group.mdx  | 25 +++++++++++++------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index 427da5858f7..4bd915a8a6e 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -68,14 +68,25 @@ job "docs" {
   When enabled, if the node it is running on becomes disconnected 
   or goes down, this allocations wont be rescheduled and will show up as `unknown`
   until the node comes back up or it is manually restarted. 
+  
   This behaviour will only modify the reschedule process on the server.
-  To modify the allocation behaviour on the client, see `stop_after_client_disconnect`.
-
-  Setting `max_client_disconnect` and `avoid_reschedule_on_list=true` at the same time 
-  requires that [rescheduling is disabled entirely][] If [`max_client_disconnect`](#max_client_disconnect) 
-  is set and `avoid_reschedule_on_lost=true`, allocations on disconnected nodes will be
-  `unknown` until the `max_client_disconnect` window expires, at which point they 
-  become `lost` but will not be rescheduled. 
+  To modify the allocation behaviour on the client, see 
+  [`stop_after_client_disconnect`](#stop_after_client_disconnect) .
+
+  Setting `max_client_disconnect` and `avoid_reschedule_on_lost=true` at the same 
+  time requires that [rescheduling is disabled entirely][].
+  If [`max_client_disconnect`](#max_client_disconnect) is set and 
+  `avoid_reschedule_on_lost=true`, allocations on disconnected nodes will be
+  `unknown` until the `max_client_disconnect` window expires, at which point 
+  the node will be transition from `disconnected` to `down`. The allocation 
+  will remain as `unknown` and won't be rescheduled.
+
+  To get the job up and running again it has to be manually stoped and run again
+  or [restarted][restart] using the `reschedule` option:
+
+  ```plaintext
+  `nomad job restart -reschedule <job>`
+  ```
 
 - `restart` <code>([Restart][]: nil)</code> - Specifies the restart policy for
   all tasks in this group. If omitted, a default policy exists for each job

From 9a3f1a662033ea942dcea7d8d6d5741350669bb7 Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Fri, 1 Dec 2023 13:28:58 +0100
Subject: [PATCH 40/50] Update website/content/docs/job-specification/group.mdx

Co-authored-by: Tim Gross <tgross@hashicorp.com>
---
 website/content/docs/job-specification/group.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index 4bd915a8a6e..3d8a397700e 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -81,7 +81,7 @@ job "docs" {
   the node will be transition from `disconnected` to `down`. The allocation 
   will remain as `unknown` and won't be rescheduled.
 
-  To get the job up and running again it has to be manually stoped and run again
+  To get the job up and running again it has to be manually stopped and run again
   or [restarted][restart] using the `reschedule` option:
 
   ```plaintext

From d0b01be19b9ad304b81a80a9a1ac5ac48bd7e815 Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Fri, 1 Dec 2023 13:29:37 +0100
Subject: [PATCH 41/50] Update nomad/plan_apply.go

Co-authored-by: Luiz Aoqui <luiz@hashicorp.com>
---
 nomad/plan_apply.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go
index 3ba9bf5c3f1..34f351de4a8 100644
--- a/nomad/plan_apply.go
+++ b/nomad/plan_apply.go
@@ -738,10 +738,10 @@ func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID stri
 		}
 		return false, "node is disconnected and contains invalid updates", nil
 	} else if node.Status == structs.NodeStatusDown {
-		if isValidForLostNode(plan, node.ID) {
+		if isValidForDownNode(plan, node.ID) {
 			return true, "", nil
 		}
-		return false, "node is lost and contains invalid updates", nil
+		return false, "node is down and contains invalid updates", nil
 	} else if node.Status != structs.NodeStatusReady {
 		return false, "node is not ready for placements", nil
 	}

From 2a3f8d1e24982898fd3a49f2426c197d3d37c80c Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Fri, 1 Dec 2023 13:29:50 +0100
Subject: [PATCH 42/50] Update nomad/structs/structs.go

Co-authored-by: Luiz Aoqui <luiz@hashicorp.com>
---
 nomad/structs/structs.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index fd505250f0b..9a33ec15971 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -4676,7 +4676,7 @@ func (j *Job) Validate() error {
 		if tg.MaxClientDisconnect != nil &&
 			tg.ReschedulePolicy.Attempts > 0 &&
 			tg.AvoidRescheduleOnLost {
-			err := fmt.Errorf("max_client_disconnect and single_instance_on_lost cannot be enabled when rechedule.attempts > 0")
+			err := fmt.Errorf("max_client_disconnect and avoid_reschedule_on_lost cannot be enabled when rechedule.attempts > 0")
 			mErr.Errors = append(mErr.Errors, err)
 		}
 	}

From 47e267b86aa3e50d0ae3652c6dde97212f1e74d4 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Fri, 1 Dec 2023 18:42:47 +0100
Subject: [PATCH 43/50] style: rename option to prevent_reschedule_on_lost

---
 api/jobs_test.go                              |  66 +++++-----
 api/tasks.go                                  |   6 +-
 client/client.go                              |   2 +-
 command/agent/job_endpoint.go                 |   6 +-
 command/agent/job_endpoint_test.go            |   8 +-
 nomad/core_sched_test.go                      |  34 +++---
 nomad/mock/job.go                             |   6 +-
 nomad/plan_apply.go                           |   8 +-
 nomad/structs/diff_test.go                    |  54 ++++-----
 nomad/structs/structs.go                      |  16 +--
 scheduler/reconcile.go                        |   8 +-
 scheduler/reconcile_test.go                   | 114 +++++++++---------
 scheduler/reconcile_util.go                   |   4 +-
 scheduler/reconcile_util_test.go              |   6 +-
 scheduler/util_test.go                        |   2 +-
 .../content/docs/job-specification/group.mdx  |   6 +-
 16 files changed, 173 insertions(+), 173 deletions(-)

diff --git a/api/jobs_test.go b/api/jobs_test.go
index 05b822d1df2..2ab57479d9e 100644
--- a/api/jobs_test.go
+++ b/api/jobs_test.go
@@ -312,9 +312,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                  pointerOf(""),
-						Count:                 pointerOf(1),
-						AvoidRescheduleOnLost: pointerOf(false),
+						Name:                    pointerOf(""),
+						Count:                   pointerOf(1),
+						PreventRescheduleOnLost: pointerOf(false),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -400,9 +400,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				JobModifyIndex:    pointerOf(uint64(0)),
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                  pointerOf(""),
-						Count:                 pointerOf(1),
-						AvoidRescheduleOnLost: pointerOf(false),
+						Name:                    pointerOf(""),
+						Count:                   pointerOf(1),
+						PreventRescheduleOnLost: pointerOf(false),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -493,9 +493,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                  pointerOf("bar"),
-						AvoidRescheduleOnLost: pointerOf(false),
-						Count:                 pointerOf(1),
+						Name:                    pointerOf("bar"),
+						PreventRescheduleOnLost: pointerOf(false),
+						Count:                   pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -558,9 +558,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                  pointerOf("cache"),
-						Count:                 pointerOf(1),
-						AvoidRescheduleOnLost: pointerOf(true),
+						Name:                    pointerOf("cache"),
+						Count:                   pointerOf(1),
+						PreventRescheduleOnLost: pointerOf(true),
 						RestartPolicy: &RestartPolicy{
 							Interval: pointerOf(5 * time.Minute),
 							Attempts: pointerOf(10),
@@ -670,9 +670,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                  pointerOf("cache"),
-						Count:                 pointerOf(1),
-						AvoidRescheduleOnLost: pointerOf(true),
+						Name:                    pointerOf("cache"),
+						Count:                   pointerOf(1),
+						PreventRescheduleOnLost: pointerOf(true),
 						RestartPolicy: &RestartPolicy{
 							Interval:        pointerOf(5 * time.Minute),
 							Attempts:        pointerOf(10),
@@ -869,8 +869,8 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                  pointerOf("bar"),
-						AvoidRescheduleOnLost: pointerOf(true),
+						Name:                    pointerOf("bar"),
+						PreventRescheduleOnLost: pointerOf(true),
 						Consul: &Consul{
 							Namespace: "",
 						},
@@ -890,8 +890,8 @@ func TestJobs_Canonicalize(t *testing.T) {
 						},
 					},
 					{
-						Name:                  pointerOf("baz"),
-						AvoidRescheduleOnLost: pointerOf(false),
+						Name:                    pointerOf("baz"),
+						PreventRescheduleOnLost: pointerOf(false),
 						Tasks: []*Task{
 							{
 								Name: "task1",
@@ -936,9 +936,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                  pointerOf("bar"),
-						Count:                 pointerOf(1),
-						AvoidRescheduleOnLost: pointerOf(true),
+						Name:                    pointerOf("bar"),
+						Count:                   pointerOf(1),
+						PreventRescheduleOnLost: pointerOf(true),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -986,9 +986,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 						},
 					},
 					{
-						Name:                  pointerOf("baz"),
-						AvoidRescheduleOnLost: pointerOf(false),
-						Count:                 pointerOf(1),
+						Name:                    pointerOf("baz"),
+						PreventRescheduleOnLost: pointerOf(false),
+						Count:                   pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -1046,8 +1046,8 @@ func TestJobs_Canonicalize(t *testing.T) {
 				ParentID: pointerOf("lol"),
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                  pointerOf("bar"),
-						AvoidRescheduleOnLost: pointerOf(true),
+						Name:                    pointerOf("bar"),
+						PreventRescheduleOnLost: pointerOf(true),
 						RestartPolicy: &RestartPolicy{
 							Delay:    pointerOf(15 * time.Second),
 							Attempts: pointerOf(2),
@@ -1120,9 +1120,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 				},
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                  pointerOf("bar"),
-						AvoidRescheduleOnLost: pointerOf(true),
-						Count:                 pointerOf(1),
+						Name:                    pointerOf("bar"),
+						PreventRescheduleOnLost: pointerOf(true),
+						Count:                   pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
@@ -1176,9 +1176,9 @@ func TestJobs_Canonicalize(t *testing.T) {
 						},
 					},
 					{
-						Name:                  pointerOf("baz"),
-						AvoidRescheduleOnLost: pointerOf(false),
-						Count:                 pointerOf(1),
+						Name:                    pointerOf("baz"),
+						PreventRescheduleOnLost: pointerOf(false),
+						Count:                   pointerOf(1),
 						EphemeralDisk: &EphemeralDisk{
 							Sticky:  pointerOf(false),
 							Migrate: pointerOf(false),
diff --git a/api/tasks.go b/api/tasks.go
index 68e55cb12eb..ea448d8c2f1 100644
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -459,7 +459,7 @@ type TaskGroup struct {
 	MaxClientDisconnect       *time.Duration            `mapstructure:"max_client_disconnect" hcl:"max_client_disconnect,optional"`
 	Scaling                   *ScalingPolicy            `hcl:"scaling,block"`
 	Consul                    *Consul                   `hcl:"consul,block"`
-	AvoidRescheduleOnLost     *bool                     `hcl:"avoid_reschedule_on_lost,optional"`
+	PreventRescheduleOnLost   *bool                     `hcl:"prevent_reschedule_on_lost,optional"`
 }
 
 // NewTaskGroup creates a new TaskGroup.
@@ -578,8 +578,8 @@ func (g *TaskGroup) Canonicalize(job *Job) {
 	for _, s := range g.Services {
 		s.Canonicalize(nil, g, job)
 	}
-	if g.AvoidRescheduleOnLost == nil {
-		g.AvoidRescheduleOnLost = pointerOf(false)
+	if g.PreventRescheduleOnLost == nil {
+		g.PreventRescheduleOnLost = pointerOf(false)
 	}
 }
 
diff --git a/client/client.go b/client/client.go
index 6a8c5c0570c..9d423fa4a5e 100644
--- a/client/client.go
+++ b/client/client.go
@@ -2670,7 +2670,7 @@ func (c *Client) updateAlloc(update *structs.Allocation) {
 	// Reconnect unknown allocations if they were updated and are not terminal.
 	reconnect := update.ClientStatus == structs.AllocClientStatusUnknown &&
 		update.AllocModifyIndex > alloc.AllocModifyIndex &&
-		(!update.ServerTerminalStatus() || !alloc.AvoidRescheduleOnLost())
+		(!update.ServerTerminalStatus() || !alloc.PreventRescheduleOnLost())
 	if reconnect {
 		err = ar.Reconnect(update)
 		if err != nil {
diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go
index c689ef09216..1ec32e645ff 100644
--- a/command/agent/job_endpoint.go
+++ b/command/agent/job_endpoint.go
@@ -1135,10 +1135,10 @@ func ApiTgToStructsTG(job *structs.Job, taskGroup *api.TaskGroup, tg *structs.Ta
 		RenderTemplates: *taskGroup.RestartPolicy.RenderTemplates,
 	}
 
-	if taskGroup.AvoidRescheduleOnLost == nil {
-		tg.AvoidRescheduleOnLost = false
+	if taskGroup.PreventRescheduleOnLost == nil {
+		tg.PreventRescheduleOnLost = false
 	} else {
-		tg.AvoidRescheduleOnLost = *taskGroup.AvoidRescheduleOnLost
+		tg.PreventRescheduleOnLost = *taskGroup.PreventRescheduleOnLost
 	}
 
 	if taskGroup.ShutdownDelay != nil {
diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go
index 0e95908ebe2..15bc4f2538e 100644
--- a/command/agent/job_endpoint_test.go
+++ b/command/agent/job_endpoint_test.go
@@ -3058,7 +3058,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
 						Operand: "z",
 					},
 				},
-				AvoidRescheduleOnLost: false,
+				PreventRescheduleOnLost: false,
 				Affinities: []*structs.Affinity{
 					{
 						LTarget: "x",
@@ -3553,9 +3553,9 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
 		},
 		TaskGroups: []*structs.TaskGroup{
 			{
-				Name:                  "group1",
-				Count:                 5,
-				AvoidRescheduleOnLost: false,
+				Name:                    "group1",
+				Count:                   5,
+				PreventRescheduleOnLost: false,
 				Constraints: []*structs.Constraint{
 					{
 						LTarget: "x",
diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go
index 7b2adc3cf19..113be84aa2b 100644
--- a/nomad/core_sched_test.go
+++ b/nomad/core_sched_test.go
@@ -1940,21 +1940,21 @@ func TestCoreScheduler_PartitionJobReap(t *testing.T) {
 // Tests various scenarios when allocations are eligible to be GCed
 func TestAllocation_GCEligible(t *testing.T) {
 	type testCase struct {
-		Desc                  string
-		GCTime                time.Time
-		ClientStatus          string
-		DesiredStatus         string
-		JobStatus             string
-		JobStop               bool
-		AvoidRescheduleOnLost *bool
-		AllocJobModifyIndex   uint64
-		JobModifyIndex        uint64
-		ModifyIndex           uint64
-		NextAllocID           string
-		ReschedulePolicy      *structs.ReschedulePolicy
-		RescheduleTrackers    []*structs.RescheduleEvent
-		ThresholdIndex        uint64
-		ShouldGC              bool
+		Desc                    string
+		GCTime                  time.Time
+		ClientStatus            string
+		DesiredStatus           string
+		JobStatus               string
+		JobStop                 bool
+		PreventRescheduleOnLost *bool
+		AllocJobModifyIndex     uint64
+		JobModifyIndex          uint64
+		ModifyIndex             uint64
+		NextAllocID             string
+		ReschedulePolicy        *structs.ReschedulePolicy
+		RescheduleTrackers      []*structs.RescheduleEvent
+		ThresholdIndex          uint64
+		ShouldGC                bool
 	}
 
 	fail := time.Now()
@@ -2183,8 +2183,8 @@ func TestAllocation_GCEligible(t *testing.T) {
 		alloc.NextAllocation = tc.NextAllocID
 		job := mock.Job()
 		alloc.TaskGroup = job.TaskGroups[0].Name
-		if tc.AvoidRescheduleOnLost != nil {
-			job.TaskGroups[0].AvoidRescheduleOnLost = *tc.AvoidRescheduleOnLost
+		if tc.PreventRescheduleOnLost != nil {
+			job.TaskGroups[0].PreventRescheduleOnLost = *tc.PreventRescheduleOnLost
 		}
 		job.TaskGroups[0].ReschedulePolicy = tc.ReschedulePolicy
 		if tc.JobStatus != "" {
diff --git a/nomad/mock/job.go b/nomad/mock/job.go
index 15c53707c15..3b4f4e05c90 100644
--- a/nomad/mock/job.go
+++ b/nomad/mock/job.go
@@ -31,9 +31,9 @@ func Job() *structs.Job {
 		},
 		TaskGroups: []*structs.TaskGroup{
 			{
-				Name:                  "web",
-				Count:                 10,
-				AvoidRescheduleOnLost: false,
+				Name:                    "web",
+				Count:                   10,
+				PreventRescheduleOnLost: false,
 				Constraints: []*structs.Constraint{
 					{
 						LTarget: "${attr.consul.version}",
diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go
index 34f351de4a8..e29c00ea83c 100644
--- a/nomad/plan_apply.go
+++ b/nomad/plan_apply.go
@@ -796,12 +796,12 @@ func isValidForDisconnectedNode(plan *structs.Plan, nodeID string) bool {
 	return true
 }
 
-// The plan is only valid for lost nodes if it only contains
+// The plan is only valid for a node down if it only contains
 // updates to mark allocations as unknown and those allocations are configured
-// as non reschedulables when lost.
-func isValidForLostNode(plan *structs.Plan, nodeID string) bool {
+// as non reschedulables when lost or if the allocs are being updated to lost.
+func isValidForDownNode(plan *structs.Plan, nodeID string) bool {
 	for _, alloc := range plan.NodeAllocation[nodeID] {
-		if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && alloc.AvoidRescheduleOnLost()) &&
+		if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && alloc.PreventRescheduleOnLost()) &&
 			(alloc.ClientStatus != structs.AllocClientStatusLost) {
 			return false
 		}
diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go
index ae6b4f19c93..8089eb0dd96 100644
--- a/nomad/structs/diff_test.go
+++ b/nomad/structs/diff_test.go
@@ -1251,38 +1251,38 @@ func TestJobDiff(t *testing.T) {
 			Old: &Job{
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                  "foo",
-						Count:                 1,
-						AvoidRescheduleOnLost: true,
+						Name:                    "foo",
+						Count:                   1,
+						PreventRescheduleOnLost: true,
 					},
 					{
-						Name:                  "bar",
-						Count:                 1,
-						AvoidRescheduleOnLost: false,
+						Name:                    "bar",
+						Count:                   1,
+						PreventRescheduleOnLost: false,
 					},
 					{
-						Name:                  "baz",
-						Count:                 1,
-						AvoidRescheduleOnLost: true,
+						Name:                    "baz",
+						Count:                   1,
+						PreventRescheduleOnLost: true,
 					},
 				},
 			},
 			New: &Job{
 				TaskGroups: []*TaskGroup{
 					{
-						Name:                  "bar",
-						Count:                 1,
-						AvoidRescheduleOnLost: false,
+						Name:                    "bar",
+						Count:                   1,
+						PreventRescheduleOnLost: false,
 					},
 					{
-						Name:                  "baz",
-						Count:                 2,
-						AvoidRescheduleOnLost: true,
+						Name:                    "baz",
+						Count:                   2,
+						PreventRescheduleOnLost: true,
 					},
 					{
-						Name:                  "bam",
-						Count:                 1,
-						AvoidRescheduleOnLost: true,
+						Name:                    "bam",
+						Count:                   1,
+						PreventRescheduleOnLost: true,
 					},
 				},
 			},
@@ -1295,7 +1295,7 @@ func TestJobDiff(t *testing.T) {
 						Fields: []*FieldDiff{
 							{
 								Type: DiffTypeAdded,
-								Name: "AvoidRescheduleOnLost",
+								Name: "PreventRescheduleOnLost",
 								Old:  "",
 								New:  "true",
 							},
@@ -1330,7 +1330,7 @@ func TestJobDiff(t *testing.T) {
 
 							{
 								Type: DiffTypeDeleted,
-								Name: "AvoidRescheduleOnLost",
+								Name: "PreventRescheduleOnLost",
 								Old:  "true",
 								New:  "",
 							},
@@ -1861,14 +1861,14 @@ func TestTaskGroupDiff(t *testing.T) {
 		{
 			TestCase: "Reschedule on lost diff",
 			Old: &TaskGroup{
-				Name:                  "foo",
-				Count:                 100,
-				AvoidRescheduleOnLost: true,
+				Name:                    "foo",
+				Count:                   100,
+				PreventRescheduleOnLost: true,
 			},
 			New: &TaskGroup{
-				Name:                  "foo",
-				Count:                 100,
-				AvoidRescheduleOnLost: false,
+				Name:                    "foo",
+				Count:                   100,
+				PreventRescheduleOnLost: false,
 			},
 			Expected: &TaskGroupDiff{
 				Type: DiffTypeEdited,
@@ -1876,7 +1876,7 @@ func TestTaskGroupDiff(t *testing.T) {
 				Fields: []*FieldDiff{
 					{
 						Type: DiffTypeEdited,
-						Name: "AvoidRescheduleOnLost",
+						Name: "PreventRescheduleOnLost",
 						Old:  "true",
 						New:  "false",
 					},
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 9a33ec15971..5e08805a136 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -4675,8 +4675,8 @@ func (j *Job) Validate() error {
 
 		if tg.MaxClientDisconnect != nil &&
 			tg.ReschedulePolicy.Attempts > 0 &&
-			tg.AvoidRescheduleOnLost {
-			err := fmt.Errorf("max_client_disconnect and avoid_reschedule_on_lost cannot be enabled when rechedule.attempts > 0")
+			tg.PreventRescheduleOnLost {
+			err := fmt.Errorf("max_client_disconnect and prevent_reschedule_on_lost cannot be enabled when rechedule.attempts > 0")
 			mErr.Errors = append(mErr.Errors, err)
 		}
 	}
@@ -6649,10 +6649,10 @@ type TaskGroup struct {
 	// allocations for tasks in this group to attempt to resume running without a restart.
 	MaxClientDisconnect *time.Duration
 
-	// AvoidRescheduleOnLost is used to signal that an allocation should not
+	// PreventRescheduleOnLost is used to signal that an allocation should not
 	// be rescheduled if its node becomes lost. If the node is disconnected, it will
 	// be also considered as lost and wont be rescheduled.
-	AvoidRescheduleOnLost bool
+	PreventRescheduleOnLost bool
 }
 
 func (tg *TaskGroup) Copy() *TaskGroup {
@@ -11032,13 +11032,13 @@ func (a *Allocation) SupportsDisconnectedClients(serverSupportsDisconnectedClien
 	return false
 }
 
-// AvoidRescheduleOnLost determines if an alloc allows to have a replacement
+// PreventRescheduleOnLost determines if an alloc allows to have a replacement
 // when lost.
-func (a *Allocation) AvoidRescheduleOnLost() bool {
+func (a *Allocation) PreventRescheduleOnLost() bool {
 	if a.Job != nil {
 		tg := a.Job.LookupTaskGroup(a.TaskGroup)
 		if tg != nil {
-			return tg.AvoidRescheduleOnLost
+			return tg.PreventRescheduleOnLost
 		}
 	}
 
@@ -11258,7 +11258,7 @@ func (a *Allocation) Expired(now time.Time) bool {
 		return false
 	}
 
-	if tg.MaxClientDisconnect == nil && !tg.AvoidRescheduleOnLost {
+	if tg.MaxClientDisconnect == nil && !tg.PreventRescheduleOnLost {
 		return false
 	}
 
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 9f8fd9b5780..02c485b51fa 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -485,7 +485,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	}
 
 	if len(expiring) > 0 {
-		if tg.AvoidRescheduleOnLost {
+		if tg.PreventRescheduleOnLost {
 			untainted = untainted.union(expiring)
 		} else {
 			lost = lost.union(expiring)
@@ -495,9 +495,9 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// which ones later and which ones can't be rescheduled at all.
 	timeoutLaterEvals := map[string]string{}
 	if len(disconnecting) > 0 {
-		// If MaxClientDisconnect is enabled as well as tg.AvoidRescheduleOnLost,
+		// If MaxClientDisconnect is enabled as well as tg.PreventRescheduleOnLost,
 		// the reschedule policy won't be enabled and the lost allocations
-		// wont be rescheduled, and AvoidRescheduleOnLost is ignored.
+		// wont be rescheduled, and PreventRescheduleOnLost is ignored.
 		if tg.MaxClientDisconnect != nil {
 			untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(a.batch, true, a.now, a.evalID, a.deployment)
 
@@ -509,7 +509,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 			// create followup evals, and update the ClientStatus to unknown.
 			timeoutLaterEvals = a.createTimeoutLaterEvals(disconnecting, tg.Name)
 
-		} else if tg.AvoidRescheduleOnLost {
+		} else if tg.PreventRescheduleOnLost {
 			untainted = untainted.union(disconnecting)
 		}
 
diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go
index b0d1cfd3b9f..3e13276a8db 100644
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -890,10 +890,10 @@ func TestReconciler_Destructive_ScaleDown(t *testing.T) {
 
 // Tests the reconciler properly handles allocations when a node
 // goes down or disconnects, using all possible combinations of
-// AvoidRescheduleOnLost, MaxClientDisconnect and ReschedulePolicy.
+// PreventRescheduleOnLost, MaxClientDisconnect and ReschedulePolicy.
 // Having the 3 configurations enabled is not a valid option and is not
 // included in the test.
-func TestReconciler_LostNode_AvoidRescheduleOnLost(t *testing.T) {
+func TestReconciler_LostNode_PreventRescheduleOnLost(t *testing.T) {
 	disabledReschedulePolicy := &structs.ReschedulePolicy{
 		Attempts:  0,
 		Unlimited: false,
@@ -903,65 +903,65 @@ func TestReconciler_LostNode_AvoidRescheduleOnLost(t *testing.T) {
 	now := time.Now()
 
 	testCases := []struct {
-		name                  string
-		avoidRescheduleOnLost bool
-		maxClientDisconnect   *time.Duration
-		reschedulePolicy      *structs.ReschedulePolicy
-		expectPlace           int
-		expectStop            int
-		expectIgnore          int
-		expectDisconnect      int
-		allocStatus           string
+		name                    string
+		PreventRescheduleOnLost bool
+		maxClientDisconnect     *time.Duration
+		reschedulePolicy        *structs.ReschedulePolicy
+		expectPlace             int
+		expectStop              int
+		expectIgnore            int
+		expectDisconnect        int
+		allocStatus             string
 	}{
 		{
-			name:                  "AvoidRescheduleOnLost off, MaxClientDisconnect off, Reschedule off",
-			maxClientDisconnect:   nil,
-			avoidRescheduleOnLost: false,
-			reschedulePolicy:      disabledReschedulePolicy,
-			expectPlace:           2,
-			expectStop:            2,
-			expectIgnore:          3,
-			expectDisconnect:      0,
-			allocStatus:           structs.AllocClientStatusLost,
+			name:                    "PreventRescheduleOnLost off, MaxClientDisconnect off, Reschedule off",
+			maxClientDisconnect:     nil,
+			PreventRescheduleOnLost: false,
+			reschedulePolicy:        disabledReschedulePolicy,
+			expectPlace:             2,
+			expectStop:              2,
+			expectIgnore:            3,
+			expectDisconnect:        0,
+			allocStatus:             structs.AllocClientStatusLost,
 		},
 		{
-			name:                  "AvoidRescheduleOnLost on, MaxClientDisconnect off, Reschedule off",
-			maxClientDisconnect:   nil,
-			avoidRescheduleOnLost: true,
-			reschedulePolicy:      disabledReschedulePolicy,
-			expectPlace:           0,
-			expectStop:            0,
-			expectIgnore:          5,
-			expectDisconnect:      2,
-			allocStatus:           structs.AllocClientStatusUnknown,
+			name:                    "PreventRescheduleOnLost on, MaxClientDisconnect off, Reschedule off",
+			maxClientDisconnect:     nil,
+			PreventRescheduleOnLost: true,
+			reschedulePolicy:        disabledReschedulePolicy,
+			expectPlace:             0,
+			expectStop:              0,
+			expectIgnore:            5,
+			expectDisconnect:        2,
+			allocStatus:             structs.AllocClientStatusUnknown,
 		},
 		{
-			name:                  "AvoidRescheduleOnLost off, MaxClientDisconnect on, Reschedule off",
-			maxClientDisconnect:   pointer.Of(10 * time.Second),
-			avoidRescheduleOnLost: false,
-			reschedulePolicy:      disabledReschedulePolicy,
-			expectPlace:           2,
-			expectStop:            1,
-			expectIgnore:          4,
-			expectDisconnect:      1,
-			allocStatus:           structs.AllocClientStatusLost,
+			name:                    "PreventRescheduleOnLost off, MaxClientDisconnect on, Reschedule off",
+			maxClientDisconnect:     pointer.Of(10 * time.Second),
+			PreventRescheduleOnLost: false,
+			reschedulePolicy:        disabledReschedulePolicy,
+			expectPlace:             2,
+			expectStop:              1,
+			expectIgnore:            4,
+			expectDisconnect:        1,
+			allocStatus:             structs.AllocClientStatusLost,
 		},
 		{
-			name:                  "AvoidRescheduleOnLost on, MaxClientDisconnect on, Reschedule off",
-			maxClientDisconnect:   pointer.Of(10 * time.Second),
-			avoidRescheduleOnLost: true,
-			reschedulePolicy:      disabledReschedulePolicy,
-			expectPlace:           1, // This behaviour needs to be verified
-			expectStop:            0,
-			expectIgnore:          5,
-			expectDisconnect:      2,
-			allocStatus:           structs.AllocClientStatusUnknown,
+			name:                    "PreventRescheduleOnLost on, MaxClientDisconnect on, Reschedule off",
+			maxClientDisconnect:     pointer.Of(10 * time.Second),
+			PreventRescheduleOnLost: true,
+			reschedulePolicy:        disabledReschedulePolicy,
+			expectPlace:             1, // This behaviour needs to be verified
+			expectStop:              0,
+			expectIgnore:            5,
+			expectDisconnect:        2,
+			allocStatus:             structs.AllocClientStatusUnknown,
 		},
 
 		{
-			name:                  "AvoidRescheduleOnLost off, MaxClientDisconnect off, Reschedule on",
-			maxClientDisconnect:   nil,
-			avoidRescheduleOnLost: false,
+			name:                    "PreventRescheduleOnLost off, MaxClientDisconnect off, Reschedule on",
+			maxClientDisconnect:     nil,
+			PreventRescheduleOnLost: false,
 			reschedulePolicy: &structs.ReschedulePolicy{
 				Attempts: 1,
 			},
@@ -971,9 +971,9 @@ func TestReconciler_LostNode_AvoidRescheduleOnLost(t *testing.T) {
 			allocStatus:  structs.AllocClientStatusLost,
 		},
 		{
-			name:                  "AvoidRescheduleOnLost on, MaxClientDisconnect off, Reschedule on",
-			maxClientDisconnect:   nil,
-			avoidRescheduleOnLost: true,
+			name:                    "PreventRescheduleOnLost on, MaxClientDisconnect off, Reschedule on",
+			maxClientDisconnect:     nil,
+			PreventRescheduleOnLost: true,
 			reschedulePolicy: &structs.ReschedulePolicy{
 				Attempts: 1,
 			},
@@ -984,9 +984,9 @@ func TestReconciler_LostNode_AvoidRescheduleOnLost(t *testing.T) {
 			allocStatus:      structs.AllocClientStatusUnknown,
 		},
 		{
-			name:                  "AvoidRescheduleOnLost off, MaxClientDisconnect on, Reschedule on",
-			maxClientDisconnect:   pointer.Of(10 * time.Second),
-			avoidRescheduleOnLost: false,
+			name:                    "PreventRescheduleOnLost off, MaxClientDisconnect on, Reschedule on",
+			maxClientDisconnect:     pointer.Of(10 * time.Second),
+			PreventRescheduleOnLost: false,
 			reschedulePolicy: &structs.ReschedulePolicy{
 				Attempts: 1,
 			},
@@ -1002,7 +1002,7 @@ func TestReconciler_LostNode_AvoidRescheduleOnLost(t *testing.T) {
 		t.Run(tc.name, func(t *testing.T) {
 			job := mock.Job()
 			job.TaskGroups[0].Count = 5
-			job.TaskGroups[0].AvoidRescheduleOnLost = tc.avoidRescheduleOnLost
+			job.TaskGroups[0].PreventRescheduleOnLost = tc.PreventRescheduleOnLost
 			job.TaskGroups[0].MaxClientDisconnect = tc.maxClientDisconnect
 			job.TaskGroups[0].ReschedulePolicy = tc.reschedulePolicy
 
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index d792226bed8..18d8af5c4e5 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -277,7 +277,7 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 				}
 
 			} else {
-				if alloc.AvoidRescheduleOnLost() {
+				if alloc.PreventRescheduleOnLost() {
 					if alloc.ClientStatus == structs.AllocClientStatusRunning {
 						disconnecting[alloc.ID] = alloc
 						continue
@@ -364,7 +364,7 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 		// Allocs on terminal nodes that can't be rescheduled need to be treated
 		// differently than those that can.
 		if taintedNode.TerminalStatus() {
-			if alloc.AvoidRescheduleOnLost() {
+			if alloc.PreventRescheduleOnLost() {
 				if alloc.ClientStatus == structs.AllocClientStatusUnknown {
 					untainted[alloc.ID] = alloc
 					continue
diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go
index 88b899f5a60..268c963d52c 100644
--- a/scheduler/reconcile_util_test.go
+++ b/scheduler/reconcile_util_test.go
@@ -43,14 +43,14 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 
 	testJobSingle := mock.Job()
 	testJobSingle.TaskGroups[0].MaxClientDisconnect = pointer.Of(5 * time.Second)
-	testJobSingle.TaskGroups[0].AvoidRescheduleOnLost = true
+	testJobSingle.TaskGroups[0].PreventRescheduleOnLost = true
 
 	testJobNoMaxDisconnect := mock.Job()
 	testJobNoMaxDisconnect.TaskGroups[0].MaxClientDisconnect = nil
 
 	testJobNoMaxDisconnectSingle := mock.Job()
 	testJobNoMaxDisconnectSingle.TaskGroups[0].MaxClientDisconnect = nil
-	testJobNoMaxDisconnectSingle.TaskGroups[0].AvoidRescheduleOnLost = true
+	testJobNoMaxDisconnectSingle.TaskGroups[0].PreventRescheduleOnLost = true
 
 	unknownAllocState := []*structs.AllocState{{
 		Field: structs.AllocStateFieldClientStatus,
@@ -84,7 +84,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 		supportsDisconnectedClients bool
 		skipNilNodeTest             bool
 		now                         time.Time
-		AvoidRescheduleOnLost       bool
+		PreventRescheduleOnLost     bool
 		// expected results
 		untainted     allocSet
 		migrate       allocSet
diff --git a/scheduler/util_test.go b/scheduler/util_test.go
index 8d4b135dcf2..8490b2e000d 100644
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -546,7 +546,7 @@ func TestTasksUpdated(t *testing.T) {
 
 	// Compare changed Template ErrMissingKey
 	j30.TaskGroups[0].Tasks[0].Templates[0].ErrMissingKey = true
-	require.True(t, tasksUpdated(j29, j30, name).modified)
+	must.True(t, tasksUpdated(j29, j30, name).modified)
 
 }
 
diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index 3d8a397700e..f1b2f23b943 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -63,7 +63,7 @@ job "docs" {
   rescheduling strategy. Nomad will then attempt to schedule the task on another
   node if any of the group allocation statuses become "failed".
 
-- `avoid_reschedule_on_lost` `(bool: false)` - Defines the reschedule behaviour 
+- `prevent_reschedule_on_lost` `(bool: false)` - Defines the reschedule behaviour 
   of an allocation when the node it is running on becomes disconnect or lost. 
   When enabled, if the node it is running on becomes disconnected 
   or goes down, this allocations wont be rescheduled and will show up as `unknown`
@@ -73,10 +73,10 @@ job "docs" {
   To modify the allocation behaviour on the client, see 
   [`stop_after_client_disconnect`](#stop_after_client_disconnect) .
 
-  Setting `max_client_disconnect` and `avoid_reschedule_on_lost=true` at the same 
+  Setting `max_client_disconnect` and `prevent_reschedule_on_lost=true` at the same 
   time requires that [rescheduling is disabled entirely][].
   If [`max_client_disconnect`](#max_client_disconnect) is set and 
-  `avoid_reschedule_on_lost=true`, allocations on disconnected nodes will be
+  `prevent_reschedule_on_lost=true`, allocations on disconnected nodes will be
   `unknown` until the `max_client_disconnect` window expires, at which point 
   the node will be transition from `disconnected` to `down`. The allocation 
   will remain as `unknown` and won't be rescheduled.

From ab7ebf257640cdb90aae7c88f374980298221634 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Mon, 4 Dec 2023 14:00:22 +0100
Subject: [PATCH 44/50] fix: update test

---
 nomad/structs/diff_test.go | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go
index 8089eb0dd96..5a9d9c6e576 100644
--- a/nomad/structs/diff_test.go
+++ b/nomad/structs/diff_test.go
@@ -1295,15 +1295,15 @@ func TestJobDiff(t *testing.T) {
 						Fields: []*FieldDiff{
 							{
 								Type: DiffTypeAdded,
-								Name: "PreventRescheduleOnLost",
+								Name: "Count",
 								Old:  "",
-								New:  "true",
+								New:  "1",
 							},
 							{
 								Type: DiffTypeAdded,
-								Name: "Count",
+								Name: "PreventRescheduleOnLost",
 								Old:  "",
-								New:  "1",
+								New:  "true",
 							},
 						},
 					},
@@ -1327,17 +1327,16 @@ func TestJobDiff(t *testing.T) {
 						Type: DiffTypeDeleted,
 						Name: "foo",
 						Fields: []*FieldDiff{
-
 							{
 								Type: DiffTypeDeleted,
-								Name: "PreventRescheduleOnLost",
-								Old:  "true",
+								Name: "Count",
+								Old:  "1",
 								New:  "",
 							},
 							{
 								Type: DiffTypeDeleted,
-								Name: "Count",
-								Old:  "1",
+								Name: "PreventRescheduleOnLost",
+								Old:  "true",
 								New:  "",
 							},
 						},

From 17cb0bd390d31e56c9125b8b9664029aa16237cc Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 6 Dec 2023 11:00:24 +0100
Subject: [PATCH 45/50] Update nomad/structs/structs.go

Co-authored-by: Luiz Aoqui <luiz@hashicorp.com>
---
 nomad/structs/structs.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 5e08805a136..9036962ecd5 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -6650,8 +6650,7 @@ type TaskGroup struct {
 	MaxClientDisconnect *time.Duration
 
 	// PreventRescheduleOnLost is used to signal that an allocation should not
-	// be rescheduled if its node becomes lost. If the node is disconnected, it will
-	// be also considered as lost and wont be rescheduled.
+	// be rescheduled if its node goes down or is disconnected.
 	PreventRescheduleOnLost bool
 }
 

From a6d3ada82a5d2011b2c35a1f1f9a692dbadf1d68 Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 6 Dec 2023 11:00:40 +0100
Subject: [PATCH 46/50] Update nomad/structs/structs.go

Co-authored-by: Luiz Aoqui <luiz@hashicorp.com>
---
 nomad/structs/structs.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 9036962ecd5..6a4e79b97a8 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -10823,7 +10823,6 @@ func (a *Allocation) copyImpl(job bool) *Allocation {
 func (a *Allocation) TerminalStatus() bool {
 	// First check the desired state and if that isn't terminal, check client
 	// state.
-
 	return a.ServerTerminalStatus() || a.ClientTerminalStatus()
 }
 

From 926774852231602280094c178199a74a7bac2d3f Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 6 Dec 2023 11:02:55 +0100
Subject: [PATCH 47/50] Update scheduler/util_test.go

Co-authored-by: Luiz Aoqui <luiz@hashicorp.com>
---
 scheduler/util_test.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scheduler/util_test.go b/scheduler/util_test.go
index 8490b2e000d..a7049333845 100644
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -547,7 +547,6 @@ func TestTasksUpdated(t *testing.T) {
 	// Compare changed Template ErrMissingKey
 	j30.TaskGroups[0].Tasks[0].Templates[0].ErrMissingKey = true
 	must.True(t, tasksUpdated(j29, j30, name).modified)
-
 }
 
 func TestTasksUpdated_connectServiceUpdated(t *testing.T) {

From ee08f4dc463162147b2e4ce0922a1ec9680eead8 Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 6 Dec 2023 11:03:04 +0100
Subject: [PATCH 48/50] Update website/content/docs/job-specification/group.mdx

Co-authored-by: Luiz Aoqui <luiz@hashicorp.com>
---
 website/content/docs/job-specification/group.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index f1b2f23b943..dd5858dc64f 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -64,7 +64,7 @@ job "docs" {
   node if any of the group allocation statuses become "failed".
 
 - `prevent_reschedule_on_lost` `(bool: false)` - Defines the reschedule behaviour 
-  of an allocation when the node it is running on becomes disconnect or lost. 
+  of an allocation when the node it is running on misses heartbeats. 
   When enabled, if the node it is running on becomes disconnected 
   or goes down, this allocations wont be rescheduled and will show up as `unknown`
   until the node comes back up or it is manually restarted. 

From 85d20b3c5c06a5a53420d47663c2523c4dd3d2ad Mon Sep 17 00:00:00 2001
From: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 6 Dec 2023 11:16:20 +0100
Subject: [PATCH 49/50] Update website/content/docs/job-specification/group.mdx

Co-authored-by: Luiz Aoqui <luiz@hashicorp.com>
---
 website/content/docs/job-specification/group.mdx | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index dd5858dc64f..0259d8bdba7 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -81,12 +81,10 @@ job "docs" {
   the node will be transition from `disconnected` to `down`. The allocation 
   will remain as `unknown` and won't be rescheduled.
 
-  To get the job up and running again it has to be manually stopped and run again
-  or [restarted][restart] using the `reschedule` option:
+  The `unknown` allocation has to be manually stopped to run it again.
 
   ```plaintext
-  `nomad job restart -reschedule <job>`
-  ```
+  `nomad alloc stop  <alloc ID>`
 
 - `restart` <code>([Restart][]: nil)</code> - Specifies the restart policy for
   all tasks in this group. If omitted, a default policy exists for each job

From f97e2801efab712047b1e2d2488eadee8c183301 Mon Sep 17 00:00:00 2001
From: Juanadelacuesta <juanita.delacuestamorales@hashicorp.com>
Date: Wed, 6 Dec 2023 11:37:00 +0100
Subject: [PATCH 50/50] func: add new section to documentation

---
 .../content/docs/job-specification/group.mdx  | 45 +++++++++++++++----
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/website/content/docs/job-specification/group.mdx b/website/content/docs/job-specification/group.mdx
index 0259d8bdba7..98024164e2f 100644
--- a/website/content/docs/job-specification/group.mdx
+++ b/website/content/docs/job-specification/group.mdx
@@ -71,20 +71,16 @@ job "docs" {
   
   This behaviour will only modify the reschedule process on the server.
   To modify the allocation behaviour on the client, see 
-  [`stop_after_client_disconnect`](#stop_after_client_disconnect) .
-
-  Setting `max_client_disconnect` and `prevent_reschedule_on_lost=true` at the same 
-  time requires that [rescheduling is disabled entirely][].
-  If [`max_client_disconnect`](#max_client_disconnect) is set and 
-  `prevent_reschedule_on_lost=true`, allocations on disconnected nodes will be
-  `unknown` until the `max_client_disconnect` window expires, at which point 
-  the node will be transition from `disconnected` to `down`. The allocation 
-  will remain as `unknown` and won't be rescheduled.
+  [`stop_after_client_disconnect`](#stop_after_client_disconnect).
 
   The `unknown` allocation has to be manually stopped to run it again.
 
   ```plaintext
   `nomad alloc stop  <alloc ID>`
+  ```
+
+  Setting `max_client_disconnect` and `prevent_reschedule_on_lost = true` at the 
+  same time requires that [rescheduling is disabled entirely][`disable_rescheduling`].
 
 - `restart` <code>([Restart][]: nil)</code> - Specifies the restart policy for
   all tasks in this group. If omitted, a default policy exists for each job
@@ -346,6 +342,36 @@ group "second" {
 }
 ```
 
+#### Max Client Disconnect and Prevent Reschedule On Lost
+
+Setting `max_client_disconnect` and `prevent_reschedule_on_lost = true` at the 
+same time requires that [rescheduling is disabled entirely][`disable_rescheduling`].
+
+```hcl
+# jobspec.nomad
+
+group "first" {
+  max_client_disconnect = "12h"
+  prevent_reschedule_on_lost = true
+  task "first-task" {
+
+    reschedule {
+      attempts  = 0
+      unlimited = false
+    }
+
+    ...
+  }
+}
+```
+
+If [`max_client_disconnect`](#max_client_disconnect) is set and 
+`prevent_reschedule_on_lost = true`, allocations on disconnected nodes will be
+`unknown` until the `max_client_disconnect` window expires, at which point 
+the node will be transition from `disconnected` to `down`. The allocation 
+will remain as `unknown` and won't be rescheduled.
+
+
 [task]: /nomad/docs/job-specification/task 'Nomad task Job Specification'
 [job]: /nomad/docs/job-specification/job 'Nomad job Job Specification'
 [constraint]: /nomad/docs/job-specification/constraint 'Nomad constraint Job Specification'
@@ -356,6 +382,7 @@ group "second" {
 [ephemeraldisk]: /nomad/docs/job-specification/ephemeral_disk 'Nomad ephemeral_disk Job Specification'
 [`heartbeat_grace`]: /nomad/docs/configuration/server#heartbeat_grace
 [`max_client_disconnect`]: /nomad/docs/job-specification/group#max_client_disconnect
+[`disable_rescheduling`]: /nomad/docs/job-specification/reschedule#disabling-rescheduling
 [max-client-disconnect]: /nomad/docs/job-specification/group#max-client-disconnect 'the example code below'
 [`stop_after_client_disconnect`]: /nomad/docs/job-specification/group#stop_after_client_disconnect
 [meta]: /nomad/docs/job-specification/meta 'Nomad meta Job Specification'