From 3dd8cd1bfad8ec731b26dea5be77dcfca002bdc3 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 22 Feb 2018 17:43:07 -0600 Subject: [PATCH 01/25] New delayed rescheduling options, validation function and unit tests --- nomad/mock/mock.go | 6 +- nomad/structs/diff_test.go | 122 ++++++++++++++++++++-- nomad/structs/structs.go | 190 +++++++++++++++++++++++++++++++--- nomad/structs/structs_test.go | 187 ++++++++++++++++++++++++++++----- 4 files changed, 452 insertions(+), 53 deletions(-) diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 7de4987a25b..3a8588b9cba 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -92,8 +92,10 @@ func Job() *structs.Job { Mode: structs.RestartPolicyModeDelay, }, ReschedulePolicy: &structs.ReschedulePolicy{ - Attempts: 2, - Interval: 10 * time.Minute, + Attempts: 2, + Interval: 10 * time.Minute, + Delay: 5 * time.Second, + DelayFunction: "linear", }, Tasks: []*structs.Task{ { diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go index 10bce23b5c2..52f28938bdb 100644 --- a/nomad/structs/diff_test.go +++ b/nomad/structs/diff_test.go @@ -1499,8 +1499,12 @@ func TestTaskGroupDiff(t *testing.T) { Old: &TaskGroup{}, New: &TaskGroup{ ReschedulePolicy: &ReschedulePolicy{ - Attempts: 1, - Interval: 15 * time.Second, + Attempts: 1, + Interval: 15 * time.Second, + Delay: 5 * time.Second, + DelayCeiling: 20 * time.Second, + DelayFunction: "exponential", + Unlimited: false, }, }, Expected: &TaskGroupDiff{ @@ -1516,12 +1520,36 @@ func TestTaskGroupDiff(t *testing.T) { Old: "", New: "1", }, + { + Type: DiffTypeAdded, + Name: "Delay", + Old: "", + New: "5000000000", + }, + { + Type: DiffTypeAdded, + Name: "DelayCeiling", + Old: "", + New: "20000000000", + }, + { + Type: DiffTypeAdded, + Name: "DelayFunction", + Old: "", + New: "exponential", + }, { Type: DiffTypeAdded, Name: "Interval", Old: "", New: "15000000000", }, + { + Type: DiffTypeAdded, + Name: "Unlimited", + Old: "", + New: "false", + }, }, }, }, @@ -1531,8 +1559,12 @@ func TestTaskGroupDiff(t *testing.T) { // ReschedulePolicy deleted Old: &TaskGroup{ ReschedulePolicy: &ReschedulePolicy{ - Attempts: 1, - Interval: 15 * time.Second, + Attempts: 1, + Interval: 15 * time.Second, + Delay: 5 * time.Second, + DelayCeiling: 20 * time.Second, + DelayFunction: "exponential", + Unlimited: false, }, }, New: &TaskGroup{}, @@ -1549,12 +1581,36 @@ func TestTaskGroupDiff(t *testing.T) { Old: "1", New: "", }, + { + Type: DiffTypeDeleted, + Name: "Delay", + Old: "5000000000", + New: "", + }, + { + Type: DiffTypeDeleted, + Name: "DelayCeiling", + Old: "20000000000", + New: "", + }, + { + Type: DiffTypeDeleted, + Name: "DelayFunction", + Old: "exponential", + New: "", + }, { Type: DiffTypeDeleted, Name: "Interval", Old: "15000000000", New: "", }, + { + Type: DiffTypeDeleted, + Name: "Unlimited", + Old: "false", + New: "", + }, }, }, }, @@ -1564,14 +1620,22 @@ func TestTaskGroupDiff(t *testing.T) { // ReschedulePolicy edited Old: &TaskGroup{ ReschedulePolicy: &ReschedulePolicy{ - Attempts: 1, - Interval: 1 * time.Second, + Attempts: 1, + Interval: 1 * time.Second, + DelayFunction: "exponential", + Delay: 20 * time.Second, + DelayCeiling: 1 * time.Minute, + Unlimited: false, }, }, New: &TaskGroup{ ReschedulePolicy: &ReschedulePolicy{ - Attempts: 2, - Interval: 2 * time.Second, + Attempts: 2, + Interval: 2 * time.Second, + DelayFunction: "linear", + Delay: 30 * time.Second, + DelayCeiling: 1 * time.Minute, + Unlimited: true, }, }, Expected: &TaskGroupDiff{ @@ -1587,12 +1651,30 @@ func TestTaskGroupDiff(t *testing.T) { Old: "1", New: "2", }, + { + Type: DiffTypeEdited, + Name: "Delay", + Old: "20000000000", + New: "30000000000", + }, + { + Type: DiffTypeEdited, + Name: "DelayFunction", + Old: "exponential", + New: "linear", + }, { Type: DiffTypeEdited, Name: "Interval", Old: "1000000000", New: "2000000000", }, + { + Type: DiffTypeEdited, + Name: "Unlimited", + Old: "false", + New: "true", + }, }, }, }, @@ -1625,12 +1707,36 @@ func TestTaskGroupDiff(t *testing.T) { Old: "1", New: "1", }, + { + Type: DiffTypeNone, + Name: "Delay", + Old: "0", + New: "0", + }, + { + Type: DiffTypeNone, + Name: "DelayCeiling", + Old: "0", + New: "0", + }, + { + Type: DiffTypeNone, + Name: "DelayFunction", + Old: "", + New: "", + }, { Type: DiffTypeEdited, Name: "Interval", Old: "1000000000", New: "2000000000", }, + { + Type: DiffTypeNone, + Name: "Unlimited", + Old: "false", + New: "false", + }, }, }, }, diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 402b5701fb9..ea961271444 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -35,6 +35,8 @@ import ( "github.com/mitchellh/copystructure" "github.com/ugorji/go/codec" + "math" + hcodec "github.com/hashicorp/go-msgpack/codec" ) @@ -2656,12 +2658,16 @@ var ( var ( DefaultServiceJobReschedulePolicy = ReschedulePolicy{ - Attempts: 2, - Interval: 1 * time.Hour, + Delay: 30 * time.Second, + DelayFunction: "exponential", + DelayCeiling: 1 * time.Hour, + Unlimited: true, } DefaultBatchJobReschedulePolicy = ReschedulePolicy{ - Attempts: 1, - Interval: 24 * time.Hour, + Attempts: 1, + Interval: 24 * time.Hour, + Delay: 5 * time.Second, + DelayFunction: "linear", } ) @@ -2744,6 +2750,9 @@ func NewRestartPolicy(jobType string) *RestartPolicy { } const ReschedulePolicyMinInterval = 15 * time.Second +const ReschedulePolicyMinDelay = 5 * time.Second + +var RescheduleDelayFunctions = [...]string{"linear", "exponential", "fibonacci"} // ReschedulePolicy configures how Tasks are rescheduled when they crash or fail. type ReschedulePolicy struct { @@ -2753,7 +2762,20 @@ type ReschedulePolicy struct { // Interval is a duration in which we can limit the number of reschedule attempts. Interval time.Duration - //TODO delay + // Delay is a minimum duration to wait between reschedule attempts. + // The delay function determines how much subsequent reschedule attempts are delayed by. + Delay time.Duration + + // DelayFunction determines how the delay progressively changes on subsequent reschedule + // attempts. Valid values are "exponential", "linear", and "fibonacci". + DelayFunction string + + // DelayCeiling is an upper bound on the delay. + DelayCeiling time.Duration + + // Unlimited allows infinite rescheduling attempts. Only allowed when delay is set between reschedule + // attempts. + Unlimited bool } func (r *ReschedulePolicy) Copy() *ReschedulePolicy { @@ -2765,17 +2787,151 @@ func (r *ReschedulePolicy) Copy() *ReschedulePolicy { return nrp } +// Validate uses different criteria to validate the reschedule policy +// Delay must be a minimum of 5 seconds +// Delay Ceiling is ignored if Delay Function is "linear" +// Number of possible attempts is validated, given the interval, delay and delay function func (r *ReschedulePolicy) Validate() error { - if r != nil && r.Attempts > 0 { - var mErr multierror.Error - // Check for ambiguous/confusing settings + enabled := r != nil && (r.Attempts > 0 || r.Unlimited) + if !enabled { + return nil + } + var mErr multierror.Error + // Check for ambiguous/confusing settings + + delayPreCheck := true + // Delay should be bigger than the default + if r.Delay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { + multierror.Append(&mErr, fmt.Errorf("Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) + delayPreCheck = false + } + + // Must use a valid delay function + if !isValidDelayFunction(r.DelayFunction) { + multierror.Append(&mErr, fmt.Errorf("Invalid delay function %q, must be one of %q", r.DelayFunction, RescheduleDelayFunctions)) + delayPreCheck = false + } + + // Validate DelayCeiling if not using linear delay progression + if r.DelayFunction != "linear" { + if r.DelayCeiling.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { + multierror.Append(&mErr, fmt.Errorf("Delay Ceiling cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) + delayPreCheck = false + } + if r.DelayCeiling < r.Delay { + multierror.Append(&mErr, fmt.Errorf("Delay Ceiling cannot be less than Delay %v (got %v)", r.Delay, r.DelayCeiling)) + delayPreCheck = false + } + + } + + //Validate Interval and other delay parameters if attempts are limited + if !r.Unlimited { if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { - multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", RestartPolicyMinInterval, r.Interval)) + multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, r.Interval)) } + if !delayPreCheck { + // We can't cross validate the rest of the delay params if delayPreCheck fails, so return early + return mErr.ErrorOrNil() + } + crossValidationErr := r.validateDelayParams() + if crossValidationErr != nil { + multierror.Append(&mErr, crossValidationErr) + } + } + return mErr.ErrorOrNil() +} - return mErr.ErrorOrNil() +func isValidDelayFunction(delayFunc string) bool { + for _, value := range RescheduleDelayFunctions { + if value == delayFunc { + return true + } } - return nil + return false +} + +func (r *ReschedulePolicy) validateDelayParams() error { + ok, possibleAttempts, recommendedInterval := r.viableAttempts() + if ok { + return nil + } + var mErr multierror.Error + if r.DelayFunction == "linear" { + multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v and "+ + "delay function %q", possibleAttempts, r.Interval, r.Delay, r.DelayFunction)) + } else { + multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ + "delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.DelayCeiling)) + } + multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Minute), r.Attempts)) + return mErr.ErrorOrNil() +} + +func (r *ReschedulePolicy) viableAttempts() (bool, int, time.Duration) { + var possibleAttempts int + var recommendedInterval time.Duration + valid := true + switch r.DelayFunction { + case "linear": + recommendedInterval = time.Duration(r.Attempts) * r.Delay + if r.Interval < recommendedInterval { + possibleAttempts = int(r.Interval / r.Delay) + valid = false + } + case "exponential": + for i := 0; i < r.Attempts; i++ { + nextDelay := time.Duration(math.Pow(2, float64(i))) * r.Delay + if nextDelay > r.DelayCeiling { + nextDelay = r.DelayCeiling + recommendedInterval += nextDelay + } else { + recommendedInterval = nextDelay + } + if recommendedInterval < r.Interval { + possibleAttempts++ + } + } + if possibleAttempts < r.Attempts { + valid = false + } + case "fibonacci": + var slots []time.Duration + slots = append(slots, r.Delay) + slots = append(slots, r.Delay) + reachedCeiling := false + for i := 2; i < r.Attempts; i++ { + var nextDelay time.Duration + if reachedCeiling { + //switch to linear + nextDelay = slots[i-1] + r.DelayCeiling + } else { + nextDelay = slots[i-1] + slots[i-2] + if nextDelay > r.DelayCeiling { + nextDelay = r.DelayCeiling + reachedCeiling = true + } + } + slots = append(slots, nextDelay) + } + recommendedInterval = slots[len(slots)-1] + if r.Interval < recommendedInterval { + valid = false + // calculate possible attempts + for i := 0; i < len(slots); i++ { + if slots[i] > r.Interval { + possibleAttempts = i + break + } + } + } + default: + return false, 0, 0 + } + if possibleAttempts < 0 { // can happen if delay is bigger than interval + possibleAttempts = 0 + } + return valid, possibleAttempts, recommendedInterval } func NewReschedulePolicy(jobType string) *ReschedulePolicy { @@ -2920,12 +3076,14 @@ func (tg *TaskGroup) Validate(j *Job) error { mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) } - if tg.ReschedulePolicy != nil { - if err := tg.ReschedulePolicy.Validate(); err != nil { - mErr.Errors = append(mErr.Errors, err) + if j.Type != JobTypeSystem { + if tg.ReschedulePolicy != nil { + if err := tg.ReschedulePolicy.Validate(); err != nil { + mErr.Errors = append(mErr.Errors, err) + } + } else { + mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a reschedule policy", tg.Name)) } - } else { - mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a reschedule policy", tg.Name)) } if tg.EphemeralDisk != nil { diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 3a54b2f4282..f3dc8bda035 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -570,8 +570,10 @@ func testJob() *Job { Delay: 1 * time.Minute, }, ReschedulePolicy: &ReschedulePolicy{ - Interval: 5 * time.Minute, - Attempts: 10, + Interval: 5 * time.Minute, + Attempts: 10, + Delay: 5 * time.Second, + DelayFunction: "linear", }, Tasks: []*Task{ { @@ -930,6 +932,7 @@ func TestTaskGroup_Validate(t *testing.T) { ReschedulePolicy: &ReschedulePolicy{ Interval: 5 * time.Minute, Attempts: 5, + Delay: 5 * time.Second, }, } err := tg.Validate(j) @@ -1012,8 +1015,10 @@ func TestTaskGroup_Validate(t *testing.T) { Mode: RestartPolicyModeDelay, }, ReschedulePolicy: &ReschedulePolicy{ - Interval: 5 * time.Minute, - Attempts: 10, + Interval: 5 * time.Minute, + Attempts: 10, + Delay: 5 * time.Second, + DelayFunction: "linear", }, } @@ -2424,45 +2429,173 @@ func TestRestartPolicy_Validate(t *testing.T) { func TestReschedulePolicy_Validate(t *testing.T) { type testCase struct { + desc string ReschedulePolicy *ReschedulePolicy - err error + errors []error } testCases := []testCase{ { + desc: "Nil", + }, + { + desc: "Disabled", ReschedulePolicy: &ReschedulePolicy{ Attempts: 0, Interval: 0 * time.Second}, - err: nil, }, { + desc: "Disabled", ReschedulePolicy: &ReschedulePolicy{ - Attempts: 1, + Attempts: -1, Interval: 5 * time.Minute}, - err: nil, }, { + desc: "Valid Linear Delay", ReschedulePolicy: &ReschedulePolicy{ - Attempts: -1, - Interval: 5 * time.Minute}, - err: nil, + Attempts: 1, + Interval: 5 * time.Minute, + Delay: 10 * time.Second, + DelayFunction: "linear"}, + }, + { + desc: "Valid Exponential Delay", + ReschedulePolicy: &ReschedulePolicy{ + Attempts: 5, + Interval: 1 * time.Hour, + Delay: 30 * time.Second, + DelayCeiling: 5 * time.Minute, + DelayFunction: "exponential"}, + }, + { + desc: "Valid Fibonacci Delay", + ReschedulePolicy: &ReschedulePolicy{ + Attempts: 5, + Interval: 15 * time.Minute, + Delay: 10 * time.Second, + DelayCeiling: 5 * time.Minute, + DelayFunction: "fibonacci"}, + }, + { + desc: "Invalid delay function", + ReschedulePolicy: &ReschedulePolicy{ + Attempts: 1, + Interval: 1 * time.Second, + DelayFunction: "blah"}, + errors: []error{ + fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, time.Second), + fmt.Errorf("Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, 0*time.Second), + fmt.Errorf("Invalid delay function %q, must be one of %q", "blah", RescheduleDelayFunctions), + }, + }, + { + desc: "Invalid delay ceiling", + ReschedulePolicy: &ReschedulePolicy{ + Attempts: 1, + Interval: 8 * time.Second, + DelayFunction: "exponential", + Delay: 15 * time.Second, + DelayCeiling: 5 * time.Second}, + errors: []error{ + fmt.Errorf("Delay Ceiling cannot be less than Delay %v (got %v)", 15*time.Second, 5*time.Second), + }, + }, + { + desc: "Invalid delay and interval", + ReschedulePolicy: &ReschedulePolicy{ + Attempts: 1, + Interval: 1 * time.Second, + DelayFunction: "linear"}, + errors: []error{ + fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, time.Second), + fmt.Errorf("Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, 0*time.Second), + }, + }, { + // Should suggest 2h40m as the interval + desc: "Invalid Attempts - linear delay", + ReschedulePolicy: &ReschedulePolicy{ + Attempts: 10, + Interval: 1 * time.Hour, + Delay: 20 * time.Minute, + DelayFunction: "linear", + }, + errors: []error{ + fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v and delay function %q", 3, time.Hour, 20*time.Minute, "linear"), + fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", 200*time.Minute, 10), + }, }, { + // Should suggest 4h40m as the interval + // Delay progression in minutes {5, 10, 20, 40, 40, 40, 40, 40, 40, 40} + desc: "Invalid Attempts - exponential delay", ReschedulePolicy: &ReschedulePolicy{ - Attempts: 1, - Interval: 1 * time.Second}, - err: fmt.Errorf("Interval cannot be less than %v (got %v)", RestartPolicyMinInterval, time.Second), + Attempts: 10, + Interval: 30 * time.Minute, + Delay: 5 * time.Minute, + DelayCeiling: 40 * time.Minute, + DelayFunction: "exponential", + }, + errors: []error{ + fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ + "delay function %q, and delay ceiling %v", 3, 30*time.Minute, 5*time.Minute, "exponential", 40*time.Minute), + fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", 280*time.Minute, 10), + }, + }, + { + // Should suggest 8h as the interval + // Delay progression in minutes {20, 20, 40, 60, 80, 80, 80, 80, 80, 80} + desc: "Invalid Attempts - fibonacci delay", + ReschedulePolicy: &ReschedulePolicy{ + Attempts: 10, + Interval: 1 * time.Hour, + Delay: 20 * time.Minute, + DelayCeiling: 80 * time.Minute, + DelayFunction: "fibonacci", + }, + errors: []error{ + fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ + "delay function %q, and delay ceiling %v", 4, 1*time.Hour, 20*time.Minute, "fibonacci", 80*time.Minute), + fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", 480*time.Minute, 10), + }, + }, + { + desc: "Valid Unlimited config", + ReschedulePolicy: &ReschedulePolicy{ + Attempts: 1, + Unlimited: true, + DelayFunction: "exponential", + Delay: 5 * time.Minute, + DelayCeiling: 1 * time.Hour, + }, + }, + { + desc: "Invalid Unlimited config", + ReschedulePolicy: &ReschedulePolicy{ + Attempts: 1, + Interval: 1 * time.Second, + Unlimited: true, + DelayFunction: "exponential", + }, + errors: []error{ + fmt.Errorf("Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, 0*time.Second), + fmt.Errorf("Delay Ceiling cannot be less than %v (got %v)", ReschedulePolicyMinDelay, 0*time.Second), + }, }, } - assert := assert.New(t) - for _, tc := range testCases { - if tc.err != nil { - assert.Contains(tc.ReschedulePolicy.Validate().Error(), tc.err.Error()) - } else { - assert.Nil(tc.err) - } + t.Run(tc.desc, func(t *testing.T) { + require := require.New(t) + gotErr := tc.ReschedulePolicy.Validate() + if tc.errors != nil { + // Validate all errors + for _, err := range tc.errors { + require.Contains(gotErr.Error(), err.Error()) + } + } else { + require.Nil(gotErr) + } + }) } } @@ -2719,7 +2852,7 @@ func TestAllocation_ShouldReschedule(t *testing.T) { ClientStatus: AllocClientStatusFailed, DesiredStatus: AllocDesiredStatusRun, FailTime: fail, - ReschedulePolicy: &ReschedulePolicy{0, 1 * time.Minute}, + ReschedulePolicy: &ReschedulePolicy{Attempts: 0, Interval: 1 * time.Minute}, ShouldReschedule: false, }, { @@ -2751,7 +2884,7 @@ func TestAllocation_ShouldReschedule(t *testing.T) { ClientStatus: AllocClientStatusComplete, DesiredStatus: AllocDesiredStatusRun, FailTime: fail, - ReschedulePolicy: &ReschedulePolicy{1, 1 * time.Minute}, + ReschedulePolicy: &ReschedulePolicy{Attempts: 1, Interval: 1 * time.Minute}, ShouldReschedule: false, }, { @@ -2759,14 +2892,14 @@ func TestAllocation_ShouldReschedule(t *testing.T) { ClientStatus: AllocClientStatusFailed, DesiredStatus: AllocDesiredStatusRun, FailTime: fail, - ReschedulePolicy: &ReschedulePolicy{1, 1 * time.Minute}, + ReschedulePolicy: &ReschedulePolicy{Attempts: 1, Interval: 1 * time.Minute}, ShouldReschedule: true, }, { Desc: "Reschedule with leftover attempts", ClientStatus: AllocClientStatusFailed, DesiredStatus: AllocDesiredStatusRun, - ReschedulePolicy: &ReschedulePolicy{2, 5 * time.Minute}, + ReschedulePolicy: &ReschedulePolicy{Attempts: 2, Interval: 5 * time.Minute}, FailTime: fail, RescheduleTrackers: []*RescheduleEvent{ { @@ -2780,7 +2913,7 @@ func TestAllocation_ShouldReschedule(t *testing.T) { ClientStatus: AllocClientStatusFailed, DesiredStatus: AllocDesiredStatusRun, FailTime: fail, - ReschedulePolicy: &ReschedulePolicy{1, 5 * time.Minute}, + ReschedulePolicy: &ReschedulePolicy{Attempts: 1, Interval: 5 * time.Minute}, RescheduleTrackers: []*RescheduleEvent{ { RescheduleTime: fail.Add(-6 * time.Minute).UTC().UnixNano(), @@ -2793,7 +2926,7 @@ func TestAllocation_ShouldReschedule(t *testing.T) { ClientStatus: AllocClientStatusFailed, DesiredStatus: AllocDesiredStatusRun, FailTime: fail, - ReschedulePolicy: &ReschedulePolicy{2, 5 * time.Minute}, + ReschedulePolicy: &ReschedulePolicy{Attempts: 2, Interval: 5 * time.Minute}, RescheduleTrackers: []*RescheduleEvent{ { RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(), From e65d870fe95c2a53565aa666a56f06b34bdbc806 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Fri, 23 Feb 2018 10:23:32 -0600 Subject: [PATCH 02/25] Add new reschedule options to API layer and unit tests --- api/jobs_test.go | 40 +++++++++--- api/search_test.go | 15 ++--- api/tasks.go | 42 +++++++++++-- api/tasks_test.go | 97 ++++++++++++++++++++++-------- command/agent/job_endpoint.go | 8 ++- command/agent/job_endpoint_test.go | 16 +++-- nomad/structs/structs.go | 3 +- 7 files changed, 168 insertions(+), 53 deletions(-) diff --git a/api/jobs_test.go b/api/jobs_test.go index dc2dd351ffd..41fb411402a 100644 --- a/api/jobs_test.go +++ b/api/jobs_test.go @@ -136,8 +136,12 @@ func TestJobs_Canonicalize(t *testing.T) { Mode: helper.StringToPtr("fail"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(2), - Interval: helper.TimeToPtr(1 * time.Hour), + Attempts: helper.IntToPtr(0), + Interval: helper.TimeToPtr(0), + DelayFunction: helper.StringToPtr("exponential"), + Delay: helper.TimeToPtr(30 * time.Second), + DelayCeiling: helper.TimeToPtr(1 * time.Hour), + Unlimited: helper.BoolToPtr(true), }, Tasks: []*Task{ { @@ -202,8 +206,12 @@ func TestJobs_Canonicalize(t *testing.T) { Mode: helper.StringToPtr("fail"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(2), - Interval: helper.TimeToPtr(1 * time.Hour), + Attempts: helper.IntToPtr(0), + Interval: helper.TimeToPtr(0), + DelayFunction: helper.StringToPtr("exponential"), + Delay: helper.TimeToPtr(30 * time.Second), + DelayCeiling: helper.TimeToPtr(1 * time.Hour), + Unlimited: helper.BoolToPtr(true), }, Tasks: []*Task{ { @@ -335,8 +343,12 @@ func TestJobs_Canonicalize(t *testing.T) { Mode: helper.StringToPtr("delay"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(2), - Interval: helper.TimeToPtr(1 * time.Hour), + Attempts: helper.IntToPtr(0), + Interval: helper.TimeToPtr(0), + DelayFunction: helper.StringToPtr("exponential"), + Delay: helper.TimeToPtr(30 * time.Second), + DelayCeiling: helper.TimeToPtr(1 * time.Hour), + Unlimited: helper.BoolToPtr(true), }, EphemeralDisk: &EphemeralDisk{ Sticky: helper.BoolToPtr(false), @@ -550,8 +562,12 @@ func TestJobs_Canonicalize(t *testing.T) { Mode: helper.StringToPtr("fail"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(2), - Interval: helper.TimeToPtr(1 * time.Hour), + Attempts: helper.IntToPtr(0), + Interval: helper.TimeToPtr(0), + DelayFunction: helper.StringToPtr("exponential"), + Delay: helper.TimeToPtr(30 * time.Second), + DelayCeiling: helper.TimeToPtr(1 * time.Hour), + Unlimited: helper.BoolToPtr(true), }, Update: &UpdateStrategy{ Stagger: helper.TimeToPtr(2 * time.Second), @@ -586,8 +602,12 @@ func TestJobs_Canonicalize(t *testing.T) { Mode: helper.StringToPtr("fail"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(2), - Interval: helper.TimeToPtr(1 * time.Hour), + Attempts: helper.IntToPtr(0), + Interval: helper.TimeToPtr(0), + DelayFunction: helper.StringToPtr("exponential"), + Delay: helper.TimeToPtr(30 * time.Second), + DelayCeiling: helper.TimeToPtr(1 * time.Hour), + Unlimited: helper.BoolToPtr(true), }, Update: &UpdateStrategy{ Stagger: helper.TimeToPtr(1 * time.Second), diff --git a/api/search_test.go b/api/search_test.go index e62d9966745..892394c39eb 100644 --- a/api/search_test.go +++ b/api/search_test.go @@ -4,11 +4,11 @@ import ( "testing" "github.com/hashicorp/nomad/api/contexts" - "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestSearch_List(t *testing.T) { - assert := assert.New(t) + require := require.New(t) t.Parallel() c, s := makeClient(t, nil, nil) @@ -16,16 +16,17 @@ func TestSearch_List(t *testing.T) { job := testJob() _, _, err := c.Jobs().Register(job, nil) - assert.Nil(err) + require.Nil(err) id := *job.ID prefix := id[:len(id)-2] resp, qm, err := c.Search().PrefixSearch(prefix, contexts.Jobs, nil) - assert.Nil(err) - assert.NotNil(qm) + require.Nil(err) + require.NotNil(qm) + require.NotNil(qm) jobMatches := resp.Matches[contexts.Jobs] - assert.Equal(1, len(jobMatches)) - assert.Equal(id, jobMatches[0]) + require.Equal(1, len(jobMatches)) + require.Equal(id, jobMatches[0]) } diff --git a/api/tasks.go b/api/tasks.go index 69a8022f935..ef123fa06d5 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -86,6 +86,20 @@ type ReschedulePolicy struct { // Interval is a duration in which we can limit the number of reschedule attempts. Interval *time.Duration `mapstructure:"interval"` + + // Delay is a minimum duration to wait between reschedule attempts. + // The delay function determines how much subsequent reschedule attempts are delayed by. + Delay *time.Duration `mapstructure:"delay"` + + // DelayFunction determines how the delay progressively changes on subsequent reschedule + // attempts. Valid values are "exponential", "linear", and "fibonacci". + DelayFunction *string `mapstructure:"delay_function"` + + // DelayCeiling is an upper bound on the delay. + DelayCeiling *time.Duration `mapstructure:"delay_ceiling"` + + // Unlimited allows rescheduling attempts until they succeed + Unlimited *bool `mapstructure:"unlimited"` } func (r *ReschedulePolicy) Merge(rp *ReschedulePolicy) { @@ -95,6 +109,18 @@ func (r *ReschedulePolicy) Merge(rp *ReschedulePolicy) { if rp.Attempts != nil { r.Attempts = rp.Attempts } + if rp.Delay != nil { + r.Delay = rp.Delay + } + if rp.DelayFunction != nil { + r.DelayFunction = rp.DelayFunction + } + if rp.DelayCeiling != nil { + r.DelayCeiling = rp.DelayCeiling + } + if rp.Unlimited != nil { + r.Unlimited = rp.Unlimited + } } func (r *ReschedulePolicy) Copy() *ReschedulePolicy { @@ -316,13 +342,21 @@ func (g *TaskGroup) Canonicalize(job *Job) { switch *job.Type { case "service": defaultReschedulePolicy = &ReschedulePolicy{ - Attempts: helper.IntToPtr(structs.DefaultServiceJobReschedulePolicy.Attempts), - Interval: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.Interval), + Attempts: helper.IntToPtr(structs.DefaultServiceJobReschedulePolicy.Attempts), + Interval: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.Interval), + Delay: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.Delay), + DelayFunction: helper.StringToPtr(structs.DefaultServiceJobReschedulePolicy.DelayFunction), + DelayCeiling: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.DelayCeiling), + Unlimited: helper.BoolToPtr(structs.DefaultServiceJobReschedulePolicy.Unlimited), } case "batch": defaultReschedulePolicy = &ReschedulePolicy{ - Attempts: helper.IntToPtr(structs.DefaultBatchJobReschedulePolicy.Attempts), - Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), + Attempts: helper.IntToPtr(structs.DefaultBatchJobReschedulePolicy.Attempts), + Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), + Delay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Delay), + DelayFunction: helper.StringToPtr(structs.DefaultBatchJobReschedulePolicy.DelayFunction), + DelayCeiling: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.DelayCeiling), + Unlimited: helper.BoolToPtr(structs.DefaultBatchJobReschedulePolicy.Unlimited), } default: defaultReschedulePolicy = &ReschedulePolicy{ diff --git a/api/tasks_test.go b/api/tasks_test.go index 37c47d51429..284b6e8a46e 100644 --- a/api/tasks_test.go +++ b/api/tasks_test.go @@ -284,70 +284,115 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { jobReschedulePolicy: nil, taskReschedulePolicy: nil, expected: &ReschedulePolicy{ - Attempts: helper.IntToPtr(structs.DefaultBatchJobReschedulePolicy.Attempts), - Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), + Attempts: helper.IntToPtr(structs.DefaultBatchJobReschedulePolicy.Attempts), + Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), + Delay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Delay), + DelayFunction: helper.StringToPtr(structs.DefaultBatchJobReschedulePolicy.DelayFunction), + DelayCeiling: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.DelayCeiling), + Unlimited: helper.BoolToPtr(structs.DefaultBatchJobReschedulePolicy.Unlimited), }, }, { desc: "Empty job reschedule policy", jobReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(0), - Interval: helper.TimeToPtr(0), + Attempts: helper.IntToPtr(0), + Interval: helper.TimeToPtr(0), + Delay: helper.TimeToPtr(0), + DelayCeiling: helper.TimeToPtr(0), + DelayFunction: helper.StringToPtr(""), + Unlimited: helper.BoolToPtr(false), }, taskReschedulePolicy: nil, expected: &ReschedulePolicy{ - Attempts: helper.IntToPtr(0), - Interval: helper.TimeToPtr(0), + Attempts: helper.IntToPtr(0), + Interval: helper.TimeToPtr(0), + Delay: helper.TimeToPtr(0), + DelayCeiling: helper.TimeToPtr(0), + DelayFunction: helper.StringToPtr(""), + Unlimited: helper.BoolToPtr(false), }, }, { desc: "Inherit from job", jobReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(1), - Interval: helper.TimeToPtr(20 * time.Second), + Attempts: helper.IntToPtr(1), + Interval: helper.TimeToPtr(20 * time.Second), + Delay: helper.TimeToPtr(20 * time.Second), + DelayCeiling: helper.TimeToPtr(10 * time.Minute), + DelayFunction: helper.StringToPtr("linear"), + Unlimited: helper.BoolToPtr(false), }, taskReschedulePolicy: nil, expected: &ReschedulePolicy{ - Attempts: helper.IntToPtr(1), - Interval: helper.TimeToPtr(20 * time.Second), + Attempts: helper.IntToPtr(1), + Interval: helper.TimeToPtr(20 * time.Second), + Delay: helper.TimeToPtr(20 * time.Second), + DelayCeiling: helper.TimeToPtr(10 * time.Minute), + DelayFunction: helper.StringToPtr("linear"), + Unlimited: helper.BoolToPtr(false), }, }, { desc: "Set in task", jobReschedulePolicy: nil, taskReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(5), - Interval: helper.TimeToPtr(2 * time.Minute), + Attempts: helper.IntToPtr(5), + Interval: helper.TimeToPtr(2 * time.Minute), + Delay: helper.TimeToPtr(20 * time.Second), + DelayCeiling: helper.TimeToPtr(10 * time.Minute), + DelayFunction: helper.StringToPtr("linear"), + Unlimited: helper.BoolToPtr(false), }, expected: &ReschedulePolicy{ - Attempts: helper.IntToPtr(5), - Interval: helper.TimeToPtr(2 * time.Minute), + Attempts: helper.IntToPtr(5), + Interval: helper.TimeToPtr(2 * time.Minute), + Delay: helper.TimeToPtr(20 * time.Second), + DelayCeiling: helper.TimeToPtr(10 * time.Minute), + DelayFunction: helper.StringToPtr("linear"), + Unlimited: helper.BoolToPtr(false), }, }, { desc: "Merge from job", jobReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(1), + Attempts: helper.IntToPtr(1), + Delay: helper.TimeToPtr(20 * time.Second), + DelayCeiling: helper.TimeToPtr(10 * time.Minute), }, taskReschedulePolicy: &ReschedulePolicy{ - Interval: helper.TimeToPtr(5 * time.Minute), + Interval: helper.TimeToPtr(5 * time.Minute), + DelayFunction: helper.StringToPtr("linear"), + Unlimited: helper.BoolToPtr(false), }, expected: &ReschedulePolicy{ - Attempts: helper.IntToPtr(1), - Interval: helper.TimeToPtr(5 * time.Minute), + Attempts: helper.IntToPtr(1), + Interval: helper.TimeToPtr(5 * time.Minute), + Delay: helper.TimeToPtr(20 * time.Second), + DelayCeiling: helper.TimeToPtr(10 * time.Minute), + DelayFunction: helper.StringToPtr("linear"), + Unlimited: helper.BoolToPtr(false), }, }, { desc: "Override from group", jobReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(1), + Attempts: helper.IntToPtr(1), + DelayCeiling: helper.TimeToPtr(10 * time.Second), }, taskReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(5), + Attempts: helper.IntToPtr(5), + Delay: helper.TimeToPtr(20 * time.Second), + DelayCeiling: helper.TimeToPtr(20 * time.Minute), + DelayFunction: helper.StringToPtr("linear"), + Unlimited: helper.BoolToPtr(false), }, expected: &ReschedulePolicy{ - Attempts: helper.IntToPtr(5), - Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), + Attempts: helper.IntToPtr(5), + Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), + Delay: helper.TimeToPtr(20 * time.Second), + DelayCeiling: helper.TimeToPtr(20 * time.Minute), + DelayFunction: helper.StringToPtr("linear"), + Unlimited: helper.BoolToPtr(false), }, }, { @@ -357,8 +402,12 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { }, taskReschedulePolicy: nil, expected: &ReschedulePolicy{ - Attempts: helper.IntToPtr(1), - Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), + Attempts: helper.IntToPtr(1), + Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), + Delay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Delay), + DelayFunction: helper.StringToPtr(structs.DefaultBatchJobReschedulePolicy.DelayFunction), + DelayCeiling: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.DelayCeiling), + Unlimited: helper.BoolToPtr(structs.DefaultBatchJobReschedulePolicy.Unlimited), }, }, } diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index c661e4b0bdd..a059343ab33 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -639,8 +639,12 @@ func ApiTgToStructsTG(taskGroup *api.TaskGroup, tg *structs.TaskGroup) { } tg.ReschedulePolicy = &structs.ReschedulePolicy{ - Attempts: *taskGroup.ReschedulePolicy.Attempts, - Interval: *taskGroup.ReschedulePolicy.Interval, + Attempts: *taskGroup.ReschedulePolicy.Attempts, + Interval: *taskGroup.ReschedulePolicy.Interval, + Delay: *taskGroup.ReschedulePolicy.Delay, + DelayFunction: *taskGroup.ReschedulePolicy.DelayFunction, + DelayCeiling: *taskGroup.ReschedulePolicy.DelayCeiling, + Unlimited: *taskGroup.ReschedulePolicy.Unlimited, } tg.EphemeralDisk = &structs.EphemeralDisk{ diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go index 2cf2e6fa8a7..ccc58276a3b 100644 --- a/command/agent/job_endpoint_test.go +++ b/command/agent/job_endpoint_test.go @@ -1172,8 +1172,12 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { Mode: helper.StringToPtr("delay"), }, ReschedulePolicy: &api.ReschedulePolicy{ - Interval: helper.TimeToPtr(12 * time.Hour), - Attempts: helper.IntToPtr(5), + Interval: helper.TimeToPtr(12 * time.Hour), + Attempts: helper.IntToPtr(5), + DelayFunction: helper.StringToPtr("linear"), + Delay: helper.TimeToPtr(30 * time.Second), + Unlimited: helper.BoolToPtr(true), + DelayCeiling: helper.TimeToPtr(20 * time.Minute), }, EphemeralDisk: &api.EphemeralDisk{ SizeMB: helper.IntToPtr(100), @@ -1384,8 +1388,12 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { Mode: "delay", }, ReschedulePolicy: &structs.ReschedulePolicy{ - Interval: 12 * time.Hour, - Attempts: 5, + Interval: 12 * time.Hour, + Attempts: 5, + DelayFunction: "linear", + Delay: 30 * time.Second, + Unlimited: true, + DelayCeiling: 20 * time.Minute, }, EphemeralDisk: &structs.EphemeralDisk{ SizeMB: 100, diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index ea961271444..41deedf205b 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2773,8 +2773,7 @@ type ReschedulePolicy struct { // DelayCeiling is an upper bound on the delay. DelayCeiling time.Duration - // Unlimited allows infinite rescheduling attempts. Only allowed when delay is set between reschedule - // attempts. + // Unlimited allows rescheduling attempts until they succeed Unlimited bool } From 02741177e23dc38d250d90a039bfe0f89baa4d62 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Fri, 23 Feb 2018 12:57:29 -0600 Subject: [PATCH 03/25] Formatting and linting fixes --- nomad/structs/structs_test.go | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index f3dc8bda035..3614a135182 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -2497,7 +2497,8 @@ func TestReschedulePolicy_Validate(t *testing.T) { Delay: 15 * time.Second, DelayCeiling: 5 * time.Second}, errors: []error{ - fmt.Errorf("Delay Ceiling cannot be less than Delay %v (got %v)", 15*time.Second, 5*time.Second), + fmt.Errorf("Delay Ceiling cannot be less than Delay %v (got %v)", + 15*time.Second, 5*time.Second), }, }, { @@ -2520,8 +2521,10 @@ func TestReschedulePolicy_Validate(t *testing.T) { DelayFunction: "linear", }, errors: []error{ - fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v and delay function %q", 3, time.Hour, 20*time.Minute, "linear"), - fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", 200*time.Minute, 10), + fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v and"+ + " delay function %q", 3, time.Hour, 20*time.Minute, "linear"), + fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", + 200*time.Minute, 10), }, }, { @@ -2537,8 +2540,10 @@ func TestReschedulePolicy_Validate(t *testing.T) { }, errors: []error{ fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ - "delay function %q, and delay ceiling %v", 3, 30*time.Minute, 5*time.Minute, "exponential", 40*time.Minute), - fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", 280*time.Minute, 10), + "delay function %q, and delay ceiling %v", 3, 30*time.Minute, 5*time.Minute, + "exponential", 40*time.Minute), + fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", + 280*time.Minute, 10), }, }, { @@ -2554,8 +2559,10 @@ func TestReschedulePolicy_Validate(t *testing.T) { }, errors: []error{ fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ - "delay function %q, and delay ceiling %v", 4, 1*time.Hour, 20*time.Minute, "fibonacci", 80*time.Minute), - fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", 480*time.Minute, 10), + "delay function %q, and delay ceiling %v", 4, 1*time.Hour, 20*time.Minute, + "fibonacci", 80*time.Minute), + fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", + 480*time.Minute, 10), }, }, { From aecf3f751a3d13d26c7378f0c8478362b8a83d82 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Mon, 26 Feb 2018 16:57:44 -0600 Subject: [PATCH 04/25] Fix formatting --- nomad/structs/structs.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 41deedf205b..e5d19c4927b 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2824,7 +2824,7 @@ func (r *ReschedulePolicy) Validate() error { } - //Validate Interval and other delay parameters if attempts are limited + // Validate Interval and other delay parameters if attempts are limited if !r.Unlimited { if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, r.Interval)) From 9b609c73c5dbae5c6b305cd3fee89588ac07b176 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 1 Mar 2018 18:16:01 -0600 Subject: [PATCH 05/25] Add new reschedule stanza fields to list of valid fields --- jobspec/parse.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/jobspec/parse.go b/jobspec/parse.go index 53dc9c5fca6..20e06016cb3 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -446,6 +446,10 @@ func parseReschedulePolicy(final **api.ReschedulePolicy, list *ast.ObjectList) e valid := []string{ "attempts", "interval", + "unlimited", + "delay", + "delay_ceiling", + "delay_function", } if err := helper.CheckHCLKeys(obj.Val, valid); err != nil { return err From 447b5a872d9f173e0a6d428a815faa4f3489eb31 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 1 Mar 2018 18:20:09 -0600 Subject: [PATCH 06/25] Added FollowupEvalID field and helper methods to calculate reschedule eligibility based on delay --- api/allocations.go | 2 + nomad/structs/structs.go | 124 +++++++- nomad/structs/structs_test.go | 533 +++++++++++++++++++++++++++++++++- 3 files changed, 642 insertions(+), 17 deletions(-) diff --git a/api/allocations.go b/api/allocations.go index 42b5c8636b3..68047ee5b46 100644 --- a/api/allocations.go +++ b/api/allocations.go @@ -86,6 +86,7 @@ type Allocation struct { TaskStates map[string]*TaskState DeploymentID string DeploymentStatus *AllocDeploymentStatus + FollowupEvalID string PreviousAllocation string NextAllocation string RescheduleTracker *RescheduleTracker @@ -129,6 +130,7 @@ type AllocationListStub struct { TaskStates map[string]*TaskState DeploymentStatus *AllocDeploymentStatus RescheduleTracker *RescheduleTracker + FollowupEvalID string CreateIndex uint64 ModifyIndex uint64 CreateTime int64 diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index e5d19c4927b..b51428e3fd1 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2773,7 +2773,8 @@ type ReschedulePolicy struct { // DelayCeiling is an upper bound on the delay. DelayCeiling time.Duration - // Unlimited allows rescheduling attempts until they succeed + // Unlimited allows infinite rescheduling attempts. Only allowed when delay is set + // between reschedule attempts. Unlimited bool } @@ -2824,7 +2825,7 @@ func (r *ReschedulePolicy) Validate() error { } - // Validate Interval and other delay parameters if attempts are limited + //Validate Interval and other delay parameters if attempts are limited if !r.Unlimited { if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, r.Interval)) @@ -5237,12 +5238,16 @@ type RescheduleEvent struct { // PrevNodeID is the node ID of the previous allocation PrevNodeID string + + // Delay is the reschedule delay associated with the attempt + Delay time.Duration } -func NewRescheduleEvent(rescheduleTime int64, prevAllocID string, prevNodeID string) *RescheduleEvent { +func NewRescheduleEvent(rescheduleTime int64, prevAllocID string, prevNodeID string, delay time.Duration) *RescheduleEvent { return &RescheduleEvent{RescheduleTime: rescheduleTime, PrevAllocID: prevAllocID, - PrevNodeID: prevNodeID} + PrevNodeID: prevNodeID, + Delay: delay} } func (re *RescheduleEvent) Copy() *RescheduleEvent { @@ -5338,6 +5343,13 @@ type Allocation struct { // given deployment DeploymentStatus *AllocDeploymentStatus + // RescheduleTrackers captures details of previous reschedule attempts of the allocation + RescheduleTracker *RescheduleTracker + + // FollowupEvalID captures a follow up evaluation created to handle a failed allocation + // that can be rescheduled in the future + FollowupEvalID string + // Raft Indexes CreateIndex uint64 ModifyIndex uint64 @@ -5352,9 +5364,6 @@ type Allocation struct { // ModifyTime is the time the allocation was last updated. ModifyTime int64 - - // RescheduleTrackers captures details of previous reschedule attempts of the allocation - RescheduleTracker *RescheduleTracker } // Index returns the index of the allocation. If the allocation is from a task @@ -5461,11 +5470,11 @@ func (a *Allocation) RescheduleEligible(reschedulePolicy *ReschedulePolicy, fail } attempts := reschedulePolicy.Attempts interval := reschedulePolicy.Interval - - if attempts == 0 { + enabled := attempts > 0 || reschedulePolicy.Unlimited + if !enabled { return false } - if (a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0) && attempts > 0 { + if (a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0) && attempts > 0 || reschedulePolicy.Unlimited { return true } attempted := 0 @@ -5479,6 +5488,84 @@ func (a *Allocation) RescheduleEligible(reschedulePolicy *ReschedulePolicy, fail return attempted < attempts } +// LastEventTime is the time of the last task event in the allocation. +// It is used to determine allocation failure time. +func (a *Allocation) LastEventTime() time.Time { + var lastEventTime time.Time + if a.TaskStates != nil { + for _, e := range a.TaskStates { + if lastEventTime.IsZero() || e.FinishedAt.After(lastEventTime) { + lastEventTime = e.FinishedAt + } + } + } + return lastEventTime +} + +// NextRescheduleTime returns a time delta after which the allocation is eligible to be rescheduled +// and whether the next reschedule time is within policy's interval if the policy doesn't allow unlimited reschedules +func (a *Allocation) NextRescheduleTime(reschedulePolicy *ReschedulePolicy) (time.Time, bool) { + failTime := a.LastEventTime() + if a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() { + return time.Time{}, false + } + nextDelay := a.NextDelay(reschedulePolicy) + nextRescheduleTime := failTime.Add(nextDelay) + rescheduleEligible := reschedulePolicy.Unlimited || (reschedulePolicy.Attempts > 0 && a.RescheduleTracker == nil) + if reschedulePolicy.Attempts > 0 && a.RescheduleTracker != nil && a.RescheduleTracker.Events != nil { + // Check for eligibility based on the interval if max attempts is set + attempted := 0 + for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { + lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime + timeDiff := failTime.UTC().UnixNano() - lastAttempt + if timeDiff < reschedulePolicy.Interval.Nanoseconds() { + attempted += 1 + } + } + rescheduleEligible = attempted < reschedulePolicy.Attempts && nextDelay < reschedulePolicy.Interval + } + return nextRescheduleTime, rescheduleEligible +} + +func (a *Allocation) NextDelay(policy *ReschedulePolicy) time.Duration { + delayDur := policy.Delay + if a.RescheduleTracker == nil || a.RescheduleTracker.Events == nil || len(a.RescheduleTracker.Events) == 0 { + return delayDur + } + events := a.RescheduleTracker.Events + switch policy.DelayFunction { + case "exponential": + delayDur = a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1].Delay * 2 + case "fibonacci": + if len(events) >= 2 { + fibN1Delay := events[len(events)-1].Delay + fibN2Delay := events[len(events)-2].Delay + // Handle reset of delay ceiling which should cause + // a new series to start + if fibN2Delay == policy.DelayCeiling && fibN1Delay == policy.Delay { + delayDur = fibN1Delay + } else { + delayDur = fibN1Delay + fibN2Delay + } + } + default: + return delayDur + } + if policy.DelayCeiling > 0 && delayDur > policy.DelayCeiling { + delayDur = policy.DelayCeiling + // check if delay needs to be reset + if len(a.RescheduleTracker.Events) > 0 { + lastRescheduleEvent := a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1] + timeDiff := a.LastEventTime().UTC().UnixNano() - lastRescheduleEvent.RescheduleTime + if timeDiff > delayDur.Nanoseconds() { + delayDur = policy.Delay + } + } + } + + return delayDur +} + // Terminated returns if the allocation is in a terminal state on a client. func (a *Allocation) Terminated() bool { if a.ClientStatus == AllocClientStatusFailed || @@ -5583,10 +5670,12 @@ type AllocListStub struct { ClientDescription string TaskStates map[string]*TaskState DeploymentStatus *AllocDeploymentStatus - CreateIndex uint64 - ModifyIndex uint64 - CreateTime int64 - ModifyTime int64 + + FollowupEvalID string + CreateIndex uint64 + ModifyIndex uint64 + CreateTime int64 + ModifyTime int64 } // SetEventDisplayMessage populates the display message if its not already set, @@ -5868,9 +5957,14 @@ type Evaluation struct { StatusDescription string // Wait is a minimum wait time for running the eval. This is used to - // support a rolling upgrade. + // support a rolling upgrade in versions prior to 0.7.0 + // Deprecated Wait time.Duration + // WaitUntil is the time when this eval should be run. This is used to + // supported delayed rescheduling of failed allocations + WaitUntil time.Time + // NextEval is the evaluation ID for the eval created to do a followup. // This is used to support rolling upgrades, where we need a chain of evaluations. NextEval string diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 3614a135182..50c0ea0ade3 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -2961,6 +2961,529 @@ func TestAllocation_ShouldReschedule(t *testing.T) { } } +func TestAllocation_LastEventTime(t *testing.T) { + type testCase struct { + desc string + taskState map[string]*TaskState + expectedLastEventTime time.Time + } + var timeZero time.Time + + t1 := time.Now() + + testCases := []testCase{ + { + desc: "nil task state", + expectedLastEventTime: timeZero, + }, + { + desc: "empty task state", + taskState: make(map[string]*TaskState), + expectedLastEventTime: timeZero, + }, + { + desc: "Finished At not set", + taskState: map[string]*TaskState{"foo": {State: "start", + StartedAt: t1.Add(-2 * time.Hour)}}, + expectedLastEventTime: timeZero, + }, + { + desc: "One finished event", + taskState: map[string]*TaskState{"foo": {State: "start", + StartedAt: t1.Add(-2 * time.Hour), + FinishedAt: t1.Add(-1 * time.Hour)}}, + expectedLastEventTime: t1.Add(-1 * time.Hour), + }, + { + desc: "Multiple events", + taskState: map[string]*TaskState{"foo": {State: "start", + StartedAt: t1.Add(-2 * time.Hour), + FinishedAt: t1.Add(-1 * time.Hour)}, + "bar": {State: "start", + StartedAt: t1.Add(-2 * time.Hour), + FinishedAt: t1.Add(-40 * time.Minute)}}, + expectedLastEventTime: t1.Add(-40 * time.Minute), + }, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + alloc := &Allocation{} + alloc.TaskStates = tc.taskState + require.Equal(t, tc.expectedLastEventTime, alloc.LastEventTime()) + }) + } +} + +func TestAllocation_NextDelay(t *testing.T) { + type testCase struct { + desc string + reschedulePolicy *ReschedulePolicy + alloc *Allocation + expectedRescheduleTime time.Time + expectedRescheduleEligible bool + } + now := time.Now() + testCases := []testCase{ + { + desc: "Allocation hasn't failed yet", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "linear", + Delay: 5 * time.Second, + }, + alloc: &Allocation{}, + expectedRescheduleTime: time.Time{}, + expectedRescheduleEligible: false, + }, + { + desc: "Allocation lacks task state", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "linear", + Delay: 5 * time.Second, + }, + alloc: &Allocation{ClientStatus: AllocClientStatusFailed}, + expectedRescheduleTime: time.Time{}, + expectedRescheduleEligible: false, + }, + { + desc: "linear delay, unlimited restarts, no reschedule tracker", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "linear", + Delay: 5 * time.Second, + Unlimited: true, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "dead", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-2 * time.Second)}}, + }, + expectedRescheduleTime: now.Add(-2 * time.Second).Add(5 * time.Second), + expectedRescheduleEligible: true, + }, + { + desc: "linear delay with reschedule tracker", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "linear", + Delay: 5 * time.Second, + Interval: 10 * time.Minute, + Attempts: 2, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-2 * time.Second)}}, + RescheduleTracker: &RescheduleTracker{ + Events: []*RescheduleEvent{{ + RescheduleTime: now.Add(-2 * time.Minute).UTC().UnixNano(), + Delay: 5 * time.Second, + }}, + }}, + expectedRescheduleTime: now.Add(-2 * time.Second).Add(5 * time.Second), + expectedRescheduleEligible: true, + }, + { + desc: "linear delay with reschedule tracker, attempts exhausted", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "linear", + Delay: 5 * time.Second, + Interval: 10 * time.Minute, + Attempts: 2, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-2 * time.Second)}}, + RescheduleTracker: &RescheduleTracker{ + Events: []*RescheduleEvent{ + { + RescheduleTime: now.Add(-3 * time.Minute).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-2 * time.Minute).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + }, + }}, + expectedRescheduleTime: now.Add(-2 * time.Second).Add(5 * time.Second), + expectedRescheduleEligible: false, + }, + { + desc: "exponential delay - no reschedule tracker", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "exponential", + Delay: 5 * time.Second, + DelayCeiling: 90 * time.Second, + Unlimited: true, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-2 * time.Second)}}, + }, + expectedRescheduleTime: now.Add(-2 * time.Second).Add(5 * time.Second), + expectedRescheduleEligible: true, + }, + { + desc: "exponential delay with reschedule tracker", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "exponential", + Delay: 5 * time.Second, + DelayCeiling: 90 * time.Second, + Unlimited: true, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-2 * time.Second)}}, + RescheduleTracker: &RescheduleTracker{ + Events: []*RescheduleEvent{ + { + RescheduleTime: now.Add(-2 * time.Hour).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 10 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 20 * time.Second, + }, + }, + }}, + expectedRescheduleTime: now.Add(-2 * time.Second).Add(40 * time.Second), + expectedRescheduleEligible: true, + }, + { + desc: "exponential delay with delay ceiling reached", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "exponential", + Delay: 5 * time.Second, + DelayCeiling: 90 * time.Second, + Unlimited: true, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-15 * time.Second)}}, + RescheduleTracker: &RescheduleTracker{ + Events: []*RescheduleEvent{ + { + RescheduleTime: now.Add(-2 * time.Hour).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 10 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 20 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 40 * time.Second, + }, + { + RescheduleTime: now.Add(-40 * time.Second).UTC().UnixNano(), + Delay: 80 * time.Second, + }, + }, + }}, + expectedRescheduleTime: now.Add(-15 * time.Second).Add(90 * time.Second), + expectedRescheduleEligible: true, + }, + { + // Test case where most recent reschedule ran longer than delay ceiling + desc: "exponential delay, delay ceiling reset condition met", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "exponential", + Delay: 5 * time.Second, + DelayCeiling: 90 * time.Second, + Unlimited: true, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-15 * time.Minute)}}, + RescheduleTracker: &RescheduleTracker{ + Events: []*RescheduleEvent{ + { + RescheduleTime: now.Add(-2 * time.Hour).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 10 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 20 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 40 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 80 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 90 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 90 * time.Second, + }, + }, + }}, + expectedRescheduleTime: now.Add(-15 * time.Minute).Add(5 * time.Second), + expectedRescheduleEligible: true, + }, + { + desc: "fibonacci delay - no reschedule tracker", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "fibonacci", + Delay: 5 * time.Second, + DelayCeiling: 90 * time.Second, + Unlimited: true, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-2 * time.Second)}}}, + expectedRescheduleTime: now.Add(-2 * time.Second).Add(5 * time.Second), + expectedRescheduleEligible: true, + }, + { + desc: "fibonacci delay with reschedule tracker", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "fibonacci", + Delay: 5 * time.Second, + DelayCeiling: 90 * time.Second, + Unlimited: true, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-2 * time.Second)}}, + RescheduleTracker: &RescheduleTracker{ + Events: []*RescheduleEvent{ + { + RescheduleTime: now.Add(-2 * time.Hour).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-5 * time.Second).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + }, + }}, + expectedRescheduleTime: now.Add(-2 * time.Second).Add(10 * time.Second), + expectedRescheduleEligible: true, + }, + { + desc: "fibonacci delay with more events", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "fibonacci", + Delay: 5 * time.Second, + DelayCeiling: 90 * time.Second, + Unlimited: true, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-2 * time.Second)}}, + RescheduleTracker: &RescheduleTracker{ + Events: []*RescheduleEvent{ + { + RescheduleTime: now.Add(-2 * time.Hour).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 10 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 15 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 25 * time.Second, + }, + }, + }}, + expectedRescheduleTime: now.Add(-2 * time.Second).Add(40 * time.Second), + expectedRescheduleEligible: true, + }, + { + desc: "fibonacci delay with delay ceiling reached", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "fibonacci", + Delay: 5 * time.Second, + DelayCeiling: 50 * time.Second, + Unlimited: true, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-15 * time.Second)}}, + RescheduleTracker: &RescheduleTracker{ + Events: []*RescheduleEvent{ + { + RescheduleTime: now.Add(-2 * time.Hour).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 10 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 15 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 25 * time.Second, + }, + { + RescheduleTime: now.Add(-40 * time.Second).UTC().UnixNano(), + Delay: 40 * time.Second, + }, + }, + }}, + expectedRescheduleTime: now.Add(-15 * time.Second).Add(50 * time.Second), + expectedRescheduleEligible: true, + }, + { + desc: "fibonacci delay with delay reset condition met", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "fibonacci", + Delay: 5 * time.Second, + DelayCeiling: 50 * time.Second, + Unlimited: true, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-5 * time.Minute)}}, + RescheduleTracker: &RescheduleTracker{ + Events: []*RescheduleEvent{ + { + RescheduleTime: now.Add(-2 * time.Hour).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 10 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 15 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 25 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 40 * time.Second, + }, + }, + }}, + expectedRescheduleTime: now.Add(-5 * time.Minute).Add(5 * time.Second), + expectedRescheduleEligible: true, + }, + { + desc: "fibonacci delay with the most recent event that reset delay value", + reschedulePolicy: &ReschedulePolicy{ + DelayFunction: "fibonacci", + Delay: 5 * time.Second, + DelayCeiling: 50 * time.Second, + Unlimited: true, + }, + alloc: &Allocation{ + ClientStatus: AllocClientStatusFailed, + TaskStates: map[string]*TaskState{"foo": {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-5 * time.Second)}}, + RescheduleTracker: &RescheduleTracker{ + Events: []*RescheduleEvent{ + { + RescheduleTime: now.Add(-2 * time.Hour).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 10 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 15 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 25 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 40 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + Delay: 50 * time.Second, + }, + { + RescheduleTime: now.Add(-1 * time.Minute).UTC().UnixNano(), + Delay: 5 * time.Second, + }, + }, + }}, + expectedRescheduleTime: now.Add(-5 * time.Second).Add(5 * time.Second), + expectedRescheduleEligible: true, + }, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + require := require.New(t) + reschedTime, allowed := tc.alloc.NextRescheduleTime(tc.reschedulePolicy) + require.Equal(tc.expectedRescheduleEligible, allowed) + require.Equal(tc.expectedRescheduleTime, reschedTime) + }) + } + +} + func TestRescheduleTracker_Copy(t *testing.T) { type testCase struct { original *RescheduleTracker @@ -2970,9 +3493,15 @@ func TestRescheduleTracker_Copy(t *testing.T) { cases := []testCase{ {nil, nil}, {&RescheduleTracker{Events: []*RescheduleEvent{ - {2, "12", "12"}, + {RescheduleTime: 2, + PrevAllocID: "12", + PrevNodeID: "12", + Delay: 30 * time.Second}, }}, &RescheduleTracker{Events: []*RescheduleEvent{ - {2, "12", "12"}, + {RescheduleTime: 2, + PrevAllocID: "12", + PrevNodeID: "12", + Delay: 30 * time.Second}, }}}, } From 9628454d7a29c6e17ba0669ae51eef55369b320b Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 1 Mar 2018 18:23:44 -0600 Subject: [PATCH 07/25] Scheduler and Reconciler changes to support delayed rescheduling --- scheduler/generic_sched.go | 58 +++- scheduler/generic_sched_test.go | 492 +++++++++++++++++++++++++++++++- scheduler/reconcile.go | 117 +++++++- scheduler/reconcile_test.go | 302 +++++++++++++++++++- scheduler/reconcile_util.go | 57 ++-- 5 files changed, 979 insertions(+), 47 deletions(-) diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 5830c5d11cd..994c782c62b 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -72,8 +72,10 @@ type GenericScheduler struct { ctx *EvalContext stack *GenericStack + // Deprecated, was used in pre Nomad 0.7 rolling update stanza followupEvalWait time.Duration nextEval *structs.Evaluation + followUpEvals []*structs.Evaluation deployment *structs.Deployment @@ -261,6 +263,19 @@ func (s *GenericScheduler) process() (bool, error) { s.logger.Printf("[DEBUG] sched: %#v: rolling migration limit reached, next eval '%s' created", s.eval, s.nextEval.ID) } + // Create follow up evals for any delayed reschedule eligible allocations + if len(s.followUpEvals) > 0 { + for _, eval := range s.followUpEvals { + eval.PreviousEval = s.eval.ID + // TODO(preetha) this should be batching evals before inserting them + if err := s.planner.CreateEval(eval); err != nil { + s.logger.Printf("[ERR] sched: %#v failed to make next eval for rescheduling: %v", s.eval, err) + return false, err + } + s.logger.Printf("[DEBUG] sched: %#v: found reschedulable allocs, next eval '%s' created", s.eval, eval.ID) + } + } + // Submit the plan and store the results. result, newState, err := s.planner.SubmitPlan(s.plan) s.planResult = result @@ -336,6 +351,12 @@ func (s *GenericScheduler) computeJobAllocs() error { // follow up eval to handle node draining. s.followupEvalWait = results.followupEvalWait + // Store all the follow up evaluations from rescheduled allocations + if len(results.desiredFollowupEvals) > 0 { + for _, evals := range results.desiredFollowupEvals { + s.followUpEvals = append(s.followUpEvals, evals...) + } + } // Update the stored deployment if results.deployment != nil { s.deployment = results.deployment @@ -467,7 +488,7 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul if prevAllocation != nil { alloc.PreviousAllocation = prevAllocation.ID if missing.IsRescheduling() { - updateRescheduleTracker(alloc, prevAllocation) + updateRescheduleTracker(alloc, prevAllocation, tg.ReschedulePolicy, time.Now()) } } @@ -523,15 +544,42 @@ func getSelectOptions(prevAllocation *structs.Allocation, preferredNode *structs return selectOptions } +const max_past_reschedule_events = 5 + // updateRescheduleTracker carries over previous restart attempts and adds the most recent restart -func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation) { +func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation, reschedPolicy *structs.ReschedulePolicy, now time.Time) { var rescheduleEvents []*structs.RescheduleEvent if prev.RescheduleTracker != nil { - for _, reschedEvent := range prev.RescheduleTracker.Events { - rescheduleEvents = append(rescheduleEvents, reschedEvent.Copy()) + var interval time.Duration + if reschedPolicy != nil { + interval = reschedPolicy.Interval + } + // if attempts is set copy all events in the interval range + if reschedPolicy.Attempts > 0 { + for _, reschedEvent := range prev.RescheduleTracker.Events { + timeDiff := time.Now().UTC().UnixNano() - reschedEvent.RescheduleTime + // Only copy over events that are within restart interval + // This keeps the list of events small in cases where there's a long chain of old restart events + if interval > 0 && timeDiff <= interval.Nanoseconds() { + rescheduleEvents = append(rescheduleEvents, reschedEvent.Copy()) + } + } + } else { + // for unlimited restarts, only copy the last n + copied := 0 + for i := len(prev.RescheduleTracker.Events) - 1; i >= 0 && copied < max_past_reschedule_events; i-- { + reschedEvent := prev.RescheduleTracker.Events[i] + rescheduleEvents = append(rescheduleEvents, reschedEvent.Copy()) + copied++ + } + // reverse it to get the correct order + for left, right := 0, len(rescheduleEvents)-1; left < right; left, right = left+1, right-1 { + rescheduleEvents[left], rescheduleEvents[right] = rescheduleEvents[right], rescheduleEvents[left] + } } } - rescheduleEvent := structs.NewRescheduleEvent(time.Now().UTC().UnixNano(), prev.ID, prev.NodeID) + nextDelay := prev.NextDelay(reschedPolicy) + rescheduleEvent := structs.NewRescheduleEvent(now.UnixNano(), prev.ID, prev.NodeID, nextDelay) rescheduleEvents = append(rescheduleEvents, rescheduleEvent) alloc.RescheduleTracker = &structs.RescheduleTracker{Events: rescheduleEvents} } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 1443a231415..ee7a775bcfd 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -2715,7 +2715,7 @@ func TestServiceSched_RetryLimit(t *testing.T) { h.AssertEvalStatus(t, structs.EvalStatusFailed) } -func TestServiceSched_Reschedule_Once(t *testing.T) { +func TestServiceSched_Reschedule_OnceNow(t *testing.T) { h := NewHarness(t) // Create some nodes @@ -2730,9 +2730,15 @@ func TestServiceSched_Reschedule_Once(t *testing.T) { job := mock.Job() job.TaskGroups[0].Count = 2 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ - Attempts: 1, - Interval: 15 * time.Minute, + Attempts: 1, + Interval: 15 * time.Minute, + Delay: 5 * time.Second, + DelayCeiling: 1 * time.Minute, + DelayFunction: "linear", } + tgName := job.TaskGroups[0].Name + now := time.Now() + noErr(t, h.State.UpsertJob(h.NextIndex(), job)) var allocs []*structs.Allocation @@ -2746,6 +2752,9 @@ func TestServiceSched_Reschedule_Once(t *testing.T) { } // Mark one of the allocations as failed allocs[1].ClientStatus = structs.AllocClientStatusFailed + allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-10 * time.Second)}} failedAllocID := allocs[1].ID successAllocID := allocs[0].ID @@ -2817,7 +2826,96 @@ func TestServiceSched_Reschedule_Once(t *testing.T) { } -func TestServiceSched_Reschedule_Multiple(t *testing.T) { +// Tests that alloc reschedulable at a future time creates a follow up eval +func TestServiceSched_Reschedule_Later(t *testing.T) { + h := NewHarness(t) + require := require.New(t) + // Create some nodes + var nodes []*structs.Node + for i := 0; i < 10; i++ { + node := mock.Node() + nodes = append(nodes, node) + noErr(t, h.State.UpsertNode(h.NextIndex(), node)) + } + + // Generate a fake job with allocations and an update policy. + job := mock.Job() + job.TaskGroups[0].Count = 2 + delayDuration := 15 * time.Second + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ + Attempts: 1, + Interval: 15 * time.Minute, + Delay: delayDuration, + DelayCeiling: 1 * time.Minute, + DelayFunction: "linear", + } + tgName := job.TaskGroups[0].Name + now := time.Now() + + noErr(t, h.State.UpsertJob(h.NextIndex(), job)) + + var allocs []*structs.Allocation + for i := 0; i < 2; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = nodes[i].ID + alloc.Name = fmt.Sprintf("my-job.web[%d]", i) + allocs = append(allocs, alloc) + } + // Mark one of the allocations as failed + allocs[1].ClientStatus = structs.AllocClientStatusFailed + allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now}} + failedAllocID := allocs[1].ID + + noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) + + // Create a mock evaluation + eval := &structs.Evaluation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Priority: 50, + TriggeredBy: structs.EvalTriggerNodeUpdate, + JobID: job.ID, + Status: structs.EvalStatusPending, + } + noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) + + // Process the evaluation + err := h.Process(NewServiceScheduler, eval) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Ensure multiple plans + if len(h.Plans) == 0 { + t.Fatalf("bad: %#v", h.Plans) + } + + // Lookup the allocations by JobID + ws := memdb.NewWatchSet() + out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) + noErr(t, err) + + // Verify no new allocs were created + require.Equal(2, len(out)) + + // Verify follow up eval was created for the failed alloc + alloc, err := h.State.AllocByID(ws, failedAllocID) + require.Nil(err) + require.NotEmpty(alloc.FollowupEvalID) + + // Ensure there is a follow up eval. + if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusPending { + t.Fatalf("bad: %#v", h.CreateEvals) + } + followupEval := h.CreateEvals[0] + require.Equal(now.Add(delayDuration), followupEval.WaitUntil) +} + +func TestServiceSched_Reschedule_MultipleNow(t *testing.T) { h := NewHarness(t) // Create some nodes @@ -2833,9 +2931,14 @@ func TestServiceSched_Reschedule_Multiple(t *testing.T) { job := mock.Job() job.TaskGroups[0].Count = 2 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ - Attempts: maxRestartAttempts, - Interval: 30 * time.Minute, + Attempts: maxRestartAttempts, + Interval: 30 * time.Minute, + Delay: 5 * time.Second, + DelayFunction: "linear", } + tgName := job.TaskGroups[0].Name + now := time.Now() + noErr(t, h.State.UpsertJob(h.NextIndex(), job)) var allocs []*structs.Allocation @@ -2850,6 +2953,9 @@ func TestServiceSched_Reschedule_Multiple(t *testing.T) { } // Mark one of the allocations as failed allocs[1].ClientStatus = structs.AllocClientStatusFailed + allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-10 * time.Second)}} noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) @@ -2915,6 +3021,9 @@ func TestServiceSched_Reschedule_Multiple(t *testing.T) { // Mark this alloc as failed again newAlloc.ClientStatus = structs.AllocClientStatusFailed + newAlloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", + StartedAt: now.Add(-12 * time.Second), + FinishedAt: now.Add(-10 * time.Second)}} failedAllocId = newAlloc.ID failedNodeID = newAlloc.NodeID @@ -2946,6 +3055,136 @@ func TestServiceSched_Reschedule_Multiple(t *testing.T) { assert.Equal(5, len(out)) // 2 original, plus 3 reschedule attempts } +// Tests that old reschedule attempts are pruned +func TestServiceSched_Reschedule_PruneEvents(t *testing.T) { + h := NewHarness(t) + + // Create some nodes + var nodes []*structs.Node + for i := 0; i < 10; i++ { + node := mock.Node() + nodes = append(nodes, node) + noErr(t, h.State.UpsertNode(h.NextIndex(), node)) + } + + // Generate a fake job with allocations and an update policy. + job := mock.Job() + job.TaskGroups[0].Count = 2 + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ + DelayFunction: "exponential", + DelayCeiling: 1 * time.Hour, + Delay: 5 * time.Second, + Unlimited: true, + } + noErr(t, h.State.UpsertJob(h.NextIndex(), job)) + + var allocs []*structs.Allocation + for i := 0; i < 2; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = nodes[i].ID + alloc.Name = fmt.Sprintf("my-job.web[%d]", i) + allocs = append(allocs, alloc) + } + now := time.Now() + // Mark allocations as failed with restart info + allocs[1].TaskStates = map[string]*structs.TaskState{job.TaskGroups[0].Name: {State: "dead", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-15 * time.Minute)}} + allocs[1].ClientStatus = structs.AllocClientStatusFailed + + allocs[1].RescheduleTracker = &structs.RescheduleTracker{ + Events: []*structs.RescheduleEvent{ + {RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), + PrevAllocID: uuid.Generate(), + PrevNodeID: uuid.Generate(), + Delay: 5 * time.Second, + }, + {RescheduleTime: now.Add(-40 * time.Minute).UTC().UnixNano(), + PrevAllocID: allocs[0].ID, + PrevNodeID: uuid.Generate(), + Delay: 10 * time.Second, + }, + {RescheduleTime: now.Add(-30 * time.Minute).UTC().UnixNano(), + PrevAllocID: allocs[0].ID, + PrevNodeID: uuid.Generate(), + Delay: 20 * time.Second, + }, + {RescheduleTime: now.Add(-20 * time.Minute).UTC().UnixNano(), + PrevAllocID: allocs[0].ID, + PrevNodeID: uuid.Generate(), + Delay: 40 * time.Second, + }, + {RescheduleTime: now.Add(-10 * time.Minute).UTC().UnixNano(), + PrevAllocID: allocs[0].ID, + PrevNodeID: uuid.Generate(), + Delay: 80 * time.Second, + }, + {RescheduleTime: now.Add(-3 * time.Minute).UTC().UnixNano(), + PrevAllocID: allocs[0].ID, + PrevNodeID: uuid.Generate(), + Delay: 160 * time.Second, + }, + }, + } + expectedFirstRescheduleEvent := allocs[1].RescheduleTracker.Events[1] + expectedDelay := 320 * time.Second + failedAllocID := allocs[1].ID + successAllocID := allocs[0].ID + + noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) + + // Create a mock evaluation + eval := &structs.Evaluation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Priority: 50, + TriggeredBy: structs.EvalTriggerNodeUpdate, + JobID: job.ID, + Status: structs.EvalStatusPending, + } + noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) + + // Process the evaluation + err := h.Process(NewServiceScheduler, eval) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Ensure multiple plans + if len(h.Plans) == 0 { + t.Fatalf("bad: %#v", h.Plans) + } + + // Lookup the allocations by JobID + ws := memdb.NewWatchSet() + out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) + noErr(t, err) + + // Verify that one new allocation got created with its restart tracker info + assert := assert.New(t) + assert.Equal(3, len(out)) + var newAlloc *structs.Allocation + for _, alloc := range out { + if alloc.ID != successAllocID && alloc.ID != failedAllocID { + newAlloc = alloc + } + } + + assert.Equal(failedAllocID, newAlloc.PreviousAllocation) + // Verify that the new alloc copied the last 5 reschedule attempts + assert.Equal(6, len(newAlloc.RescheduleTracker.Events)) + assert.Equal(expectedFirstRescheduleEvent, newAlloc.RescheduleTracker.Events[0]) + + mostRecentRescheduleEvent := newAlloc.RescheduleTracker.Events[5] + // Verify that the failed alloc ID is in the most recent reschedule event + assert.Equal(failedAllocID, mostRecentRescheduleEvent.PrevAllocID) + // Verify that the delay value was captured correctly + assert.Equal(expectedDelay, mostRecentRescheduleEvent.Delay) + +} + // Tests that deployments with failed allocs don't result in placements func TestDeployment_FailedAllocs_NoReschedule(t *testing.T) { h := NewHarness(t) @@ -3079,6 +3318,9 @@ func TestBatchSched_Run_FailedAlloc(t *testing.T) { job.TaskGroups[0].Count = 1 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) + tgName := job.TaskGroups[0].Name + now := time.Now() + // Create a failed alloc alloc := mock.Alloc() alloc.Job = job @@ -3086,6 +3328,9 @@ func TestBatchSched_Run_FailedAlloc(t *testing.T) { alloc.NodeID = node.ID alloc.Name = "my-job.web[0]" alloc.ClientStatus = structs.AllocClientStatusFailed + alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-10 * time.Second)}} noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) // Create a mock evaluation to register the job @@ -3231,6 +3476,9 @@ func TestBatchSched_Run_FailedAllocQueuedAllocations(t *testing.T) { job.TaskGroups[0].Count = 1 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) + tgName := job.TaskGroups[0].Name + now := time.Now() + // Create a failed alloc alloc := mock.Alloc() alloc.Job = job @@ -3238,6 +3486,9 @@ func TestBatchSched_Run_FailedAllocQueuedAllocations(t *testing.T) { alloc.NodeID = node.ID alloc.Name = "my-job.web[0]" alloc.ClientStatus = structs.AllocClientStatusFailed + alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-10 * time.Second)}} noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) // Create a mock evaluation to register the job @@ -3963,3 +4214,232 @@ func TestServiceSched_CancelDeployment_NewerJob(t *testing.T) { h.AssertEvalStatus(t, structs.EvalStatusComplete) } + +// Various table driven tests for carry forward +// of past reschedule events +func Test_updateRescheduleTracker(t *testing.T) { + + t1 := time.Now().UTC() + alloc := mock.Alloc() + prevAlloc := mock.Alloc() + + type testCase struct { + desc string + prevAllocEvents []*structs.RescheduleEvent + reschedPolicy *structs.ReschedulePolicy + expectedRescheduleEvents []*structs.RescheduleEvent + reschedTime time.Time + } + + testCases := []testCase{ + { + desc: "No past events", + prevAllocEvents: nil, + reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second}, + reschedTime: t1, + expectedRescheduleEvents: []*structs.RescheduleEvent{{t1.UnixNano(), prevAlloc.ID, prevAlloc.NodeID, 5 * time.Second}}, + }, + { + desc: "one past event, linear delay", + prevAllocEvents: []*structs.RescheduleEvent{ + {RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 5 * time.Second}}, + reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second}, + reschedTime: t1, + expectedRescheduleEvents: []*structs.RescheduleEvent{ + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 5 * time.Second, + }, + { + RescheduleTime: t1.UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 5 * time.Second, + }, + }, + }, + { + desc: "one past event, fibonacci delay", + prevAllocEvents: []*structs.RescheduleEvent{ + {RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 5 * time.Second}}, + reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second, DelayFunction: "fibonacci", DelayCeiling: 60 * time.Second}, + reschedTime: t1, + expectedRescheduleEvents: []*structs.RescheduleEvent{ + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 5 * time.Second, + }, + { + RescheduleTime: t1.UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 5 * time.Second, + }, + }, + }, + { + desc: "eight past events, fibonacci delay, unlimited", + prevAllocEvents: []*structs.RescheduleEvent{ + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 5 * time.Second, + }, + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 5 * time.Second, + }, + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 10 * time.Second, + }, + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 15 * time.Second, + }, + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 25 * time.Second, + }, + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 40 * time.Second, + }, + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 65 * time.Second, + }, + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 105 * time.Second, + }, + }, + reschedPolicy: &structs.ReschedulePolicy{Unlimited: true, Delay: 5 * time.Second, DelayFunction: "fibonacci", DelayCeiling: 240 * time.Second}, + reschedTime: t1, + expectedRescheduleEvents: []*structs.RescheduleEvent{ + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 15 * time.Second, + }, + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 25 * time.Second, + }, + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 40 * time.Second, + }, + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 65 * time.Second, + }, + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 105 * time.Second, + }, + { + RescheduleTime: t1.UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 170 * time.Second, + }, + }, + }, + { + desc: " old attempts past interval, exponential delay, limited", + prevAllocEvents: []*structs.RescheduleEvent{ + { + RescheduleTime: t1.Add(-2 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 5 * time.Second, + }, + { + RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 10 * time.Second, + }, + { + RescheduleTime: t1.Add(-30 * time.Minute).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 20 * time.Second, + }, + { + RescheduleTime: t1.Add(-10 * time.Minute).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 40 * time.Second, + }, + }, + reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 1 * time.Hour, Attempts: 5, Delay: 5 * time.Second, DelayFunction: "exponential", DelayCeiling: 240 * time.Second}, + reschedTime: t1, + expectedRescheduleEvents: []*structs.RescheduleEvent{ + { + RescheduleTime: t1.Add(-30 * time.Minute).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 20 * time.Second, + }, + { + RescheduleTime: t1.Add(-10 * time.Minute).UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 40 * time.Second, + }, + { + RescheduleTime: t1.UnixNano(), + PrevAllocID: prevAlloc.ID, + PrevNodeID: prevAlloc.NodeID, + Delay: 80 * time.Second, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + require := require.New(t) + prevAlloc.RescheduleTracker = &structs.RescheduleTracker{Events: tc.prevAllocEvents} + updateRescheduleTracker(alloc, prevAlloc, tc.reschedPolicy, tc.reschedTime) + require.Equal(tc.expectedRescheduleEvents, alloc.RescheduleTracker.Events) + }) + } + +} diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 35eebdcb12a..5eedc91be82 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -5,7 +5,10 @@ import ( "log" "time" + "sort" + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" ) @@ -92,7 +95,23 @@ type reconcileResults struct { // followupEvalWait is set if there should be a followup eval run after the // given duration + // Deprecated, the delay strategy that sets this is not available after nomad 0.7.0 followupEvalWait time.Duration + + // desiredFollowupEvals is the map of follow up evaluations to create per task group + // This is used to create a delayed evaluation for rescheduling failed allocations. + desiredFollowupEvals map[string][]*structs.Evaluation +} + +// delayedRescheduleInfo contains the allocation id and a time when its eligible to be rescheduled. +// this is used to create follow up evaluations +type delayedRescheduleInfo struct { + + // allocID is the ID of the allocation eligible to be rescheduled + allocID string + + // rescheduleTime is the time to use in the delayed evaluation + rescheduleTime time.Time } func (r *reconcileResults) GoString() string { @@ -136,7 +155,8 @@ func NewAllocReconciler(logger *log.Logger, allocUpdateFn allocUpdateType, batch existingAllocs: existingAllocs, taintedNodes: taintedNodes, result: &reconcileResults{ - desiredTGUpdates: make(map[string]*structs.DesiredUpdates), + desiredTGUpdates: make(map[string]*structs.DesiredUpdates), + desiredFollowupEvals: make(map[string][]*structs.Evaluation), }, } } @@ -270,6 +290,8 @@ func (a *allocReconciler) markStop(allocs allocSet, clientStatus, statusDescript } } +const batchedFailedAllocWindowSize = 5 * time.Second + // computeGroup reconciles state for a particular task group. It returns whether // the deployment it is for is complete with regards to the task group. func (a *allocReconciler) computeGroup(group string, all allocSet) bool { @@ -318,11 +340,18 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { untainted, migrate, lost := all.filterByTainted(a.taintedNodes) // Determine what set of terminal allocations need to be rescheduled - untainted, reschedule := untainted.filterByReschedulable(a.batch, tg.ReschedulePolicy) + untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, tg.ReschedulePolicy) + + // Create batched follow up evaluations for allocations that are reschedulable later + rescheduleLaterAllocs := make(map[string]*structs.Allocation) + if len(rescheduleLater) > 0 { + rescheduleLaterAllocs = a.handleDelayedReschedules(rescheduleLater, all, tg.Name) + } + // Create a structure for choosing names. Seed with the taken names which is // the union of untainted and migrating nodes (includes canaries) - nameIndex := newAllocNameIndex(a.jobID, group, tg.Count, untainted.union(migrate, reschedule)) + nameIndex := newAllocNameIndex(a.jobID, group, tg.Count, untainted.union(migrate, rescheduleNow)) // Stop any unneeded allocations and update the untainted set to not // included stopped allocations. @@ -341,7 +370,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { // Do inplace upgrades where possible and capture the set of upgrades that // need to be done destructively. - ignore, inplace, destructive := a.computeUpdates(tg, untainted) + ignore, inplace, destructive := a.computeUpdates(tg, untainted, rescheduleLaterAllocs) desiredChanges.Ignore += uint64(len(ignore)) desiredChanges.InPlaceUpdate += uint64(len(inplace)) if !existingDeployment { @@ -379,7 +408,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { // * The deployment is not paused or failed // * Not placing any canaries // * If there are any canaries that they have been promoted - place := a.computePlacements(tg, nameIndex, untainted, migrate, reschedule) + place := a.computePlacements(tg, nameIndex, untainted, migrate, rescheduleNow) if !existingDeployment { dstate.DesiredTotal += len(place) } @@ -774,7 +803,7 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc // 2. Those that can be upgraded in-place. These are added to the results // automatically since the function contains the correct state to do so, // 3. Those that require destructive updates -func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted allocSet) (ignore, inplace, destructive allocSet) { +func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted, rescheduleLaterAllocs allocSet) (ignore, inplace, destructive allocSet) { // Determine the set of allocations that need to be updated ignore = make(map[string]*structs.Allocation) inplace = make(map[string]*structs.Allocation) @@ -782,7 +811,13 @@ func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted all for _, alloc := range untainted { ignoreChange, destructiveChange, inplaceAlloc := a.allocUpdateFn(alloc, a.job, group) - if ignoreChange { + // Also check if the alloc is marked for later rescheduling. + // If so it should be in the inplace list + reschedLaterAlloc, isRescheduleLater := rescheduleLaterAllocs[alloc.ID] + if isRescheduleLater { + inplace[alloc.ID] = alloc + a.result.inplaceUpdate = append(a.result.inplaceUpdate, reschedLaterAlloc) + } else if ignoreChange { ignore[alloc.ID] = alloc } else if destructiveChange { destructive[alloc.ID] = alloc @@ -796,3 +831,71 @@ func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted all return } +func (a *allocReconciler) handleDelayedReschedules(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string) allocSet { + // Sort by time + sort.Slice(rescheduleLater, func(i, j int) bool { + return rescheduleLater[i].rescheduleTime.Before(rescheduleLater[j].rescheduleTime) + }) + + var evals []*structs.Evaluation + var allocIDs []string + nextReschedTime := rescheduleLater[0].rescheduleTime + allocIDToFollowupEvalID := make(map[string]string, len(rescheduleLater)) + for _, allocReschedInfo := range rescheduleLater { + if allocReschedInfo.rescheduleTime.UTC().UnixNano()-nextReschedTime.UTC().UnixNano() < batchedFailedAllocWindowSize.Nanoseconds() { + // add to batch + allocIDs = append(allocIDs, allocReschedInfo.allocID) + } else { + // create a new eval for the previous batch + eval := &structs.Evaluation{ + ID: uuid.Generate(), + Namespace: a.job.Namespace, + Priority: a.job.Priority, + Type: a.job.Type, + TriggeredBy: structs.EvalTriggerRetryFailedAlloc, + JobID: a.job.ID, + JobModifyIndex: a.job.ModifyIndex, + Status: structs.EvalStatusPending, + WaitUntil: nextReschedTime, + } + evals = append(evals, eval) + for _, allocID := range allocIDs { + allocIDToFollowupEvalID[allocID] = eval.ID + } + nextReschedTime = allocReschedInfo.rescheduleTime + // clear out this batch and start it again + allocIDs = nil + allocIDs = append(allocIDs, allocReschedInfo.allocID) + } + } + // Deal with the last batch + if len(allocIDs) > 0 { + eval := &structs.Evaluation{ + ID: uuid.Generate(), + Namespace: a.job.Namespace, + Priority: a.job.Priority, + Type: a.job.Type, + TriggeredBy: structs.EvalTriggerRetryFailedAlloc, + JobID: a.job.ID, + JobModifyIndex: a.job.ModifyIndex, + Status: structs.EvalStatusPending, + WaitUntil: nextReschedTime, + } + evals = append(evals, eval) + for _, allocID := range allocIDs { + allocIDToFollowupEvalID[allocID] = eval.ID + } + } + + a.result.desiredFollowupEvals[tgName] = evals + + // create inplace updates for every alloc ID that needs to be updated with its follow up eval ID + rescheduleLaterAllocs := make(map[string]*structs.Allocation) + for allocID, evalID := range allocIDToFollowupEvalID { + existingAlloc := all[allocID] + updatedAlloc := existingAlloc.Copy() + updatedAlloc.FollowupEvalID = evalID + rescheduleLaterAllocs[allocID] = updatedAlloc + } + return rescheduleLaterAllocs +} diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index 910039d038d..7866a20964e 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -15,6 +15,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/kr/pretty" + "github.com/stretchr/testify/require" ) /* @@ -38,9 +39,12 @@ Basic Tests: √ Handle task group being removed √ Handle job being stopped both as .Stopped and nil √ Place more that one group -√ Handle rescheduling failed allocs for batch jobs -√ Handle rescheduling failed allocs for service jobs +√ Handle delayed rescheduling failed allocs for batch jobs +√ Handle delayed rescheduling failed allocs for service jobs +√ Handle eligible now rescheduling failed allocs for batch jobs +√ Handle eligible now rescheduling failed allocs for service jobs √ Previously rescheduled allocs should not be rescheduled again +√ Aggregated evaluations for allocations that fail close together Update stanza Tests: √ Stopped job cancels any active deployment @@ -1203,15 +1207,177 @@ func TestReconciler_MultiTG(t *testing.T) { assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place)) } -// Tests rescheduling failed batch allocations -func TestReconciler_Reschedule_Batch(t *testing.T) { +// Tests delayed rescheduling of failed batch allocations +func TestReconciler_RescheduleLater_Batch(t *testing.T) { + require := require.New(t) // Set desired 4 job := mock.Job() job.TaskGroups[0].Count = 4 + now := time.Now() + // Set up reschedule policy + delayDur := 15 * time.Second + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "linear"} + tgName := job.TaskGroups[0].Name + // Create 6 existing allocations - 2 running, 1 complete and 3 failed + var allocs []*structs.Allocation + for i := 0; i < 6; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + allocs = append(allocs, alloc) + alloc.ClientStatus = structs.AllocClientStatusRunning + } + // Mark 3 as failed with restart tracking info + allocs[0].ClientStatus = structs.AllocClientStatusFailed + allocs[0].NextAllocation = allocs[1].ID + allocs[1].ClientStatus = structs.AllocClientStatusFailed + allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ + {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), + PrevAllocID: allocs[0].ID, + PrevNodeID: uuid.Generate(), + }, + }} + allocs[1].NextAllocation = allocs[2].ID + allocs[2].ClientStatus = structs.AllocClientStatusFailed + allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now}} + allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ + {RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(), + PrevAllocID: allocs[0].ID, + PrevNodeID: uuid.Generate(), + }, + {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), + PrevAllocID: allocs[1].ID, + PrevNodeID: uuid.Generate(), + }, + }} + // Mark one as complete + allocs[5].ClientStatus = structs.AllocClientStatusComplete + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil) + r := reconciler.Compute() + + // Two reschedule attempts were already made, one more can be made at a future time + + // Verify that the follow up eval has the expected waitUntil time + evals := r.desiredFollowupEvals[tgName] + require.NotNil(evals) + require.Equal(1, len(evals)) + require.Equal(now.Add(delayDur), evals[0].WaitUntil) + + // Alloc 5 should not be replaced because it is terminal + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 0, + inplace: 1, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Place: 0, + InPlaceUpdate: 1, + Ignore: 3, + }, + }, + }) + assertNamesHaveIndexes(t, intRange(2, 2), allocsToNames(r.inplaceUpdate)) + // verify that the followup evalID field is set correctly + r.inplaceUpdate[0].EvalID = evals[0].ID +} + +// Tests delayed rescheduling of failed batch allocations and batching of allocs +// with fail times that are close together +func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) { + require := require.New(t) + // Set desired 4 + job := mock.Job() + job.TaskGroups[0].Count = 10 + now := time.Now() // Set up reschedule policy - job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour} + delayDur := 15 * time.Second + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "linear"} + tgName := job.TaskGroups[0].Name + // Create 10 existing allocations + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + allocs = append(allocs, alloc) + alloc.ClientStatus = structs.AllocClientStatusRunning + } + // Mark 5 as failed with fail times very close together + for i := 0; i < 5; i++ { + allocs[i].ClientStatus = structs.AllocClientStatusFailed + allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(time.Duration(50*i) * time.Millisecond)}} + } + + // Mark two more as failed several seconds later + for i := 5; i < 7; i++ { + allocs[i].ClientStatus = structs.AllocClientStatusFailed + allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(10 * time.Second)}} + } + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil) + r := reconciler.Compute() + + // Verify that two follow up evals were created + evals := r.desiredFollowupEvals[tgName] + require.NotNil(evals) + require.Equal(2, len(evals)) + + // Verify expected WaitUntil values for both batched evals + require.Equal(now.Add(delayDur), evals[0].WaitUntil) + secondBatchDuration := delayDur + 10*time.Second + require.Equal(now.Add(secondBatchDuration), evals[1].WaitUntil) + // Alloc 5 should not be replaced because it is terminal + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 0, + inplace: 7, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Place: 0, + InPlaceUpdate: 7, + Ignore: 3, + }, + }, + }) + assertNamesHaveIndexes(t, intRange(0, 6), allocsToNames(r.inplaceUpdate)) + // verify that the followup evalID field is set correctly + for _, alloc := range r.inplaceUpdate { + if allocNameToIndex(alloc.Name) < 5 { + require.Equal(evals[0].ID, alloc.FollowupEvalID) + } else if allocNameToIndex(alloc.Name) < 7 { + require.Equal(evals[1].ID, alloc.FollowupEvalID) + } else { + t.Fatalf("Unexpected alloc name in Inplace results %v", alloc.Name) + } + } +} + +// Tests rescheduling failed batch allocations +func TestReconciler_RescheduleNow_Batch(t *testing.T) { + require := require.New(t) + // Set desired 4 + job := mock.Job() + job.TaskGroups[0].Count = 4 + now := time.Now() + // Set up reschedule policy + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayFunction: "linear"} + tgName := job.TaskGroups[0].Name // Create 6 existing allocations - 2 running, 1 complete and 3 failed var allocs []*structs.Allocation for i := 0; i < 6; i++ { @@ -1235,6 +1401,9 @@ func TestReconciler_Reschedule_Batch(t *testing.T) { }} allocs[1].NextAllocation = allocs[2].ID allocs[2].ClientStatus = structs.AllocClientStatusFailed + allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-10 * time.Second)}} allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ {RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(), PrevAllocID: allocs[0].ID, @@ -1251,7 +1420,11 @@ func TestReconciler_Reschedule_Batch(t *testing.T) { reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil) r := reconciler.Compute() - // Two reschedule attempts were made, one more can be made + // Verify that no follow up evals were created + evals := r.desiredFollowupEvals[tgName] + require.Nil(evals) + + // Two reschedule attempts were made, one more can be made now // Alloc 5 should not be replaced because it is terminal assertResults(t, r, &resultExpectation{ createDeployment: nil, @@ -1266,19 +1439,24 @@ func TestReconciler_Reschedule_Batch(t *testing.T) { }, }, }) + assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place)) assertPlaceResultsHavePreviousAllocs(t, 1, r.place) assertPlacementsAreRescheduled(t, 1, r.place) + } // Tests rescheduling failed service allocations with desired state stop -func TestReconciler_Reschedule_Service(t *testing.T) { +func TestReconciler_RescheduleLater_Service(t *testing.T) { + require := require.New(t) // Set desired 5 job := mock.Job() job.TaskGroups[0].Count = 5 - + tgName := job.TaskGroups[0].Name + now := time.Now() // Set up reschedule policy - job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour} + delayDur := 15 * time.Second + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, DelayCeiling: 1 * time.Hour} // Create 5 existing allocations var allocs []*structs.Allocation @@ -1293,15 +1471,87 @@ func TestReconciler_Reschedule_Service(t *testing.T) { } // Mark two as failed allocs[0].ClientStatus = structs.AllocClientStatusFailed + // Mark one of them as already rescheduled once + allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ + {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), + PrevAllocID: uuid.Generate(), + PrevNodeID: uuid.Generate(), + }, + }} + allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now}} allocs[1].ClientStatus = structs.AllocClientStatusFailed + // Mark one as desired state stop + allocs[4].DesiredStatus = structs.AllocDesiredStatusStop + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) + r := reconciler.Compute() + + // Should place a new placement and create a follow up eval for the delayed reschedule + // Verify that the follow up eval has the expected waitUntil time + evals := r.desiredFollowupEvals[tgName] + require.NotNil(evals) + require.Equal(1, len(evals)) + require.Equal(now.Add(delayDur), evals[0].WaitUntil) + + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 1, + inplace: 1, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Place: 1, + InPlaceUpdate: 1, + Ignore: 3, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(1, 1), allocsToNames(r.inplaceUpdate)) + // verify that the followup evalID field is set correctly + r.inplaceUpdate[0].EvalID = evals[0].ID +} + +// Tests rescheduling failed service allocations with desired state stop +func TestReconciler_RescheduleNow_Service(t *testing.T) { + require := require.New(t) + // Set desired 5 + job := mock.Job() + job.TaskGroups[0].Count = 5 + tgName := job.TaskGroups[0].Name + now := time.Now() + // Set up reschedule policy + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayCeiling: 1 * time.Hour} + + // Create 5 existing allocations + var allocs []*structs.Allocation + for i := 0; i < 5; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + allocs = append(allocs, alloc) + alloc.ClientStatus = structs.AllocClientStatusRunning + } + // Mark two as failed + allocs[0].ClientStatus = structs.AllocClientStatusFailed // Mark one of them as already rescheduled once - allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ + allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), PrevAllocID: uuid.Generate(), PrevNodeID: uuid.Generate(), }, }} + allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-10 * time.Second)}} + allocs[1].ClientStatus = structs.AllocClientStatusFailed // Mark one as desired state stop allocs[4].DesiredStatus = structs.AllocDesiredStatusStop @@ -1309,7 +1559,11 @@ func TestReconciler_Reschedule_Service(t *testing.T) { reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) r := reconciler.Compute() - // Should place 2, one is rescheduled, one is past its reschedule limit and one is a new placement + // Verify that no follow up evals were created + evals := r.desiredFollowupEvals[tgName] + require.Nil(evals) + + // Verify that one rescheduled alloc and one replacement for terminal alloc were placed assertResults(t, r, &resultExpectation{ createDeployment: nil, deploymentUpdates: nil, @@ -1324,8 +1578,8 @@ func TestReconciler_Reschedule_Service(t *testing.T) { }, }) - assertNamesHaveIndexes(t, intRange(0, 0, 4, 4), placeResultsToNames(r.place)) - // 2 rescheduled allocs should have previous allocs + assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place)) + // Rescheduled allocs should have previous allocs assertPlaceResultsHavePreviousAllocs(t, 1, r.place) assertPlacementsAreRescheduled(t, 1, r.place) } @@ -3374,6 +3628,8 @@ func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) { job := mock.Job() job.TaskGroups[0].Update = noCanaryUpdate + tgName := job.TaskGroups[0].Name + now := time.Now() // Create an existing failed deployment that has some placed allocs d := structs.NewDeployment(job) d.Status = structs.DeploymentStatusFailed @@ -3394,8 +3650,17 @@ func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) { alloc.TaskGroup = job.TaskGroups[0].Name allocs = append(allocs, alloc) } + + //create some allocations that are reschedulable now allocs[2].ClientStatus = structs.AllocClientStatusFailed + allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-10 * time.Second)}} + allocs[3].ClientStatus = structs.AllocClientStatusFailed + allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-10 * time.Second)}} reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil) r := reconciler.Compute() @@ -3417,6 +3682,8 @@ func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) { func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) { job := mock.Job() job.TaskGroups[0].Update = noCanaryUpdate + tgName := job.TaskGroups[0].Name + now := time.Now() // Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet d := structs.NewDeployment(job) @@ -3439,8 +3706,17 @@ func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) { alloc.DeploymentID = d.ID allocs = append(allocs, alloc) } + + // Create allocs that are reschedulable now allocs[2].ClientStatus = structs.AllocClientStatusFailed + allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-10 * time.Second)}} + allocs[3].ClientStatus = structs.AllocClientStatusFailed + allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", + StartedAt: now.Add(-1 * time.Hour), + FinishedAt: now.Add(-10 * time.Second)}} reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil) r := reconciler.Compute() diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index cff5c264812..99a7a07cc1b 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -227,14 +227,17 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi return } -// filterByReschedulable filters the allocation set to return the set of allocations that are either -// terminal or running, and a set of allocations that must be rescheduled -func (a allocSet) filterByReschedulable(isBatch bool, reschedulePolicy *structs.ReschedulePolicy) (untainted, reschedule allocSet) { +// filterByRescheduleable filters the allocation set to return the set of allocations that are either +// terminal or running, and a set of allocations that must be rescheduled now. Allocations that can be rescheduled +// at a future time are also returned so that we can create follow up evaluations for them +func (a allocSet) filterByRescheduleable(isBatch bool, reschedulePolicy *structs.ReschedulePolicy) (untainted, rescheduleNow allocSet, rescheduleLater []*delayedRescheduleInfo) { untainted = make(map[string]*structs.Allocation) - reschedule = make(map[string]*structs.Allocation) + rescheduleNow = make(map[string]*structs.Allocation) now := time.Now() for _, alloc := range a { + var isUntainted, eligibleNow, eligibleLater bool + var rescheduleTime time.Time if isBatch { // Allocs from batch jobs should be filtered when the desired status // is terminal and the client did not finish or when the client @@ -249,26 +252,48 @@ func (a allocSet) filterByReschedulable(isBatch bool, reschedulePolicy *structs. default: } if alloc.NextAllocation == "" { - if alloc.ShouldReschedule(reschedulePolicy, now) { - reschedule[alloc.ID] = alloc - } else { - untainted[alloc.ID] = alloc - } + //ignore allocs that have already been rescheduled + isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, reschedulePolicy, now, true) } } else { //ignore allocs that have already been rescheduled if alloc.NextAllocation == "" { - // ignore allocs whose desired state is stop/evict - // everything else is either reschedulable or untainted - if alloc.ShouldReschedule(reschedulePolicy, now) { - reschedule[alloc.ID] = alloc - } else if alloc.DesiredStatus != structs.AllocDesiredStatusStop && alloc.DesiredStatus != structs.AllocDesiredStatusEvict { - untainted[alloc.ID] = alloc - } + isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, reschedulePolicy, now, false) } } + if isUntainted { + untainted[alloc.ID] = alloc + } + if eligibleNow { + rescheduleNow[alloc.ID] = alloc + } else if eligibleLater { + rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, rescheduleTime}) + } } + return +} +// updateByReschedulable is a helper method that encapsulates logic for whether a failed allocation +// should be rescheduled now, later or left in the untainted set +func updateByReschedulable(alloc *structs.Allocation, reschedulePolicy *structs.ReschedulePolicy, now time.Time, batch bool) (untainted, rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) { + shouldAllow := true + if !batch { + // for service type jobs we ignore allocs whose desired state is stop/evict + // everything else is either rescheduleable or untainted + shouldAllow = alloc.DesiredStatus != structs.AllocDesiredStatusStop && alloc.DesiredStatus != structs.AllocDesiredStatusEvict + } + rescheduleTime, eligible := alloc.NextRescheduleTime(reschedulePolicy) + timeDiff := rescheduleTime.UTC().UnixNano() - now.UTC().UnixNano() + // we consider a time difference of less than 5 seconds to be eligible + // because we collapse allocations that failed within 5 seconds into a single evaluation + if eligible && timeDiff < batchedFailedAllocWindowSize.Nanoseconds() { + rescheduleNow = true + } else if shouldAllow { + untainted = true + if eligible && alloc.FollowupEvalID == "" { + rescheduleLater = true + } + } return } From f83ddd44bad39ac7429ab336e8c754b04666baee Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 1 Mar 2018 18:28:02 -0600 Subject: [PATCH 08/25] Added a delay heap to track evals with WaitUntil set, and use in eval broker --- nomad/delayheap/delay_heap.go | 166 +++++++++++++++++++++++++++++ nomad/delayheap/delay_heap_test.go | 115 ++++++++++++++++++++ nomad/eval_broker.go | 126 +++++++++++++++++++--- nomad/eval_broker_test.go | 47 +++++++- 4 files changed, 439 insertions(+), 15 deletions(-) create mode 100644 nomad/delayheap/delay_heap.go create mode 100644 nomad/delayheap/delay_heap_test.go diff --git a/nomad/delayheap/delay_heap.go b/nomad/delayheap/delay_heap.go new file mode 100644 index 00000000000..50ca56674d7 --- /dev/null +++ b/nomad/delayheap/delay_heap.go @@ -0,0 +1,166 @@ +package delayheap + +import ( + "container/heap" + "fmt" + "time" + + "github.com/hashicorp/nomad/nomad/structs" +) + +// DelayHeap wraps a heap and gives operations other than Push/Pop. +// The inner heap is sorted by the time in the WaitUntil field of DelayHeapNode +type DelayHeap struct { + index map[structs.NamespacedID]*DelayHeapNode + heap delayedHeapImp +} + +type HeapNode interface { + Data() interface{} + ID() string + Namespace() string +} + +// DelayHeapNode encapsulates the node stored in DelayHeap +// WaitUntil is used as the sorting criteria +type DelayHeapNode struct { + // Node is the data object stored in the delay heap + Node HeapNode + // WaitUntil is the time delay associated with the node + // Objects in the heap are sorted by WaitUntil + WaitUntil time.Time + + index int +} + +type delayedHeapImp []*DelayHeapNode + +func (h delayedHeapImp) Len() int { + return len(h) +} + +func (h delayedHeapImp) Less(i, j int) bool { + // Two zero times should return false. + // Otherwise, zero is "greater" than any other time. + // (To sort it at the end of the list.) + // Sort such that zero times are at the end of the list. + iZero, jZero := h[i].WaitUntil.IsZero(), h[j].WaitUntil.IsZero() + if iZero && jZero { + return false + } else if iZero { + return false + } else if jZero { + return true + } + + return h[i].WaitUntil.Before(h[j].WaitUntil) +} + +func (h delayedHeapImp) Swap(i, j int) { + h[i], h[j] = h[j], h[i] + h[i].index = i + h[j].index = j +} + +func (h *delayedHeapImp) Push(x interface{}) { + node := x.(*DelayHeapNode) + n := len(*h) + node.index = n + *h = append(*h, node) +} + +func (h *delayedHeapImp) Pop() interface{} { + old := *h + n := len(old) + node := old[n-1] + node.index = -1 // for safety + *h = old[0 : n-1] + return node +} + +func NewDelayHeap() *DelayHeap { + return &DelayHeap{ + index: make(map[structs.NamespacedID]*DelayHeapNode), + heap: make(delayedHeapImp, 0), + } +} + +func (p *DelayHeap) Push(dataNode HeapNode, next time.Time) error { + tuple := structs.NamespacedID{ + ID: dataNode.ID(), + Namespace: dataNode.Namespace(), + } + if _, ok := p.index[tuple]; ok { + return fmt.Errorf("node %q (%s) already exists", dataNode.ID(), dataNode.Namespace()) + } + + delayHeapNode := &DelayHeapNode{dataNode, next, 0} + p.index[tuple] = delayHeapNode + heap.Push(&p.heap, delayHeapNode) + return nil +} + +func (p *DelayHeap) Pop() *DelayHeapNode { + if len(p.heap) == 0 { + return nil + } + + delayHeapNode := heap.Pop(&p.heap).(*DelayHeapNode) + tuple := structs.NamespacedID{ + ID: delayHeapNode.Node.ID(), + Namespace: delayHeapNode.Node.Namespace(), + } + delete(p.index, tuple) + return delayHeapNode +} + +func (p *DelayHeap) Peek() *DelayHeapNode { + if len(p.heap) == 0 { + return nil + } + + return p.heap[0] +} + +func (p *DelayHeap) Contains(heapNode HeapNode) bool { + tuple := structs.NamespacedID{ + ID: heapNode.ID(), + Namespace: heapNode.Namespace(), + } + _, ok := p.index[tuple] + return ok +} + +func (p *DelayHeap) Update(heapNode HeapNode, waitUntil time.Time) error { + tuple := structs.NamespacedID{ + ID: heapNode.ID(), + Namespace: heapNode.Namespace(), + } + if existingHeapNode, ok := p.index[tuple]; ok { + // Need to update the job as well because its spec can change. + existingHeapNode.Node = heapNode + existingHeapNode.WaitUntil = waitUntil + heap.Fix(&p.heap, existingHeapNode.index) + return nil + } + + return fmt.Errorf("heap doesn't contain object with ID %q (%s)", heapNode.ID(), heapNode.Namespace()) +} + +func (p *DelayHeap) Remove(heapNode HeapNode) error { + tuple := structs.NamespacedID{ + ID: heapNode.ID(), + Namespace: heapNode.Namespace(), + } + if node, ok := p.index[tuple]; ok { + heap.Remove(&p.heap, node.index) + delete(p.index, tuple) + return nil + } + + return fmt.Errorf("heap doesn't contain object with ID %q (%s)", heapNode.ID(), heapNode.Namespace()) +} + +func (p *DelayHeap) Length() int { + return len(p.heap) +} diff --git a/nomad/delayheap/delay_heap_test.go b/nomad/delayheap/delay_heap_test.go new file mode 100644 index 00000000000..3bb87e377e9 --- /dev/null +++ b/nomad/delayheap/delay_heap_test.go @@ -0,0 +1,115 @@ +package delayheap + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// HeapNodeImpl satisfies the HeapNode interface +type heapNodeImpl struct { + dataObject interface{} + id string + namespace string +} + +func (d *heapNodeImpl) Data() interface{} { + return d.dataObject +} + +func (d *heapNodeImpl) ID() string { + return d.id +} + +func (d *heapNodeImpl) Namespace() string { + return d.namespace +} + +func TestDelayHeap_PushPop(t *testing.T) { + delayHeap := NewDelayHeap() + now := time.Now() + require := require.New(t) + // a dummy type to use as the inner object in the heap + type myObj struct { + a int + b string + } + dataNode1 := &heapNodeImpl{ + dataObject: &myObj{a: 0, b: "hey"}, + id: "101", + namespace: "default", + } + delayHeap.Push(dataNode1, now.Add(-10*time.Minute)) + + dataNode2 := &heapNodeImpl{ + dataObject: &myObj{a: 0, b: "hey"}, + id: "102", + namespace: "default", + } + delayHeap.Push(dataNode2, now.Add(10*time.Minute)) + + dataNode3 := &heapNodeImpl{ + dataObject: &myObj{a: 0, b: "hey"}, + id: "103", + namespace: "default", + } + delayHeap.Push(dataNode3, now.Add(-15*time.Second)) + + dataNode4 := &heapNodeImpl{ + dataObject: &myObj{a: 0, b: "hey"}, + id: "101", + namespace: "test-namespace", + } + delayHeap.Push(dataNode4, now.Add(2*time.Hour)) + + expectedWaitTimes := []time.Time{now.Add(-10 * time.Minute), now.Add(-15 * time.Second), now.Add(10 * time.Minute), now.Add(2 * time.Hour)} + entries := getHeapEntries(delayHeap, now) + for i, entry := range entries { + require.Equal(expectedWaitTimes[i], entry.WaitUntil) + } + +} + +func TestDelayHeap_Update(t *testing.T) { + delayHeap := NewDelayHeap() + now := time.Now() + require := require.New(t) + // a dummy type to use as the inner object in the heap + type myObj struct { + a int + b string + } + dataNode1 := &heapNodeImpl{ + dataObject: &myObj{a: 0, b: "hey"}, + id: "101", + namespace: "default", + } + delayHeap.Push(dataNode1, now.Add(-10*time.Minute)) + + dataNode2 := &heapNodeImpl{ + dataObject: &myObj{a: 0, b: "hey"}, + id: "102", + namespace: "default", + } + delayHeap.Push(dataNode2, now.Add(10*time.Minute)) + delayHeap.Update(dataNode1, now.Add(20*time.Minute)) + + expectedWaitTimes := []time.Time{now.Add(10 * time.Minute), now.Add(20 * time.Minute)} + expectedIdOrder := []string{"102", "101"} + entries := getHeapEntries(delayHeap, now) + for i, entry := range entries { + require.Equal(expectedWaitTimes[i], entry.WaitUntil) + require.Equal(expectedIdOrder[i], entry.Node.ID()) + } + +} + +func getHeapEntries(delayHeap *DelayHeap, now time.Time) []*DelayHeapNode { + var entries []*DelayHeapNode + for node := delayHeap.Pop(); node != nil; { + entries = append(entries, node) + node = delayHeap.Pop() + } + return entries +} diff --git a/nomad/eval_broker.go b/nomad/eval_broker.go index f8ea14ff917..27efd4fe71c 100644 --- a/nomad/eval_broker.go +++ b/nomad/eval_broker.go @@ -8,8 +8,11 @@ import ( "sync" "time" + "context" + "github.com/armon/go-metrics" "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/delayheap" "github.com/hashicorp/nomad/nomad/structs" ) @@ -77,7 +80,19 @@ type EvalBroker struct { // timeWait has evaluations that are waiting for time to elapse timeWait map[string]*time.Timer - // initialNackDelay is the delay applied before reenqueuing a + // delayedEvalCancelFunc is used to stop the long running go routine + // that processes delayed evaluations + delayedEvalCancelFunc context.CancelFunc + + // delayHeap is a heap used to track incoming evaluations that are + // not eligible to enqueue until their WaitTime + delayHeap *delayheap.DelayHeap + + // delayedEvalsUpdateCh is used to trigger notifications for updates + // to the delayHeap + delayedEvalsUpdateCh chan struct{} + + // initialNackDelay is the delay applied before re-enqueuing a // Nacked evaluation for the first time. initialNackDelay time.Duration @@ -113,22 +128,25 @@ func NewEvalBroker(timeout, initialNackDelay, subsequentNackDelay time.Duration, return nil, fmt.Errorf("timeout cannot be negative") } b := &EvalBroker{ - nackTimeout: timeout, - deliveryLimit: deliveryLimit, - enabled: false, - stats: new(BrokerStats), - evals: make(map[string]int), - jobEvals: make(map[structs.NamespacedID]string), + nackTimeout: timeout, + deliveryLimit: deliveryLimit, + enabled: false, + stats: new(BrokerStats), + evals: make(map[string]int), + jobEvals: make(map[structs.NamespacedID]string), blocked: make(map[structs.NamespacedID]PendingEvaluations), - ready: make(map[string]PendingEvaluations), - unack: make(map[string]*unackEval), - waiting: make(map[string]chan struct{}), - requeue: make(map[string]*structs.Evaluation), - timeWait: make(map[string]*time.Timer), - initialNackDelay: initialNackDelay, - subsequentNackDelay: subsequentNackDelay, + ready: make(map[string]PendingEvaluations), + unack: make(map[string]*unackEval), + waiting: make(map[string]chan struct{}), + requeue: make(map[string]*structs.Evaluation), + timeWait: make(map[string]*time.Timer), + initialNackDelay: initialNackDelay, + subsequentNackDelay: subsequentNackDelay, + delayHeap: delayheap.NewDelayHeap(), + delayedEvalsUpdateCh: make(chan struct{}, 1), } b.stats.ByScheduler = make(map[string]*SchedulerStats) + return b, nil } @@ -144,6 +162,10 @@ func (b *EvalBroker) Enabled() bool { func (b *EvalBroker) SetEnabled(enabled bool) { b.l.Lock() b.enabled = enabled + // start the go routine for delayed evals + ctx, cancel := context.WithCancel(context.Background()) + b.delayedEvalCancelFunc = cancel + go b.runDelayedEvalsWatcher(ctx) b.l.Unlock() if !enabled { b.Flush() @@ -206,6 +228,16 @@ func (b *EvalBroker) processEnqueue(eval *structs.Evaluation, token string) { return } + if !eval.WaitUntil.IsZero() { + b.delayHeap.Push(&evalWrapper{eval}, eval.WaitUntil) + // Signal an update. + select { + case b.delayedEvalsUpdateCh <- struct{}{}: + default: + } + return + } + b.enqueueLocked(eval, eval.Type) } @@ -663,6 +695,12 @@ func (b *EvalBroker) Flush() { wait.Stop() } + // Cancel the delayed evaluations goroutine + b.delayedEvalCancelFunc() + + // Clear out the update channel for delayed evaluations + b.delayedEvalsUpdateCh = make(chan struct{}, 1) + // Reset the broker b.stats.TotalReady = 0 b.stats.TotalUnacked = 0 @@ -675,6 +713,66 @@ func (b *EvalBroker) Flush() { b.ready = make(map[string]PendingEvaluations) b.unack = make(map[string]*unackEval) b.timeWait = make(map[string]*time.Timer) + b.delayHeap = delayheap.NewDelayHeap() +} + +// evalWrapper satisfies the HeapNode interface +type evalWrapper struct { + eval *structs.Evaluation +} + +func (d *evalWrapper) Data() interface{} { + return d.eval +} + +func (d *evalWrapper) ID() string { + return d.eval.ID +} + +func (d *evalWrapper) Namespace() string { + return d.eval.Namespace +} + +// runDelayedEvalsWatcher is a long-lived function that waits till a job's periodic spec is met and +// then creates an evaluation to run the job. +func (b *EvalBroker) runDelayedEvalsWatcher(ctx context.Context) { + var timerChannel <-chan time.Time + for b.enabled { + eval, waitUntil := b.nextDelayedEval() + if waitUntil.IsZero() { + timerChannel = nil + } else { + launchDur := waitUntil.Sub(time.Now().UTC()) + timerChannel = time.After(launchDur) + } + + select { + case <-ctx.Done(): + return + case <-timerChannel: + // remove from the heap since we can enqueue it now + b.delayHeap.Remove(&evalWrapper{eval}) + b.enqueueLocked(eval, eval.Type) + case <-b.delayedEvalsUpdateCh: + continue + } + } +} + +// nextDelayedEval returns the next delayed eval to launch and when it should be enqueued. +// This peeks at the heap to return the top. +func (b *EvalBroker) nextDelayedEval() (*structs.Evaluation, time.Time) { + // If there is nothing wait for an update. + if b.delayHeap.Length() == 0 { + return nil, time.Time{} + } + nextEval := b.delayHeap.Peek() + + if nextEval == nil { + return nil, time.Time{} + } + eval := nextEval.Node.Data().(*structs.Evaluation) + return eval, nextEval.WaitUntil } // Stats is used to query the state of the broker diff --git a/nomad/eval_broker_test.go b/nomad/eval_broker_test.go index 5275ed999f4..87f47936ebc 100644 --- a/nomad/eval_broker_test.go +++ b/nomad/eval_broker_test.go @@ -1141,7 +1141,52 @@ func TestEvalBroker_Wait(t *testing.T) { } } -// Ensure that priority is taken into account when enqueuing many evaluations. +// Test that delayed evaluations work as expected +func TestEvalBroker_WaitUntil(t *testing.T) { + t.Parallel() + require := require.New(t) + b := testBroker(t, 0) + b.SetEnabled(true) + + now := time.Now() + // Create a few of evals with WaitUntil set + eval1 := mock.Eval() + eval1.WaitUntil = now.Add(1 * time.Second) + eval1.CreateIndex = 1 + b.Enqueue(eval1) + + eval2 := mock.Eval() + eval2.WaitUntil = now.Add(100 * time.Millisecond) + // set CreateIndex to use as a tie breaker when eval2 + // and eval3 are both in the pending evals heap + eval2.CreateIndex = 2 + b.Enqueue(eval2) + + eval3 := mock.Eval() + eval3.WaitUntil = now.Add(20 * time.Millisecond) + eval3.CreateIndex = 1 + b.Enqueue(eval3) + + // sleep enough for two evals to be ready + time.Sleep(200 * time.Millisecond) + + // first dequeue should return eval3 + out, _, err := b.Dequeue(defaultSched, time.Second) + require.Nil(err) + require.Equal(eval3, out) + + // second dequeue should return eval2 + out, _, err = b.Dequeue(defaultSched, time.Second) + require.Nil(err) + require.Equal(eval2, out) + + // third dequeue should return eval1 + out, _, err = b.Dequeue(defaultSched, 2*time.Second) + require.Nil(err) + require.Equal(eval1, out) +} + +// Ensure that priority is taken into account when enqueueing many evaluations. func TestEvalBroker_EnqueueAll_Dequeue_Fair(t *testing.T) { t.Parallel() b := testBroker(t, 0) From 3eebacb53e76c67e4a35094c6b0df5956f63217b Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Fri, 2 Mar 2018 15:42:13 -0600 Subject: [PATCH 09/25] Remove unnecessary check against 5 second window for determining immediate scheduling eligibility --- scheduler/reconcile_util.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index 99a7a07cc1b..e6d954935b7 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -283,10 +283,9 @@ func updateByReschedulable(alloc *structs.Allocation, reschedulePolicy *structs. shouldAllow = alloc.DesiredStatus != structs.AllocDesiredStatusStop && alloc.DesiredStatus != structs.AllocDesiredStatusEvict } rescheduleTime, eligible := alloc.NextRescheduleTime(reschedulePolicy) - timeDiff := rescheduleTime.UTC().UnixNano() - now.UTC().UnixNano() // we consider a time difference of less than 5 seconds to be eligible // because we collapse allocations that failed within 5 seconds into a single evaluation - if eligible && timeDiff < batchedFailedAllocWindowSize.Nanoseconds() { + if eligible && now.After(rescheduleTime) { rescheduleNow = true } else if shouldAllow { untainted = true From c841dc01a9c1d57f5e98b3bb44f2176df802643c Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Mon, 5 Mar 2018 21:46:54 -0600 Subject: [PATCH 10/25] Address some code review comments --- nomad/delayheap/delay_heap.go | 29 +++++++++++++++-------------- nomad/delayheap/delay_heap_test.go | 4 ++-- nomad/eval_broker.go | 25 ++++++++++++++++--------- nomad/eval_broker_test.go | 5 +++-- nomad/structs/structs.go | 2 +- 5 files changed, 37 insertions(+), 28 deletions(-) diff --git a/nomad/delayheap/delay_heap.go b/nomad/delayheap/delay_heap.go index 50ca56674d7..4e8ba669c09 100644 --- a/nomad/delayheap/delay_heap.go +++ b/nomad/delayheap/delay_heap.go @@ -9,21 +9,22 @@ import ( ) // DelayHeap wraps a heap and gives operations other than Push/Pop. -// The inner heap is sorted by the time in the WaitUntil field of DelayHeapNode +// The inner heap is sorted by the time in the WaitUntil field of delayHeapNode type DelayHeap struct { - index map[structs.NamespacedID]*DelayHeapNode + index map[structs.NamespacedID]*delayHeapNode heap delayedHeapImp } +// HeapNode is an interface type implemented by objects stored in the DelayHeap type HeapNode interface { - Data() interface{} - ID() string - Namespace() string + Data() interface{} // The data object + ID() string // ID of the object, used in conjunction with namespace for deduplication + Namespace() string // Namespace of the object, can be empty } -// DelayHeapNode encapsulates the node stored in DelayHeap +// delayHeapNode encapsulates the node stored in DelayHeap // WaitUntil is used as the sorting criteria -type DelayHeapNode struct { +type delayHeapNode struct { // Node is the data object stored in the delay heap Node HeapNode // WaitUntil is the time delay associated with the node @@ -33,7 +34,7 @@ type DelayHeapNode struct { index int } -type delayedHeapImp []*DelayHeapNode +type delayedHeapImp []*delayHeapNode func (h delayedHeapImp) Len() int { return len(h) @@ -63,7 +64,7 @@ func (h delayedHeapImp) Swap(i, j int) { } func (h *delayedHeapImp) Push(x interface{}) { - node := x.(*DelayHeapNode) + node := x.(*delayHeapNode) n := len(*h) node.index = n *h = append(*h, node) @@ -80,7 +81,7 @@ func (h *delayedHeapImp) Pop() interface{} { func NewDelayHeap() *DelayHeap { return &DelayHeap{ - index: make(map[structs.NamespacedID]*DelayHeapNode), + index: make(map[structs.NamespacedID]*delayHeapNode), heap: make(delayedHeapImp, 0), } } @@ -94,18 +95,18 @@ func (p *DelayHeap) Push(dataNode HeapNode, next time.Time) error { return fmt.Errorf("node %q (%s) already exists", dataNode.ID(), dataNode.Namespace()) } - delayHeapNode := &DelayHeapNode{dataNode, next, 0} + delayHeapNode := &delayHeapNode{dataNode, next, 0} p.index[tuple] = delayHeapNode heap.Push(&p.heap, delayHeapNode) return nil } -func (p *DelayHeap) Pop() *DelayHeapNode { +func (p *DelayHeap) Pop() *delayHeapNode { if len(p.heap) == 0 { return nil } - delayHeapNode := heap.Pop(&p.heap).(*DelayHeapNode) + delayHeapNode := heap.Pop(&p.heap).(*delayHeapNode) tuple := structs.NamespacedID{ ID: delayHeapNode.Node.ID(), Namespace: delayHeapNode.Node.Namespace(), @@ -114,7 +115,7 @@ func (p *DelayHeap) Pop() *DelayHeapNode { return delayHeapNode } -func (p *DelayHeap) Peek() *DelayHeapNode { +func (p *DelayHeap) Peek() *delayHeapNode { if len(p.heap) == 0 { return nil } diff --git a/nomad/delayheap/delay_heap_test.go b/nomad/delayheap/delay_heap_test.go index 3bb87e377e9..3b44acc19ca 100644 --- a/nomad/delayheap/delay_heap_test.go +++ b/nomad/delayheap/delay_heap_test.go @@ -105,8 +105,8 @@ func TestDelayHeap_Update(t *testing.T) { } -func getHeapEntries(delayHeap *DelayHeap, now time.Time) []*DelayHeapNode { - var entries []*DelayHeapNode +func getHeapEntries(delayHeap *DelayHeap, now time.Time) []*delayHeapNode { + var entries []*delayHeapNode for node := delayHeap.Pop(); node != nil; { entries = append(entries, node) node = delayHeap.Pop() diff --git a/nomad/eval_broker.go b/nomad/eval_broker.go index 27efd4fe71c..01f5971047e 100644 --- a/nomad/eval_broker.go +++ b/nomad/eval_broker.go @@ -161,14 +161,17 @@ func (b *EvalBroker) Enabled() bool { // should only be enabled on the active leader. func (b *EvalBroker) SetEnabled(enabled bool) { b.l.Lock() + prevEnabled := b.enabled b.enabled = enabled - // start the go routine for delayed evals - ctx, cancel := context.WithCancel(context.Background()) - b.delayedEvalCancelFunc = cancel - go b.runDelayedEvalsWatcher(ctx) + if !prevEnabled && enabled { + // start the go routine for delayed evals + ctx, cancel := context.WithCancel(context.Background()) + b.delayedEvalCancelFunc = cancel + go b.runDelayedEvalsWatcher(ctx) + } b.l.Unlock() if !enabled { - b.Flush() + b.flush() } } @@ -230,6 +233,7 @@ func (b *EvalBroker) processEnqueue(eval *structs.Evaluation, token string) { if !eval.WaitUntil.IsZero() { b.delayHeap.Push(&evalWrapper{eval}, eval.WaitUntil) + b.stats.TotalWaiting += 1 // Signal an update. select { case b.delayedEvalsUpdateCh <- struct{}{}: @@ -675,7 +679,7 @@ func (b *EvalBroker) ResumeNackTimeout(evalID, token string) error { } // Flush is used to clear the state of the broker -func (b *EvalBroker) Flush() { +func (b *EvalBroker) flush() { b.l.Lock() defer b.l.Unlock() @@ -733,11 +737,11 @@ func (d *evalWrapper) Namespace() string { return d.eval.Namespace } -// runDelayedEvalsWatcher is a long-lived function that waits till a job's periodic spec is met and -// then creates an evaluation to run the job. +// runDelayedEvalsWatcher is a long-lived function that waits till a time deadline is met for +// pending evaluations before enqueuing them func (b *EvalBroker) runDelayedEvalsWatcher(ctx context.Context) { var timerChannel <-chan time.Time - for b.enabled { + for { eval, waitUntil := b.nextDelayedEval() if waitUntil.IsZero() { timerChannel = nil @@ -752,7 +756,10 @@ func (b *EvalBroker) runDelayedEvalsWatcher(ctx context.Context) { case <-timerChannel: // remove from the heap since we can enqueue it now b.delayHeap.Remove(&evalWrapper{eval}) + b.l.Lock() + b.stats.TotalWaiting -= 1 b.enqueueLocked(eval, eval.Type) + b.l.Unlock() case <-b.delayedEvalsUpdateCh: continue } diff --git a/nomad/eval_broker_test.go b/nomad/eval_broker_test.go index 87f47936ebc..8f7f71510a9 100644 --- a/nomad/eval_broker_test.go +++ b/nomad/eval_broker_test.go @@ -1141,7 +1141,7 @@ func TestEvalBroker_Wait(t *testing.T) { } } -// Test that delayed evaluations work as expected +// Ensure that delayed evaluations work as expected func TestEvalBroker_WaitUntil(t *testing.T) { t.Parallel() require := require.New(t) @@ -1166,7 +1166,7 @@ func TestEvalBroker_WaitUntil(t *testing.T) { eval3.WaitUntil = now.Add(20 * time.Millisecond) eval3.CreateIndex = 1 b.Enqueue(eval3) - + require.Equal(3, b.stats.TotalWaiting) // sleep enough for two evals to be ready time.Sleep(200 * time.Millisecond) @@ -1184,6 +1184,7 @@ func TestEvalBroker_WaitUntil(t *testing.T) { out, _, err = b.Dequeue(defaultSched, 2*time.Second) require.Nil(err) require.Equal(eval1, out) + require.Equal(0, b.stats.TotalWaiting) } // Ensure that priority is taken into account when enqueueing many evaluations. diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index b51428e3fd1..a35574735e3 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2825,7 +2825,7 @@ func (r *ReschedulePolicy) Validate() error { } - //Validate Interval and other delay parameters if attempts are limited + // Validate Interval and other delay parameters if attempts are limited if !r.Unlimited { if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, r.Interval)) From 733c40e058f31b87cd7d141b3d213d925c23037e Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 7 Mar 2018 15:26:45 -0600 Subject: [PATCH 11/25] Add parsing test cases --- jobspec/parse_test.go | 38 ++++++++++++++++++- .../reschedule-job-unlimited.hcl | 20 ++++++++++ jobspec/test-fixtures/reschedule-job.hcl | 2 + 3 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 jobspec/test-fixtures/reschedule-job-unlimited.hcl diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index 90901ba16cd..45831a577a2 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -679,8 +679,42 @@ func TestParse(t *testing.T) { Type: helper.StringToPtr("batch"), Datacenters: []string{"dc1"}, Reschedule: &api.ReschedulePolicy{ - Attempts: helper.IntToPtr(15), - Interval: helper.TimeToPtr(30 * time.Minute), + Attempts: helper.IntToPtr(15), + Interval: helper.TimeToPtr(30 * time.Minute), + DelayFunction: helper.StringToPtr("linear"), + Delay: helper.TimeToPtr(10 * time.Second), + }, + TaskGroups: []*api.TaskGroup{ + { + Name: helper.StringToPtr("bar"), + Count: helper.IntToPtr(3), + Tasks: []*api.Task{ + { + Name: "bar", + Driver: "raw_exec", + Config: map[string]interface{}{ + "command": "bash", + "args": []interface{}{"-c", "echo hi"}, + }, + }, + }, + }, + }, + }, + false, + }, + { + "reschedule-job-unlimited.hcl", + &api.Job{ + ID: helper.StringToPtr("foo"), + Name: helper.StringToPtr("foo"), + Type: helper.StringToPtr("batch"), + Datacenters: []string{"dc1"}, + Reschedule: &api.ReschedulePolicy{ + DelayFunction: helper.StringToPtr("exponential"), + Delay: helper.TimeToPtr(10 * time.Second), + DelayCeiling: helper.TimeToPtr(120 * time.Second), + Unlimited: helper.BoolToPtr(true), }, TaskGroups: []*api.TaskGroup{ { diff --git a/jobspec/test-fixtures/reschedule-job-unlimited.hcl b/jobspec/test-fixtures/reschedule-job-unlimited.hcl new file mode 100644 index 00000000000..b2840c123a1 --- /dev/null +++ b/jobspec/test-fixtures/reschedule-job-unlimited.hcl @@ -0,0 +1,20 @@ +job "foo" { + datacenters = ["dc1"] + type = "batch" + reschedule { + delay = "10s", + delay_function = "exponential" + delay_ceiling="120s" + unlimited = true + } + group "bar" { + count = 3 + task "bar" { + driver = "raw_exec" + config { + command = "bash" + args = ["-c", "echo hi"] + } + } + } +} diff --git a/jobspec/test-fixtures/reschedule-job.hcl b/jobspec/test-fixtures/reschedule-job.hcl index 323fef88267..48fd7167067 100644 --- a/jobspec/test-fixtures/reschedule-job.hcl +++ b/jobspec/test-fixtures/reschedule-job.hcl @@ -4,6 +4,8 @@ job "foo" { reschedule { attempts = 15 interval = "30m" + delay = "10s", + delay_function = "linear" } group "bar" { count = 3 From f57f5865bb3b1f171108d14bed7dd6d5f50d64bd Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 7 Mar 2018 15:35:10 -0600 Subject: [PATCH 12/25] Move delayheap to lib package --- {nomad/delayheap => lib}/delay_heap.go | 2 +- {nomad/delayheap => lib}/delay_heap_test.go | 2 +- nomad/eval_broker.go | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) rename {nomad/delayheap => lib}/delay_heap.go (99%) rename {nomad/delayheap => lib}/delay_heap_test.go (99%) diff --git a/nomad/delayheap/delay_heap.go b/lib/delay_heap.go similarity index 99% rename from nomad/delayheap/delay_heap.go rename to lib/delay_heap.go index 4e8ba669c09..15558319857 100644 --- a/nomad/delayheap/delay_heap.go +++ b/lib/delay_heap.go @@ -1,4 +1,4 @@ -package delayheap +package lib import ( "container/heap" diff --git a/nomad/delayheap/delay_heap_test.go b/lib/delay_heap_test.go similarity index 99% rename from nomad/delayheap/delay_heap_test.go rename to lib/delay_heap_test.go index 3b44acc19ca..ddf64067732 100644 --- a/nomad/delayheap/delay_heap_test.go +++ b/lib/delay_heap_test.go @@ -1,4 +1,4 @@ -package delayheap +package lib import ( "testing" diff --git a/nomad/eval_broker.go b/nomad/eval_broker.go index 01f5971047e..b3bee7159e0 100644 --- a/nomad/eval_broker.go +++ b/nomad/eval_broker.go @@ -12,7 +12,7 @@ import ( "github.com/armon/go-metrics" "github.com/hashicorp/nomad/helper/uuid" - "github.com/hashicorp/nomad/nomad/delayheap" + "github.com/hashicorp/nomad/lib" "github.com/hashicorp/nomad/nomad/structs" ) @@ -86,7 +86,7 @@ type EvalBroker struct { // delayHeap is a heap used to track incoming evaluations that are // not eligible to enqueue until their WaitTime - delayHeap *delayheap.DelayHeap + delayHeap *lib.DelayHeap // delayedEvalsUpdateCh is used to trigger notifications for updates // to the delayHeap @@ -142,7 +142,7 @@ func NewEvalBroker(timeout, initialNackDelay, subsequentNackDelay time.Duration, timeWait: make(map[string]*time.Timer), initialNackDelay: initialNackDelay, subsequentNackDelay: subsequentNackDelay, - delayHeap: delayheap.NewDelayHeap(), + delayHeap: lib.NewDelayHeap(), delayedEvalsUpdateCh: make(chan struct{}, 1), } b.stats.ByScheduler = make(map[string]*SchedulerStats) @@ -717,7 +717,7 @@ func (b *EvalBroker) flush() { b.ready = make(map[string]PendingEvaluations) b.unack = make(map[string]*unackEval) b.timeWait = make(map[string]*time.Timer) - b.delayHeap = delayheap.NewDelayHeap() + b.delayHeap = lib.NewDelayHeap() } // evalWrapper satisfies the HeapNode interface From 94e252eaa3a87682bc66d49e296efec8119f8997 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 7 Mar 2018 18:02:22 -0600 Subject: [PATCH 13/25] Extra comments, remove unnecessary if condition --- nomad/structs/structs.go | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index a35574735e3..e426a3d596e 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -5502,7 +5502,7 @@ func (a *Allocation) LastEventTime() time.Time { return lastEventTime } -// NextRescheduleTime returns a time delta after which the allocation is eligible to be rescheduled +// NextRescheduleTime returns a time on or after which the allocation is eligible to be rescheduled, // and whether the next reschedule time is within policy's interval if the policy doesn't allow unlimited reschedules func (a *Allocation) NextRescheduleTime(reschedulePolicy *ReschedulePolicy) (time.Time, bool) { failTime := a.LastEventTime() @@ -5527,6 +5527,8 @@ func (a *Allocation) NextRescheduleTime(reschedulePolicy *ReschedulePolicy) (tim return nextRescheduleTime, rescheduleEligible } +// NextDelay returns a duration after which the allocation can be rescheduled. +// It is calculated according to the delay function and previous reschedule attempts. func (a *Allocation) NextDelay(policy *ReschedulePolicy) time.Duration { delayDur := policy.Delay if a.RescheduleTracker == nil || a.RescheduleTracker.Events == nil || len(a.RescheduleTracker.Events) == 0 { @@ -5554,13 +5556,13 @@ func (a *Allocation) NextDelay(policy *ReschedulePolicy) time.Duration { if policy.DelayCeiling > 0 && delayDur > policy.DelayCeiling { delayDur = policy.DelayCeiling // check if delay needs to be reset - if len(a.RescheduleTracker.Events) > 0 { - lastRescheduleEvent := a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1] - timeDiff := a.LastEventTime().UTC().UnixNano() - lastRescheduleEvent.RescheduleTime - if timeDiff > delayDur.Nanoseconds() { - delayDur = policy.Delay - } + + lastRescheduleEvent := a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1] + timeDiff := a.LastEventTime().UTC().UnixNano() - lastRescheduleEvent.RescheduleTime + if timeDiff > delayDur.Nanoseconds() { + delayDur = policy.Delay } + } return delayDur @@ -5670,12 +5672,11 @@ type AllocListStub struct { ClientDescription string TaskStates map[string]*TaskState DeploymentStatus *AllocDeploymentStatus - - FollowupEvalID string - CreateIndex uint64 - ModifyIndex uint64 - CreateTime int64 - ModifyTime int64 + FollowupEvalID string + CreateIndex uint64 + ModifyIndex uint64 + CreateTime int64 + ModifyTime int64 } // SetEventDisplayMessage populates the display message if its not already set, From e5e2ec5a2fcf52c58e17e63feb7818dbd08d457f Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 7 Mar 2018 18:44:54 -0600 Subject: [PATCH 14/25] More small review feedback --- scheduler/generic_sched.go | 28 +++++++++++++++------------- scheduler/generic_sched_test.go | 2 +- scheduler/reconcile.go | 13 ++++++++++--- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 994c782c62b..603292b4b2a 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -42,6 +42,10 @@ const ( // blockedEvalFailedPlacements is the description used for blocked evals // that are a result of failing to place all allocations. blockedEvalFailedPlacements = "created to place remaining allocations" + + // maxPastRescheduleEvents is the maximum number of past reschedule event + // that we track when unlimited rescheduling is enabled + maxPastRescheduleEvents = 5 ) // SetStatusError is used to set the status of the evaluation to the given error @@ -206,6 +210,7 @@ func (s *GenericScheduler) process() (bool, error) { numTaskGroups = len(s.job.TaskGroups) } s.queuedAllocs = make(map[string]int, numTaskGroups) + s.followUpEvals = nil // Create a plan s.plan = s.eval.MakePlan(s.job) @@ -486,9 +491,10 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul // If the new allocation is replacing an older allocation then we // set the record the older allocation id so that they are chained if prevAllocation != nil { + now := time.Now() alloc.PreviousAllocation = prevAllocation.ID if missing.IsRescheduling() { - updateRescheduleTracker(alloc, prevAllocation, tg.ReschedulePolicy, time.Now()) + updateRescheduleTracker(alloc, prevAllocation, tg.ReschedulePolicy, now) } } @@ -544,8 +550,6 @@ func getSelectOptions(prevAllocation *structs.Allocation, preferredNode *structs return selectOptions } -const max_past_reschedule_events = 5 - // updateRescheduleTracker carries over previous restart attempts and adds the most recent restart func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation, reschedPolicy *structs.ReschedulePolicy, now time.Time) { var rescheduleEvents []*structs.RescheduleEvent @@ -554,10 +558,10 @@ func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation if reschedPolicy != nil { interval = reschedPolicy.Interval } - // if attempts is set copy all events in the interval range + // If attempts is set copy all events in the interval range if reschedPolicy.Attempts > 0 { for _, reschedEvent := range prev.RescheduleTracker.Events { - timeDiff := time.Now().UTC().UnixNano() - reschedEvent.RescheduleTime + timeDiff := now.UnixNano() - reschedEvent.RescheduleTime // Only copy over events that are within restart interval // This keeps the list of events small in cases where there's a long chain of old restart events if interval > 0 && timeDiff <= interval.Nanoseconds() { @@ -565,16 +569,14 @@ func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation } } } else { - // for unlimited restarts, only copy the last n - copied := 0 - for i := len(prev.RescheduleTracker.Events) - 1; i >= 0 && copied < max_past_reschedule_events; i-- { + // Only copy the last n if unlimited is set + start := 0 + if len(prev.RescheduleTracker.Events) > maxPastRescheduleEvents { + start = len(prev.RescheduleTracker.Events) - maxPastRescheduleEvents + } + for i := start; i < len(prev.RescheduleTracker.Events); i++ { reschedEvent := prev.RescheduleTracker.Events[i] rescheduleEvents = append(rescheduleEvents, reschedEvent.Copy()) - copied++ - } - // reverse it to get the correct order - for left, right := 0, len(rescheduleEvents)-1; left < right; left, right = left+1, right-1 { - rescheduleEvents[left], rescheduleEvents[right] = rescheduleEvents[right], rescheduleEvents[left] } } } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index ee7a775bcfd..be0d964e323 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -4390,7 +4390,7 @@ func Test_updateRescheduleTracker(t *testing.T) { Delay: 5 * time.Second, }, { - RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), + RescheduleTime: t1.Add(-70 * time.Minute).UnixNano(), PrevAllocID: prevAlloc.ID, PrevNodeID: prevAlloc.NodeID, Delay: 10 * time.Second, diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 5eedc91be82..0fc0b2a838e 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -12,6 +12,12 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) +const ( + // batchedFailedAllocWindowSize is the window size used + // to batch up failed allocations before creating an eval + batchedFailedAllocWindowSize = 5 * time.Second +) + // allocUpdateType takes an existing allocation and a new job definition and // returns whether the allocation can ignore the change, requires a destructive // update, or can be inplace updated. If it can be inplace updated, an updated @@ -290,8 +296,6 @@ func (a *allocReconciler) markStop(allocs allocSet, clientStatus, statusDescript } } -const batchedFailedAllocWindowSize = 5 * time.Second - // computeGroup reconciles state for a particular task group. It returns whether // the deployment it is for is complete with regards to the task group. func (a *allocReconciler) computeGroup(group string, all allocSet) bool { @@ -343,7 +347,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, tg.ReschedulePolicy) // Create batched follow up evaluations for allocations that are reschedulable later - rescheduleLaterAllocs := make(map[string]*structs.Allocation) + var rescheduleLaterAllocs map[string]*structs.Allocation if len(rescheduleLater) > 0 { rescheduleLaterAllocs = a.handleDelayedReschedules(rescheduleLater, all, tg.Name) } @@ -831,6 +835,9 @@ func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted, re return } + +// handleDelayedReschedules creates batched followup evaluations with the WaitUntil field set +// for allocations that are eligible to be rescheduled later func (a *allocReconciler) handleDelayedReschedules(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string) allocSet { // Sort by time sort.Slice(rescheduleLater, func(i, j int) bool { From e80d0d8156623a7ffea72be30f0c5ce2992de89e Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 8 Mar 2018 08:33:44 -0600 Subject: [PATCH 15/25] Cleaner handling of batched evals --- scheduler/reconcile.go | 54 +++++++++++++++---------------------- scheduler/reconcile_util.go | 8 +++--- 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 0fc0b2a838e..2bb6d9a5412 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -845,16 +845,29 @@ func (a *allocReconciler) handleDelayedReschedules(rescheduleLater []*delayedRes }) var evals []*structs.Evaluation - var allocIDs []string nextReschedTime := rescheduleLater[0].rescheduleTime allocIDToFollowupEvalID := make(map[string]string, len(rescheduleLater)) + // Create a new eval for the first batch + eval := &structs.Evaluation{ + ID: uuid.Generate(), + Namespace: a.job.Namespace, + Priority: a.job.Priority, + Type: a.job.Type, + TriggeredBy: structs.EvalTriggerRetryFailedAlloc, + JobID: a.job.ID, + JobModifyIndex: a.job.ModifyIndex, + Status: structs.EvalStatusPending, + WaitUntil: nextReschedTime, + } + evals = append(evals, eval) for _, allocReschedInfo := range rescheduleLater { - if allocReschedInfo.rescheduleTime.UTC().UnixNano()-nextReschedTime.UTC().UnixNano() < batchedFailedAllocWindowSize.Nanoseconds() { - // add to batch - allocIDs = append(allocIDs, allocReschedInfo.allocID) + if allocReschedInfo.rescheduleTime.Sub(nextReschedTime) < batchedFailedAllocWindowSize { + allocIDToFollowupEvalID[allocReschedInfo.allocID] = eval.ID } else { - // create a new eval for the previous batch - eval := &structs.Evaluation{ + // Start a new batch + nextReschedTime = allocReschedInfo.rescheduleTime + // Create a new eval for the new batch + eval = &structs.Evaluation{ ID: uuid.Generate(), Namespace: a.job.Namespace, Priority: a.job.Priority, @@ -866,37 +879,14 @@ func (a *allocReconciler) handleDelayedReschedules(rescheduleLater []*delayedRes WaitUntil: nextReschedTime, } evals = append(evals, eval) - for _, allocID := range allocIDs { - allocIDToFollowupEvalID[allocID] = eval.ID - } - nextReschedTime = allocReschedInfo.rescheduleTime - // clear out this batch and start it again - allocIDs = nil - allocIDs = append(allocIDs, allocReschedInfo.allocID) - } - } - // Deal with the last batch - if len(allocIDs) > 0 { - eval := &structs.Evaluation{ - ID: uuid.Generate(), - Namespace: a.job.Namespace, - Priority: a.job.Priority, - Type: a.job.Type, - TriggeredBy: structs.EvalTriggerRetryFailedAlloc, - JobID: a.job.ID, - JobModifyIndex: a.job.ModifyIndex, - Status: structs.EvalStatusPending, - WaitUntil: nextReschedTime, - } - evals = append(evals, eval) - for _, allocID := range allocIDs { - allocIDToFollowupEvalID[allocID] = eval.ID + // Set the evalID for the first alloc in this new batch + allocIDToFollowupEvalID[allocReschedInfo.allocID] = eval.ID } } a.result.desiredFollowupEvals[tgName] = evals - // create inplace updates for every alloc ID that needs to be updated with its follow up eval ID + // Create in-place updates for every alloc ID that needs to be updated with its follow up eval ID rescheduleLaterAllocs := make(map[string]*structs.Allocation) for allocID, evalID := range allocIDToFollowupEvalID { existingAlloc := all[allocID] diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index e6d954935b7..bc3115bbfa9 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -252,11 +252,11 @@ func (a allocSet) filterByRescheduleable(isBatch bool, reschedulePolicy *structs default: } if alloc.NextAllocation == "" { - //ignore allocs that have already been rescheduled + // Ignore allocs that have already been rescheduled isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, reschedulePolicy, now, true) } } else { - //ignore allocs that have already been rescheduled + // Ignore allocs that have already been rescheduled if alloc.NextAllocation == "" { isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, reschedulePolicy, now, false) } @@ -278,12 +278,12 @@ func (a allocSet) filterByRescheduleable(isBatch bool, reschedulePolicy *structs func updateByReschedulable(alloc *structs.Allocation, reschedulePolicy *structs.ReschedulePolicy, now time.Time, batch bool) (untainted, rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) { shouldAllow := true if !batch { - // for service type jobs we ignore allocs whose desired state is stop/evict + // For service type jobs we ignore allocs whose desired state is stop/evict // everything else is either rescheduleable or untainted shouldAllow = alloc.DesiredStatus != structs.AllocDesiredStatusStop && alloc.DesiredStatus != structs.AllocDesiredStatusEvict } rescheduleTime, eligible := alloc.NextRescheduleTime(reschedulePolicy) - // we consider a time difference of less than 5 seconds to be eligible + // We consider a time difference of less than 5 seconds to be eligible // because we collapse allocations that failed within 5 seconds into a single evaluation if eligible && now.After(rescheduleTime) { rescheduleNow = true From 8736b9f068b1bf6862b68ab27d212097a9294444 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 8 Mar 2018 08:38:21 -0600 Subject: [PATCH 16/25] Update comment about WaitTime --- scheduler/generic_sched.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 603292b4b2a..fc6a1cbbd24 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -76,7 +76,7 @@ type GenericScheduler struct { ctx *EvalContext stack *GenericStack - // Deprecated, was used in pre Nomad 0.7 rolling update stanza + // Deprecated, was used in pre Nomad 0.7 rolling update stanza and in node draining prior to Nomad 0.8 followupEvalWait time.Duration nextEval *structs.Evaluation followUpEvals []*structs.Evaluation From abeab12b9e9418341f532ede4c639db0998a3130 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 8 Mar 2018 09:36:01 -0600 Subject: [PATCH 17/25] Get reschedule policy from the alloc directly --- nomad/structs/structs.go | 20 ++++++++++++++++---- nomad/structs/structs_test.go | 6 +++++- scheduler/generic_sched.go | 7 ++++--- scheduler/generic_sched_test.go | 3 ++- scheduler/reconcile.go | 2 +- scheduler/reconcile_util.go | 10 +++++----- 6 files changed, 33 insertions(+), 15 deletions(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index e426a3d596e..8a2c09004f1 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -5502,14 +5502,25 @@ func (a *Allocation) LastEventTime() time.Time { return lastEventTime } +// ReschedulePolicy returns the reschedule policy based on the task group +func (a *Allocation) ReschedulePolicy() *ReschedulePolicy { + tg := a.Job.LookupTaskGroup(a.TaskGroup) + if tg == nil { + return nil + } + return tg.ReschedulePolicy +} + // NextRescheduleTime returns a time on or after which the allocation is eligible to be rescheduled, // and whether the next reschedule time is within policy's interval if the policy doesn't allow unlimited reschedules -func (a *Allocation) NextRescheduleTime(reschedulePolicy *ReschedulePolicy) (time.Time, bool) { +func (a *Allocation) NextRescheduleTime() (time.Time, bool) { failTime := a.LastEventTime() - if a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() { + reschedulePolicy := a.ReschedulePolicy() + if a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil { return time.Time{}, false } - nextDelay := a.NextDelay(reschedulePolicy) + + nextDelay := a.NextDelay() nextRescheduleTime := failTime.Add(nextDelay) rescheduleEligible := reschedulePolicy.Unlimited || (reschedulePolicy.Attempts > 0 && a.RescheduleTracker == nil) if reschedulePolicy.Attempts > 0 && a.RescheduleTracker != nil && a.RescheduleTracker.Events != nil { @@ -5529,7 +5540,8 @@ func (a *Allocation) NextRescheduleTime(reschedulePolicy *ReschedulePolicy) (tim // NextDelay returns a duration after which the allocation can be rescheduled. // It is calculated according to the delay function and previous reschedule attempts. -func (a *Allocation) NextDelay(policy *ReschedulePolicy) time.Duration { +func (a *Allocation) NextDelay() time.Duration { + policy := a.ReschedulePolicy() delayDur := policy.Delay if a.RescheduleTracker == nil || a.RescheduleTracker.Events == nil || len(a.RescheduleTracker.Events) == 0 { return delayDur diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 50c0ea0ade3..15cf24e42da 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -3476,7 +3476,11 @@ func TestAllocation_NextDelay(t *testing.T) { for _, tc := range testCases { t.Run(tc.desc, func(t *testing.T) { require := require.New(t) - reschedTime, allowed := tc.alloc.NextRescheduleTime(tc.reschedulePolicy) + j := testJob() + j.TaskGroups[0].ReschedulePolicy = tc.reschedulePolicy + tc.alloc.Job = j + tc.alloc.TaskGroup = j.TaskGroups[0].Name + reschedTime, allowed := tc.alloc.NextRescheduleTime() require.Equal(tc.expectedRescheduleEligible, allowed) require.Equal(tc.expectedRescheduleTime, reschedTime) }) diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index fc6a1cbbd24..4060e15b3e2 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -494,7 +494,7 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul now := time.Now() alloc.PreviousAllocation = prevAllocation.ID if missing.IsRescheduling() { - updateRescheduleTracker(alloc, prevAllocation, tg.ReschedulePolicy, now) + updateRescheduleTracker(alloc, prevAllocation, now) } } @@ -551,7 +551,8 @@ func getSelectOptions(prevAllocation *structs.Allocation, preferredNode *structs } // updateRescheduleTracker carries over previous restart attempts and adds the most recent restart -func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation, reschedPolicy *structs.ReschedulePolicy, now time.Time) { +func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation, now time.Time) { + reschedPolicy := prev.ReschedulePolicy() var rescheduleEvents []*structs.RescheduleEvent if prev.RescheduleTracker != nil { var interval time.Duration @@ -580,7 +581,7 @@ func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation } } } - nextDelay := prev.NextDelay(reschedPolicy) + nextDelay := prev.NextDelay() rescheduleEvent := structs.NewRescheduleEvent(now.UnixNano(), prev.ID, prev.NodeID, nextDelay) rescheduleEvents = append(rescheduleEvents, rescheduleEvent) alloc.RescheduleTracker = &structs.RescheduleTracker{Events: rescheduleEvents} diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index be0d964e323..349a597822a 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -4437,7 +4437,8 @@ func Test_updateRescheduleTracker(t *testing.T) { t.Run(tc.desc, func(t *testing.T) { require := require.New(t) prevAlloc.RescheduleTracker = &structs.RescheduleTracker{Events: tc.prevAllocEvents} - updateRescheduleTracker(alloc, prevAlloc, tc.reschedPolicy, tc.reschedTime) + prevAlloc.Job.LookupTaskGroup(prevAlloc.TaskGroup).ReschedulePolicy = tc.reschedPolicy + updateRescheduleTracker(alloc, prevAlloc, tc.reschedTime) require.Equal(tc.expectedRescheduleEvents, alloc.RescheduleTracker.Events) }) } diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 2bb6d9a5412..89999c614ee 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -344,7 +344,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { untainted, migrate, lost := all.filterByTainted(a.taintedNodes) // Determine what set of terminal allocations need to be rescheduled - untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, tg.ReschedulePolicy) + untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch) // Create batched follow up evaluations for allocations that are reschedulable later var rescheduleLaterAllocs map[string]*structs.Allocation diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index bc3115bbfa9..db3a5ff1e3d 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -230,7 +230,7 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi // filterByRescheduleable filters the allocation set to return the set of allocations that are either // terminal or running, and a set of allocations that must be rescheduled now. Allocations that can be rescheduled // at a future time are also returned so that we can create follow up evaluations for them -func (a allocSet) filterByRescheduleable(isBatch bool, reschedulePolicy *structs.ReschedulePolicy) (untainted, rescheduleNow allocSet, rescheduleLater []*delayedRescheduleInfo) { +func (a allocSet) filterByRescheduleable(isBatch bool) (untainted, rescheduleNow allocSet, rescheduleLater []*delayedRescheduleInfo) { untainted = make(map[string]*structs.Allocation) rescheduleNow = make(map[string]*structs.Allocation) @@ -253,12 +253,12 @@ func (a allocSet) filterByRescheduleable(isBatch bool, reschedulePolicy *structs } if alloc.NextAllocation == "" { // Ignore allocs that have already been rescheduled - isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, reschedulePolicy, now, true) + isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, true) } } else { // Ignore allocs that have already been rescheduled if alloc.NextAllocation == "" { - isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, reschedulePolicy, now, false) + isUntainted, eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, false) } } if isUntainted { @@ -275,14 +275,14 @@ func (a allocSet) filterByRescheduleable(isBatch bool, reschedulePolicy *structs // updateByReschedulable is a helper method that encapsulates logic for whether a failed allocation // should be rescheduled now, later or left in the untainted set -func updateByReschedulable(alloc *structs.Allocation, reschedulePolicy *structs.ReschedulePolicy, now time.Time, batch bool) (untainted, rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) { +func updateByReschedulable(alloc *structs.Allocation, now time.Time, batch bool) (untainted, rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) { shouldAllow := true if !batch { // For service type jobs we ignore allocs whose desired state is stop/evict // everything else is either rescheduleable or untainted shouldAllow = alloc.DesiredStatus != structs.AllocDesiredStatusStop && alloc.DesiredStatus != structs.AllocDesiredStatusEvict } - rescheduleTime, eligible := alloc.NextRescheduleTime(reschedulePolicy) + rescheduleTime, eligible := alloc.NextRescheduleTime() // We consider a time difference of less than 5 seconds to be eligible // because we collapse allocations that failed within 5 seconds into a single evaluation if eligible && now.After(rescheduleTime) { From 9bccc53857ae5b75459f31ad81654201cdcbb03d Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 8 Mar 2018 17:30:54 -0600 Subject: [PATCH 18/25] Avoids unnecessary timer object creation --- nomad/eval_broker.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/nomad/eval_broker.go b/nomad/eval_broker.go index b3bee7159e0..5a6845f244a 100644 --- a/nomad/eval_broker.go +++ b/nomad/eval_broker.go @@ -741,13 +741,21 @@ func (d *evalWrapper) Namespace() string { // pending evaluations before enqueuing them func (b *EvalBroker) runDelayedEvalsWatcher(ctx context.Context) { var timerChannel <-chan time.Time + var delayTimer *time.Timer + init := false for { eval, waitUntil := b.nextDelayedEval() if waitUntil.IsZero() { timerChannel = nil } else { launchDur := waitUntil.Sub(time.Now().UTC()) - timerChannel = time.After(launchDur) + if !init { + delayTimer = time.NewTimer(launchDur) + init = true + } else { + delayTimer.Reset(launchDur) + } + timerChannel = delayTimer.C } select { From 854ae91ae24d9a607c4afea2cb87709bcc341695 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Mon, 12 Mar 2018 14:32:43 -0500 Subject: [PATCH 19/25] Address more code review feedback --- nomad/eval_broker.go | 8 ++++---- scheduler/generic_sched.go | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/nomad/eval_broker.go b/nomad/eval_broker.go index 5a6845f244a..b8e13283dae 100644 --- a/nomad/eval_broker.go +++ b/nomad/eval_broker.go @@ -700,7 +700,9 @@ func (b *EvalBroker) flush() { } // Cancel the delayed evaluations goroutine - b.delayedEvalCancelFunc() + if b.delayedEvalCancelFunc != nil { + b.delayedEvalCancelFunc() + } // Clear out the update channel for delayed evaluations b.delayedEvalsUpdateCh = make(chan struct{}, 1) @@ -742,16 +744,14 @@ func (d *evalWrapper) Namespace() string { func (b *EvalBroker) runDelayedEvalsWatcher(ctx context.Context) { var timerChannel <-chan time.Time var delayTimer *time.Timer - init := false for { eval, waitUntil := b.nextDelayedEval() if waitUntil.IsZero() { timerChannel = nil } else { launchDur := waitUntil.Sub(time.Now().UTC()) - if !init { + if delayTimer == nil { delayTimer = time.NewTimer(launchDur) - init = true } else { delayTimer.Reset(launchDur) } diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 4060e15b3e2..94dbc8a4b60 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -429,6 +429,9 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul // Update the set of placement nodes s.stack.SetNodes(nodes) + // Capture current time to use as the start time for any rescheduled allocations + now := time.Now() + // Have to handle destructive changes first as we need to discount their // resources. To understand this imagine the resources were reduced and the // count was scaled up. @@ -491,7 +494,6 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul // If the new allocation is replacing an older allocation then we // set the record the older allocation id so that they are chained if prevAllocation != nil { - now := time.Now() alloc.PreviousAllocation = prevAllocation.ID if missing.IsRescheduling() { updateRescheduleTracker(alloc, prevAllocation, now) From 73783e023f9643632930e842222106436cb19b0e Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Mon, 12 Mar 2018 14:39:18 -0500 Subject: [PATCH 20/25] Update comment --- nomad/eval_broker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/eval_broker.go b/nomad/eval_broker.go index b8e13283dae..b6dbedcc4c4 100644 --- a/nomad/eval_broker.go +++ b/nomad/eval_broker.go @@ -775,7 +775,7 @@ func (b *EvalBroker) runDelayedEvalsWatcher(ctx context.Context) { } // nextDelayedEval returns the next delayed eval to launch and when it should be enqueued. -// This peeks at the heap to return the top. +// This peeks at the heap to return the top. If the heap is empty, this returns nil and zero time. func (b *EvalBroker) nextDelayedEval() (*structs.Evaluation, time.Time) { // If there is nothing wait for an update. if b.delayHeap.Length() == 0 { From d4056c4489cf51b13cc1d895aa4eedc4fc99fd15 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Tue, 13 Mar 2018 10:06:26 -0500 Subject: [PATCH 21/25] Rename DelayCeiling to MaxDelay --- api/evaluations.go | 1 + api/jobs_test.go | 10 ++--- api/tasks.go | 12 ++--- api/tasks_test.go | 32 +++++++------- command/agent/job_endpoint.go | 2 +- command/agent/job_endpoint_test.go | 4 +- jobspec/parse.go | 2 +- jobspec/parse_test.go | 2 +- .../reschedule-job-unlimited.hcl | 2 +- nomad/structs/diff_test.go | 44 +++++++++---------- nomad/structs/structs.go | 32 +++++++------- nomad/structs/structs_test.go | 32 +++++++------- scheduler/generic_sched_test.go | 12 ++--- scheduler/reconcile_test.go | 4 +- 14 files changed, 96 insertions(+), 95 deletions(-) diff --git a/api/evaluations.go b/api/evaluations.go index 5aa893469ee..f8ec9432923 100644 --- a/api/evaluations.go +++ b/api/evaluations.go @@ -67,6 +67,7 @@ type Evaluation struct { Status string StatusDescription string Wait time.Duration + WaitUntil time.Time NextEval string PreviousEval string BlockedEval string diff --git a/api/jobs_test.go b/api/jobs_test.go index 41fb411402a..5d7a2675038 100644 --- a/api/jobs_test.go +++ b/api/jobs_test.go @@ -140,7 +140,7 @@ func TestJobs_Canonicalize(t *testing.T) { Interval: helper.TimeToPtr(0), DelayFunction: helper.StringToPtr("exponential"), Delay: helper.TimeToPtr(30 * time.Second), - DelayCeiling: helper.TimeToPtr(1 * time.Hour), + MaxDelay: helper.TimeToPtr(1 * time.Hour), Unlimited: helper.BoolToPtr(true), }, Tasks: []*Task{ @@ -210,7 +210,7 @@ func TestJobs_Canonicalize(t *testing.T) { Interval: helper.TimeToPtr(0), DelayFunction: helper.StringToPtr("exponential"), Delay: helper.TimeToPtr(30 * time.Second), - DelayCeiling: helper.TimeToPtr(1 * time.Hour), + MaxDelay: helper.TimeToPtr(1 * time.Hour), Unlimited: helper.BoolToPtr(true), }, Tasks: []*Task{ @@ -347,7 +347,7 @@ func TestJobs_Canonicalize(t *testing.T) { Interval: helper.TimeToPtr(0), DelayFunction: helper.StringToPtr("exponential"), Delay: helper.TimeToPtr(30 * time.Second), - DelayCeiling: helper.TimeToPtr(1 * time.Hour), + MaxDelay: helper.TimeToPtr(1 * time.Hour), Unlimited: helper.BoolToPtr(true), }, EphemeralDisk: &EphemeralDisk{ @@ -566,7 +566,7 @@ func TestJobs_Canonicalize(t *testing.T) { Interval: helper.TimeToPtr(0), DelayFunction: helper.StringToPtr("exponential"), Delay: helper.TimeToPtr(30 * time.Second), - DelayCeiling: helper.TimeToPtr(1 * time.Hour), + MaxDelay: helper.TimeToPtr(1 * time.Hour), Unlimited: helper.BoolToPtr(true), }, Update: &UpdateStrategy{ @@ -606,7 +606,7 @@ func TestJobs_Canonicalize(t *testing.T) { Interval: helper.TimeToPtr(0), DelayFunction: helper.StringToPtr("exponential"), Delay: helper.TimeToPtr(30 * time.Second), - DelayCeiling: helper.TimeToPtr(1 * time.Hour), + MaxDelay: helper.TimeToPtr(1 * time.Hour), Unlimited: helper.BoolToPtr(true), }, Update: &UpdateStrategy{ diff --git a/api/tasks.go b/api/tasks.go index ef123fa06d5..5bf95780055 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -95,8 +95,8 @@ type ReschedulePolicy struct { // attempts. Valid values are "exponential", "linear", and "fibonacci". DelayFunction *string `mapstructure:"delay_function"` - // DelayCeiling is an upper bound on the delay. - DelayCeiling *time.Duration `mapstructure:"delay_ceiling"` + // MaxDelay is an upper bound on the delay. + MaxDelay *time.Duration `mapstructure:"max_delay"` // Unlimited allows rescheduling attempts until they succeed Unlimited *bool `mapstructure:"unlimited"` @@ -115,8 +115,8 @@ func (r *ReschedulePolicy) Merge(rp *ReschedulePolicy) { if rp.DelayFunction != nil { r.DelayFunction = rp.DelayFunction } - if rp.DelayCeiling != nil { - r.DelayCeiling = rp.DelayCeiling + if rp.MaxDelay != nil { + r.MaxDelay = rp.MaxDelay } if rp.Unlimited != nil { r.Unlimited = rp.Unlimited @@ -346,7 +346,7 @@ func (g *TaskGroup) Canonicalize(job *Job) { Interval: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.Interval), Delay: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.Delay), DelayFunction: helper.StringToPtr(structs.DefaultServiceJobReschedulePolicy.DelayFunction), - DelayCeiling: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.DelayCeiling), + MaxDelay: helper.TimeToPtr(structs.DefaultServiceJobReschedulePolicy.MaxDelay), Unlimited: helper.BoolToPtr(structs.DefaultServiceJobReschedulePolicy.Unlimited), } case "batch": @@ -355,7 +355,7 @@ func (g *TaskGroup) Canonicalize(job *Job) { Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), Delay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Delay), DelayFunction: helper.StringToPtr(structs.DefaultBatchJobReschedulePolicy.DelayFunction), - DelayCeiling: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.DelayCeiling), + MaxDelay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.MaxDelay), Unlimited: helper.BoolToPtr(structs.DefaultBatchJobReschedulePolicy.Unlimited), } default: diff --git a/api/tasks_test.go b/api/tasks_test.go index 284b6e8a46e..3280507ad59 100644 --- a/api/tasks_test.go +++ b/api/tasks_test.go @@ -288,7 +288,7 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), Delay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Delay), DelayFunction: helper.StringToPtr(structs.DefaultBatchJobReschedulePolicy.DelayFunction), - DelayCeiling: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.DelayCeiling), + MaxDelay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.MaxDelay), Unlimited: helper.BoolToPtr(structs.DefaultBatchJobReschedulePolicy.Unlimited), }, }, @@ -298,7 +298,7 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { Attempts: helper.IntToPtr(0), Interval: helper.TimeToPtr(0), Delay: helper.TimeToPtr(0), - DelayCeiling: helper.TimeToPtr(0), + MaxDelay: helper.TimeToPtr(0), DelayFunction: helper.StringToPtr(""), Unlimited: helper.BoolToPtr(false), }, @@ -307,7 +307,7 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { Attempts: helper.IntToPtr(0), Interval: helper.TimeToPtr(0), Delay: helper.TimeToPtr(0), - DelayCeiling: helper.TimeToPtr(0), + MaxDelay: helper.TimeToPtr(0), DelayFunction: helper.StringToPtr(""), Unlimited: helper.BoolToPtr(false), }, @@ -318,7 +318,7 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { Attempts: helper.IntToPtr(1), Interval: helper.TimeToPtr(20 * time.Second), Delay: helper.TimeToPtr(20 * time.Second), - DelayCeiling: helper.TimeToPtr(10 * time.Minute), + MaxDelay: helper.TimeToPtr(10 * time.Minute), DelayFunction: helper.StringToPtr("linear"), Unlimited: helper.BoolToPtr(false), }, @@ -327,7 +327,7 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { Attempts: helper.IntToPtr(1), Interval: helper.TimeToPtr(20 * time.Second), Delay: helper.TimeToPtr(20 * time.Second), - DelayCeiling: helper.TimeToPtr(10 * time.Minute), + MaxDelay: helper.TimeToPtr(10 * time.Minute), DelayFunction: helper.StringToPtr("linear"), Unlimited: helper.BoolToPtr(false), }, @@ -339,7 +339,7 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { Attempts: helper.IntToPtr(5), Interval: helper.TimeToPtr(2 * time.Minute), Delay: helper.TimeToPtr(20 * time.Second), - DelayCeiling: helper.TimeToPtr(10 * time.Minute), + MaxDelay: helper.TimeToPtr(10 * time.Minute), DelayFunction: helper.StringToPtr("linear"), Unlimited: helper.BoolToPtr(false), }, @@ -347,7 +347,7 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { Attempts: helper.IntToPtr(5), Interval: helper.TimeToPtr(2 * time.Minute), Delay: helper.TimeToPtr(20 * time.Second), - DelayCeiling: helper.TimeToPtr(10 * time.Minute), + MaxDelay: helper.TimeToPtr(10 * time.Minute), DelayFunction: helper.StringToPtr("linear"), Unlimited: helper.BoolToPtr(false), }, @@ -355,9 +355,9 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { { desc: "Merge from job", jobReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(1), - Delay: helper.TimeToPtr(20 * time.Second), - DelayCeiling: helper.TimeToPtr(10 * time.Minute), + Attempts: helper.IntToPtr(1), + Delay: helper.TimeToPtr(20 * time.Second), + MaxDelay: helper.TimeToPtr(10 * time.Minute), }, taskReschedulePolicy: &ReschedulePolicy{ Interval: helper.TimeToPtr(5 * time.Minute), @@ -368,7 +368,7 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { Attempts: helper.IntToPtr(1), Interval: helper.TimeToPtr(5 * time.Minute), Delay: helper.TimeToPtr(20 * time.Second), - DelayCeiling: helper.TimeToPtr(10 * time.Minute), + MaxDelay: helper.TimeToPtr(10 * time.Minute), DelayFunction: helper.StringToPtr("linear"), Unlimited: helper.BoolToPtr(false), }, @@ -376,13 +376,13 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { { desc: "Override from group", jobReschedulePolicy: &ReschedulePolicy{ - Attempts: helper.IntToPtr(1), - DelayCeiling: helper.TimeToPtr(10 * time.Second), + Attempts: helper.IntToPtr(1), + MaxDelay: helper.TimeToPtr(10 * time.Second), }, taskReschedulePolicy: &ReschedulePolicy{ Attempts: helper.IntToPtr(5), Delay: helper.TimeToPtr(20 * time.Second), - DelayCeiling: helper.TimeToPtr(20 * time.Minute), + MaxDelay: helper.TimeToPtr(20 * time.Minute), DelayFunction: helper.StringToPtr("linear"), Unlimited: helper.BoolToPtr(false), }, @@ -390,7 +390,7 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { Attempts: helper.IntToPtr(5), Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), Delay: helper.TimeToPtr(20 * time.Second), - DelayCeiling: helper.TimeToPtr(20 * time.Minute), + MaxDelay: helper.TimeToPtr(20 * time.Minute), DelayFunction: helper.StringToPtr("linear"), Unlimited: helper.BoolToPtr(false), }, @@ -406,7 +406,7 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { Interval: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Interval), Delay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.Delay), DelayFunction: helper.StringToPtr(structs.DefaultBatchJobReschedulePolicy.DelayFunction), - DelayCeiling: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.DelayCeiling), + MaxDelay: helper.TimeToPtr(structs.DefaultBatchJobReschedulePolicy.MaxDelay), Unlimited: helper.BoolToPtr(structs.DefaultBatchJobReschedulePolicy.Unlimited), }, }, diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index a059343ab33..dc64afcfa3d 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -643,7 +643,7 @@ func ApiTgToStructsTG(taskGroup *api.TaskGroup, tg *structs.TaskGroup) { Interval: *taskGroup.ReschedulePolicy.Interval, Delay: *taskGroup.ReschedulePolicy.Delay, DelayFunction: *taskGroup.ReschedulePolicy.DelayFunction, - DelayCeiling: *taskGroup.ReschedulePolicy.DelayCeiling, + MaxDelay: *taskGroup.ReschedulePolicy.MaxDelay, Unlimited: *taskGroup.ReschedulePolicy.Unlimited, } diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go index ccc58276a3b..dc50dd77e66 100644 --- a/command/agent/job_endpoint_test.go +++ b/command/agent/job_endpoint_test.go @@ -1177,7 +1177,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { DelayFunction: helper.StringToPtr("linear"), Delay: helper.TimeToPtr(30 * time.Second), Unlimited: helper.BoolToPtr(true), - DelayCeiling: helper.TimeToPtr(20 * time.Minute), + MaxDelay: helper.TimeToPtr(20 * time.Minute), }, EphemeralDisk: &api.EphemeralDisk{ SizeMB: helper.IntToPtr(100), @@ -1393,7 +1393,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { DelayFunction: "linear", Delay: 30 * time.Second, Unlimited: true, - DelayCeiling: 20 * time.Minute, + MaxDelay: 20 * time.Minute, }, EphemeralDisk: &structs.EphemeralDisk{ SizeMB: 100, diff --git a/jobspec/parse.go b/jobspec/parse.go index 20e06016cb3..d6f235e05f2 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -448,7 +448,7 @@ func parseReschedulePolicy(final **api.ReschedulePolicy, list *ast.ObjectList) e "interval", "unlimited", "delay", - "delay_ceiling", + "max_delay", "delay_function", } if err := helper.CheckHCLKeys(obj.Val, valid); err != nil { diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index 45831a577a2..dbf1200570e 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -713,7 +713,7 @@ func TestParse(t *testing.T) { Reschedule: &api.ReschedulePolicy{ DelayFunction: helper.StringToPtr("exponential"), Delay: helper.TimeToPtr(10 * time.Second), - DelayCeiling: helper.TimeToPtr(120 * time.Second), + MaxDelay: helper.TimeToPtr(120 * time.Second), Unlimited: helper.BoolToPtr(true), }, TaskGroups: []*api.TaskGroup{ diff --git a/jobspec/test-fixtures/reschedule-job-unlimited.hcl b/jobspec/test-fixtures/reschedule-job-unlimited.hcl index b2840c123a1..c51be506a56 100644 --- a/jobspec/test-fixtures/reschedule-job-unlimited.hcl +++ b/jobspec/test-fixtures/reschedule-job-unlimited.hcl @@ -4,7 +4,7 @@ job "foo" { reschedule { delay = "10s", delay_function = "exponential" - delay_ceiling="120s" + max_delay="120s" unlimited = true } group "bar" { diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go index 52f28938bdb..09453b38246 100644 --- a/nomad/structs/diff_test.go +++ b/nomad/structs/diff_test.go @@ -1502,7 +1502,7 @@ func TestTaskGroupDiff(t *testing.T) { Attempts: 1, Interval: 15 * time.Second, Delay: 5 * time.Second, - DelayCeiling: 20 * time.Second, + MaxDelay: 20 * time.Second, DelayFunction: "exponential", Unlimited: false, }, @@ -1526,12 +1526,6 @@ func TestTaskGroupDiff(t *testing.T) { Old: "", New: "5000000000", }, - { - Type: DiffTypeAdded, - Name: "DelayCeiling", - Old: "", - New: "20000000000", - }, { Type: DiffTypeAdded, Name: "DelayFunction", @@ -1544,6 +1538,12 @@ func TestTaskGroupDiff(t *testing.T) { Old: "", New: "15000000000", }, + { + Type: DiffTypeAdded, + Name: "MaxDelay", + Old: "", + New: "20000000000", + }, { Type: DiffTypeAdded, Name: "Unlimited", @@ -1562,7 +1562,7 @@ func TestTaskGroupDiff(t *testing.T) { Attempts: 1, Interval: 15 * time.Second, Delay: 5 * time.Second, - DelayCeiling: 20 * time.Second, + MaxDelay: 20 * time.Second, DelayFunction: "exponential", Unlimited: false, }, @@ -1587,12 +1587,6 @@ func TestTaskGroupDiff(t *testing.T) { Old: "5000000000", New: "", }, - { - Type: DiffTypeDeleted, - Name: "DelayCeiling", - Old: "20000000000", - New: "", - }, { Type: DiffTypeDeleted, Name: "DelayFunction", @@ -1605,6 +1599,12 @@ func TestTaskGroupDiff(t *testing.T) { Old: "15000000000", New: "", }, + { + Type: DiffTypeDeleted, + Name: "MaxDelay", + Old: "20000000000", + New: "", + }, { Type: DiffTypeDeleted, Name: "Unlimited", @@ -1624,7 +1624,7 @@ func TestTaskGroupDiff(t *testing.T) { Interval: 1 * time.Second, DelayFunction: "exponential", Delay: 20 * time.Second, - DelayCeiling: 1 * time.Minute, + MaxDelay: 1 * time.Minute, Unlimited: false, }, }, @@ -1634,7 +1634,7 @@ func TestTaskGroupDiff(t *testing.T) { Interval: 2 * time.Second, DelayFunction: "linear", Delay: 30 * time.Second, - DelayCeiling: 1 * time.Minute, + MaxDelay: 1 * time.Minute, Unlimited: true, }, }, @@ -1713,12 +1713,6 @@ func TestTaskGroupDiff(t *testing.T) { Old: "0", New: "0", }, - { - Type: DiffTypeNone, - Name: "DelayCeiling", - Old: "0", - New: "0", - }, { Type: DiffTypeNone, Name: "DelayFunction", @@ -1731,6 +1725,12 @@ func TestTaskGroupDiff(t *testing.T) { Old: "1000000000", New: "2000000000", }, + { + Type: DiffTypeNone, + Name: "MaxDelay", + Old: "0", + New: "0", + }, { Type: DiffTypeNone, Name: "Unlimited", diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 8a2c09004f1..2999026b618 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2660,7 +2660,7 @@ var ( DefaultServiceJobReschedulePolicy = ReschedulePolicy{ Delay: 30 * time.Second, DelayFunction: "exponential", - DelayCeiling: 1 * time.Hour, + MaxDelay: 1 * time.Hour, Unlimited: true, } DefaultBatchJobReschedulePolicy = ReschedulePolicy{ @@ -2770,8 +2770,8 @@ type ReschedulePolicy struct { // attempts. Valid values are "exponential", "linear", and "fibonacci". DelayFunction string - // DelayCeiling is an upper bound on the delay. - DelayCeiling time.Duration + // MaxDelay is an upper bound on the delay. + MaxDelay time.Duration // Unlimited allows infinite rescheduling attempts. Only allowed when delay is set // between reschedule attempts. @@ -2812,14 +2812,14 @@ func (r *ReschedulePolicy) Validate() error { delayPreCheck = false } - // Validate DelayCeiling if not using linear delay progression + // Validate MaxDelay if not using linear delay progression if r.DelayFunction != "linear" { - if r.DelayCeiling.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { + if r.MaxDelay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { multierror.Append(&mErr, fmt.Errorf("Delay Ceiling cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) delayPreCheck = false } - if r.DelayCeiling < r.Delay { - multierror.Append(&mErr, fmt.Errorf("Delay Ceiling cannot be less than Delay %v (got %v)", r.Delay, r.DelayCeiling)) + if r.MaxDelay < r.Delay { + multierror.Append(&mErr, fmt.Errorf("Delay Ceiling cannot be less than Delay %v (got %v)", r.Delay, r.MaxDelay)) delayPreCheck = false } @@ -2862,7 +2862,7 @@ func (r *ReschedulePolicy) validateDelayParams() error { "delay function %q", possibleAttempts, r.Interval, r.Delay, r.DelayFunction)) } else { multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ - "delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.DelayCeiling)) + "delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.MaxDelay)) } multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Minute), r.Attempts)) return mErr.ErrorOrNil() @@ -2882,8 +2882,8 @@ func (r *ReschedulePolicy) viableAttempts() (bool, int, time.Duration) { case "exponential": for i := 0; i < r.Attempts; i++ { nextDelay := time.Duration(math.Pow(2, float64(i))) * r.Delay - if nextDelay > r.DelayCeiling { - nextDelay = r.DelayCeiling + if nextDelay > r.MaxDelay { + nextDelay = r.MaxDelay recommendedInterval += nextDelay } else { recommendedInterval = nextDelay @@ -2904,11 +2904,11 @@ func (r *ReschedulePolicy) viableAttempts() (bool, int, time.Duration) { var nextDelay time.Duration if reachedCeiling { //switch to linear - nextDelay = slots[i-1] + r.DelayCeiling + nextDelay = slots[i-1] + r.MaxDelay } else { nextDelay = slots[i-1] + slots[i-2] - if nextDelay > r.DelayCeiling { - nextDelay = r.DelayCeiling + if nextDelay > r.MaxDelay { + nextDelay = r.MaxDelay reachedCeiling = true } } @@ -5556,7 +5556,7 @@ func (a *Allocation) NextDelay() time.Duration { fibN2Delay := events[len(events)-2].Delay // Handle reset of delay ceiling which should cause // a new series to start - if fibN2Delay == policy.DelayCeiling && fibN1Delay == policy.Delay { + if fibN2Delay == policy.MaxDelay && fibN1Delay == policy.Delay { delayDur = fibN1Delay } else { delayDur = fibN1Delay + fibN2Delay @@ -5565,8 +5565,8 @@ func (a *Allocation) NextDelay() time.Duration { default: return delayDur } - if policy.DelayCeiling > 0 && delayDur > policy.DelayCeiling { - delayDur = policy.DelayCeiling + if policy.MaxDelay > 0 && delayDur > policy.MaxDelay { + delayDur = policy.MaxDelay // check if delay needs to be reset lastRescheduleEvent := a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1] diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 15cf24e42da..d249d30c590 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -2464,7 +2464,7 @@ func TestReschedulePolicy_Validate(t *testing.T) { Attempts: 5, Interval: 1 * time.Hour, Delay: 30 * time.Second, - DelayCeiling: 5 * time.Minute, + MaxDelay: 5 * time.Minute, DelayFunction: "exponential"}, }, { @@ -2473,7 +2473,7 @@ func TestReschedulePolicy_Validate(t *testing.T) { Attempts: 5, Interval: 15 * time.Minute, Delay: 10 * time.Second, - DelayCeiling: 5 * time.Minute, + MaxDelay: 5 * time.Minute, DelayFunction: "fibonacci"}, }, { @@ -2495,7 +2495,7 @@ func TestReschedulePolicy_Validate(t *testing.T) { Interval: 8 * time.Second, DelayFunction: "exponential", Delay: 15 * time.Second, - DelayCeiling: 5 * time.Second}, + MaxDelay: 5 * time.Second}, errors: []error{ fmt.Errorf("Delay Ceiling cannot be less than Delay %v (got %v)", 15*time.Second, 5*time.Second), @@ -2535,7 +2535,7 @@ func TestReschedulePolicy_Validate(t *testing.T) { Attempts: 10, Interval: 30 * time.Minute, Delay: 5 * time.Minute, - DelayCeiling: 40 * time.Minute, + MaxDelay: 40 * time.Minute, DelayFunction: "exponential", }, errors: []error{ @@ -2554,7 +2554,7 @@ func TestReschedulePolicy_Validate(t *testing.T) { Attempts: 10, Interval: 1 * time.Hour, Delay: 20 * time.Minute, - DelayCeiling: 80 * time.Minute, + MaxDelay: 80 * time.Minute, DelayFunction: "fibonacci", }, errors: []error{ @@ -2572,7 +2572,7 @@ func TestReschedulePolicy_Validate(t *testing.T) { Unlimited: true, DelayFunction: "exponential", Delay: 5 * time.Minute, - DelayCeiling: 1 * time.Hour, + MaxDelay: 1 * time.Hour, }, }, { @@ -3115,7 +3115,7 @@ func TestAllocation_NextDelay(t *testing.T) { reschedulePolicy: &ReschedulePolicy{ DelayFunction: "exponential", Delay: 5 * time.Second, - DelayCeiling: 90 * time.Second, + MaxDelay: 90 * time.Second, Unlimited: true, }, alloc: &Allocation{ @@ -3132,7 +3132,7 @@ func TestAllocation_NextDelay(t *testing.T) { reschedulePolicy: &ReschedulePolicy{ DelayFunction: "exponential", Delay: 5 * time.Second, - DelayCeiling: 90 * time.Second, + MaxDelay: 90 * time.Second, Unlimited: true, }, alloc: &Allocation{ @@ -3164,7 +3164,7 @@ func TestAllocation_NextDelay(t *testing.T) { reschedulePolicy: &ReschedulePolicy{ DelayFunction: "exponential", Delay: 5 * time.Second, - DelayCeiling: 90 * time.Second, + MaxDelay: 90 * time.Second, Unlimited: true, }, alloc: &Allocation{ @@ -3205,7 +3205,7 @@ func TestAllocation_NextDelay(t *testing.T) { reschedulePolicy: &ReschedulePolicy{ DelayFunction: "exponential", Delay: 5 * time.Second, - DelayCeiling: 90 * time.Second, + MaxDelay: 90 * time.Second, Unlimited: true, }, alloc: &Allocation{ @@ -3253,7 +3253,7 @@ func TestAllocation_NextDelay(t *testing.T) { reschedulePolicy: &ReschedulePolicy{ DelayFunction: "fibonacci", Delay: 5 * time.Second, - DelayCeiling: 90 * time.Second, + MaxDelay: 90 * time.Second, Unlimited: true, }, alloc: &Allocation{ @@ -3269,7 +3269,7 @@ func TestAllocation_NextDelay(t *testing.T) { reschedulePolicy: &ReschedulePolicy{ DelayFunction: "fibonacci", Delay: 5 * time.Second, - DelayCeiling: 90 * time.Second, + MaxDelay: 90 * time.Second, Unlimited: true, }, alloc: &Allocation{ @@ -3297,7 +3297,7 @@ func TestAllocation_NextDelay(t *testing.T) { reschedulePolicy: &ReschedulePolicy{ DelayFunction: "fibonacci", Delay: 5 * time.Second, - DelayCeiling: 90 * time.Second, + MaxDelay: 90 * time.Second, Unlimited: true, }, alloc: &Allocation{ @@ -3337,7 +3337,7 @@ func TestAllocation_NextDelay(t *testing.T) { reschedulePolicy: &ReschedulePolicy{ DelayFunction: "fibonacci", Delay: 5 * time.Second, - DelayCeiling: 50 * time.Second, + MaxDelay: 50 * time.Second, Unlimited: true, }, alloc: &Allocation{ @@ -3381,7 +3381,7 @@ func TestAllocation_NextDelay(t *testing.T) { reschedulePolicy: &ReschedulePolicy{ DelayFunction: "fibonacci", Delay: 5 * time.Second, - DelayCeiling: 50 * time.Second, + MaxDelay: 50 * time.Second, Unlimited: true, }, alloc: &Allocation{ @@ -3425,7 +3425,7 @@ func TestAllocation_NextDelay(t *testing.T) { reschedulePolicy: &ReschedulePolicy{ DelayFunction: "fibonacci", Delay: 5 * time.Second, - DelayCeiling: 50 * time.Second, + MaxDelay: 50 * time.Second, Unlimited: true, }, alloc: &Allocation{ diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 349a597822a..5b21034eb9c 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -2733,7 +2733,7 @@ func TestServiceSched_Reschedule_OnceNow(t *testing.T) { Attempts: 1, Interval: 15 * time.Minute, Delay: 5 * time.Second, - DelayCeiling: 1 * time.Minute, + MaxDelay: 1 * time.Minute, DelayFunction: "linear", } tgName := job.TaskGroups[0].Name @@ -2846,7 +2846,7 @@ func TestServiceSched_Reschedule_Later(t *testing.T) { Attempts: 1, Interval: 15 * time.Minute, Delay: delayDuration, - DelayCeiling: 1 * time.Minute, + MaxDelay: 1 * time.Minute, DelayFunction: "linear", } tgName := job.TaskGroups[0].Name @@ -3072,7 +3072,7 @@ func TestServiceSched_Reschedule_PruneEvents(t *testing.T) { job.TaskGroups[0].Count = 2 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ DelayFunction: "exponential", - DelayCeiling: 1 * time.Hour, + MaxDelay: 1 * time.Hour, Delay: 5 * time.Second, Unlimited: true, } @@ -4270,7 +4270,7 @@ func Test_updateRescheduleTracker(t *testing.T) { PrevAllocID: prevAlloc.ID, PrevNodeID: prevAlloc.NodeID, Delay: 5 * time.Second}}, - reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second, DelayFunction: "fibonacci", DelayCeiling: 60 * time.Second}, + reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second, DelayFunction: "fibonacci", MaxDelay: 60 * time.Second}, reschedTime: t1, expectedRescheduleEvents: []*structs.RescheduleEvent{ { @@ -4339,7 +4339,7 @@ func Test_updateRescheduleTracker(t *testing.T) { Delay: 105 * time.Second, }, }, - reschedPolicy: &structs.ReschedulePolicy{Unlimited: true, Delay: 5 * time.Second, DelayFunction: "fibonacci", DelayCeiling: 240 * time.Second}, + reschedPolicy: &structs.ReschedulePolicy{Unlimited: true, Delay: 5 * time.Second, DelayFunction: "fibonacci", MaxDelay: 240 * time.Second}, reschedTime: t1, expectedRescheduleEvents: []*structs.RescheduleEvent{ { @@ -4408,7 +4408,7 @@ func Test_updateRescheduleTracker(t *testing.T) { Delay: 40 * time.Second, }, }, - reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 1 * time.Hour, Attempts: 5, Delay: 5 * time.Second, DelayFunction: "exponential", DelayCeiling: 240 * time.Second}, + reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 1 * time.Hour, Attempts: 5, Delay: 5 * time.Second, DelayFunction: "exponential", MaxDelay: 240 * time.Second}, reschedTime: t1, expectedRescheduleEvents: []*structs.RescheduleEvent{ { diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index 7866a20964e..34f6eddbfa0 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -1456,7 +1456,7 @@ func TestReconciler_RescheduleLater_Service(t *testing.T) { now := time.Now() // Set up reschedule policy delayDur := 15 * time.Second - job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, DelayCeiling: 1 * time.Hour} + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour} // Create 5 existing allocations var allocs []*structs.Allocation @@ -1526,7 +1526,7 @@ func TestReconciler_RescheduleNow_Service(t *testing.T) { tgName := job.TaskGroups[0].Name now := time.Now() // Set up reschedule policy - job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayCeiling: 1 * time.Hour} + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: 5 * time.Second, MaxDelay: 1 * time.Hour} // Create 5 existing allocations var allocs []*structs.Allocation From 74bd789c29d8350a4ffa0c425c6fe73f257444ec Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Tue, 13 Mar 2018 10:27:07 -0500 Subject: [PATCH 22/25] Fix formatting --- command/agent/job_endpoint.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index dc64afcfa3d..2012513a79b 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -643,7 +643,7 @@ func ApiTgToStructsTG(taskGroup *api.TaskGroup, tg *structs.TaskGroup) { Interval: *taskGroup.ReschedulePolicy.Interval, Delay: *taskGroup.ReschedulePolicy.Delay, DelayFunction: *taskGroup.ReschedulePolicy.DelayFunction, - MaxDelay: *taskGroup.ReschedulePolicy.MaxDelay, + MaxDelay: *taskGroup.ReschedulePolicy.MaxDelay, Unlimited: *taskGroup.ReschedulePolicy.Unlimited, } From 69c4a58c340a9f08ede7b1ca3c00fc6f97813a6c Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Fri, 2 Mar 2018 17:43:49 -0600 Subject: [PATCH 23/25] Show reschedule eligibility time in alloc-status when followup evalid is available --- command/alloc_status.go | 18 ++++++++++++++++++ command/helpers.go | 14 +++++++++++--- command/helpers_test.go | 1 + 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/command/alloc_status.go b/command/alloc_status.go index 1bea22610d7..b38c9676081 100644 --- a/command/alloc_status.go +++ b/command/alloc_status.go @@ -283,6 +283,13 @@ func formatAllocBasicInfo(alloc *api.Allocation, client *api.Client, uuidLength basic = append(basic, fmt.Sprintf("Replacement Alloc ID|%s", limit(alloc.NextAllocation, uuidLength))) } + if alloc.FollowupEvalID != "" { + nextEvalTime := futureEvalTimePretty(alloc.FollowupEvalID, client) + if nextEvalTime != "" { + basic = append(basic, + fmt.Sprintf("Reschedule Eligibility|%s", nextEvalTime)) + } + } if verbose { basic = append(basic, @@ -296,6 +303,17 @@ func formatAllocBasicInfo(alloc *api.Allocation, client *api.Client, uuidLength return formatKV(basic), nil } +// lookupEvalTime returns a human +func futureEvalTimePretty(evalID string, client *api.Client) string { + evaluation, _, err := client.Evaluations().Info(evalID, nil) + // Eval time is not a critical output, + // don't return it on errors, if its not set or already in the past + if err != nil || evaluation.WaitUntil.IsZero() || time.Now().After(evaluation.WaitUntil) { + return "" + } + return prettyTimeDiff(evaluation.WaitUntil, time.Now()) +} + // outputTaskDetails prints task details for each task in the allocation, // optionally printing verbose statistics if displayStats is set func (c *AllocStatusCommand) outputTaskDetails(alloc *api.Allocation, stats *api.AllocResourceUsage, displayStats bool) { diff --git a/command/helpers.go b/command/helpers.go index 68f0f15bc1a..9b1b6fed86b 100644 --- a/command/helpers.go +++ b/command/helpers.go @@ -103,7 +103,15 @@ func prettyTimeDiff(first, second time.Time) string { second = second.Round(time.Second) // calculate time difference in seconds - d := second.Sub(first) + var d time.Duration + messageSuffix := "ago" + if second.Equal(first) || second.After(first) { + d = second.Sub(first) + } else { + d = first.Sub(second) + messageSuffix = "from now" + } + u := uint64(d.Seconds()) var buf [32]byte @@ -183,9 +191,9 @@ func prettyTimeDiff(first, second time.Time) string { end = indexes[num_periods-3] } if start == end { //edge case when time difference is less than a second - return "0s ago" + return "0s " + messageSuffix } else { - return string(buf[start:end]) + " ago" + return string(buf[start:end]) + " " + messageSuffix } } diff --git a/command/helpers_test.go b/command/helpers_test.go index a639a6dfd3c..2bf88939d06 100644 --- a/command/helpers_test.go +++ b/command/helpers_test.go @@ -310,6 +310,7 @@ func TestPrettyTimeDiff(t *testing.T) { {now, now.Add(-60 * time.Minute), "1h ago"}, {now, now.Add(-80 * time.Minute), "1h20m ago"}, {now, now.Add(-6 * time.Hour), "6h ago"}, + {now.Add(-6 * time.Hour), now, "6h from now"}, {now, now.Add(-22165 * time.Second), "6h9m ago"}, {now, now.Add(-100 * time.Hour), "4d4h ago"}, {now, now.Add(-438000 * time.Minute), "10mo4d ago"}, From 2ab03a13e05e24998c30d2493f5b395963828ea8 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Tue, 13 Mar 2018 18:05:44 -0500 Subject: [PATCH 24/25] fix method comment --- command/alloc_status.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/command/alloc_status.go b/command/alloc_status.go index b38c9676081..77455cdebf8 100644 --- a/command/alloc_status.go +++ b/command/alloc_status.go @@ -303,7 +303,8 @@ func formatAllocBasicInfo(alloc *api.Allocation, client *api.Client, uuidLength return formatKV(basic), nil } -// lookupEvalTime returns a human +// futureEvalTimePretty returns when the eval is eligible to reschedule +// relative to current time, based on the WaitUntil field func futureEvalTimePretty(evalID string, client *api.Client) string { evaluation, _, err := client.Evaluations().Info(evalID, nil) // Eval time is not a critical output, From b1fd173c13e3b61c0994797d19f1926be4f7d8cd Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 14 Mar 2018 16:12:22 -0500 Subject: [PATCH 25/25] Fix linting warning --- nomad/eval_broker.go | 2 +- scheduler/reconcile.go | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/nomad/eval_broker.go b/nomad/eval_broker.go index b6dbedcc4c4..0217e966dc4 100644 --- a/nomad/eval_broker.go +++ b/nomad/eval_broker.go @@ -134,7 +134,7 @@ func NewEvalBroker(timeout, initialNackDelay, subsequentNackDelay time.Duration, stats: new(BrokerStats), evals: make(map[string]int), jobEvals: make(map[structs.NamespacedID]string), - blocked: make(map[structs.NamespacedID]PendingEvaluations), + blocked: make(map[structs.NamespacedID]PendingEvaluations), ready: make(map[string]PendingEvaluations), unack: make(map[string]*unackEval), waiting: make(map[string]chan struct{}), diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 89999c614ee..3bfd1a89e14 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -352,7 +352,6 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { rescheduleLaterAllocs = a.handleDelayedReschedules(rescheduleLater, all, tg.Name) } - // Create a structure for choosing names. Seed with the taken names which is // the union of untainted and migrating nodes (includes canaries) nameIndex := newAllocNameIndex(a.jobID, group, tg.Count, untainted.union(migrate, rescheduleNow))