Skip to content

Commit

Permalink
Merge pull request #4004 from hashicorp/b-rescheduling-fixes
Browse files Browse the repository at this point in the history
Make suggested interval round to seconds
  • Loading branch information
Preetha authored Mar 19, 2018
2 parents 82fb214 + f4f178b commit 61d8ce1
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 7 deletions.
21 changes: 21 additions & 0 deletions e2e/rescheduling/input/rescheduling_default.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
job "test" {
datacenters = ["dc1"]
type = "service"

group "t" {
count = 3
task "t" {
driver = "raw_exec"
config {
command = "bash"
args = ["-c", "lol 5000"]
}
}
restart {
attempts = 0
delay = "0s"
mode = "fail"
}

}
}
20 changes: 20 additions & 0 deletions e2e/rescheduling/input/rescheduling_system.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
job "test" {
datacenters = ["dc1"]
type = "system"

group "t" {
count = 1
task "t" {
driver = "raw_exec"
config {
command = "bash"
args = ["-c", "lol 5000"]
}
}
restart {
attempts = 0
delay = "0s"
mode = "fail"
}
}
}
1 change: 1 addition & 0 deletions e2e/rescheduling/server_side_restarts_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
)

var integration = flag.Bool("integration", false, "run integration tests")
var slow = flag.Bool("slow", false, "runs slower integration tests")

func TestServerSideRestarts(t *testing.T) {
if !*integration {
Expand Down
46 changes: 40 additions & 6 deletions e2e/rescheduling/server_side_restarts_test.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
package rescheduling

import (
"sort"
"time"

"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/jobspec"
_ "github.com/hashicorp/nomad/jobspec"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"

"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/uuid"
)

var _ = Describe("Server Side Restart Tests", func() {
Expand All @@ -28,6 +32,7 @@ var _ = Describe("Server Side Restart Tests", func() {
for _, a := range allocs {
ret = append(ret, a.ClientStatus)
}
sort.Strings(ret)
return ret
}

Expand Down Expand Up @@ -59,7 +64,7 @@ var _ = Describe("Server Side Restart Tests", func() {
JustBeforeEach(func() {
job, err = jobspec.ParseFile(specFile)
Expect(err).ShouldNot(HaveOccurred())

job.ID = helper.StringToPtr(uuid.Generate())
resp, _, err := jobs.Register(job, nil)
Expect(err).ShouldNot(HaveOccurred())
Expect(resp.EvalID).ShouldNot(BeEmpty())
Expand All @@ -84,20 +89,49 @@ var _ = Describe("Server Side Restart Tests", func() {
})
})

Context("System jobs should never be rescheduled", func() {
BeforeEach(func() {
specFile = "input/rescheduling_system.hcl"
})

It("Should have exactly one failed alloc", func() {
Eventually(allocStatuses, 10*time.Second, time.Second).Should(ConsistOf([]string{"failed"}))
})
})

Context("Default Rescheduling", func() {
BeforeEach(func() {
specFile = "input/rescheduling_default.hcl"
})
It("Should have exactly three allocs and all failed after 5 secs", func() {
Eventually(allocStatuses, 5*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed"}))
})
// wait until first exponential delay kicks in and rescheduling is attempted
It("Should have exactly six allocs and all failed after 35 secs", func() {
if !*slow {
Skip("Skipping slow test")
}
Eventually(allocStatuses, 35*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed", "failed", "failed", "failed"}))
})
})

Context("Reschedule attempts maxed out", func() {
BeforeEach(func() {
specFile = "input/rescheduling_fail.hcl"
})
// Expect 3 original plus 6 rescheduled allocs from 2 attempts
var expected []string
for i := 0; i < 9; i++ {
expected = append(expected, "failed")
}
It("Should have all failed", func() {
Eventually(allocStatuses, 6*time.Second, time.Second).ShouldNot(
SatisfyAll(ContainElement("pending"),
ContainElement("running")))
})
Context("Updating job to change its version", func() {
It("Should have running allocs now", func() {
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "sleep 15000"}
_, _, err := jobs.Register(job, nil)
Expect(err).ShouldNot(HaveOccurred())
Eventually(allocStatuses, 5*time.Second, time.Second).Should(ContainElement("running"))
})
})
})

Context("Reschedule attempts succeeded", func() {
Expand Down
2 changes: 1 addition & 1 deletion nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -2892,7 +2892,7 @@ func (r *ReschedulePolicy) validateDelayParams() error {
multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+
"delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.MaxDelay))
}
multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Minute), r.Attempts))
multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Second), r.Attempts))
return mErr.ErrorOrNil()
}

Expand Down

0 comments on commit 61d8ce1

Please sign in to comment.