-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add CLI and API support for forcing rescheduling of failed allocs #4274
Changes from 12 commits
242cc19
3b7d23f
268a99e
1bad719
4f9d92c
2d0e273
b5e18b6
879c2c9
53c05c5
e2f13d2
b2006cc
ae5d8fd
2ea09b8
a5ca379
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -90,8 +90,25 @@ func (s *HTTPServer) jobForceEvaluate(resp http.ResponseWriter, req *http.Reques | |
if req.Method != "PUT" && req.Method != "POST" { | ||
return nil, CodedError(405, ErrInvalidMethod) | ||
} | ||
args := structs.JobEvaluateRequest{ | ||
JobID: jobName, | ||
var args structs.JobEvaluateRequest | ||
|
||
// TODO(preetha): remove in 0.9 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 0.10? i guess this code will go out in 0.9 :) |
||
// COMPAT: For backwards compatibility allow using this endpoint without a payload | ||
if req.ContentLength == 0 { | ||
args = structs.JobEvaluateRequest{ | ||
JobID: jobName, | ||
} | ||
} else { | ||
if err := decodeBody(req, &args); err != nil { | ||
return nil, CodedError(400, err.Error()) | ||
} | ||
if args.JobID == "" { | ||
return nil, CodedError(400, "Job ID must be specified") | ||
} | ||
|
||
if jobName != "" && args.JobID != jobName { | ||
return nil, CodedError(400, "JobID not same as job name") | ||
} | ||
} | ||
s.parseWriteRequest(req, &args.WriteRequest) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
package command | ||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
|
||
"github.com/hashicorp/nomad/api" | ||
"github.com/hashicorp/nomad/api/contexts" | ||
"github.com/posener/complete" | ||
) | ||
|
||
type JobEvalCommand struct { | ||
Meta | ||
forceRescheduling bool | ||
} | ||
|
||
func (c *JobEvalCommand) Help() string { | ||
helpText := ` | ||
Usage: nomad job eval [options] <job_id> | ||
|
||
Force an evaluation of the provided job ID. Forcing an evaluation will trigger the scheduler | ||
to re-evaluate the job. The force flags allow operators to force the scheduler to create | ||
new allocations under certain scenarios. | ||
|
||
General Options: | ||
|
||
` + generalOptionsUsage() + ` | ||
|
||
Eval Options: | ||
|
||
-force-reschedule | ||
Force reschedule failed allocations even if they are not currently | ||
eligible for rescheduling. | ||
|
||
-detach | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Blank line above |
||
Return immediately instead of entering monitor mode. The ID | ||
of the evaluation created will be printed to the screen, which can be | ||
used to examine the evaluation using the eval-status command. | ||
|
||
-verbose | ||
Display full information. | ||
` | ||
return strings.TrimSpace(helpText) | ||
} | ||
|
||
func (c *JobEvalCommand) Synopsis() string { | ||
return "Force an evaluation for the job" | ||
} | ||
|
||
func (c *JobEvalCommand) AutocompleteFlags() complete.Flags { | ||
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), | ||
complete.Flags{ | ||
"-force-reschedule": complete.PredictNothing, | ||
"-detach": complete.PredictNothing, | ||
"-verbose": complete.PredictNothing, | ||
}) | ||
} | ||
|
||
func (c *JobEvalCommand) AutocompleteArgs() complete.Predictor { | ||
return complete.PredictFunc(func(a complete.Args) []string { | ||
client, err := c.Meta.Client() | ||
if err != nil { | ||
return nil | ||
} | ||
|
||
resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Jobs, nil) | ||
if err != nil { | ||
return []string{} | ||
} | ||
return resp.Matches[contexts.Jobs] | ||
}) | ||
} | ||
|
||
func (c *JobEvalCommand) Name() string { return "job eval" } | ||
|
||
func (c *JobEvalCommand) Run(args []string) int { | ||
var detach, verbose bool | ||
|
||
flags := c.Meta.FlagSet(c.Name(), FlagSetClient) | ||
flags.Usage = func() { c.Ui.Output(c.Help()) } | ||
flags.BoolVar(&c.forceRescheduling, "force-reschedule", false, "") | ||
flags.BoolVar(&detach, "detach", false, "") | ||
flags.BoolVar(&verbose, "verbose", false, "") | ||
|
||
if err := flags.Parse(args); err != nil { | ||
return 1 | ||
} | ||
|
||
// Check that we either got no jobs or exactly one. | ||
args = flags.Args() | ||
if len(args) != 1 { | ||
c.Ui.Error("This command takes one argument: <job>") | ||
c.Ui.Error(commandErrorText(c)) | ||
return 1 | ||
} | ||
|
||
// Get the HTTP client | ||
client, err := c.Meta.Client() | ||
if err != nil { | ||
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) | ||
return 1 | ||
} | ||
|
||
// Truncate the id unless full length is requested | ||
length := shortId | ||
if verbose { | ||
length = fullId | ||
} | ||
// Call eval endpoint | ||
jobID := args[0] | ||
|
||
opts := api.EvalOptions{ | ||
ForceReschedule: c.forceRescheduling, | ||
} | ||
evalId, _, err := client.Jobs().EvaluateWithOpts(jobID, opts, nil) | ||
if err != nil { | ||
c.Ui.Error(fmt.Sprintf("Error evaluating job: %s", err)) | ||
return 1 | ||
} | ||
|
||
if detach { | ||
c.Ui.Output(fmt.Sprintf("Created eval ID: %q ", limit(evalId, length))) | ||
return 0 | ||
} | ||
|
||
mon := newMonitor(c.Ui, client, length) | ||
return mon.monitor(evalId, false) | ||
return 0 | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
package command | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
|
||
"fmt" | ||
|
||
"github.com/hashicorp/nomad/nomad/mock" | ||
"github.com/hashicorp/nomad/nomad/structs" | ||
"github.com/hashicorp/nomad/testutil" | ||
"github.com/mitchellh/cli" | ||
"github.com/posener/complete" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestJobEvalCommand_Implements(t *testing.T) { | ||
t.Parallel() | ||
var _ cli.Command = &JobEvalCommand{} | ||
} | ||
|
||
func TestJobEvalCommand_Fails(t *testing.T) { | ||
t.Parallel() | ||
ui := new(cli.MockUi) | ||
cmd := &JobEvalCommand{Meta: Meta{Ui: ui}} | ||
|
||
// Fails on misuse | ||
if code := cmd.Run([]string{"some", "bad", "args"}); code != 1 { | ||
t.Fatalf("expected exit code 1, got: %d", code) | ||
} | ||
if out := ui.ErrorWriter.String(); !strings.Contains(out, commandErrorText(cmd)) { | ||
t.Fatalf("expected help output, got: %s", out) | ||
} | ||
ui.ErrorWriter.Reset() | ||
|
||
// Fails when job ID is not specified | ||
if code := cmd.Run([]string{}); code != 1 { | ||
t.Fatalf("expect exit 1, got: %d", code) | ||
} | ||
if out := ui.ErrorWriter.String(); !strings.Contains(out, "This command takes one argument") { | ||
t.Fatalf("unexpected error: %v", out) | ||
} | ||
ui.ErrorWriter.Reset() | ||
|
||
} | ||
|
||
func TestJobEvalCommand_Run(t *testing.T) { | ||
t.Parallel() | ||
srv, client, url := testServer(t, true, nil) | ||
defer srv.Shutdown() | ||
|
||
// Wait for a node to be ready | ||
testutil.WaitForResult(func() (bool, error) { | ||
nodes, _, err := client.Nodes().List(nil) | ||
if err != nil { | ||
return false, err | ||
} | ||
for _, node := range nodes { | ||
if node.Status == structs.NodeStatusReady { | ||
return true, nil | ||
} | ||
} | ||
return false, fmt.Errorf("no ready nodes") | ||
}, func(err error) { | ||
t.Fatalf("err: %v", err) | ||
}) | ||
|
||
ui := new(cli.MockUi) | ||
cmd := &JobEvalCommand{Meta: Meta{Ui: ui}} | ||
require := require.New(t) | ||
|
||
state := srv.Agent.Server().State() | ||
|
||
// Create a job | ||
job := mock.Job() | ||
err := state.UpsertJob(11, job) | ||
require.Nil(err) | ||
|
||
job, err = state.JobByID(nil, structs.DefaultNamespace, job.ID) | ||
require.Nil(err) | ||
|
||
// Create a failed alloc for the job | ||
alloc := mock.Alloc() | ||
alloc.Job = job | ||
alloc.JobID = job.ID | ||
alloc.TaskGroup = job.TaskGroups[0].Name | ||
alloc.Namespace = job.Namespace | ||
alloc.ClientStatus = structs.AllocClientStatusFailed | ||
err = state.UpsertAllocs(12, []*structs.Allocation{alloc}) | ||
require.Nil(err) | ||
|
||
if code := cmd.Run([]string{"-address=" + url, "-force-reschedule", "-detach", job.ID}); code != 0 { | ||
t.Fatalf("expected exit 0, got: %d", code) | ||
} | ||
|
||
// Lookup alloc again | ||
alloc, err = state.AllocByID(nil, alloc.ID) | ||
require.NotNil(alloc) | ||
require.Nil(err) | ||
require.True(*alloc.DesiredTransition.ForceReschedule) | ||
|
||
} | ||
|
||
func TestJobEvalCommand_AutocompleteArgs(t *testing.T) { | ||
assert := assert.New(t) | ||
t.Parallel() | ||
|
||
srv, _, url := testServer(t, true, nil) | ||
defer srv.Shutdown() | ||
|
||
ui := new(cli.MockUi) | ||
cmd := &JobEvalCommand{Meta: Meta{Ui: ui, flagAddress: url}} | ||
|
||
// Create a fake job | ||
state := srv.Agent.Server().State() | ||
j := mock.Job() | ||
assert.Nil(state.UpsertJob(1000, j)) | ||
|
||
prefix := j.ID[:len(j.ID)-5] | ||
args := complete.Args{Last: prefix} | ||
predictor := cmd.AutocompleteArgs() | ||
|
||
res := predictor.Predict(args) | ||
assert.Equal(1, len(res)) | ||
assert.Equal(j.ID, res[0]) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not add a test that submits a job and runs the force eval against it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wrong method name in doc