diff --git a/api/jobs.go b/api/jobs.go index d3a255d06b8..2543229770d 100644 --- a/api/jobs.go +++ b/api/jobs.go @@ -233,6 +233,22 @@ func (j *Jobs) ForceEvaluate(jobID string, q *WriteOptions) (string, *WriteMeta, return resp.EvalID, wm, nil } +// EvaluateWithOpts is used to force-evaluate an existing job and takes additional options +// for whether to force reschedule failed allocations +func (j *Jobs) EvaluateWithOpts(jobID string, opts EvalOptions, q *WriteOptions) (string, *WriteMeta, error) { + req := &JobEvaluateRequest{ + JobID: jobID, + EvalOptions: opts, + } + + var resp JobRegisterResponse + wm, err := j.client.write("/v1/job/"+jobID+"/evaluate", req, &resp, q) + if err != nil { + return "", nil, err + } + return resp.EvalID, wm, nil +} + // PeriodicForce spawns a new instance of the periodic job and returns the eval ID func (j *Jobs) PeriodicForce(jobID string, q *WriteOptions) (string, *WriteMeta, error) { var resp periodicForceResponse @@ -1032,3 +1048,15 @@ type JobStabilityResponse struct { JobModifyIndex uint64 WriteMeta } + +// JobEvaluateRequest is used when we just need to re-evaluate a target job +type JobEvaluateRequest struct { + JobID string + EvalOptions EvalOptions + WriteRequest +} + +// EvalOptions is used to encapsulate options when forcing a job evaluation +type EvalOptions struct { + ForceReschedule bool +} diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index 9ff26acc66d..5976c7c985e 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -90,8 +90,25 @@ func (s *HTTPServer) jobForceEvaluate(resp http.ResponseWriter, req *http.Reques if req.Method != "PUT" && req.Method != "POST" { return nil, CodedError(405, ErrInvalidMethod) } - args := structs.JobEvaluateRequest{ - JobID: jobName, + var args structs.JobEvaluateRequest + + // TODO(preetha): remove in 0.9 + // COMPAT: For backwards compatibility allow using this endpoint without a payload + if req.ContentLength == 0 { + args = structs.JobEvaluateRequest{ + JobID: jobName, + } + } else { + if err := decodeBody(req, &args); err != nil { + return nil, CodedError(400, err.Error()) + } + if args.JobID == "" { + return nil, CodedError(400, "Job ID must be specified") + } + + if jobName != "" && args.JobID != jobName { + return nil, CodedError(400, "JobID not same as job name") + } } s.parseWriteRequest(req, &args.WriteRequest) diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go index d92433f4ed7..5908c70b81c 100644 --- a/command/agent/job_endpoint_test.go +++ b/command/agent/job_endpoint_test.go @@ -609,6 +609,57 @@ func TestHTTP_JobForceEvaluate(t *testing.T) { }) } +func TestHTTP_JobEvaluate_ForceReschedule(t *testing.T) { + t.Parallel() + httpTest(t, nil, func(s *TestAgent) { + // Create the job + job := mock.Job() + args := structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{ + Region: "global", + Namespace: structs.DefaultNamespace, + }, + } + var resp structs.JobRegisterResponse + if err := s.Agent.RPC("Job.Register", &args, &resp); err != nil { + t.Fatalf("err: %v", err) + } + jobEvalReq := api.JobEvaluateRequest{ + JobID: job.ID, + EvalOptions: api.EvalOptions{ + ForceReschedule: true, + }, + } + + buf := encodeReq(jobEvalReq) + + // Make the HTTP request + req, err := http.NewRequest("POST", "/v1/job/"+job.ID+"/evaluate", buf) + if err != nil { + t.Fatalf("err: %v", err) + } + respW := httptest.NewRecorder() + + // Make the request + obj, err := s.Server.JobSpecificRequest(respW, req) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Check the response + reg := obj.(structs.JobRegisterResponse) + if reg.EvalID == "" { + t.Fatalf("bad: %v", reg) + } + + // Check for the index + if respW.HeaderMap.Get("X-Nomad-Index") == "" { + t.Fatalf("missing index") + } + }) +} + func TestHTTP_JobEvaluations(t *testing.T) { t.Parallel() httpTest(t, nil, func(s *TestAgent) { diff --git a/command/commands.go b/command/commands.go index c4104ca4548..27f33b15913 100644 --- a/command/commands.go +++ b/command/commands.go @@ -270,6 +270,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory { Meta: meta, }, nil }, + "job eval": func() (cli.Command, error) { + return &JobEvalCommand{ + Meta: meta, + }, nil + }, "job history": func() (cli.Command, error) { return &JobHistoryCommand{ Meta: meta, diff --git a/command/job_eval.go b/command/job_eval.go new file mode 100644 index 00000000000..2b3f4e64805 --- /dev/null +++ b/command/job_eval.go @@ -0,0 +1,128 @@ +package command + +import ( + "fmt" + "strings" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/api/contexts" + "github.com/posener/complete" +) + +type JobEvalCommand struct { + Meta + forceRescheduling bool +} + +func (c *JobEvalCommand) Help() string { + helpText := ` +Usage: nomad job eval [options] + + Force an evaluation of the provided job ID. Forcing an evaluation will trigger the scheduler + to re-evaluate the job. The force flags allow operators to force the scheduler to create + new allocations under certain scenarios. + +General Options: + + ` + generalOptionsUsage() + ` + +Eval Options: + + -force-reschedule + Force reschedule failed allocations even if they are not currently + eligible for rescheduling. + + -detach + Return immediately instead of entering monitor mode. The ID + of the evaluation created will be printed to the screen, which can be + used to examine the evaluation using the eval-status command. + + -verbose + Display full information. +` + return strings.TrimSpace(helpText) +} + +func (c *JobEvalCommand) Synopsis() string { + return "Force an evaluation for the job" +} + +func (c *JobEvalCommand) AutocompleteFlags() complete.Flags { + return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), + complete.Flags{ + "-force-reschedule": complete.PredictNothing, + "-detach": complete.PredictNothing, + "-verbose": complete.PredictNothing, + }) +} + +func (c *JobEvalCommand) AutocompleteArgs() complete.Predictor { + return complete.PredictFunc(func(a complete.Args) []string { + client, err := c.Meta.Client() + if err != nil { + return nil + } + + resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Jobs, nil) + if err != nil { + return []string{} + } + return resp.Matches[contexts.Jobs] + }) +} + +func (c *JobEvalCommand) Name() string { return "job eval" } + +func (c *JobEvalCommand) Run(args []string) int { + var detach, verbose bool + + flags := c.Meta.FlagSet(c.Name(), FlagSetClient) + flags.Usage = func() { c.Ui.Output(c.Help()) } + flags.BoolVar(&c.forceRescheduling, "force-reschedule", false, "") + flags.BoolVar(&detach, "detach", false, "") + flags.BoolVar(&verbose, "verbose", false, "") + + if err := flags.Parse(args); err != nil { + return 1 + } + + // Check that we either got no jobs or exactly one. + args = flags.Args() + if len(args) != 1 { + c.Ui.Error("This command takes one argument: ") + c.Ui.Error(commandErrorText(c)) + return 1 + } + + // Get the HTTP client + client, err := c.Meta.Client() + if err != nil { + c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) + return 1 + } + + // Truncate the id unless full length is requested + length := shortId + if verbose { + length = fullId + } + // Call eval endpoint + jobID := args[0] + + opts := api.EvalOptions{ + ForceReschedule: c.forceRescheduling, + } + evalId, _, err := client.Jobs().EvaluateWithOpts(jobID, opts, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error evaluating job: %s", err)) + return 1 + } + + if detach { + c.Ui.Output(fmt.Sprintf("Created eval ID: %q ", limit(evalId, length))) + return 0 + } + + mon := newMonitor(c.Ui, client, length) + return mon.monitor(evalId, false) +} diff --git a/command/job_eval_test.go b/command/job_eval_test.go new file mode 100644 index 00000000000..ef5019e360e --- /dev/null +++ b/command/job_eval_test.go @@ -0,0 +1,127 @@ +package command + +import ( + "strings" + "testing" + + "fmt" + + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" + "github.com/mitchellh/cli" + "github.com/posener/complete" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestJobEvalCommand_Implements(t *testing.T) { + t.Parallel() + var _ cli.Command = &JobEvalCommand{} +} + +func TestJobEvalCommand_Fails(t *testing.T) { + t.Parallel() + ui := new(cli.MockUi) + cmd := &JobEvalCommand{Meta: Meta{Ui: ui}} + + // Fails on misuse + if code := cmd.Run([]string{"some", "bad", "args"}); code != 1 { + t.Fatalf("expected exit code 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, commandErrorText(cmd)) { + t.Fatalf("expected help output, got: %s", out) + } + ui.ErrorWriter.Reset() + + // Fails when job ID is not specified + if code := cmd.Run([]string{}); code != 1 { + t.Fatalf("expect exit 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, "This command takes one argument") { + t.Fatalf("unexpected error: %v", out) + } + ui.ErrorWriter.Reset() + +} + +func TestJobEvalCommand_Run(t *testing.T) { + t.Parallel() + srv, client, url := testServer(t, true, nil) + defer srv.Shutdown() + + // Wait for a node to be ready + testutil.WaitForResult(func() (bool, error) { + nodes, _, err := client.Nodes().List(nil) + if err != nil { + return false, err + } + for _, node := range nodes { + if node.Status == structs.NodeStatusReady { + return true, nil + } + } + return false, fmt.Errorf("no ready nodes") + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + ui := new(cli.MockUi) + cmd := &JobEvalCommand{Meta: Meta{Ui: ui}} + require := require.New(t) + + state := srv.Agent.Server().State() + + // Create a job + job := mock.Job() + err := state.UpsertJob(11, job) + require.Nil(err) + + job, err = state.JobByID(nil, structs.DefaultNamespace, job.ID) + require.Nil(err) + + // Create a failed alloc for the job + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.TaskGroup = job.TaskGroups[0].Name + alloc.Namespace = job.Namespace + alloc.ClientStatus = structs.AllocClientStatusFailed + err = state.UpsertAllocs(12, []*structs.Allocation{alloc}) + require.Nil(err) + + if code := cmd.Run([]string{"-address=" + url, "-force-reschedule", "-detach", job.ID}); code != 0 { + t.Fatalf("expected exit 0, got: %d", code) + } + + // Lookup alloc again + alloc, err = state.AllocByID(nil, alloc.ID) + require.NotNil(alloc) + require.Nil(err) + require.True(*alloc.DesiredTransition.ForceReschedule) + +} + +func TestJobEvalCommand_AutocompleteArgs(t *testing.T) { + assert := assert.New(t) + t.Parallel() + + srv, _, url := testServer(t, true, nil) + defer srv.Shutdown() + + ui := new(cli.MockUi) + cmd := &JobEvalCommand{Meta: Meta{Ui: ui, flagAddress: url}} + + // Create a fake job + state := srv.Agent.Server().State() + j := mock.Job() + assert.Nil(state.UpsertJob(1000, j)) + + prefix := j.ID[:len(j.ID)-5] + args := complete.Args{Last: prefix} + predictor := cmd.AutocompleteArgs() + + res := predictor.Predict(args) + assert.Equal(1, len(res)) + assert.Equal(j.ID, res[0]) +} diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index 4acc885fffa..164899e5873 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -39,6 +39,13 @@ var ( RTarget: ">= 0.6.1", Operand: structs.ConstraintVersion, } + + // allowRescheduleTransition is the transition that allows failed + // allocations to be force rescheduled. We create a one off + // variable to avoid creating a new object for every request. + allowForceRescheduleTransition = &structs.DesiredTransition{ + ForceReschedule: helper.BoolToPtr(true), + } ) // Job endpoint is used for job interactions @@ -538,6 +545,28 @@ func (j *Job) Evaluate(args *structs.JobEvaluateRequest, reply *structs.JobRegis return fmt.Errorf("can't evaluate parameterized job") } + forceRescheduleAllocs := make(map[string]*structs.DesiredTransition) + + if args.EvalOptions.ForceReschedule { + // Find any failed allocs that could be force rescheduled + allocs, err := snap.AllocsByJob(ws, args.RequestNamespace(), args.JobID, false) + if err != nil { + return err + } + + for _, alloc := range allocs { + taskGroup := job.LookupTaskGroup(alloc.TaskGroup) + // Forcing rescheduling is only allowed if task group has rescheduling enabled + if taskGroup == nil || !taskGroup.ReschedulePolicy.Enabled() { + continue + } + + if alloc.NextAllocation == "" && alloc.ClientStatus == structs.AllocClientStatusFailed && !alloc.DesiredTransition.ShouldForceReschedule() { + forceRescheduleAllocs[alloc.ID] = allowForceRescheduleTransition + } + } + } + // Create a new evaluation eval := &structs.Evaluation{ ID: uuid.Generate(), @@ -549,13 +578,14 @@ func (j *Job) Evaluate(args *structs.JobEvaluateRequest, reply *structs.JobRegis JobModifyIndex: job.ModifyIndex, Status: structs.EvalStatusPending, } - update := &structs.EvalUpdateRequest{ - Evals: []*structs.Evaluation{eval}, - WriteRequest: structs.WriteRequest{Region: args.Region}, + + // Create a AllocUpdateDesiredTransitionRequest request with the eval and any forced rescheduled allocs + updateTransitionReq := &structs.AllocUpdateDesiredTransitionRequest{ + Allocs: forceRescheduleAllocs, + Evals: []*structs.Evaluation{eval}, } + _, evalIndex, err := j.srv.raftApply(structs.AllocUpdateDesiredTransitionRequestType, updateTransitionReq) - // Commit this evaluation via Raft - _, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update) if err != nil { j.srv.logger.Printf("[ERR] nomad.job: Eval create failed: %v", err) return err diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go index 92067040ad9..fe51fbabee2 100644 --- a/nomad/job_endpoint_test.go +++ b/nomad/job_endpoint_test.go @@ -1297,6 +1297,81 @@ func TestJobEndpoint_Evaluate(t *testing.T) { } } +func TestJobEndpoint_ForceRescheduleEvaluate(t *testing.T) { + require := require.New(t) + t.Parallel() + s1 := TestServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the register request + job := mock.Job() + req := &structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{ + Region: "global", + Namespace: job.Namespace, + }, + } + + // Fetch the response + var resp structs.JobRegisterResponse + err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp) + require.Nil(err) + require.NotEqual(0, resp.Index) + + state := s1.fsm.State() + job, err = state.JobByID(nil, structs.DefaultNamespace, job.ID) + require.Nil(err) + + // Create a failed alloc + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.TaskGroup = job.TaskGroups[0].Name + alloc.Namespace = job.Namespace + alloc.ClientStatus = structs.AllocClientStatusFailed + err = s1.State().UpsertAllocs(resp.Index+1, []*structs.Allocation{alloc}) + require.Nil(err) + + // Force a re-evaluation + reEval := &structs.JobEvaluateRequest{ + JobID: job.ID, + EvalOptions: structs.EvalOptions{ForceReschedule: true}, + WriteRequest: structs.WriteRequest{ + Region: "global", + Namespace: job.Namespace, + }, + } + + // Fetch the response + err = msgpackrpc.CallWithCodec(codec, "Job.Evaluate", reEval, &resp) + require.Nil(err) + require.NotEqual(0, resp.Index) + + // Lookup the evaluation + ws := memdb.NewWatchSet() + eval, err := state.EvalByID(ws, resp.EvalID) + require.Nil(err) + require.NotNil(eval) + require.Equal(eval.CreateIndex, resp.EvalCreateIndex) + require.Equal(eval.Priority, job.Priority) + require.Equal(eval.Type, job.Type) + require.Equal(eval.TriggeredBy, structs.EvalTriggerJobRegister) + require.Equal(eval.JobID, job.ID) + require.Equal(eval.JobModifyIndex, resp.JobModifyIndex) + require.Equal(eval.Status, structs.EvalStatusPending) + + // Lookup the alloc, verify DesiredTransition ForceReschedule + alloc, err = state.AllocByID(ws, alloc.ID) + require.NotNil(alloc) + require.Nil(err) + require.True(*alloc.DesiredTransition.ForceReschedule) +} + func TestJobEndpoint_Evaluate_ACL(t *testing.T) { t.Parallel() require := require.New(t) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 615921f5bf0..c52ef84507d 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -465,10 +465,16 @@ type JobDeregisterOptions struct { // JobEvaluateRequest is used when we just need to re-evaluate a target job type JobEvaluateRequest struct { - JobID string + JobID string + EvalOptions EvalOptions WriteRequest } +// EvalOptions is used to encapsulate options when forcing a job evaluation +type EvalOptions struct { + ForceReschedule bool +} + // JobSpecificRequest is used when we just need to specify a target job type JobSpecificRequest struct { JobID string @@ -2990,13 +2996,17 @@ func (r *ReschedulePolicy) Copy() *ReschedulePolicy { return nrp } +func (r *ReschedulePolicy) Enabled() bool { + enabled := r != nil && (r.Attempts > 0 || r.Unlimited) + return enabled +} + // Validate uses different criteria to validate the reschedule policy // Delay must be a minimum of 5 seconds // Delay Ceiling is ignored if Delay Function is "constant" // Number of possible attempts is validated, given the interval, delay and delay function func (r *ReschedulePolicy) Validate() error { - enabled := r != nil && (r.Attempts > 0 || r.Unlimited) - if !enabled { + if !r.Enabled() { return nil } var mErr multierror.Error @@ -5610,6 +5620,11 @@ type DesiredTransition struct { // automatically eligible. An example is an allocation that is part of a // deployment. Reschedule *bool + + // ForceReschedule is used to indicate that this allocation must be rescheduled. + // This field is only used when operators want to force a placement even if + // a failed allocation is not eligible to be rescheduled + ForceReschedule *bool } // Merge merges the two desired transitions, preferring the values from the @@ -5622,6 +5637,10 @@ func (d *DesiredTransition) Merge(o *DesiredTransition) { if o.Reschedule != nil { d.Reschedule = o.Reschedule } + + if o.ForceReschedule != nil { + d.ForceReschedule = o.ForceReschedule + } } // ShouldMigrate returns whether the transition object dictates a migration. @@ -5635,6 +5654,15 @@ func (d *DesiredTransition) ShouldReschedule() bool { return d.Reschedule != nil && *d.Reschedule } +// ShouldForceReschedule returns whether the transition object dictates a +// forced rescheduling. +func (d *DesiredTransition) ShouldForceReschedule() bool { + if d == nil { + return false + } + return d.ForceReschedule != nil && *d.ForceReschedule +} + const ( AllocDesiredStatusRun = "run" // Allocation should run AllocDesiredStatusStop = "stop" // Allocation should stop diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index 9d22439e2ff..3aded05b93c 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -4570,3 +4570,75 @@ func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T }) assertPlaceResultsHavePreviousAllocs(t, 10, r.place) } + +// Tests force rescheduling a failed alloc that is past its reschedule limit +func TestReconciler_ForceReschedule_Service(t *testing.T) { + require := require.New(t) + + // Set desired 5 + job := mock.Job() + job.TaskGroups[0].Count = 5 + tgName := job.TaskGroups[0].Name + + // Set up reschedule policy and update stanza + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ + Attempts: 1, + Interval: 24 * time.Hour, + Delay: 5 * time.Second, + DelayFunction: "", + MaxDelay: 1 * time.Hour, + Unlimited: false, + } + job.TaskGroups[0].Update = noCanaryUpdate + + // Create 5 existing allocations + var allocs []*structs.Allocation + for i := 0; i < 5; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + allocs = append(allocs, alloc) + alloc.ClientStatus = structs.AllocClientStatusRunning + } + + // Mark one as failed and past its reschedule limit so not eligible to reschedule + allocs[0].ClientStatus = structs.AllocClientStatusFailed + allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ + {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), + PrevAllocID: uuid.Generate(), + PrevNodeID: uuid.Generate(), + }, + }} + + // Mark DesiredTransition ForceReschedule + allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: helper.BoolToPtr(true)} + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") + r := reconciler.Compute() + + // Verify that no follow up evals were created + evals := r.desiredFollowupEvals[tgName] + require.Nil(evals) + + // Verify that one rescheduled alloc was created because of the forced reschedule + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 1, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Place: 1, + Ignore: 4, + }, + }, + }) + + // Rescheduled allocs should have previous allocs + assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) + assertPlaceResultsHavePreviousAllocs(t, 1, r.place) + assertPlacementsAreRescheduled(t, 1, r.place) +} diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index b59fd8209cc..87fa4f93644 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -327,6 +327,11 @@ func updateByReschedulable(alloc *structs.Allocation, now time.Time, evalID stri return } + // Check if the allocation is marked as it should be force rescheduled + if alloc.DesiredTransition.ShouldForceReschedule() { + rescheduleNow = true + } + // Reschedule if the eval ID matches the alloc's followup evalID or if its close to its reschedule time rescheduleTime, eligible := alloc.NextRescheduleTime() if eligible && (alloc.FollowupEvalID == evalID || rescheduleTime.Sub(now) <= rescheduleWindowSize) { diff --git a/website/source/api/jobs.html.md b/website/source/api/jobs.html.md index 284830937d8..a1270fe70ea 100644 --- a/website/source/api/jobs.html.md +++ b/website/source/api/jobs.html.md @@ -1361,7 +1361,9 @@ $ curl \ ## Create Job Evaluation This endpoint creates a new evaluation for the given job. This can be used to -force run the scheduling logic if necessary. +force run the scheduling logic if necessary. Since Nomad 0.8.4, this endpoint +supports a JSON payload with additional options. Support for calling this end point +without a JSON payload will be removed in Nomad 0.9. | Method | Path | Produces | | ------- | -------------------------- | -------------------------- | @@ -1380,11 +1382,30 @@ The table below shows this endpoint's support for - `:job_id` `(string: )` - Specifies the ID of the job (as specified in the job file during submission). This is specified as part of the path. +- `JobID` `(string: )` - Specify the ID of the job in the JSON payload + +- `EvalOptions` `()` - Specify additional options to be used during the forced evaluation. + - `ForceReschedule` `(bool: false)` - If set, failed allocations of the job are rescheduled + immediately. This is useful for operators to force immediate placement even if the failed allocations are past + their reschedule limit, or are delayed by several hours because the allocation's reschedule policy has exponential delay. + +### Sample Payload + +```json +{ + "JobID": "my-job", + "EvalOptions": { + "ForceReschedule":true + } +} +``` + ### Sample Request ```text $ curl \ --request POST \ + -d @sample.json \ https://localhost:4646/v1/job/my-job/evaluate ``` diff --git a/website/source/docs/commands/job.html.md.erb b/website/source/docs/commands/job.html.md.erb index 1677fd51486..acaccf7a12a 100644 --- a/website/source/docs/commands/job.html.md.erb +++ b/website/source/docs/commands/job.html.md.erb @@ -19,6 +19,7 @@ subcommands are available: * [`job deployments`][deployments] - List deployments for a job * [`job dispatch`][dispatch] - Dispatch an instance of a parameterized job +* [`job eval`][eval] - Force an evaluation for a job * [`job history`][history] - Display all tracked versions of a job * [`job promote`][promote] - Promote a job's canaries * [`job revert`][revert] - Revert to a prior version of the job @@ -26,6 +27,7 @@ subcommands are available: [deployments]: /docs/commands/job/deployments.html "List deployments for a job" [dispatch]: /docs/commands/job/dispatch.html "Dispatch an instance of a parameterized job" +[eval]: /docs/commands/job/eval.html "Force an evaluation for a job" [history]: /docs/commands/job/history.html "Display all tracked versions of a job" [promote]: /docs/commands/job/promote.html "Promote a job's canaries" [revert]: /docs/commands/job/revert.html "Revert to a prior version of the job" diff --git a/website/source/docs/commands/job/eval.html.md.erb b/website/source/docs/commands/job/eval.html.md.erb new file mode 100644 index 00000000000..8ede561c166 --- /dev/null +++ b/website/source/docs/commands/job/eval.html.md.erb @@ -0,0 +1,69 @@ +--- +layout: "docs" +page_title: "Commands: job eval" +sidebar_current: "docs-commands-job-eval" +description: > + The job eval command is used to force an evaluation of a job +--- + +# Command: job eval + +The `job eval` command is used to force an evaluation of a job, given the job ID. + +## Usage + +``` +nomad job eval [options] +``` + +The `job eval` command requires a single argument, specifying the job ID to evaluate. +If there is an exact match based on the provided job ID, then +the job will be evaluated, forcing a scheduler run. + +## General Options + +<%= partial "docs/commands/_general_options" %> + +## Eval Options + +* `-force-reschedule`: `force-reschedule` is used to force placement of failed allocations. +If this is set, failed allocations that are past their reschedule limit, and those that are +scheduled to be replaced at a future time are placed immediately. This option only places failed +allocations if the task group has rescheduling enabled. + +* `-detach`: Return immediately instead of monitoring. A new evaluation ID + will be output, which can be used to examine the evaluation using the + [eval status](/docs/commands/eval-status.html) command + +* `-verbose`: Show full information. + +## Examples + +Evaluate the job with ID "job1": + +``` +$ nomad job eval job1 +==> Monitoring evaluation "0f3bc0f3" + Evaluation triggered by job "test" + Evaluation within deployment: "51baf5c8" + Evaluation status changed: "pending" -> "complete" +==> Evaluation "0f3bc0f3" finished with status "complete" +``` + +Evaluate the job with ID "job1" and return immediately: + +``` +$ nomad job eval -detach job1 +Created eval ID: "4947e728" +``` + +Evaluate the job with ID "job1", and reschedule any eligible failed allocations: + +``` +$ nomad job eval -force-reschedule job1 +==> Monitoring evaluation "0f3bc0f3" + Evaluation triggered by job "test" + Evaluation within deployment: "51baf5c8" + Evaluation status changed: "pending" -> "complete" +==> Evaluation "0f3bc0f3" finished with status "complete" +``` \ No newline at end of file diff --git a/website/source/layouts/docs.erb b/website/source/layouts/docs.erb index 4f48b934069..b0e3b7250fc 100644 --- a/website/source/layouts/docs.erb +++ b/website/source/layouts/docs.erb @@ -231,6 +231,9 @@ > dispatch + > + eval + > history