From 38dbe768e7f813f198abbc4f2ea2c321d9200cfd Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 25 May 2016 14:11:14 -0700 Subject: [PATCH 1/7] Add eval-status and remove eval-monitor --- command/alloc_status.go | 1 + command/eval_monitor.go | 81 -------- command/eval_status.go | 184 ++++++++++++++++++ ...al_monitor_test.go => eval_status_test.go} | 10 +- command/fs.go | 2 + command/plan.go | 2 +- command/server_members.go | 2 +- commands.go | 4 +- scheduler/generic_sched.go | 11 ++ 9 files changed, 207 insertions(+), 90 deletions(-) delete mode 100644 command/eval_monitor.go create mode 100644 command/eval_status.go rename command/{eval_monitor_test.go => eval_status_test.go} (83%) diff --git a/command/alloc_status.go b/command/alloc_status.go index 700a175469c..76e6d84cbb4 100644 --- a/command/alloc_status.go +++ b/command/alloc_status.go @@ -27,6 +27,7 @@ General Options: ` + generalOptionsUsage() + ` +Alloc Status Options: -short Display short output. Shows only the most recent task event. diff --git a/command/eval_monitor.go b/command/eval_monitor.go deleted file mode 100644 index 72d4ffa6f07..00000000000 --- a/command/eval_monitor.go +++ /dev/null @@ -1,81 +0,0 @@ -package command - -import ( - "fmt" - "strings" -) - -type EvalMonitorCommand struct { - Meta -} - -func (c *EvalMonitorCommand) Help() string { - helpText := ` -Usage: nomad eval-monitor [options] - - Start an interactive monitoring session for an existing evaluation. - The monitor command periodically polls for information about the - provided evaluation, including status updates, new allocations, - updates to allocations, and failures. Status is printed in near - real-time to the terminal. - - The command will exit when the given evaluation reaches a terminal - state (completed or failed). Exit code 0 is returned on successful - evaluation, and if there are no scheduling problems. If there are - job placement issues encountered (unsatisfiable constraints, - resource exhaustion, etc), then the exit code will be 2. Any other - errors, including client connection issues or internal errors, are - indicated by exit code 1. - -General Options: - - ` + generalOptionsUsage() + ` - -Eval Monitor Options: - - -verbose - Show full information. -` - return strings.TrimSpace(helpText) -} - -func (c *EvalMonitorCommand) Synopsis() string { - return "Monitor an evaluation interactively" -} - -func (c *EvalMonitorCommand) Run(args []string) int { - var verbose bool - - flags := c.Meta.FlagSet("eval-monitor", FlagSetClient) - flags.Usage = func() { c.Ui.Output(c.Help()) } - flags.BoolVar(&verbose, "verbose", false, "") - - if err := flags.Parse(args); err != nil { - return 1 - } - - // Truncate the id unless full length is requested - length := shortId - if verbose { - length = fullId - } - - // Check that we got exactly one eval ID - args = flags.Args() - if len(args) != 1 { - c.Ui.Error(c.Help()) - return 1 - } - evalID := args[0] - - // Get the HTTP client - client, err := c.Meta.Client() - if err != nil { - c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) - return 1 - } - - // Start monitoring - mon := newMonitor(c.Ui, client, length) - return mon.monitor(evalID, true) -} diff --git a/command/eval_status.go b/command/eval_status.go new file mode 100644 index 00000000000..4d17280691b --- /dev/null +++ b/command/eval_status.go @@ -0,0 +1,184 @@ +package command + +import ( + "fmt" + "strings" + + "github.com/hashicorp/nomad/api" +) + +type EvalStatusCommand struct { + Meta +} + +func (c *EvalStatusCommand) Help() string { + helpText := ` +Usage: nomad eval-status [options] + + Display information about evaluations. This command can be used to inspect the + current status of an evaluation as well as determine the reason an evaluation + did not place all allocations. + +General Options: + + ` + generalOptionsUsage() + ` + +Eval Status Options: + + -monitor + Monitor an outstanding evaluation + + -verbose + Show full information. +` + + return strings.TrimSpace(helpText) +} + +func (c *EvalStatusCommand) Synopsis() string { + return "Display evaluation status information and placement failure reasons" +} + +func (c *EvalStatusCommand) Run(args []string) int { + var monitor, verbose bool + + flags := c.Meta.FlagSet("eval-status", FlagSetClient) + flags.Usage = func() { c.Ui.Output(c.Help()) } + flags.BoolVar(&monitor, "monitor", false, "") + flags.BoolVar(&verbose, "verbose", false, "") + + if err := flags.Parse(args); err != nil { + return 1 + } + + // Check that we got exactly one evaluation ID + args = flags.Args() + if len(args) != 1 { + c.Ui.Error(c.Help()) + return 1 + } + evalID := args[0] + + // Get the HTTP client + client, err := c.Meta.Client() + if err != nil { + c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) + return 1 + } + + // Truncate the id unless full length is requested + length := shortId + if verbose { + length = fullId + } + + // Query the allocation info + if len(evalID) == 1 { + c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters.")) + return 1 + } + if len(evalID)%2 == 1 { + // Identifiers must be of even length, so we strip off the last byte + // to provide a consistent user experience. + evalID = evalID[:len(evalID)-1] + } + + evals, _, err := client.Evaluations().PrefixList(evalID) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error querying evaluation: %v", err)) + return 1 + } + if len(evals) == 0 { + c.Ui.Error(fmt.Sprintf("No evaluation(s) with prefix or id %q found", evalID)) + return 1 + } + if len(evals) > 1 { + // Format the evals + out := make([]string, len(evals)+1) + out[0] = "ID|Priority|Triggered By|Status|Placement Failures" + for i, eval := range evals { + out[i+1] = fmt.Sprintf("%s|%d|%s|%s|%t", + limit(eval.ID, length), + eval.Priority, + eval.TriggeredBy, + eval.Status, + len(eval.FailedTGAllocs) != 0, + ) + } + c.Ui.Output(fmt.Sprintf("Prefix matched multiple evaluations\n\n%s", formatList(out))) + return 0 + } + + // If we are in monitor mode, monitor and exit + if monitor { + mon := newMonitor(c.Ui, client, length) + return mon.monitor(evals[0].ID, true) + } + + // Prefix lookup matched a single evaluation + eval, _, err := client.Evaluations().Info(evals[0].ID, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error querying evaluation: %s", err)) + return 1 + } + + failures := len(eval.FailedTGAllocs) != 0 + triggerNoun, triggerSubj := getTriggerDetails(eval) + statusDesc := eval.StatusDescription + if statusDesc == "" { + statusDesc = eval.Status + } + + // Format the allocation data + basic := []string{ + fmt.Sprintf("ID|%s", limit(eval.ID, length)), + fmt.Sprintf("Status|%s", eval.Status), + fmt.Sprintf("Status Description|%s", statusDesc), + fmt.Sprintf("Type|%s", eval.Type), + fmt.Sprintf("TriggeredBy|%s", eval.TriggeredBy), + fmt.Sprintf("%s|%s", triggerNoun, triggerSubj), + fmt.Sprintf("Priority|%d", eval.Priority), + fmt.Sprintf("Placement Failures|%t", failures), + } + + if verbose { + // NextEval, PreviousEval, BlockedEval + basic = append(basic, + fmt.Sprintf("Previous Eval|%s", eval.PreviousEval), + fmt.Sprintf("Next Eval|%s", eval.NextEval), + fmt.Sprintf("Blocked Eval|%s", eval.BlockedEval)) + } + c.Ui.Output(formatKV(basic)) + + if failures { + c.Ui.Output("\n==> Failed Allocations") + for tg, metrics := range eval.FailedTGAllocs { + noun := "allocation" + if metrics.CoalescedFailures > 0 { + noun += "s" + } + c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun)) + dumpAllocMetrics(c.Ui, metrics, false) + } + + if eval.BlockedEval != "" { + c.Ui.Output(fmt.Sprintf("\nEvaluation %q waiting for additional capacity to place remainder", + limit(eval.BlockedEval, length))) + } + } + + return 0 +} + +func getTriggerDetails(eval *api.Evaluation) (noun, subject string) { + switch eval.TriggeredBy { + case "job-register", "job-deregister", "periodic-job", "rolling-update": + return "Job ID", eval.JobID + case "node-update": + return "Node ID", eval.NodeID + case "max-plan-attempts": + return "Previous Eval", eval.PreviousEval + default: + return "", "" + } +} diff --git a/command/eval_monitor_test.go b/command/eval_status_test.go similarity index 83% rename from command/eval_monitor_test.go rename to command/eval_status_test.go index 26231b1ea0d..29ea4634f8c 100644 --- a/command/eval_monitor_test.go +++ b/command/eval_status_test.go @@ -7,16 +7,16 @@ import ( "github.com/mitchellh/cli" ) -func TestEvalMonitorCommand_Implements(t *testing.T) { - var _ cli.Command = &EvalMonitorCommand{} +func TestEvalStatusCommand_Implements(t *testing.T) { + var _ cli.Command = &EvalStatusCommand{} } -func TestEvalMonitorCommand_Fails(t *testing.T) { +func TestEvalStatusCommand_Fails(t *testing.T) { srv, _, url := testServer(t, nil) defer srv.Stop() ui := new(cli.MockUi) - cmd := &EvalMonitorCommand{Meta: Meta{Ui: ui}} + cmd := &EvalStatusCommand{Meta: Meta{Ui: ui}} // Fails on misuse if code := cmd.Run([]string{"some", "bad", "args"}); code != 1 { @@ -40,7 +40,7 @@ func TestEvalMonitorCommand_Fails(t *testing.T) { if code := cmd.Run([]string{"-address=nope", "12345678-abcd-efab-cdef-123456789abc"}); code != 1 { t.Fatalf("expected exit code 1, got: %d", code) } - if out := ui.ErrorWriter.String(); !strings.Contains(out, "Error reading evaluation") { + if out := ui.ErrorWriter.String(); !strings.Contains(out, "Error querying evaluation") { t.Fatalf("expected failed query error, got: %s", out) } } diff --git a/command/fs.go b/command/fs.go index 2c66b0f41c6..9d73e7310e3 100644 --- a/command/fs.go +++ b/command/fs.go @@ -28,6 +28,8 @@ General Options: ` + generalOptionsUsage() + ` +FS Specific Options: + -H Machine friendly output. diff --git a/command/plan.go b/command/plan.go index 677bfcfb85b..7742d5a2536 100644 --- a/command/plan.go +++ b/command/plan.go @@ -47,7 +47,7 @@ General Options: ` + generalOptionsUsage() + ` -Run Options: +Plan Options: -diff Defaults to true, but can be toggled off to omit diff output. diff --git a/command/server_members.go b/command/server_members.go index 40e375f3f1a..94f5aadfc7f 100644 --- a/command/server_members.go +++ b/command/server_members.go @@ -23,7 +23,7 @@ General Options: ` + generalOptionsUsage() + ` -Agent Members Options: +Server Members Options: -detailed Show detailed information about each member. This dumps diff --git a/commands.go b/commands.go index b402f429bc8..69d96273ffd 100644 --- a/commands.go +++ b/commands.go @@ -54,8 +54,8 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory { Meta: meta, }, nil }, - "eval-monitor": func() (cli.Command, error) { - return &command.EvalMonitorCommand{ + "eval-status": func() (cli.Command, error) { + return &command.EvalStatusCommand{ Meta: meta, }, nil }, diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index bad43d21ce6..0a942cd1398 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -29,6 +29,14 @@ const ( // allocInPlace is the status used when speculating on an in-place update allocInPlace = "alloc updating in-place" + + // blockedEvalMaxPlanDesc is the description used for blocked evals that are + // a result of hitting the max number of plan attempts + blockedEvalMaxPlanDesc = "created due to placement conflicts" + + // blockedEvalFailedPlacements is the description used for blocked evals + // that are a result of failing to place all allocations. + blockedEvalFailedPlacements = "created to place remaining allocations" ) // SetStatusError is used to set the status of the evaluation to the given error @@ -154,6 +162,9 @@ func (s *GenericScheduler) createBlockedEval(planFailure bool) error { s.blocked = s.eval.CreateBlockedEval(classEligibility, escaped) if planFailure { s.blocked.TriggeredBy = structs.EvalTriggerMaxPlans + s.blocked.StatusDescription = blockedEvalMaxPlanDesc + } else { + s.blocked.StatusDescription = blockedEvalFailedPlacements } return s.planner.CreateEval(s.blocked) From edbbcd5deb561c52f52f0a0b99e418cc3234de5d Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 25 May 2016 17:06:20 -0700 Subject: [PATCH 2/7] Show failure reason in status --- command/eval_status.go | 18 ++++++- command/status.go | 106 +++++++++++++++++++++++++++++++++++------ 2 files changed, 107 insertions(+), 17 deletions(-) diff --git a/command/eval_status.go b/command/eval_status.go index 4d17280691b..951133b00c8 100644 --- a/command/eval_status.go +++ b/command/eval_status.go @@ -2,6 +2,7 @@ package command import ( "fmt" + "sort" "strings" "github.com/hashicorp/nomad/api" @@ -151,14 +152,18 @@ func (c *EvalStatusCommand) Run(args []string) int { c.Ui.Output(formatKV(basic)) if failures { - c.Ui.Output("\n==> Failed Allocations") - for tg, metrics := range eval.FailedTGAllocs { + c.Ui.Output("\n==> Failed Placements") + sorted := sortedTaskGroupFromMetrics(eval.FailedTGAllocs) + for _, tg := range sorted { + metrics := eval.FailedTGAllocs[tg] + noun := "allocation" if metrics.CoalescedFailures > 0 { noun += "s" } c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun)) dumpAllocMetrics(c.Ui, metrics, false) + c.Ui.Output("") } if eval.BlockedEval != "" { @@ -170,6 +175,15 @@ func (c *EvalStatusCommand) Run(args []string) int { return 0 } +func sortedTaskGroupFromMetrics(groups map[string]*api.AllocationMetric) []string { + tgs := make([]string, 0, len(groups)) + for tg, _ := range groups { + tgs = append(tgs, tg) + } + sort.Strings(tgs) + return tgs +} + func getTriggerDetails(eval *api.Evaluation) (noun, subject string) { switch eval.TriggeredBy { case "job-register", "job-deregister", "periodic-job", "rolling-update": diff --git a/command/status.go b/command/status.go index 9eedf966a77..e9f203689e7 100644 --- a/command/status.go +++ b/command/status.go @@ -11,9 +11,16 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) +const ( + // maxFailedTGs is the maximum number of task groups we show failure reasons + // for before defering to eval-status + maxFailedTGs = 5 +) + type StatusCommand struct { Meta - length int + length int + showEvals, verbose bool } func (c *StatusCommand) Help() string { @@ -31,8 +38,10 @@ Status Options: -short Display short output. Used only when a single job is being - queried, and drops verbose information about allocations - and evaluations. + queried, and drops verbose information about allocations. + + -evals + Display the evaluations associated with the job. -verbose Display full information. @@ -45,12 +54,13 @@ func (c *StatusCommand) Synopsis() string { } func (c *StatusCommand) Run(args []string) int { - var short, verbose bool + var short bool flags := c.Meta.FlagSet("status", FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.BoolVar(&short, "short", false, "") - flags.BoolVar(&verbose, "verbose", false, "") + flags.BoolVar(&c.showEvals, "evals", false, "") + flags.BoolVar(&c.verbose, "verbose", false, "") if err := flags.Parse(args); err != nil { return 1 @@ -65,7 +75,7 @@ func (c *StatusCommand) Run(args []string) int { // Truncate the id unless full length is requested c.length = shortId - if verbose { + if c.verbose { c.length = fullId } @@ -221,27 +231,65 @@ func (c *StatusCommand) outputPeriodicInfo(client *api.Client, job *api.Job) err func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error { var evals, allocs []string + // Query the allocations + jobAllocs, _, err := client.Jobs().Allocations(job.ID, nil) + if err != nil { + return fmt.Errorf("Error querying job allocations: %s", err) + } + // Query the evaluations jobEvals, _, err := client.Jobs().Evaluations(job.ID, nil) if err != nil { return fmt.Errorf("Error querying job evaluations: %s", err) } - // Query the allocations - jobAllocs, _, err := client.Jobs().Allocations(job.ID, nil) - if err != nil { - return fmt.Errorf("Error querying job allocations: %s", err) + // Determine latest evaluation with failures whose follow up hasn't + // completed. + evalsByID := make(map[string]*api.Evaluation, len(jobEvals)) + for _, eval := range jobEvals { + evalsByID[eval.ID] = eval + } + + var latestFailedPlacement *api.Evaluation + for _, eval := range evalsByID { + if len(eval.FailedTGAllocs) == 0 { + // Skip evals without failures + continue + } + + // Check if created blocked eval is finished + if blocked, ok := evalsByID[eval.BlockedEval]; ok { + if blocked.Status == "complete" { + continue + } + } + + if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex { + latestFailedPlacement = eval + } + } // Format the evals evals = make([]string, len(jobEvals)+1) - evals[0] = "ID|Priority|Triggered By|Status" + evals[0] = "ID|Priority|Triggered By|Status|Placement Failures" for i, eval := range jobEvals { - evals[i+1] = fmt.Sprintf("%s|%d|%s|%s", + evals[i+1] = fmt.Sprintf("%s|%d|%s|%s|%t", limit(eval.ID, c.length), eval.Priority, eval.TriggeredBy, - eval.Status) + eval.Status, + len(eval.FailedTGAllocs) != 0, + ) + } + + if c.verbose || c.showEvals { + c.Ui.Output("\n==> Evaluations") + c.Ui.Output(formatList(evals)) + } + + if latestFailedPlacement != nil { + c.outputFailedPlacements(latestFailedPlacement) } // Format the allocs @@ -257,13 +305,41 @@ func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error { alloc.ClientStatus) } - c.Ui.Output("\n==> Evaluations") - c.Ui.Output(formatList(evals)) c.Ui.Output("\n==> Allocations") c.Ui.Output(formatList(allocs)) return nil } +func (c *StatusCommand) outputFailedPlacements(failedEval *api.Evaluation) { + if failedEval == nil || len(failedEval.FailedTGAllocs) == 0 { + return + } + + c.Ui.Output("\n==> Last Placement Failure") + + sorted := sortedTaskGroupFromMetrics(failedEval.FailedTGAllocs) + for i, tg := range sorted { + if i >= maxFailedTGs { + break + } + + metrics := failedEval.FailedTGAllocs[tg] + + noun := "allocation" + if metrics.CoalescedFailures > 0 { + noun += "s" + } + c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun)) + dumpAllocMetrics(c.Ui, metrics, false) + c.Ui.Output("") + } + + if len(sorted) > maxFailedTGs { + trunc := fmt.Sprintf("Placement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID) + c.Ui.Output(trunc) + } +} + // convertApiJob is used to take a *api.Job and convert it to an *struct.Job. // This function is just a hammer and probably needs to be revisited. func convertApiJob(in *api.Job) (*structs.Job, error) { From 63ee89ec26b293c412d2c4d5fa468be5f2ebbb33 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 25 May 2016 18:06:47 -0700 Subject: [PATCH 3/7] Fix check of completed next eval --- command/status.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/status.go b/command/status.go index e9f203689e7..65aaa91d320 100644 --- a/command/status.go +++ b/command/status.go @@ -259,7 +259,7 @@ func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error { // Check if created blocked eval is finished if blocked, ok := evalsByID[eval.BlockedEval]; ok { - if blocked.Status == "complete" { + if blocked.Status != "blocked" { continue } } From a932717c1c8c0667cd0078b164cbce6fb8cb72e7 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 25 May 2016 18:19:39 -0700 Subject: [PATCH 4/7] Handle the no allocation case in status --- command/status.go | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/command/status.go b/command/status.go index 65aaa91d320..24a3a6c81cf 100644 --- a/command/status.go +++ b/command/status.go @@ -293,20 +293,24 @@ func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error { } // Format the allocs - allocs = make([]string, len(jobAllocs)+1) - allocs[0] = "ID|Eval ID|Node ID|Task Group|Desired|Status" - for i, alloc := range jobAllocs { - allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s", - limit(alloc.ID, c.length), - limit(alloc.EvalID, c.length), - limit(alloc.NodeID, c.length), - alloc.TaskGroup, - alloc.DesiredStatus, - alloc.ClientStatus) - } - c.Ui.Output("\n==> Allocations") - c.Ui.Output(formatList(allocs)) + if len(jobAllocs) > 0 { + allocs = make([]string, len(jobAllocs)+1) + allocs[0] = "ID|Eval ID|Node ID|Task Group|Desired|Status" + for i, alloc := range jobAllocs { + allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s", + limit(alloc.ID, c.length), + limit(alloc.EvalID, c.length), + limit(alloc.NodeID, c.length), + alloc.TaskGroup, + alloc.DesiredStatus, + alloc.ClientStatus) + } + + c.Ui.Output(formatList(allocs)) + } else { + c.Ui.Output("No allocations placed") + } return nil } From c993b51c4b9fd5620618bc2c053bf732513dfccc Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 25 May 2016 18:52:14 -0700 Subject: [PATCH 5/7] eval-status website docs --- command/eval_status.go | 2 +- command/run.go | 7 +- command/stop.go | 6 +- .../docs/commands/eval-monitor.html.md.erb | 56 ------------- .../docs/commands/eval-status.html.md.erb | 83 +++++++++++++++++++ website/source/docs/commands/run.html.md.erb | 20 +++-- website/source/docs/commands/stop.html.md.erb | 7 +- .../intro/getting-started/install.html.md | 4 +- website/source/layouts/docs.erb | 4 +- 9 files changed, 110 insertions(+), 79 deletions(-) delete mode 100644 website/source/docs/commands/eval-monitor.html.md.erb create mode 100644 website/source/docs/commands/eval-status.html.md.erb diff --git a/command/eval_status.go b/command/eval_status.go index 951133b00c8..3d5b7a36ace 100644 --- a/command/eval_status.go +++ b/command/eval_status.go @@ -37,7 +37,7 @@ Eval Status Options: } func (c *EvalStatusCommand) Synopsis() string { - return "Display evaluation status information and placement failure reasons" + return "Display evaluation status and placement failure reasons" } func (c *EvalStatusCommand) Run(args []string) int { diff --git a/command/run.go b/command/run.go index f5745f7aa7a..d557d212fa8 100644 --- a/command/run.go +++ b/command/run.go @@ -44,10 +44,9 @@ General Options: Run Options: -detach - Return immediately instead of entering monitor mode. After job - submission, the evaluation ID will be printed to the screen. - You can use this ID to start a monitor using the eval-monitor - command later if needed. + Return immediately instead of entering monitor mode. After job submission, + the evaluation ID will be printed to the screen, which can be used to + examine the evaluation using the eval-status command. -verbose Display full information. diff --git a/command/stop.go b/command/stop.go index 4312f330108..3a1625397c4 100644 --- a/command/stop.go +++ b/command/stop.go @@ -27,9 +27,9 @@ Stop Options: -detach Return immediately instead of entering monitor mode. After the - deregister command is submitted, a new evaluation ID is printed - to the screen, which can be used to call up a monitor later if - needed using the eval-monitor command. + deregister command is submitted, a new evaluation ID is printed to the + screen, which can be used to examine the evaluation using the eval-status + command. -yes Automatic yes to prompts. diff --git a/website/source/docs/commands/eval-monitor.html.md.erb b/website/source/docs/commands/eval-monitor.html.md.erb deleted file mode 100644 index bf7360cea20..00000000000 --- a/website/source/docs/commands/eval-monitor.html.md.erb +++ /dev/null @@ -1,56 +0,0 @@ ---- -layout: "docs" -page_title: "Commands: eval-monitor" -sidebar_current: "docs-commands-eval-monitor" -description: > - The eval-monitor command is used to attach a log monitor to an existing - evaluation using its ID. ---- - -# Command: eval-monitor - -The `eval-monitor` command is used to monitor an existing [evaluation](#). -Logs will be output describing state changes to the evaluation or its -associated [allocations](#). The monitor will exit when the evaluation -reaches a terminal state. - -## Usage - -``` -nomad eval-monitor [options] -``` - -An evaluation ID or prefix must be provided. If there is an exact match, the -the evaluation will be monitored. Otherwise, a list of matching evaluations and -information will be displayed. - -An interactive monitoring session will be started in the terminal. It is safe -to exit the monitor at any time using ctrl+c. - -The command will exit when the given evaluation reaches a terminal -state (completed or failed). Exit code 0 is returned on successful -evaluation, and if there are no scheduling problems. If there are -job placement issues encountered (unsatisfiable constraints, -resource exhaustion, etc), then the exit code will be 2. Any other -errors, including client connection issues or internal errors, are -indicated by exit code 1. - -## General Options - -<%= general_options_usage %> - -## Status Options - -* `-verbose`: Show full information. - -## Examples - -Monitor an existing evaluation - -``` -$ nomad eval-monitor 8262bc83 -==> Monitoring evaluation "8262bc83" - Allocation "bd6bd0de" created: node "6f299da5", group "group1" - Evaluation status changed: "pending" -> "complete" -==> Evaluation "8262bc83" finished with status "complete" -``` diff --git a/website/source/docs/commands/eval-status.html.md.erb b/website/source/docs/commands/eval-status.html.md.erb new file mode 100644 index 00000000000..7455d6dbff9 --- /dev/null +++ b/website/source/docs/commands/eval-status.html.md.erb @@ -0,0 +1,83 @@ +--- +layout: "docs" +page_title: "Commands: eval-status" +sidebar_current: "docs-commands-eval-status" +description: > + The eval-status command is used to see the status and potential failed + allocations of an evaluation. +--- + +# Command: eval-status + +The `eval-status` command is used to display information about an existing +evaluation. In the case an evaluation could not place all the requested +allocations, this command can be used to determine the failure reasons. + +Optionally, it can also be invoked in a monitor mode to track an outstanding +evaluation. In this mode, ogs will be output describing state changes to the +evaluation or its associated allocations. The monitor will exit when the +evaluation reaches a terminal state. + +## Usage + +``` +nomad eval-status [options] +``` + +An evaluation ID or prefix must be provided. If there is an exact match, the +the status will be shown. Otherwise, a list of matching evaluations and +information will be displayed. + +If the `-monitor` flag is passed, an interactive monitoring session will be +started in the terminal. It is safe to exit the monitor at any time using +ctrl+c. The command will exit when the given evaluation reaches a terminal +state (completed or failed). Exit code 0 is returned on successful +evaluation, and if there are no scheduling problems. If there are +job placement issues encountered (unsatisfiable constraints, +resource exhaustion, etc), then the exit code will be 2. Any other +errors, including client connection issues or internal errors, are +indicated by exit code 1. + +## General Options + +<%= general_options_usage %> + +## Status Options + +* `-monitor`: Monitor an outstanding evaluation + +* `-verbose`: Show full information. + +## Examples + +Show the status of an evaluation that has placement failures + +``` +$ nomad eval-status 2ae0e6a5 +ID = 2ae0e6a5 +Status = complete +Status Description = complete +Type = service +TriggeredBy = job-register +Job ID = example +Priority = 50 +Placement Failures = true + +==> Failed Placements +Task Group "cache" (failed to place 1 allocation): + * Class "foo" filtered 1 nodes + * Constraint "${attr.kernel.name} = windows" filtered 1 nodes + + +Evaluation "67493a64" waiting for additional capacity to place remainder +``` + +Monitor an existing evaluation + +``` +$ nomad eval-status -monitor 8262bc83 +==> Monitoring evaluation "8262bc83" + Allocation "bd6bd0de" created: node "6f299da5", group "group1" + Evaluation status changed: "pending" -> "complete" +==> Evaluation "8262bc83" finished with status "complete" +``` diff --git a/website/source/docs/commands/run.html.md.erb b/website/source/docs/commands/run.html.md.erb index 6c1b6e4aae7..0c3d7d5f6f7 100644 --- a/website/source/docs/commands/run.html.md.erb +++ b/website/source/docs/commands/run.html.md.erb @@ -39,8 +39,8 @@ client connection issues or internal errors, are indicated by exit code 1. ## Run Options * `-detach`: Return immediately instead of monitoring. A new evaluation ID - will be output, which can be used to call the monitor later using the - [eval-monitor](/docs/commands/eval-monitor.html) command. + will be output, which can be used to examine the evaluation using the + [eval-status](/docs/commands/eval-status.html) command * `-output`: Output the JSON that would be submitted to the HTTP API without submitting the job. @@ -69,15 +69,17 @@ $ nomad run -detach job1.nomad 4947e728 ``` -Schedule a job which cannot get placement. This results in a scheduling failure -and the specifics of the placement are printed: +Schedule a job which cannot be successfully placed. This results in a scheduling +failure and the specifics of the placement are printed: ``` $ nomad run failing.nomad -==> Monitoring evaluation "0d7447d9" - Scheduling error for group "group1" (failed to find a node for placement) - Allocation "a739288e" status "failed" (1/1 nodes filtered) - * Constraint "$attr.kernel.name = linux" filtered 1 nodes +==> Monitoring evaluation "2ae0e6a5" + Evaluation triggered by job "example" Evaluation status changed: "pending" -> "complete" -==> Evaluation "0d7447d9" finished with status "complete" +==> Evaluation "2ae0e6a5" finished with status "complete" but failed to place all allocations: + Task Group "cache" (failed to place 1 allocation): + * Class "foo" filtered 1 nodes + * Constraint "${attr.kernel.name} = linux" filtered 1 nodes + Evaluation "67493a64" waiting for additional capacity to place remainder ``` diff --git a/website/source/docs/commands/stop.html.md.erb b/website/source/docs/commands/stop.html.md.erb index dd56683b857..641cb636fb8 100644 --- a/website/source/docs/commands/stop.html.md.erb +++ b/website/source/docs/commands/stop.html.md.erb @@ -33,9 +33,10 @@ reached a terminal state. It is safe to exit the monitor early using ctrl+c. ## Stop Options -* `-detach`: Return immediately instead of monitoring. A new evaluation ID - will be output, which can be used to call the monitor later using the - [eval-monitor](/docs/commands/eval-monitor.html) command. +* `-detach`: Return immediately instead of entering monitor mode. After the + deregister command is submitted, a new evaluation ID is printed to the screen, + which can be used to examine the evaluation using the + [eval-status](/docs/commands/eval-status.html) command. ## Status Options diff --git a/website/source/intro/getting-started/install.html.md b/website/source/intro/getting-started/install.html.md index 8cb223e3194..386d4a880b9 100644 --- a/website/source/intro/getting-started/install.html.md +++ b/website/source/intro/getting-started/install.html.md @@ -49,11 +49,13 @@ Available commands are: agent-info Display status information about the local agent alloc-status Display allocation status information and metadata client-config View or modify client configuration details - eval-monitor Monitor an evaluation interactively + eval-status Display evaluation status and placement failure reasons fs Inspect the contents of an allocation directory init Create an example job file + inspect Inspect a submitted job node-drain Toggle drain mode on a given node node-status Display status information about nodes + plan Dry-run a job update to determine its effects run Run a new job or update an existing job server-force-leave Force a server into the 'left' state server-join Join server nodes together diff --git a/website/source/layouts/docs.erb b/website/source/layouts/docs.erb index 718a92a16ae..0abef6c64db 100644 --- a/website/source/layouts/docs.erb +++ b/website/source/layouts/docs.erb @@ -120,8 +120,8 @@ > client-config - > - eval-monitor + > + eval-status > init From ad00346640833905634420f1ec40cf84cdea15f6 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 25 May 2016 18:57:49 -0700 Subject: [PATCH 6/7] test fix --- command/status_test.go | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/command/status_test.go b/command/status_test.go index 4cdd96293a2..c4288bcaf1d 100644 --- a/command/status_test.go +++ b/command/status_test.go @@ -60,6 +60,35 @@ func TestStatusCommand_Run(t *testing.T) { if strings.Contains(out, "job1_sfx") || !strings.Contains(out, "job2_sfx") { t.Fatalf("expected only job2_sfx, got: %s", out) } + if !strings.Contains(out, "Allocations") { + t.Fatalf("should dump allocations") + } + ui.OutputWriter.Reset() + + // Query a single job showing evals + if code := cmd.Run([]string{"-address=" + url, "-evals", "job2_sfx"}); code != 0 { + t.Fatalf("expected exit 0, got: %d", code) + } + out = ui.OutputWriter.String() + if strings.Contains(out, "job1_sfx") || !strings.Contains(out, "job2_sfx") { + t.Fatalf("expected only job2_sfx, got: %s", out) + } + if !strings.Contains(out, "Evaluations") { + t.Fatalf("should dump evaluations") + } + if !strings.Contains(out, "Allocations") { + t.Fatalf("should dump allocations") + } + ui.OutputWriter.Reset() + + // Query a single job in verbose mode + if code := cmd.Run([]string{"-address=" + url, "-verbose", "job2_sfx"}); code != 0 { + t.Fatalf("expected exit 0, got: %d", code) + } + out = ui.OutputWriter.String() + if strings.Contains(out, "job1_sfx") || !strings.Contains(out, "job2_sfx") { + t.Fatalf("expected only job2_sfx, got: %s", out) + } if !strings.Contains(out, "Evaluations") { t.Fatalf("should dump evaluations") } From f5398d25e208cdbacb9061297a3575c491b2ae24 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 26 May 2016 17:37:08 -0700 Subject: [PATCH 7/7] Clean failed placement output --- command/eval_status.go | 4 +-- command/status.go | 56 +++++++++++++++++------------------------- 2 files changed, 24 insertions(+), 36 deletions(-) diff --git a/command/eval_status.go b/command/eval_status.go index 3d5b7a36ace..2e827f7f07c 100644 --- a/command/eval_status.go +++ b/command/eval_status.go @@ -130,7 +130,7 @@ func (c *EvalStatusCommand) Run(args []string) int { statusDesc = eval.Status } - // Format the allocation data + // Format the evaluation data basic := []string{ fmt.Sprintf("ID|%s", limit(eval.ID, length)), fmt.Sprintf("Status|%s", eval.Status), @@ -167,7 +167,7 @@ func (c *EvalStatusCommand) Run(args []string) int { } if eval.BlockedEval != "" { - c.Ui.Output(fmt.Sprintf("\nEvaluation %q waiting for additional capacity to place remainder", + c.Ui.Output(fmt.Sprintf("Evaluation %q waiting for additional capacity to place remainder", limit(eval.BlockedEval, length))) } } diff --git a/command/status.go b/command/status.go index 24a3a6c81cf..6c4115210ba 100644 --- a/command/status.go +++ b/command/status.go @@ -244,31 +244,9 @@ func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error { } // Determine latest evaluation with failures whose follow up hasn't - // completed. - evalsByID := make(map[string]*api.Evaluation, len(jobEvals)) - for _, eval := range jobEvals { - evalsByID[eval.ID] = eval - } - + // completed, this is done while formatting var latestFailedPlacement *api.Evaluation - for _, eval := range evalsByID { - if len(eval.FailedTGAllocs) == 0 { - // Skip evals without failures - continue - } - - // Check if created blocked eval is finished - if blocked, ok := evalsByID[eval.BlockedEval]; ok { - if blocked.Status != "blocked" { - continue - } - } - - if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex { - latestFailedPlacement = eval - } - - } + blockedEval := false // Format the evals evals = make([]string, len(jobEvals)+1) @@ -281,6 +259,19 @@ func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error { eval.Status, len(eval.FailedTGAllocs) != 0, ) + + if eval.Status == "blocked" { + blockedEval = true + } + + if len(eval.FailedTGAllocs) == 0 { + // Skip evals without failures + continue + } + + if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex { + latestFailedPlacement = eval + } } if c.verbose || c.showEvals { @@ -288,7 +279,7 @@ func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error { c.Ui.Output(formatList(evals)) } - if latestFailedPlacement != nil { + if blockedEval && latestFailedPlacement != nil { c.outputFailedPlacements(latestFailedPlacement) } @@ -319,7 +310,7 @@ func (c *StatusCommand) outputFailedPlacements(failedEval *api.Evaluation) { return } - c.Ui.Output("\n==> Last Placement Failure") + c.Ui.Output("\n==> Placement Failure") sorted := sortedTaskGroupFromMetrics(failedEval.FailedTGAllocs) for i, tg := range sorted { @@ -327,19 +318,16 @@ func (c *StatusCommand) outputFailedPlacements(failedEval *api.Evaluation) { break } + c.Ui.Output(fmt.Sprintf("Task Group %q:", tg)) metrics := failedEval.FailedTGAllocs[tg] - - noun := "allocation" - if metrics.CoalescedFailures > 0 { - noun += "s" - } - c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun)) dumpAllocMetrics(c.Ui, metrics, false) - c.Ui.Output("") + if i != len(sorted)-1 { + c.Ui.Output("") + } } if len(sorted) > maxFailedTGs { - trunc := fmt.Sprintf("Placement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID) + trunc := fmt.Sprintf("\nPlacement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID) c.Ui.Output(trunc) } }