From 1e39c324329aef6386dcffba5c41b39910c7d8c8 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Wed, 26 Jan 2022 17:14:08 -0800 Subject: [PATCH] core: prevent malformed plans from crashing leader The Plan.Submit endpoint assumed PlanRequest.Plan was never nil. While there is no evidence it ever has been nil, we should not panic if a nil plan is ever submitted because that would crash the leader. --- nomad/plan_endpoint.go | 5 +++ nomad/plan_endpoint_test.go | 78 +++++++++++++++++++++++++++++++++++++ nomad/structs/structs.go | 4 +- 3 files changed, 86 insertions(+), 1 deletion(-) diff --git a/nomad/plan_endpoint.go b/nomad/plan_endpoint.go index bf2c461fdff..9d2ea30ede0 100644 --- a/nomad/plan_endpoint.go +++ b/nomad/plan_endpoint.go @@ -1,6 +1,7 @@ package nomad import ( + "fmt" "time" metrics "github.com/armon/go-metrics" @@ -22,6 +23,10 @@ func (p *Plan) Submit(args *structs.PlanRequest, reply *structs.PlanResponse) er } defer metrics.MeasureSince([]string{"nomad", "plan", "submit"}, time.Now()) + if args.Plan == nil { + return fmt.Errorf("cannot submit nil plan") + } + // Pause the Nack timer for the eval as it is making progress as long as it // is in the plan queue. We resume immediately after we get a result to // handle the case that the receiving worker dies. diff --git a/nomad/plan_endpoint_test.go b/nomad/plan_endpoint_test.go index 039512bc3e9..a3fb596a6d4 100644 --- a/nomad/plan_endpoint_test.go +++ b/nomad/plan_endpoint_test.go @@ -8,6 +8,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/stretchr/testify/require" ) func TestPlanEndpoint_Submit(t *testing.T) { @@ -49,3 +50,80 @@ func TestPlanEndpoint_Submit(t *testing.T) { t.Fatalf("missing result") } } + +// TestPlanEndpoint_Submit_Bad asserts that the Plan.Submit endpoint rejects +// bad data with an error instead of panicking. +func TestPlanEndpoint_Submit_Bad(t *testing.T) { + t.Parallel() + + s1, cleanupS1 := TestServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + defer cleanupS1() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Mock a valid eval being dequeued by a worker + eval := mock.Eval() + s1.evalBroker.Enqueue(eval) + + evalOut, _, err := s1.evalBroker.Dequeue([]string{eval.Type}, time.Second) + require.NoError(t, err) + require.Equal(t, eval, evalOut) + + cases := []struct { + Name string + Plan *structs.Plan + Err string + }{ + { + Name: "Nil", + Plan: nil, + Err: "cannot submit nil plan", + }, + { + Name: "Empty", + Plan: &structs.Plan{}, + Err: "evaluation is not outstanding", + }, + { + Name: "BadEvalID", + Plan: &structs.Plan{ + EvalID: "1234", // does not exist + }, + Err: "evaluation is not outstanding", + }, + { + Name: "MissingToken", + Plan: &structs.Plan{ + EvalID: eval.ID, + }, + Err: "evaluation token does not match", + }, + { + Name: "InvalidToken", + Plan: &structs.Plan{ + EvalID: eval.ID, + EvalToken: "1234", // invalid + }, + Err: "evaluation token does not match", + }, + } + + for i := range cases { + tc := cases[i] + t.Run(tc.Name, func(t *testing.T) { + req := &structs.PlanRequest{ + Plan: tc.Plan, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var resp structs.PlanResponse + err := msgpackrpc.CallWithCodec(codec, "Plan.Submit", req, &resp) + require.EqualError(t, err, tc.Err) + require.Nil(t, resp.Result) + }) + } + + // Ensure no plans were enqueued + require.Zero(t, s1.planner.planQueue.Stats().Depth) +} diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 2c20932a24f..5b62935832f 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -10803,7 +10803,9 @@ type Plan struct { func (p *Plan) GoString() string { out := fmt.Sprintf("(eval %s", p.EvalID[:8]) - out += fmt.Sprintf(", job %s", p.Job.ID) + if p.Job != nil { + out += fmt.Sprintf(", job %s", p.Job.ID) + } if p.Deployment != nil { out += fmt.Sprintf(", deploy %s", p.Deployment.ID[:8]) }