Skip to content

Commit

Permalink
Handle Scaling Policies in Job Plan endpoint (#8567)
Browse files Browse the repository at this point in the history
Fixes #8544

This PR fixes a bug where using `nomad job plan ...` always report no change if the submitted job contain scaling.

The issue has three contributing factors:
1. The plan endpoint doesn't populate the required scaling policy ID; unlike the job register endpoint
2. The plan endpoint suppresses errors on job insertion - the job insertion fails here, because the scaling policy is missing the required ID
3. The scheduler reports no update necessary when the relevant job isn't in store (because the insertion failed)

This PR fixes the first two factors.  Changing the scheduler to be more strict might make sense, but may violate some idempotency invariant or make the scheduler more brittle.
  • Loading branch information
Mahmood Ali authored Jul 30, 2020
1 parent ac7fed9 commit e57c252
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ FEATURES:

BUG FIXES:

* core: Fixed a bug where `nomad job plan` reports success and no updates if the job contains a scaling policy [[GH-8551](https://github.com/hashicorp/nomad/issues/8567)]
* api: Added missing namespace field to scaling status GET response object [[GH-8527](https://github.com/hashicorp/nomad/issues/8527)]
* api: Do not allow submission of jobs of type `system` that include task groups with scaling stanzas [[GH-8481](https://github.com/hashicorp/nomad/issues/8481)]
* vault: Fixed a bug where upgrades from pre-0.11.3 that use Vault can lead to memory spikes and write large Raft messages. [[GH-8553](https://github.com/hashicorp/nomad/issues/8553)]
Expand Down
14 changes: 12 additions & 2 deletions nomad/job_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1672,6 +1672,11 @@ func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse)
return err
}

// Ensure that all scaling policies have an appropriate ID
if err := propagateScalingPolicyIDs(oldJob, args.Job); err != nil {
return err
}

var index uint64
var updatedIndex uint64

Expand All @@ -1683,11 +1688,16 @@ func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse)
if oldJob.SpecChanged(args.Job) {
// Insert the updated Job into the snapshot
updatedIndex = oldJob.JobModifyIndex + 1
snap.UpsertJob(updatedIndex, args.Job)
if err := snap.UpsertJob(updatedIndex, args.Job); err != nil {
return err
}
}
} else if oldJob == nil {
// Insert the updated Job into the snapshot
snap.UpsertJob(100, args.Job)
err := snap.UpsertJob(100, args.Job)
if err != nil {
return err
}
}

// Create an eval and mark it as requiring annotations and insert that as well
Expand Down
36 changes: 36 additions & 0 deletions nomad/job_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5369,6 +5369,42 @@ func TestJobEndpoint_Plan_NoDiff(t *testing.T) {
}
}

// TestJobEndpoint_Plan_Scaling asserts that the plan endpoint handles
// jobs with scaling stanza
func TestJobEndpoint_Plan_Scaling(t *testing.T) {
t.Parallel()

s1, cleanupS1 := TestServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
})
defer cleanupS1()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)

// Create a plan request
job := mock.Job()
tg := job.TaskGroups[0]
tg.Tasks[0].Resources.MemoryMB = 999999999
scaling := &structs.ScalingPolicy{Min: 1, Max: 100}
tg.Scaling = scaling.TargetTaskGroup(job, tg)
planReq := &structs.JobPlanRequest{
Job: job,
Diff: false,
WriteRequest: structs.WriteRequest{
Region: "global",
Namespace: job.Namespace,
},
}

// Try without a token, expect failure
var planResp structs.JobPlanResponse
err := msgpackrpc.CallWithCodec(codec, "Job.Plan", planReq, &planResp)
require.NoError(t, err)

require.NotEmpty(t, planResp.FailedTGAllocs)
require.Contains(t, planResp.FailedTGAllocs, tg.Name)
}

func TestJobEndpoint_ImplicitConstraints_Vault(t *testing.T) {
t.Parallel()

Expand Down

0 comments on commit e57c252

Please sign in to comment.