From 2757448d0258788e4c6030570fb05540db3e5670 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 14 Jun 2021 08:17:04 -0400 Subject: [PATCH 1/2] quotas: evaluate quota feasibility last in scheduler The `QuotaIterator` is used as the source of nodes passed into feasibility checking for constraints. Every node that passes the quota check counts the allocation resources agains the quota, and as a result we count nodes which will be later filtered out by constraints. Therefore for jobs with constraints, nodes that are feasibility checked but fail have been counted against quotas. This failure mode is order dependent; if all the unfiltered nodes happen to be quota checked first, everything works as expected. This changeset moves the `QuotaIterator` to happen last among all feasibility checkers (but before ranking). The `QuotaIterator` will never receive filtered nodes so it will calculate quotas correctly. --- scheduler/stack.go | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/scheduler/stack.go b/scheduler/stack.go index b8ef95a9e49..856e0c2a3e8 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -208,10 +208,6 @@ func NewSystemStack(ctx Context) *SystemStack { // have to evaluate on all nodes. s.source = NewStaticIterator(ctx, nil) - // Create the quota iterator to determine if placements would result in the - // quota attached to the namespace of the job to go over. - s.quota = NewQuotaIterator(ctx, s.source) - // Attach the job constraints. The job is filled in later. s.jobConstraint = NewConstraintChecker(ctx, nil) @@ -243,13 +239,20 @@ func NewSystemStack(ctx Context) *SystemStack { s.taskGroupDevices, s.taskGroupNetwork} avail := []FeasibilityChecker{s.taskGroupCSIVolumes} - s.wrappedChecks = NewFeasibilityWrapper(ctx, s.quota, jobs, tgs, avail) + s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs, avail) // Filter on distinct property constraints. s.distinctPropertyConstraint = NewDistinctPropertyIterator(ctx, s.wrappedChecks) + // Create the quota iterator to determine if placements would result in + // the quota attached to the namespace of the job to go over. + // Note: the quota iterator must be the last feasibility iterator before + // we upgrade to ranking, or our quota usage will include ineligible + // nodes! + s.quota = NewQuotaIterator(ctx, s.distinctPropertyConstraint) + // Upgrade from feasible to rank iterator - rankSource := NewFeasibleRankIterator(ctx, s.distinctPropertyConstraint) + rankSource := NewFeasibleRankIterator(ctx, s.quota) // Apply the bin packing, this depends on the resources needed // by a particular task group. Enable eviction as system jobs are high @@ -330,10 +333,6 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack { // balancing across eligible nodes. s.source = NewRandomIterator(ctx, nil) - // Create the quota iterator to determine if placements would result in the - // quota attached to the namespace of the job to go over. - s.quota = NewQuotaIterator(ctx, s.source) - // Attach the job constraints. The job is filled in later. s.jobConstraint = NewConstraintChecker(ctx, nil) @@ -366,7 +365,7 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack { s.taskGroupDevices, s.taskGroupNetwork} avail := []FeasibilityChecker{s.taskGroupCSIVolumes} - s.wrappedChecks = NewFeasibilityWrapper(ctx, s.quota, jobs, tgs, avail) + s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs, avail) // Filter on distinct host constraints. s.distinctHostsConstraint = NewDistinctHostsIterator(ctx, s.wrappedChecks) @@ -374,8 +373,15 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack { // Filter on distinct property constraints. s.distinctPropertyConstraint = NewDistinctPropertyIterator(ctx, s.distinctHostsConstraint) + // Create the quota iterator to determine if placements would result in + // the quota attached to the namespace of the job to go over. + // Note: the quota iterator must be the last feasibility iterator before + // we upgrade to ranking, or our quota usage will include ineligible + // nodes! + s.quota = NewQuotaIterator(ctx, s.distinctPropertyConstraint) + // Upgrade from feasible to rank iterator - rankSource := NewFeasibleRankIterator(ctx, s.distinctPropertyConstraint) + rankSource := NewFeasibleRankIterator(ctx, s.quota) // Apply the bin packing, this depends on the resources needed // by a particular task group. From 27134c34da363e76d557be0c60a1a72954b34544 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 14 Jun 2021 08:28:37 -0400 Subject: [PATCH 2/2] changelog entry --- CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 697c6e03844..54d7cd17d96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,9 @@ IMPROVEMENTS: * cli: Added `-monitor` flag to `deployment status` command and automatically monitor deployments from `job run` command. [[GH-10661](https://github.com/hashicorp/nomad/pull/10661)] +BUG FIXES: +* quotas (Enterprise): Fixed a bug where quotas were evaluated before constraints, resulting in quota capacity being used up by filtered nodes. [[GH-10753](https://github.com/hashicorp/nomad/issues/10753)] + ## 1.1.1 (June 9, 2021) FEATURES: @@ -114,6 +117,10 @@ BUG FIXES: * server: Fixed a panic that may arise on submission of jobs containing invalid service checks [[GH-10154](https://github.com/hashicorp/nomad/issues/10154)] * ui: Fixed the rendering of interstitial components shown after processing a dynamic application sizing recommendation. [[GH-10094](https://github.com/hashicorp/nomad/pull/10094)] +## 1.0.8 (Unreleased) +* quotas (Enterprise): Fixed a bug where quotas were evaluated before constraints, resulting in quota capacity being used up by filtered nodes. [[GH-10753](https://github.com/hashicorp/nomad/issues/10753)] +* quotas (Enterprise): Fixed a bug where stopped allocations for a failed deployment can be double-credited to quota limits, resulting in a quota limit bypass. [[GH-10694](https://github.com/hashicorp/nomad/issues/10694) + ## 1.0.7 (June 9, 2021) BUG FIXES: @@ -121,7 +128,6 @@ BUG FIXES: * cli: Fixed a bug where `plugin status` did not validate the passed `type` flag correctly [[GH-10712](https://github.com/hashicorp/nomad/pull/10712)] * cli: Fixed a bug where `alloc exec` may fail with "unexpected EOF" without returning the exit code after a command [[GH-10657](https://github.com/hashicorp/nomad/issues/10657)] * client: Fixed a bug where `alloc exec` sessions may terminate abruptly after a few minutes [[GH-10710](https://github.com/hashicorp/nomad/issues/10710)] -* quotas (Enterprise): Fixed a bug where stopped allocations for a failed deployment can be double-credited to quota limits, resulting in a quota limit bypass. [[GH-10694](https://github.com/hashicorp/nomad/issues/10694)] * drivers/exec: Fixed a bug where `exec` and `java` tasks inherit the Nomad agent's `oom_score_adj` value [[GH-10698](https://github.com/hashicorp/nomad/issues/10698)] * ui: Fixed a bug where exec would not work across regions. [[GH-10539](https://github.com/hashicorp/nomad/issues/10539)] * ui: Fixed global-search shortcut for non-english keyboards. [[GH-10714](https://github.com/hashicorp/nomad/issues/10714)]