Skip to content

Commit

Permalink
multiregion: all regions start in running if no max_parallel (#8209)
Browse files Browse the repository at this point in the history
If `max_parallel` is not set, all regions should begin in a `running` state
rather than a `pending` state. Otherwise the first region is set to `running`
and then all the remaining regions once it enters `blocked. That behavior is
technically correct in that we have at most `max_parallel` regions running,
but definitely not what a user expects.
  • Loading branch information
tgross authored Jun 19, 2020
1 parent ef7b7b2 commit 5a068e6
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 4 deletions.
24 changes: 24 additions & 0 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -4008,6 +4008,30 @@ func (j *Job) IsMultiregion() bool {
return j.Multiregion != nil && j.Multiregion.Regions != nil && len(j.Multiregion.Regions) > 0
}

// IsMultiregionStarter returns whether a regional job should begin
// in the running state
func (j *Job) IsMultiregionStarter() bool {
if !j.IsMultiregion() {
return true
}
if j.Type == "system" || j.Type == "batch" {
return true
}
if j.Multiregion.Strategy == nil || j.Multiregion.Strategy.MaxParallel == 0 {
return true
}
for i, region := range j.Multiregion.Regions {
if j.Region == region.Name {
if i < j.Multiregion.Strategy.MaxParallel {
return true
} else {
break
}
}
}
return false
}

// VaultPolicies returns the set of Vault policies per task group, per task
func (j *Job) VaultPolicies() map[string]map[string]*Vault {
policies := make(map[string]map[string]*Vault, len(j.TaskGroups))
Expand Down
58 changes: 58 additions & 0 deletions nomad/structs/structs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5281,6 +5281,64 @@ func TestMultiregion_CopyCanonicalize(t *testing.T) {
require.False(old.Diff(nonEmptyOld))
}

func TestMultiregion_Starter(t *testing.T) {
require := require.New(t)

j := &Job{}
j.Type = "service"
j.Region = "north"
require.True(j.IsMultiregionStarter())

tc := &Multiregion{
Strategy: &MultiregionStrategy{},
Regions: []*MultiregionRegion{
{Name: "north"},
{Name: "south"},
{Name: "east"},
{Name: "west"},
},
}

b := &Job{}
b.Type = "batch"
b.Multiregion = tc
b.Region = "west"
require.True(j.IsMultiregionStarter())

j.Multiregion = tc
j.Region = "north"
require.True(j.IsMultiregionStarter())
j.Region = "south"
require.True(j.IsMultiregionStarter())
j.Region = "east"
require.True(j.IsMultiregionStarter())
j.Region = "west"
require.True(j.IsMultiregionStarter())

tc.Strategy = &MultiregionStrategy{MaxParallel: 1}
j.Multiregion = tc
j.Region = "north"
require.True(j.IsMultiregionStarter())
j.Region = "south"
require.False(j.IsMultiregionStarter())
j.Region = "east"
require.False(j.IsMultiregionStarter())
j.Region = "west"
require.False(j.IsMultiregionStarter())

tc.Strategy = &MultiregionStrategy{MaxParallel: 2}
j.Multiregion = tc
j.Region = "north"
require.True(j.IsMultiregionStarter())
j.Region = "south"
require.True(j.IsMultiregionStarter())
j.Region = "east"
require.False(j.IsMultiregionStarter())
j.Region = "west"
require.False(j.IsMultiregionStarter())

}

func TestNodeResources_Merge(t *testing.T) {
res := &NodeResources{
Cpu: NodeCpuResources{
Expand Down
7 changes: 3 additions & 4 deletions scheduler/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,6 @@ func (a *allocReconciler) Compute() *reconcileResults {
// Detect if the deployment is paused
if a.deployment != nil {
a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused
//|| a.deployment.Status == structs.DeploymentStatusPending
a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed
}
if a.deployment == nil {
Expand Down Expand Up @@ -555,9 +554,9 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool {
// A previous group may have made the deployment already
if a.deployment == nil {
a.deployment = structs.NewDeployment(a.job)
// only the first region of a multiregion job starts in the
// running state
if a.job.IsMultiregion() && a.job.Region != a.job.Multiregion.Regions[0].Name {
// in a multiregion job, if max_parallel is set, only the first
// region starts in the running state
if !a.job.IsMultiregionStarter() {
a.deployment.Status = structs.DeploymentStatusPending
a.deployment.StatusDescription = structs.DeploymentStatusDescriptionPendingForPeer
}
Expand Down

0 comments on commit 5a068e6

Please sign in to comment.