Skip to content

Commit

Permalink
Scale-up only new machineSet while scale-down all active mSs proporti…
Browse files Browse the repository at this point in the history
…onally (#765)

* scale-up only new mS while scale-down all active mSs proportionally

Co-authored-by: Madhav Bhargava <[email protected]>
  • Loading branch information
himanshu-kun and unmarshall authored Jan 23, 2023
1 parent 7ec9a42 commit dbafe5d
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 40 deletions.
2 changes: 2 additions & 0 deletions pkg/controller/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,7 @@ func (dc *controller) reconcileClusterMachineDeployment(key string) error {
}

if d.Spec.Paused {
klog.V(3).Infof("Scaling detected for machineDeployment %s which is paused", d.Name)
return dc.sync(ctx, d, machineSets, machineMap)
}

Expand All @@ -544,6 +545,7 @@ func (dc *controller) reconcileClusterMachineDeployment(key string) error {
return err
}
if scalingEvent {
klog.V(3).Infof("Scaling detected for machineDeployment %s", d.Name)
return dc.sync(ctx, d, machineSets, machineMap)
}

Expand Down
97 changes: 57 additions & 40 deletions pkg/controller/deployment_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,25 +400,22 @@ func (dc *controller) getNewMachineSet(ctx context.Context, d *v1alpha1.MachineD
return createdIS, err
}

// scale scales proportionally in order to mitigate risk. Otherwise, scaling up can increase the size
// of the new machine set and scaling down can decrease the sizes of the old ones, both of which would
// have the effect of hastening the rollout progress, which could produce a higher proportion of unavailable
// replicas in the event of a problem with the rolled out template. Should run only on scaling events or
// when a deployment is paused and not during the normal rollout process.
func (dc *controller) scale(ctx context.Context, deployment *v1alpha1.MachineDeployment, newIS *v1alpha1.MachineSet, oldISs []*v1alpha1.MachineSet) error {
// If there is only one active machine set then we should scale that up to the full count of the
// deployment. If there is no active machine set, then we should scale up the newest machine set.
if activeOrLatest := FindActiveOrLatest(newIS, oldISs); activeOrLatest != nil {
if (activeOrLatest.Spec.Replicas) == (deployment.Spec.Replicas) {
return nil
}
_, _, err := dc.scaleMachineSetAndRecordEvent(ctx, activeOrLatest, (deployment.Spec.Replicas), deployment)
klog.V(3).Infof("Scaling latest/theOnlyActive machineSet %s", activeOrLatest.Name)
_, _, err := dc.scaleMachineSetAndRecordEvent(ctx, activeOrLatest, deployment.Spec.Replicas, deployment)
return err
}

// If the new machine set is saturated, old machine sets should be fully scaled down.
// This case handles machine set adoption during a saturated new machine set.
if IsSaturated(deployment, newIS) {
klog.V(3).Infof("Scaling old active machineSets as new machineSet %s is saturated", newIS.Name)
for _, old := range FilterActiveMachineSets(oldISs) {
if _, _, err := dc.scaleMachineSetAndRecordEvent(ctx, old, 0, deployment); err != nil {
return err
Expand All @@ -428,9 +425,11 @@ func (dc *controller) scale(ctx context.Context, deployment *v1alpha1.MachineDep
}

// There are old machine sets with machines and the new machine set is not saturated.
// We need to proportionally scale all machine sets (new and old) in case of a
// rolling deployment.
// So the scaling is handled this way:
// - Scale up ? -> scale up only the new machineSet
// - Scale down ? -> scale down the old machineSets proportionally
if IsRollingUpdate(deployment) {
klog.V(3).Infof("Scaling all active machineSets proportionally for scale-in, while scaling up latest machineSet only for scale-out, machineDeployment %s", deployment.Name)
allISs := FilterActiveMachineSets(append(oldISs, newIS))
allISsReplicas := GetReplicaCountForMachineSets(allISs)

Expand All @@ -441,56 +440,39 @@ func (dc *controller) scale(ctx context.Context, deployment *v1alpha1.MachineDep

// Number of additional replicas that can be either added or removed from the total
// replicas count. These replicas should be distributed proportionally to the active
// machine sets.
// machine sets in case of scale-in, while added only to the new machineSet during scale-out
klog.V(3).Infof("machineDeployment: %s , replicasToAdd: %d, maxAllowedSize: %d, allMachineSetReplicas: %d", deployment.Name, allowedSize, allISsReplicas)
deploymentReplicasToAdd := allowedSize - allISsReplicas

// The additional replicas should be distributed proportionally amongst the active
// machine sets from the larger to the smaller in size machine set. Scaling direction
// drives what happens in case we are trying to scale machine sets of the same size.
// In such a case when scaling up, we should scale up newer machine sets first, and
// when scaling down, we should scale down older machine sets first.
// During scale-in, the additional replicas should be distributed proportionally amongst the active
// machine sets from the larger to the smaller in size machine set.
// We should scale down older machine sets first if machine sets are of equal size.

var scalingOperation string
nameToSize := make(map[string]int32)
deploymentReplicasAdded := int32(0)
switch {
case deploymentReplicasToAdd > 0:
sort.Sort(MachineSetsBySizeNewer(allISs))
scalingOperation = "up"

nameToSize = dc.scaleNewMachineSet(newIS, allISs, deploymentReplicasToAdd, deployment)
deploymentReplicasAdded = deploymentReplicasToAdd
case deploymentReplicasToAdd < 0:
sort.Sort(MachineSetsBySizeOlder(allISs))
scalingOperation = "down"
sort.Sort(MachineSetsBySizeOlder(allISs))
nameToSize, deploymentReplicasAdded = dc.scaleMachineSetsProportionally(allISs, deploymentReplicasToAdd, deployment)
}

// Iterate over all active machine sets and estimate proportions for each of them.
// The absolute value of deploymentReplicasAdded should never exceed the absolute
// value of deploymentReplicasToAdd.
deploymentReplicasAdded := int32(0)
nameToSize := make(map[string]int32)
for i := range allISs {
is := allISs[i]

// Estimate proportions if we have replicas to add, otherwise simply populate
// nameToSize with the current sizes for each machine set.
if deploymentReplicasToAdd != 0 {
proportion := GetProportion(is, *deployment, deploymentReplicasToAdd, deploymentReplicasAdded)

nameToSize[is.Name] = (is.Spec.Replicas) + proportion
deploymentReplicasAdded += proportion
} else {
nameToSize[is.Name] = (is.Spec.Replicas)
}
}

// Update all machine sets
for i := range allISs {
is := allISs[i]

// Add/remove any leftovers to the largest machine set.
// Incorporate any leftovers to the largest machine set.
if i == 0 && deploymentReplicasToAdd != 0 {
leftover := deploymentReplicasToAdd - deploymentReplicasAdded
nameToSize[is.Name] = nameToSize[is.Name] + leftover
if nameToSize[is.Name] < 0 {
nameToSize[is.Name] = 0
}
klog.V(3).Infof("leftover proportion increase of %d done in largest machineSet %s", leftover, is.Name)
}

// TODO: Use transactions when we have them.
Expand All @@ -503,6 +485,41 @@ func (dc *controller) scale(ctx context.Context, deployment *v1alpha1.MachineDep
return nil
}

func (dc *controller) scaleNewMachineSet(newIS *v1alpha1.MachineSet, allISs []*v1alpha1.MachineSet, deploymentReplicasToAdd int32, deployment *v1alpha1.MachineDeployment) map[string]int32 {
nameToSize := make(map[string]int32)
for _, is := range allISs {
nameToSize[is.Name] = is.Spec.Replicas
}

nameToSize[newIS.Name] = newIS.Spec.Replicas + deploymentReplicasToAdd

return nameToSize
}

func (dc *controller) scaleMachineSetsProportionally(allISs []*v1alpha1.MachineSet, deploymentReplicasToAdd int32, deployment *v1alpha1.MachineDeployment) (map[string]int32, int32) {
// Iterate over all active machine sets and estimate proportions for each of them.
// The absolute value of deploymentReplicasAdded should never exceed the absolute
// value of deploymentReplicasToAdd.

nameToSize := make(map[string]int32)
deploymentReplicasAdded := int32(0)
for i := range allISs {
is := allISs[i]
// Estimate proportions if we have replicas to add, otherwise simply populate
// nameToSize with the current sizes for each machine set.
if deploymentReplicasToAdd != 0 {
proportion := GetProportion(is, *deployment, deploymentReplicasToAdd, deploymentReplicasAdded)
klog.V(3).Infof("final proportion increase for machineSet %s due to parent deployment's replica update is %d", is.Name, proportion)
nameToSize[is.Name] = (is.Spec.Replicas) + proportion
deploymentReplicasAdded += proportion
} else {
nameToSize[is.Name] = (is.Spec.Replicas)
}
}

return nameToSize, deploymentReplicasAdded
}

func (dc *controller) scaleMachineSetAndRecordEvent(ctx context.Context, is *v1alpha1.MachineSet, newScale int32, deployment *v1alpha1.MachineDeployment) (bool, *v1alpha1.MachineSet, error) {
// No need to scale
if (is.Spec.Replicas) == newScale {
Expand Down Expand Up @@ -644,7 +661,7 @@ func calculateDeploymentStatus(allISs []*v1alpha1.MachineSet, newIS *v1alpha1.Ma
}

// isScalingEvent checks whether the provided deployment has been updated with a scaling event
// by looking at the desired-replicas annotation in the active machine sets of the deployment.
// by looking at the desired-replicas annotation in the active machine sets of the deployment, and returns if there was scale-out or not.
//
// rsList should come from getReplicaSetsForDeployment(d).
// machineMap should come from getmachineMapForDeployment(d, rsList).
Expand Down
5 changes: 5 additions & 0 deletions pkg/controller/deployment_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,8 @@ func SetReplicasAnnotations(is *v1alpha1.MachineSet, desiredReplicas, maxReplica
is.Annotations[MaxReplicasAnnotation] = maxString
updated = true
}

klog.V(4).Infof("(SetReplicasAnnotations) ms.Name: %s desired: %s , max: %s , updated: %d", is.Name, desiredString, maxString, updated)
return updated
}

Expand Down Expand Up @@ -656,6 +658,7 @@ func GetProportion(is *v1alpha1.MachineSet, d v1alpha1.MachineDeployment, deploy
isFraction := getMachineSetFraction(*is, d)
allowed := deploymentReplicasToAdd - deploymentReplicasAdded

klog.V(4).Infof("allowed proportion increase= %d, proposed proportion increase= %d", allowed, isFraction)
if deploymentReplicasToAdd > 0 {
// Use the minimum between the machine set fraction and the maximum allowed replicas
// when scaling up. This way we ensure we will not scale up more than the allowed
Expand Down Expand Up @@ -689,6 +692,8 @@ func getMachineSetFraction(is v1alpha1.MachineSet, d v1alpha1.MachineDeployment)
// We should never proportionally scale up from zero which means rs.spec.replicas and annotatedReplicas
// will never be zero here.
newISsize := (float64((is.Spec.Replicas) * deploymentReplicas)) / float64(annotatedReplicas)

klog.V(4).Infof("calculating proportion increase for machineSet %s. ms.desired=%d, maxDeploymentSizePossible=%d, maxDeploymentSizePossibleAsPerAnnotation=%d", is.Name, deploymentReplicas, annotatedReplicas)
return integer.RoundToInt32(newISsize) - (is.Spec.Replicas)
}

Expand Down

0 comments on commit dbafe5d

Please sign in to comment.