Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scale-up only new machineSet while scale-down all active mSs proportionally #765

Merged
merged 4 commits into from
Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/controller/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,7 @@ func (dc *controller) reconcileClusterMachineDeployment(key string) error {
}

if d.Spec.Paused {
klog.V(3).Infof("Scaling detected for machineDeployment %s which is paused", d.Name)
return dc.sync(ctx, d, machineSets, machineMap)
}

Expand All @@ -544,6 +545,7 @@ func (dc *controller) reconcileClusterMachineDeployment(key string) error {
return err
}
if scalingEvent {
klog.V(3).Infof("Scaling detected for machineDeployment %s", d.Name)
return dc.sync(ctx, d, machineSets, machineMap)
}

Expand Down
97 changes: 57 additions & 40 deletions pkg/controller/deployment_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,25 +400,22 @@ func (dc *controller) getNewMachineSet(ctx context.Context, d *v1alpha1.MachineD
return createdIS, err
}

// scale scales proportionally in order to mitigate risk. Otherwise, scaling up can increase the size
// of the new machine set and scaling down can decrease the sizes of the old ones, both of which would
// have the effect of hastening the rollout progress, which could produce a higher proportion of unavailable
// replicas in the event of a problem with the rolled out template. Should run only on scaling events or
// when a deployment is paused and not during the normal rollout process.
func (dc *controller) scale(ctx context.Context, deployment *v1alpha1.MachineDeployment, newIS *v1alpha1.MachineSet, oldISs []*v1alpha1.MachineSet) error {
// If there is only one active machine set then we should scale that up to the full count of the
// deployment. If there is no active machine set, then we should scale up the newest machine set.
if activeOrLatest := FindActiveOrLatest(newIS, oldISs); activeOrLatest != nil {
if (activeOrLatest.Spec.Replicas) == (deployment.Spec.Replicas) {
return nil
}
_, _, err := dc.scaleMachineSetAndRecordEvent(ctx, activeOrLatest, (deployment.Spec.Replicas), deployment)
klog.V(3).Infof("Scaling latest/theOnlyActive machineSet %s", activeOrLatest.Name)
_, _, err := dc.scaleMachineSetAndRecordEvent(ctx, activeOrLatest, deployment.Spec.Replicas, deployment)
return err
}

// If the new machine set is saturated, old machine sets should be fully scaled down.
// This case handles machine set adoption during a saturated new machine set.
if IsSaturated(deployment, newIS) {
klog.V(3).Infof("Scaling old active machineSets as new machineSet %s is saturated", newIS.Name)
for _, old := range FilterActiveMachineSets(oldISs) {
if _, _, err := dc.scaleMachineSetAndRecordEvent(ctx, old, 0, deployment); err != nil {
return err
Expand All @@ -428,9 +425,11 @@ func (dc *controller) scale(ctx context.Context, deployment *v1alpha1.MachineDep
}

// There are old machine sets with machines and the new machine set is not saturated.
// We need to proportionally scale all machine sets (new and old) in case of a
// rolling deployment.
// So the scaling is handled this way:
// - Scale up ? -> scale up only the new machineSet
// - Scale down ? -> scale down the old machineSets proportionally
himanshu-kun marked this conversation as resolved.
Show resolved Hide resolved
if IsRollingUpdate(deployment) {
klog.V(3).Infof("Scaling all active machineSets proportionally for scale-in, while scaling up latest machineSet only for scale-out, machineDeployment %s", deployment.Name)
allISs := FilterActiveMachineSets(append(oldISs, newIS))
allISsReplicas := GetReplicaCountForMachineSets(allISs)

Expand All @@ -441,56 +440,39 @@ func (dc *controller) scale(ctx context.Context, deployment *v1alpha1.MachineDep

// Number of additional replicas that can be either added or removed from the total
// replicas count. These replicas should be distributed proportionally to the active
// machine sets.
// machine sets in case of scale-in, while added only to the new machineSet during scale-out
klog.V(3).Infof("machineDeployment: %s , replicasToAdd: %d, maxAllowedSize: %d, allMachineSetReplicas: %d", deployment.Name, allowedSize, allISsReplicas)
deploymentReplicasToAdd := allowedSize - allISsReplicas

// The additional replicas should be distributed proportionally amongst the active
// machine sets from the larger to the smaller in size machine set. Scaling direction
// drives what happens in case we are trying to scale machine sets of the same size.
// In such a case when scaling up, we should scale up newer machine sets first, and
// when scaling down, we should scale down older machine sets first.
// During scale-in, the additional replicas should be distributed proportionally amongst the active
// machine sets from the larger to the smaller in size machine set.
// We should scale down older machine sets first if machine sets are of equal size.

var scalingOperation string
nameToSize := make(map[string]int32)
deploymentReplicasAdded := int32(0)
switch {
case deploymentReplicasToAdd > 0:
sort.Sort(MachineSetsBySizeNewer(allISs))
scalingOperation = "up"

nameToSize = dc.scaleNewMachineSet(newIS, allISs, deploymentReplicasToAdd, deployment)
deploymentReplicasAdded = deploymentReplicasToAdd
case deploymentReplicasToAdd < 0:
sort.Sort(MachineSetsBySizeOlder(allISs))
scalingOperation = "down"
sort.Sort(MachineSetsBySizeOlder(allISs))
nameToSize, deploymentReplicasAdded = dc.scaleMachineSetsProportionally(allISs, deploymentReplicasToAdd, deployment)
}

// Iterate over all active machine sets and estimate proportions for each of them.
// The absolute value of deploymentReplicasAdded should never exceed the absolute
// value of deploymentReplicasToAdd.
deploymentReplicasAdded := int32(0)
nameToSize := make(map[string]int32)
for i := range allISs {
is := allISs[i]

// Estimate proportions if we have replicas to add, otherwise simply populate
// nameToSize with the current sizes for each machine set.
if deploymentReplicasToAdd != 0 {
proportion := GetProportion(is, *deployment, deploymentReplicasToAdd, deploymentReplicasAdded)

nameToSize[is.Name] = (is.Spec.Replicas) + proportion
deploymentReplicasAdded += proportion
} else {
nameToSize[is.Name] = (is.Spec.Replicas)
}
}

// Update all machine sets
for i := range allISs {
is := allISs[i]

// Add/remove any leftovers to the largest machine set.
// Incorporate any leftovers to the largest machine set.
if i == 0 && deploymentReplicasToAdd != 0 {
leftover := deploymentReplicasToAdd - deploymentReplicasAdded
nameToSize[is.Name] = nameToSize[is.Name] + leftover
if nameToSize[is.Name] < 0 {
nameToSize[is.Name] = 0
}
klog.V(3).Infof("leftover proportion increase of %d done in largest machineSet %s", leftover, is.Name)
}

// TODO: Use transactions when we have them.
Expand All @@ -503,6 +485,41 @@ func (dc *controller) scale(ctx context.Context, deployment *v1alpha1.MachineDep
return nil
}

func (dc *controller) scaleNewMachineSet(newIS *v1alpha1.MachineSet, allISs []*v1alpha1.MachineSet, deploymentReplicasToAdd int32, deployment *v1alpha1.MachineDeployment) map[string]int32 {
nameToSize := make(map[string]int32)
for _, is := range allISs {
nameToSize[is.Name] = is.Spec.Replicas
}

nameToSize[newIS.Name] = newIS.Spec.Replicas + deploymentReplicasToAdd

return nameToSize
}

func (dc *controller) scaleMachineSetsProportionally(allISs []*v1alpha1.MachineSet, deploymentReplicasToAdd int32, deployment *v1alpha1.MachineDeployment) (map[string]int32, int32) {
// Iterate over all active machine sets and estimate proportions for each of them.
// The absolute value of deploymentReplicasAdded should never exceed the absolute
// value of deploymentReplicasToAdd.

nameToSize := make(map[string]int32)
deploymentReplicasAdded := int32(0)
for i := range allISs {
is := allISs[i]
// Estimate proportions if we have replicas to add, otherwise simply populate
// nameToSize with the current sizes for each machine set.
if deploymentReplicasToAdd != 0 {
proportion := GetProportion(is, *deployment, deploymentReplicasToAdd, deploymentReplicasAdded)
klog.V(3).Infof("final proportion increase for machineSet %s due to parent deployment's replica update is %d", is.Name, proportion)
nameToSize[is.Name] = (is.Spec.Replicas) + proportion
deploymentReplicasAdded += proportion
} else {
nameToSize[is.Name] = (is.Spec.Replicas)
}
}

return nameToSize, deploymentReplicasAdded
}

func (dc *controller) scaleMachineSetAndRecordEvent(ctx context.Context, is *v1alpha1.MachineSet, newScale int32, deployment *v1alpha1.MachineDeployment) (bool, *v1alpha1.MachineSet, error) {
// No need to scale
if (is.Spec.Replicas) == newScale {
Expand Down Expand Up @@ -644,7 +661,7 @@ func calculateDeploymentStatus(allISs []*v1alpha1.MachineSet, newIS *v1alpha1.Ma
}

// isScalingEvent checks whether the provided deployment has been updated with a scaling event
// by looking at the desired-replicas annotation in the active machine sets of the deployment.
// by looking at the desired-replicas annotation in the active machine sets of the deployment, and returns if there was scale-out or not.
//
// rsList should come from getReplicaSetsForDeployment(d).
// machineMap should come from getmachineMapForDeployment(d, rsList).
Expand Down
5 changes: 5 additions & 0 deletions pkg/controller/deployment_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,8 @@ func SetReplicasAnnotations(is *v1alpha1.MachineSet, desiredReplicas, maxReplica
is.Annotations[MaxReplicasAnnotation] = maxString
updated = true
}

klog.V(4).Infof("(SetReplicasAnnotations) ms.Name: %s desired: %s , max: %s , updated: %d", is.Name, desiredString, maxString, updated)
return updated
}

Expand Down Expand Up @@ -656,6 +658,7 @@ func GetProportion(is *v1alpha1.MachineSet, d v1alpha1.MachineDeployment, deploy
isFraction := getMachineSetFraction(*is, d)
allowed := deploymentReplicasToAdd - deploymentReplicasAdded

klog.V(4).Infof("allowed proportion increase= %d, proposed proportion increase= %d", allowed, isFraction)
if deploymentReplicasToAdd > 0 {
// Use the minimum between the machine set fraction and the maximum allowed replicas
// when scaling up. This way we ensure we will not scale up more than the allowed
Expand Down Expand Up @@ -689,6 +692,8 @@ func getMachineSetFraction(is v1alpha1.MachineSet, d v1alpha1.MachineDeployment)
// We should never proportionally scale up from zero which means rs.spec.replicas and annotatedReplicas
// will never be zero here.
newISsize := (float64((is.Spec.Replicas) * deploymentReplicas)) / float64(annotatedReplicas)

klog.V(4).Infof("calculating proportion increase for machineSet %s. ms.desired=%d, maxDeploymentSizePossible=%d, maxDeploymentSizePossibleAsPerAnnotation=%d", is.Name, deploymentReplicas, annotatedReplicas)
return integer.RoundToInt32(newISsize) - (is.Spec.Replicas)
}

Expand Down