Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Progressing condition on machineDeployment for nil progressDeadlineSeconds #762

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pkg/apis/machine/v1alpha1/machinedeployment_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ type MachineDeploymentSpec struct {
// process failed MachineDeployments and a condition with a ProgressDeadlineExceeded
// reason will be surfaced in the MachineDeployment status. Note that progress will
// not be estimated during the time a MachineDeployment is paused. This is not set
// by default.
// by default, which is treated as infinite deadline.
// +optional
ProgressDeadlineSeconds *int32 `json:"progressDeadlineSeconds,omitempty"`
}
Expand Down Expand Up @@ -219,8 +219,8 @@ const (

// Progressing means the MachineDeployment is progressing. Progress for a MachineDeployment is
// considered when a new machine set is created or adopted, and when new machines scale
// up or old machines scale down. Progress is not estimated for paused MachineDeployments or
// when progressDeadlineSeconds is not specified.
// up or old machines scale down. Progress is not estimated for paused MachineDeployments. It is also updated
// if progressDeadlineSeconds is not specified(treated as infinite deadline), in which case it would never be updated to "false".
MachineDeploymentProgressing MachineDeploymentConditionType = "Progressing"

// ReplicaFailure is added in a MachineDeployment when one of its machines fails to be created
Expand Down
13 changes: 5 additions & 8 deletions pkg/controller/deployment_progress.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,13 @@ import (
func (dc *controller) syncRolloutStatus(ctx context.Context, allISs []*v1alpha1.MachineSet, newIS *v1alpha1.MachineSet, d *v1alpha1.MachineDeployment) error {
newStatus := calculateDeploymentStatus(allISs, newIS, d)

// If there is no progressDeadlineSeconds set, remove any Progressing condition.
if d.Spec.ProgressDeadlineSeconds == nil {
RemoveMachineDeploymentCondition(&newStatus, v1alpha1.MachineDeploymentProgressing)
}

// If there is only one machine set that is active then that means we are not running
// a new rollout and this is a resync where we don't need to estimate any progress.
// In such a case, we should simply not estimate any progress for this deployment.
currentCond := GetMachineDeploymentCondition(d.Status, v1alpha1.MachineDeploymentProgressing)
isCompleteDeployment := newStatus.Replicas == newStatus.UpdatedReplicas && currentCond != nil && currentCond.Reason == NewISAvailableReason
// Check for progress only if there is a progress deadline set and the latest rollout
// hasn't completed yet.
if d.Spec.ProgressDeadlineSeconds != nil && !isCompleteDeployment {
// Check for progress only if the latest rollout hasn't completed yet.
if !isCompleteDeployment {
switch {
case MachineDeploymentComplete(d, &newStatus):
// Update the deployment conditions with a message for the new machine set that
Expand Down Expand Up @@ -87,6 +81,8 @@ func (dc *controller) syncRolloutStatus(ctx context.Context, allISs []*v1alpha1.
}
SetMachineDeploymentCondition(&newStatus, *condition)

// the case -> (mark `Progressing` condition reason as `ProgressDeadlineExceeded` only when non-infinite progressDeadline is set)
// is already handled inside `MachineDeploymentTimedOut()`
case MachineDeploymentTimedOut(d, &newStatus):
// Update the deployment with a timeout condition. If the condition already exists,
// we ignore this update.
Expand Down Expand Up @@ -161,6 +157,7 @@ func (dc *controller) getReplicaFailures(allISs []*v1alpha1.MachineSet, newIS *v
// requeueStuckDeployment checks whether the provided deployment needs to be synced for a progress
// check. It returns the time after the deployment will be requeued for the progress check, 0 if it
// will be requeued now, or -1 if it does not need to be requeued.
// This method is just to make sure that as soon as `progressDeadlineSeconds` timeout happens we update the `Progressing` Condition.
func (dc *controller) requeueStuckMachineDeployment(d *v1alpha1.MachineDeployment, newStatus v1alpha1.MachineDeploymentStatus) time.Duration {
currentCond := GetMachineDeploymentCondition(d.Status, v1alpha1.MachineDeploymentProgressing)
// Can't estimate progress if there is no deadline in the spec or progressing condition in the current status.
Expand Down
67 changes: 33 additions & 34 deletions pkg/controller/deployment_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,6 @@ func (dc *controller) sync(ctx context.Context, d *v1alpha1.MachineDeployment, i
// These conditions are needed so that we won't accidentally report lack of progress for resumed deployments
// that were paused for longer than progressDeadlineSeconds.
func (dc *controller) checkPausedConditions(ctx context.Context, d *v1alpha1.MachineDeployment) error {
if d.Spec.ProgressDeadlineSeconds == nil {
return nil
}
cond := GetMachineDeploymentCondition(d.Status, v1alpha1.MachineDeploymentProgressing)
if cond != nil && cond.Reason == TimedOutReason {
// If we have reported lack of progress, do not overwrite it with a paused condition.
Expand Down Expand Up @@ -265,11 +262,8 @@ func (dc *controller) getNewMachineSet(ctx context.Context, d *v1alpha1.MachineD

// Should use the revision in existingNewRS's annotation, since it set by before
needsUpdate := SetMachineDeploymentRevision(d, isCopy.Annotations[RevisionAnnotation])
// If no other Progressing condition has been recorded and we need to estimate the progress
// of this deployment then it is likely that old users started caring about progress. In that
// case we need to take into account the first time we noticed their new machine set.
cond := GetMachineDeploymentCondition(d.Status, v1alpha1.MachineDeploymentProgressing)
if d.Spec.ProgressDeadlineSeconds != nil && cond == nil {
if cond == nil {
msg := fmt.Sprintf("Found new machine set %q", isCopy.Name)
condition := NewMachineDeploymentCondition(v1alpha1.MachineDeploymentProgressing, v1alpha1.ConditionTrue, FoundNewISReason, msg)
SetMachineDeploymentCondition(&d.Status, *condition)
Expand Down Expand Up @@ -340,7 +334,7 @@ func (dc *controller) getNewMachineSet(ctx context.Context, d *v1alpha1.MachineD
createdIS, err := dc.controlMachineClient.MachineSets(newIS.Namespace).Create(ctx, &newIS, metav1.CreateOptions{})
switch {
// We may end up hitting this due to a slow cache or a fast resync of the Deployment.
// Fetch a copy of the ReplicaSet. If its machineTemplateSpec is semantically deep equal
// Fetch a copy of the ReplicaSet. If its owner is our deployment and its machineTemplateSpec is semantically deep equal
// with the machineTemplateSpec of the Deployment, then that is our new ReplicaSet. Otherwise,
// this is a hash collision and we need to increment the collisionCount field in the
// status of the Deployment and try the creation again.
Expand All @@ -350,37 +344,42 @@ func (dc *controller) getNewMachineSet(ctx context.Context, d *v1alpha1.MachineD
if isErr != nil {
return nil, isErr
}
isEqual := EqualIgnoreHash(&d.Spec.Template, &is.Spec.Template)

// bought these changes from current latest k/k deployment code (https://github.com/kubernetes/kubernetes/blob/0e19bbb91644885a6db38a77ea3d697730269802/pkg/controller/deployment/sync.go#L240-L243)
controllerRef := metav1.GetControllerOf(is)
if controllerRef != nil && controllerRef.UID == d.UID && EqualIgnoreHash(&d.Spec.Template, &is.Spec.Template) {
// Pass through the matching ReplicaSet as the new ReplicaSet.
createdIS = is
err = nil
break
}

// Matching ReplicaSet is not equal - increment the collisionCount in the DeploymentStatus
// and requeue the Deployment.
if !isEqual {
if d.Status.CollisionCount == nil {
d.Status.CollisionCount = new(int32)
}
preCollisionCount := *d.Status.CollisionCount
*d.Status.CollisionCount++
// Update the collisionCount for the Deployment and let it requeue by returning the original
// error.
_, dErr := dc.controlMachineClient.MachineDeployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{})
if dErr == nil {
klog.V(2).Infof("Found a hash collision for machine deployment %q - bumping collisionCount (%d->%d) to resolve it", d.Name, preCollisionCount, *d.Status.CollisionCount)
}
return nil, err
if d.Status.CollisionCount == nil {
d.Status.CollisionCount = new(int32)
}
preCollisionCount := *d.Status.CollisionCount
*d.Status.CollisionCount++
// Update the collisionCount for the Deployment and let it requeue by returning the original
// error.
_, dErr := dc.controlMachineClient.MachineDeployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{})
if dErr == nil {
klog.V(2).Infof("Found a hash collision for machine deployment %q - bumping collisionCount (%d->%d) to resolve it", d.Name, preCollisionCount, *d.Status.CollisionCount)
}
// Pass through the matching ReplicaSet as the new ReplicaSet.
createdIS = is
err = nil
return nil, err
// bought this case from current latest k/k deployment code (https://github.com/kubernetes/kubernetes/blob/0e19bbb91644885a6db38a77ea3d697730269802/pkg/controller/deployment/sync.go#L260-L262)
case errors.HasStatusCause(err, v1.NamespaceTerminatingCause):
// if the namespace is terminating, all subsequent creates will fail and we can safely do nothing
return nil, err
case err != nil:
msg := fmt.Sprintf("Failed to create new machine set %q: %v", newIS.Name, err)
if d.Spec.ProgressDeadlineSeconds != nil {
cond := NewMachineDeploymentCondition(v1alpha1.MachineDeploymentProgressing, v1alpha1.ConditionFalse, FailedISCreateReason, msg)
SetMachineDeploymentCondition(&d.Status, *cond)
// We don't really care about this error at this point, since we have a bigger issue to report.
// TODO: Identify which errors are permanent and switch DeploymentIsFailed to take into account
// these reasons as well. Related issue: https://github.com/kubernetes/kubernetes/issues/18568
_, _ = dc.controlMachineClient.MachineDeployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{})
}
cond := NewMachineDeploymentCondition(v1alpha1.MachineDeploymentProgressing, v1alpha1.ConditionFalse, FailedISCreateReason, msg)
SetMachineDeploymentCondition(&d.Status, *cond)
// We don't really care about this error at this point, since we have a bigger issue to report.
// TODO: Identify which errors are permanent and switch DeploymentIsFailed to take into account
// these reasons as well. Related issue: https://github.com/kubernetes/kubernetes/issues/18568
_, _ = dc.controlMachineClient.MachineDeployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{})
dc.recorder.Eventf(d, v1.EventTypeWarning, FailedISCreateReason, msg)
return nil, err
}
Expand All @@ -389,7 +388,7 @@ func (dc *controller) getNewMachineSet(ctx context.Context, d *v1alpha1.MachineD
}

needsUpdate := SetMachineDeploymentRevision(d, newRevision)
if !alreadyExists && d.Spec.ProgressDeadlineSeconds != nil {
if !alreadyExists {
msg := fmt.Sprintf("Created new machine set %q", createdIS.Name)
condition := NewMachineDeploymentCondition(v1alpha1.MachineDeploymentProgressing, v1alpha1.ConditionTrue, NewMachineSetReason, msg)
SetMachineDeploymentCondition(&d.Status, *condition)
Expand Down
2 changes: 2 additions & 0 deletions pkg/controller/machine_safety.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,8 @@ func (c *controller) checkAndFreezeORUnfreezeMachineSets(ctx context.Context) er
lowerThreshold := higherThreshold - c.safetyOptions.SafetyDown

machineDeployments := c.getMachineDeploymentsForMachineSet(machineSet)
// if we have a parent machineDeployment than we use a different higherThreshold and lowerThreshold,
// keeping in mind the rolling update scenario, as we won't want to freeze during a normal rolling update.
if len(machineDeployments) >= 1 {
machineDeployment := machineDeployments[0]
if machineDeployment != nil {
Expand Down