-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[release-1.12] Don't drop traffic when upgrading a deployment fails #14840
Changes from all commits
896233b
3bb86a0
5d6068f
fecc0a0
f50114f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,12 +40,14 @@ func TestTransformDeploymentStatus(t *testing.T) { | |
Conditions: []apis.Condition{{ | ||
Type: DeploymentConditionProgressing, | ||
Status: corev1.ConditionUnknown, | ||
Reason: "Deploying", | ||
}, { | ||
Type: DeploymentConditionReplicaSetReady, | ||
Status: corev1.ConditionTrue, | ||
}, { | ||
Type: DeploymentConditionReady, | ||
Status: corev1.ConditionUnknown, | ||
Reason: "Deploying", | ||
}}, | ||
}, | ||
}, { | ||
|
@@ -147,7 +149,7 @@ func TestTransformDeploymentStatus(t *testing.T) { | |
Type: appsv1.DeploymentReplicaFailure, | ||
Status: corev1.ConditionTrue, | ||
Reason: "ReplicaSetReason", | ||
Message: "Something bag happened", | ||
Message: "Something bad happened", | ||
}}, | ||
}, | ||
want: &duckv1.Status{ | ||
|
@@ -158,12 +160,45 @@ func TestTransformDeploymentStatus(t *testing.T) { | |
Type: DeploymentConditionReplicaSetReady, | ||
Status: corev1.ConditionFalse, | ||
Reason: "ReplicaSetReason", | ||
Message: "Something bag happened", | ||
Message: "Something bad happened", | ||
}, { | ||
Type: DeploymentConditionReady, | ||
Status: corev1.ConditionFalse, | ||
Reason: "ReplicaSetReason", | ||
Message: "Something bag happened", | ||
Message: "Something bad happened", | ||
}}, | ||
}, | ||
}, { | ||
name: "replica failure has priority over progressing", | ||
ds: &appsv1.DeploymentStatus{ | ||
Conditions: []appsv1.DeploymentCondition{{ | ||
Type: appsv1.DeploymentReplicaFailure, | ||
Status: corev1.ConditionTrue, | ||
Reason: "ReplicaSetReason", | ||
Message: "Something really bad happened", | ||
}, { | ||
Type: appsv1.DeploymentProgressing, | ||
Status: corev1.ConditionFalse, | ||
Reason: "ProgressingReason", | ||
Message: "Something bad happened", | ||
}}, | ||
}, | ||
want: &duckv1.Status{ | ||
Conditions: []apis.Condition{{ | ||
Type: DeploymentConditionProgressing, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reading this I was kind of confused seeing that:
condition types defined in knative.dev/pkg are just two:
Also going from deployment conditions to duckv1 conditions and back
I am wondering if mapping deployment conditions directly to revision conditions would be more readable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm open folks cleaning this up in a follow up PR. The thing with the deployment conditions is their polarity is weird - ReplicaCreateFailure=False is actually good There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, had to read that three times to get :D |
||
Status: corev1.ConditionFalse, | ||
Reason: "ProgressingReason", | ||
Message: "Something bad happened", | ||
}, { | ||
Type: DeploymentConditionReplicaSetReady, | ||
Status: corev1.ConditionFalse, | ||
Reason: "ReplicaSetReason", | ||
Message: "Something really bad happened", | ||
}, { | ||
Type: DeploymentConditionReady, | ||
Status: corev1.ConditionFalse, | ||
Reason: "ReplicaSetReason", | ||
Message: "Something really bad happened", | ||
}}, | ||
}, | ||
}} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,6 @@ package v1 | |
import ( | ||
"time" | ||
|
||
appsv1 "k8s.io/api/apps/v1" | ||
corev1 "k8s.io/api/core/v1" | ||
net "knative.dev/networking/pkg/apis/networking" | ||
"knative.dev/pkg/kmeta" | ||
|
@@ -144,9 +143,3 @@ func (rs *RevisionStatus) IsActivationRequired() bool { | |
c := revisionCondSet.Manage(rs).GetCondition(RevisionConditionActive) | ||
return c != nil && c.Status != corev1.ConditionTrue | ||
} | ||
|
||
// IsReplicaSetFailure returns true if the deployment replicaset failed to create | ||
func (rs *RevisionStatus) IsReplicaSetFailure(deploymentStatus *appsv1.DeploymentStatus) bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where do we cover this part? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We always propagate the status now - and this is surfaced as a deployment condition |
||
ds := serving.TransformDeploymentStatus(deploymentStatus) | ||
return ds != nil && ds.GetCondition(serving.DeploymentConditionReplicaSetReady).IsFalse() | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -170,6 +170,8 @@ func (rs *RevisionStatus) PropagateDeploymentStatus(original *appsv1.DeploymentS | |
|
||
// PropagateAutoscalerStatus propagates autoscaler's status to the revision's status. | ||
func (rs *RevisionStatus) PropagateAutoscalerStatus(ps *autoscalingv1alpha1.PodAutoscalerStatus) { | ||
resUnavailable := rs.GetCondition(RevisionConditionResourcesAvailable).IsFalse() | ||
|
||
// Reflect the PA status in our own. | ||
cond := ps.GetCondition(autoscalingv1alpha1.PodAutoscalerConditionReady) | ||
rs.ActualReplicas = nil | ||
|
@@ -183,20 +185,29 @@ func (rs *RevisionStatus) PropagateAutoscalerStatus(ps *autoscalingv1alpha1.PodA | |
} | ||
|
||
if cond == nil { | ||
rs.MarkActiveUnknown("Deploying", "") | ||
rs.MarkActiveUnknown(ReasonDeploying, "") | ||
|
||
if !resUnavailable { | ||
rs.MarkResourcesAvailableUnknown(ReasonDeploying, "") | ||
} | ||
return | ||
} | ||
|
||
// Don't mark the resources available, if deployment status already determined | ||
// it isn't so. | ||
resUnavailable := rs.GetCondition(RevisionConditionResourcesAvailable).IsFalse() | ||
if ps.IsScaleTargetInitialized() && !resUnavailable { | ||
// Precondition for PA being initialized is SKS being active and | ||
// that implies that |service.endpoints| > 0. | ||
rs.MarkResourcesAvailableTrue() | ||
rs.MarkContainerHealthyTrue() | ||
} | ||
|
||
// Mark resource unavailable if we don't have a Service Name and the deployment is ready | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we somehow combine this with the above statements (from https://github.com/knative/serving/pull/14840/files#diff-831a9383e7db7880978acf31f7dfec777beb08b900b1d0e1c55a5aed42e602cbR173 down)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or in other words, the full function is a bit hard to grasp. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do you want to combine it? My hope here is to keep the conditionals straight forward. Keeping them separate helps with that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm I'd need more time to fiddle around with the current code. But maybe better to keep it here and do it on |
||
// This can happen when we have initial scale set to 0 | ||
if rs.GetCondition(RevisionConditionResourcesAvailable).IsTrue() && ps.ServiceName == "" { | ||
rs.MarkResourcesAvailableUnknown(ReasonDeploying, "") | ||
} | ||
|
||
switch cond.Status { | ||
case corev1.ConditionUnknown: | ||
rs.MarkActiveUnknown(cond.Reason, cond.Message) | ||
|
@@ -222,14 +233,6 @@ func (rs *RevisionStatus) PropagateAutoscalerStatus(ps *autoscalingv1alpha1.PodA | |
rs.MarkActiveFalse(cond.Reason, cond.Message) | ||
case corev1.ConditionTrue: | ||
rs.MarkActiveTrue() | ||
|
||
// Precondition for PA being active is SKS being active and | ||
// that implies that |service.endpoints| > 0. | ||
// | ||
// Note: This is needed for backwards compatibility as we're adding the new | ||
// ScaleTargetInitialized condition to gate readiness. | ||
rs.MarkResourcesAvailableTrue() | ||
rs.MarkContainerHealthyTrue() | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you elaborate where priority is defined, I see that
DeploymentConditionProgressing
is the same before and after, so no change there.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
'priority' here means that the replicafailure message is the last one applied so it is surfaced to the deployment's Ready condition.