Skip to content

Commit

Permalink
fix canary rollback behaviour
Browse files Browse the repository at this point in the history
Prevents the canary from getting triggered, when a canary deploy is
updated to match the primary deploy after an analysis fails.

Signed-off-by: Sanskar Jaiswal <[email protected]>
  • Loading branch information
Sanskar Jaiswal committed Apr 15, 2022
1 parent 67cc965 commit c7c0c76
Show file tree
Hide file tree
Showing 8 changed files with 256 additions and 1 deletion.
3 changes: 3 additions & 0 deletions artifacts/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1049,6 +1049,9 @@ spec:
lastAppliedSpec:
description: LastAppliedSpec of this canary
type: string
lastPromotedSpec:
description: LastPromotedSpec of this canary
type: string
lastTransitionTime:
description: LastTransitionTime of this canary
format: date-time
Expand Down
3 changes: 3 additions & 0 deletions charts/flagger/crds/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1049,6 +1049,9 @@ spec:
lastAppliedSpec:
description: LastAppliedSpec of this canary
type: string
lastPromotedSpec:
description: LastPromotedSpec of this canary
type: string
lastTransitionTime:
description: LastTransitionTime of this canary
format: date-time
Expand Down
3 changes: 3 additions & 0 deletions kustomize/base/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1046,6 +1046,9 @@ spec:
iterations:
description: Iteration count of the current canary analysis
type: number
lastPromotedSpec:
description: LastPromotedSpec of this canary
type: string
lastAppliedSpec:
description: LastAppliedSpec of this canary
type: string
Expand Down
3 changes: 3 additions & 0 deletions pkg/canary/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ func syncCanaryStatus(flaggerClient clientset.Interface, cd *flaggerv1.Canary, s
cdCopy.Status.FailedChecks = status.FailedChecks
cdCopy.Status.Iterations = status.Iterations
cdCopy.Status.LastAppliedSpec = hash
if status.Phase == flaggerv1.CanaryPhaseInitialized {
cdCopy.Status.LastPromotedSpec = hash
}
cdCopy.Status.LastTransitionTime = metav1.Now()
setAll(cdCopy)

Expand Down
8 changes: 8 additions & 0 deletions pkg/controller/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,14 @@ func (c *Controller) shouldAdvance(canary *flaggerv1.Canary, canaryController ca
return true, nil
}

// Make sure to sync lastAppliedSpec even if the canary is in a failed state.
if canary.Status.Phase == flaggerv1.CanaryPhaseFailed {
if err := canaryController.SyncStatus(canary, canary.Status); err != nil {
c.logger.With("canary", fmt.Sprintf("%s.%s", canary.Name, canary.Namespace)).Errorf("%v", err)
return false, err
}
}

newTarget, err := canaryController.HasTargetChanged(canary)
if err != nil {
return false, err
Expand Down
2 changes: 1 addition & 1 deletion test/nginx/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ helm upgrade -i flagger ${REPO_ROOT}/charts/flagger \
--set prometheus.install=true \
--set meshProvider=nginx

# kubectl -n ingress-nginx set image deployment/flagger flagger=test/flagger:latest
kubectl -n ingress-nginx set image deployment/flagger flagger=test/flagger:latest

kubectl -n ingress-nginx rollout status deployment/flagger
kubectl -n ingress-nginx rollout status deployment/flagger-prometheus
2 changes: 2 additions & 0 deletions test/nginx/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ DIR="$(cd "$(dirname "$0")" && pwd)"

"$REPO_ROOT"/test/workloads/init.sh
"$DIR"/test-canary.sh
"$REPO_ROOT"/test/workloads/init.sh
"$DIR"/test-lifecycle.sh
233 changes: 233 additions & 0 deletions test/nginx/test-lifecycle.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
#!/usr/bin/env bash

# This script runs e2e tests for Canary initialization, analysis and promotion
# Prerequisites: Kubernetes Kind, Helm and NGINX ingress controller

set -o errexit

REPO_ROOT=$(git rev-parse --show-toplevel)

cat <<EOF | kubectl apply -f -
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: podinfo
namespace: test
labels:
app: podinfo
annotations:
kubernetes.io/ingress.class: "nginx"
spec:
rules:
- host: "app.example.com"
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: podinfo
port:
number: 80
EOF

cat <<EOF | kubectl apply -f -
apiVersion: flagger.app/v1beta1
kind: Canary
metadata:
name: podinfo
namespace: test
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: podinfo
ingressRef:
apiVersion: networking.k8s.io/v1
kind: Ingress
name: podinfo
progressDeadlineSeconds: 60
service:
port: 80
targetPort: http
analysis:
interval: 10s
threshold: 2
maxWeight: 40
stepWeight: 20
metrics:
- name: request-success-rate
thresholdRange:
min: 1
interval: 30s
webhooks:
- name: load-test
url: http://flagger-loadtester.test/
metadata:
type: cmd
cmd: "hey -z 2m -q 10 -c 2 -host app.example.com http://ingress-nginx-controller.ingress-nginx/status/500"
EOF

echo '>>> Waiting for primary to be ready'
retries=50
count=0
ok=false
until ${ok}; do
kubectl -n test get canary/podinfo | grep 'Initialized' && ok=true || ok=false
sleep 5
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n ingress-nginx logs deployment/flagger
echo "No more retries left"
exit 1
fi
done

echo '✔ Canary initialization test passed'

echo '>>> Triggering canary deployment'
kubectl -n test set image deployment/podinfo podinfod=ghcr.io/stefanprodan/podinfo:6.0.1

echo '>>> Waiting for canary rollback'
retries=50
count=0
ok=false
until ${ok}; do
kubectl -n test get canary/podinfo | grep 'Failed' && ok=true || ok=false
sleep 10
kubectl -n ingress-nginx logs deployment/flagger --tail 1
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n ingress-nginx logs deployment/flagger
echo "No more retries left"
exit 1
fi
done

echo '✔ Canary rollback test passed'

pod_hash=$(kubectl get pods -l app=podinfo-primary -n test -o=jsonpath='{.items[0].metadata.labels.pod-template-hash}')

echo '>>> Reverting canary deployment to match primary'
kubectl -n test set image deployment/podinfo podinfod=ghcr.io/stefanprodan/podinfo:6.0.0

sleep 15

new_pod_hash=$(kubectl get pods -l app=podinfo-primary -n test -o=jsonpath='{.items[0].metadata.labels.pod-template-hash}')
failed=false
kubectl -n test get canary/podinfo | grep 'Failed' && failed=true || ok=false

if [ "$new_pod_hash" = "$pod_hash" -a "$failed" = true ]; then
echo '✔ Canary not triggered upon reverting canary image to match primary '
else
echo '⨯ Canary got triggered upon reverting canary image to match primary'
exit 1
fi

echo '>>> Triggering canary deployment again'
kubectl -n test set image deployment/podinfo podinfod=ghcr.io/stefanprodan/podinfo:6.0.1

echo '>>> Waiting for canary to start progress'
retries=50
count=0
ok=false
until ${ok}; do
kubectl -n test get canary/podinfo | grep 'Progressing' && ok=true || ok=false
sleep 1
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n ingress-nginx logs deployment/flagger
kubectl -n test get httpproxy podinfo -oyaml
echo "No more retries left"
exit 1
fi
done

echo '>>> Waiting for canary rollback'
retries=50
count=0
ok=false
until ${ok}; do
kubectl -n test get canary/podinfo | grep 'Failed' && ok=true || ok=false
sleep 10
kubectl -n ingress-nginx logs deployment/flagger --tail 1
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n ingress-nginx logs deployment/flagger
echo "No more retries left"
exit 1
fi
done

cat <<EOF | kubectl apply -f -
apiVersion: flagger.app/v1beta1
kind: Canary
metadata:
name: podinfo
namespace: test
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: podinfo
ingressRef:
apiVersion: networking.k8s.io/v1
kind: Ingress
name: podinfo
progressDeadlineSeconds: 60
service:
port: 80
targetPort: http
analysis:
interval: 15s
threshold: 5
maxWeight: 40
stepWeight: 20
metrics:
- name: request-success-rate
thresholdRange:
min: 1
interval: 30s
webhooks:
- name: load-test
url: http://flagger-loadtester.test/
metadata:
type: cmd
cmd: "hey -z 2m -q 10 -c 2 -host app.example.com http://ingress-nginx-controller.ingress-nginx/"
EOF

echo '>>> Retrying failed canary run'
kubectl -n test patch deploy/podinfo -p '[{"op": "add", "path":"/spec/template/metadata/annotations", "value": {"thisis": "theway"}}]' --type=json

echo '>>> Waiting for canary promotion'
retries=50
count=0
ok=false
until ${ok}; do
kubectl -n test describe deployment/podinfo-primary | grep '6.0.1' && ok=true || ok=false
sleep 10
kubectl -n ingress-nginx logs deployment/flagger --tail 1
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n ingress-nginx logs deployment/flagger
echo "No more retries left"
exit 1
fi
done

echo '>>> Waiting for canary finalization'
retries=50
count=0
ok=false
until ${ok}; do
kubectl -n test get canary/podinfo | grep 'Succeeded' && ok=true || ok=false
sleep 5
count=$(($count + 1))
if [[ ${count} -eq ${retries} ]]; then
kubectl -n ingress-nginx logs deployment/flagger
echo "No more retries left"
exit 1
fi
done

echo '✔ Canary promotion test passed'

0 comments on commit c7c0c76

Please sign in to comment.