fluxcd · stefanprodan · Sep 29, 2020 · Sep 14, 2020 · Sep 18, 2020 · Sep 18, 2020
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -94,7 +94,9 @@ jobs:
       - run: test/container-build.sh
       - run: test/e2e-kind.sh v1.18.2
       - run: test/e2e-istio.sh
+      - run: test/e2e-istio-dependencies.sh
       - run: test/e2e-istio-tests.sh
+      - run: test/e2e-istio-tests-skip-analysis.sh
 
   e2e-gloo-testing:
     machine: true

diff --git a/pkg/controller/scheduler.go b/pkg/controller/scheduler.go
@@ -236,7 +236,7 @@ func (c *Controller) advanceCanary(name string, namespace string) {
 	}
 
 	// check if analysis should be skipped
-	if skip := c.shouldSkipAnalysis(cd, canaryController, meshRouter); skip {
+	if skip := c.shouldSkipAnalysis(cd, canaryController, meshRouter, err, retriable); skip {
 		return
 	}
 
@@ -616,11 +616,20 @@ func (c *Controller) runAnalysis(canary *flaggerv1.Canary) bool {
 	return true
 }
 
-func (c *Controller) shouldSkipAnalysis(canary *flaggerv1.Canary, canaryController canary.Controller, meshRouter router.Interface) bool {
+func (c *Controller) shouldSkipAnalysis(canary *flaggerv1.Canary, canaryController canary.Controller, meshRouter router.Interface, err error, retriable bool) bool {
 	if !canary.SkipAnalysis() {
 		return false
 	}
 
+	// regardless if analysis is being skipped, rollback if canary failed to progress
+	if !retriable || canary.Status.FailedChecks >= canary.GetAnalysisThreshold() {
+		c.recordEventWarningf(canary, "Rolling back %s.%s progress deadline exceeded %v", canary.Name, canary.Namespace, err)
+		c.alert(canary, fmt.Sprintf("Progress deadline exceeded %v", err), false, flaggerv1.SeverityError)
+		c.rollback(canary, canaryController, meshRouter)
+
+		return true
+	}
+
 	// route all traffic to primary
 	primaryWeight := 100
 	canaryWeight := 0

diff --git a/test/e2e-istio-dependencies.sh b/test/e2e-istio-dependencies.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+# This script setups the scenarios for istio tests by creating a Kubernetes namespace, installing the load tester and a test workload (podinfo)
+# Prerequisites: Kubernetes Kind and Istio
+
+set -o errexit
+
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+echo '>>> Creating test namespace'
+kubectl create namespace test
+kubectl label namespace test istio-injection=enabled
+
+echo '>>> Installing the load tester'
+kubectl apply -k ${REPO_ROOT}/kustomize/tester
+kubectl -n test rollout status deployment/flagger-loadtester
+
+echo '>>> Deploy podinfo'
+kubectl apply -f ${REPO_ROOT}/test/e2e-workload.yaml
diff --git a/test/e2e-istio-tests-skip-analysis.sh b/test/e2e-istio-tests-skip-analysis.sh
@@ -0,0 +1,138 @@
+#!/usr/bin/env bash
+
+# This script runs e2e tests for when the canary analysis is skipped
+# Prerequisites: Kubernetes Kind and Istio
+
+set -o errexit
+
+echo '>>> Initialising canary'
+cat <<EOF | kubectl apply -f -
+apiVersion: flagger.app/v1beta1
+kind: Canary
+metadata:
+  name: podinfo
+  namespace: test
+spec:
+  targetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: podinfo
+  progressDeadlineSeconds: 60
+  service:
+    port: 9898
+    portDiscovery: true
+  skipAnalysis: true
+  analysis:
+    interval: 15s
+    threshold: 15
+    maxWeight: 30
+    stepWeight: 10
+    webhooks:
+      - name: load-test
+        url: http://flagger-loadtester.test/
+        timeout: 5s
+        metadata:
+          type: cmd
+          cmd: "hey -z 10m -q 10 -c 2 http://podinfo.test:9898/"
+          logCmdOutput: "true"
+EOF
+
+echo '>>> Waiting for primary to be ready'
+retries=50
+count=0
+ok=false
+until ${ok}; do
+    kubectl -n test get canary/podinfo | grep 'Initialized' && ok=true || ok=false
+    sleep 5
+    count=$(($count + 1))
+    if [[ ${count} -eq ${retries} ]]; then
+        kubectl -n istio-system logs deployment/flagger
+        echo "No more retries left"
+        exit 1
+    fi
+done
+
+echo '✔ Canary initialization test passed'
+
+echo '>>> Triggering canary deployment'
+kubectl -n test set image deployment/podinfo podinfod=stefanprodan/podinfo:3.1.1
+
+echo '>>> Waiting for canary promotion'
+retries=50
+count=0
+ok=false
+until ${ok}; do
+    kubectl -n test describe deployment/podinfo-primary | grep '3.1.1' && ok=true || ok=false
+    sleep 10
+    kubectl -n istio-system logs deployment/flagger --tail 1
+    count=$(($count + 1))
+    if [[ ${count} -eq ${retries} ]]; then
+        kubectl -n test describe deployment/podinfo
+        kubectl -n test describe deployment/podinfo-primary
+        kubectl -n istio-system logs deployment/flagger
+        echo "No more retries left"
+        exit 1
+    fi
+done
+
+echo '>>> Waiting for canary finalization'
+retries=50
+count=0
+ok=false
+until ${ok}; do
+    kubectl -n test get canary/podinfo | grep 'Succeeded' && ok=true || ok=false
+    sleep 5
+    count=$(($count + 1))
+    if [[ ${count} -eq ${retries} ]]; then
+        kubectl -n istio-system logs deployment/flagger
+        echo "No more retries left"
+        exit 1
+    fi
+done
+
+echo '✔ Canary promotion test passed'
+
+if [[ "$1" = "canary" ]]; then
+  exit 0
+fi
+
+echo '>>> Triggering canary deployment with a bad release (non existent docker image)'
+kubectl -n test set image deployment/podinfo podinfod=stefanprodan/potato:1.0.0
+
+echo '>>> Waiting for canary to fail'
+retries=50
+count=0
+ok=false
+until ${ok}; do
+    kubectl get canary/podinfo -n test -o=jsonpath='{.status.phase}' | grep 'Failed' && ok=true || ok=false
+    sleep 10
+    kubectl -n istio-system logs deployment/flagger --tail 1
+    count=$(($count + 1))
+    if [[ ${count} -eq ${retries} ]]; then
+        kubectl -n test describe deployment/podinfo
+        kubectl -n test describe deployment/podinfo-primary
+        kubectl -n istio-system logs deployment/flagger
+        echo "No more retries left"
+        exit 1
+    fi
+done
+
+echo '>>> Confirm primary pod is still running and with correct version'
+retries=50
+count=0
+ok=false
+until ${okImage} && ${okRunning}; do
+    kubectl get deployment podinfo-primary -n test -o jsonpath='{.spec.replicas}' | grep 1 && okRunning=true || okRunning=false
+    kubectl -n test describe deployment/podinfo-primary | grep '3.1.3' && okImage=true || okImage=false
+    sleep 5
+    count=$(($count + 1))
+    if [[ ${count} -eq ${retries} ]]; then
+        kubectl -n istio-system logs deployment/flagger
+        echo "No more retries left"
+        exit 1
+    fi
+done
+
+kubectl -n istio-system logs deployment/flagger
+
+echo '✔ All tests passed'
diff --git a/test/e2e-istio-tests.sh b/test/e2e-istio-tests.sh
@@ -5,19 +5,6 @@
 
 set -o errexit
 
-REPO_ROOT=$(git rev-parse --show-toplevel)
-
-echo '>>> Creating test namespace'
-kubectl create namespace test
-kubectl label namespace test istio-injection=enabled
-
-echo '>>> Installing the load tester'
-kubectl apply -k ${REPO_ROOT}/kustomize/tester
-kubectl -n test rollout status deployment/flagger-loadtester
-
-echo '>>> Deploy podinfo'
-kubectl apply -f ${REPO_ROOT}/test/e2e-workload.yaml
-
 echo '>>> Create latency metric template'
 cat <<EOF | kubectl apply -f -
 apiVersion: flagger.app/v1beta1