From 29075264526234d057f191bc11c9da51781f75d5 Mon Sep 17 00:00:00 2001 From: Daniel Albuquerque Date: Mon, 14 Sep 2020 19:46:35 +0100 Subject: [PATCH 1/9] Do not promote when not ready on skip analysis --- pkg/controller/scheduler.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pkg/controller/scheduler.go b/pkg/controller/scheduler.go index 11fac989b..db9edfa1b 100644 --- a/pkg/controller/scheduler.go +++ b/pkg/controller/scheduler.go @@ -236,7 +236,7 @@ func (c *Controller) advanceCanary(name string, namespace string) { } // check if analysis should be skipped - if skip := c.shouldSkipAnalysis(cd, canaryController, meshRouter); skip { + if skip := c.shouldSkipAnalysis(cd, canaryController, meshRouter, err, retriable); skip { return } @@ -616,11 +616,20 @@ func (c *Controller) runAnalysis(canary *flaggerv1.Canary) bool { return true } -func (c *Controller) shouldSkipAnalysis(canary *flaggerv1.Canary, canaryController canary.Controller, meshRouter router.Interface) bool { +func (c *Controller) shouldSkipAnalysis(canary *flaggerv1.Canary, canaryController canary.Controller, meshRouter router.Interface, err error, retriable bool) bool { if !canary.SkipAnalysis() { return false } + // regardless if analysis is being skipped, rollback if canary failed to progress + if !retriable || canary.Status.FailedChecks >= canary.GetAnalysisThreshold() { + c.recordEventWarningf(canary, "Rolling back %s.%s progress deadline exceeded %v", canary.Name, canary.Namespace, err) + c.alert(canary, fmt.Sprintf("Progress deadline exceeded %v", err), false, flaggerv1.SeverityError) + c.rollback(canary, canaryController, meshRouter) + + return true + } + // route all traffic to primary primaryWeight := 100 canaryWeight := 0 From 013949a9f45576b63995bcd8a6afe80226e468f6 Mon Sep 17 00:00:00 2001 From: Daniel Albuquerque Date: Fri, 18 Sep 2020 17:59:16 +0100 Subject: [PATCH 2/9] Add tests for when canary analysis is skipped --- .circleci/config.yml | 1 + pkg/apis/flagger/v1beta1/canary.go | 4 +- test/e2e-istio-tests-skip-analysis.sh | 192 ++++++++++++++++++++++++++ 3 files changed, 196 insertions(+), 1 deletion(-) create mode 100755 test/e2e-istio-tests-skip-analysis.sh diff --git a/.circleci/config.yml b/.circleci/config.yml index 09c6a5982..1ab3bd284 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -95,6 +95,7 @@ jobs: - run: test/e2e-kind.sh v1.18.2 - run: test/e2e-istio.sh - run: test/e2e-istio-tests.sh + - run: test/e2e-istio-tests-skip-analysis.sh e2e-gloo-testing: machine: true diff --git a/pkg/apis/flagger/v1beta1/canary.go b/pkg/apis/flagger/v1beta1/canary.go index 3451e796b..52bc64a47 100644 --- a/pkg/apis/flagger/v1beta1/canary.go +++ b/pkg/apis/flagger/v1beta1/canary.go @@ -428,7 +428,9 @@ func (c *Canary) GetMetricInterval() string { // SkipAnalysis returns true if the analysis is nil // or if spec.SkipAnalysis is true func (c *Canary) SkipAnalysis() bool { - if c.Spec.Analysis == nil && c.Spec.CanaryAnalysis == nil { + // log.Printf("#1 SkipAnalysis, analysis=%v canaryanalysis=%v", c.Spec.Analysis, c.Spec.CanaryAnalysis) + + if c.Spec.Analysis == nil || c.Spec.CanaryAnalysis == nil { return true } return c.Spec.SkipAnalysis diff --git a/test/e2e-istio-tests-skip-analysis.sh b/test/e2e-istio-tests-skip-analysis.sh new file mode 100755 index 000000000..7ad419dde --- /dev/null +++ b/test/e2e-istio-tests-skip-analysis.sh @@ -0,0 +1,192 @@ +#!/usr/bin/env bash + +# This script runs e2e tests for when the canary analysis is skipped +# Prerequisites: Kubernetes Kind and Istio + +set -o errexit + +REPO_ROOT=$(git rev-parse --show-toplevel) + +echo '>>> Creating test namespace' +kubectl create namespace test +kubectl label namespace test istio-injection=enabled + +echo '>>> Installing the load tester' +kubectl apply -k ${REPO_ROOT}/kustomize/tester +kubectl -n test rollout status deployment/flagger-loadtester + +echo '>>> Deploy podinfo' +kubectl apply -f ${REPO_ROOT}/test/e2e-workload.yaml + +echo '>>> Create latency metric template' +cat <>> Initialising canary' +cat <>> Waiting for primary to be ready' +retries=50 +count=0 +ok=false +until ${ok}; do + kubectl -n test get canary/podinfo | grep 'Initialized' && ok=true || ok=false + sleep 5 + count=$(($count + 1)) + if [[ ${count} -eq ${retries} ]]; then + kubectl -n istio-system logs deployment/flagger + echo "No more retries left" + exit 1 + fi +done + +echo '✔ Canary initialization test passed' + +kubectl -n test get svc/podinfo -oyaml | grep annotations-test +kubectl -n test get svc/podinfo -oyaml | grep labels-test + +echo '✔ Canary service custom metadata test passed' + +echo '>>> Triggering canary deployment' +kubectl -n test set image deployment/podinfo podinfod=stefanprodan/podinfo:3.1.1 + +echo '>>> Waiting for canary promotion' +retries=50 +count=0 +ok=false +until ${ok}; do + kubectl -n test describe deployment/podinfo-primary | grep '3.1.1' && ok=true || ok=false + sleep 10 + kubectl -n istio-system logs deployment/flagger --tail 1 + count=$(($count + 1)) + if [[ ${count} -eq ${retries} ]]; then + kubectl -n test describe deployment/podinfo + kubectl -n test describe deployment/podinfo-primary + kubectl -n istio-system logs deployment/flagger + echo "No more retries left" + exit 1 + fi +done + +echo '>>> Waiting for canary finalization' +retries=50 +count=0 +ok=false +until ${ok}; do + kubectl -n test get canary/podinfo | grep 'Succeeded' && ok=true || ok=false + sleep 5 + count=$(($count + 1)) + if [[ ${count} -eq ${retries} ]]; then + kubectl -n istio-system logs deployment/flagger + echo "No more retries left" + exit 1 + fi +done + +echo '✔ Canary promotion test passed' + +if [[ "$1" = "canary" ]]; then + exit 0 +fi + +echo '>>> Triggering canary deployment with a bad release (unknown docker image)' +kubectl -n test set image deployment/podinfo podinfod=stefanprodan/potato:1.0.0 + +echo '>>> Waiting for canary to fail' +retries=50 +count=0 +ok=false +until ${ok}; do + kubectl get canary/podinfo -n test -o=jsonpath='{.status.phase}' | grep 'Failed' && ok=true || ok=false + sleep 10 + kubectl -n istio-system logs deployment/flagger --tail 1 + count=$(($count + 1)) + if [[ ${count} -eq ${retries} ]]; then + kubectl -n test describe deployment/podinfo + kubectl -n test describe deployment/podinfo-primary + kubectl -n istio-system logs deployment/flagger + echo "No more retries left" + exit 1 + fi +done + +echo '>>> Confirm primary pod still running correct version' +retries=50 +count=0 +ok=false +until ${ok}; do + kubectl -n test describe deployment/podinfo-primary | grep '3.1.1' && ok=true || ok=false + sleep 5 + count=$(($count + 1)) + if [[ ${count} -eq ${retries} ]]; then + kubectl -n istio-system logs deployment/flagger + echo "No more retries left" + exit 1 + fi +done + +kubectl -n istio-system logs deployment/flagger + +echo '✔ All tests passed' From 8119acb40a7fcc314bf01ac62a0344be4a02b59a Mon Sep 17 00:00:00 2001 From: Daniel Albuquerque Date: Fri, 18 Sep 2020 18:00:38 +0100 Subject: [PATCH 3/9] Remove comment :) --- pkg/apis/flagger/v1beta1/canary.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/apis/flagger/v1beta1/canary.go b/pkg/apis/flagger/v1beta1/canary.go index 52bc64a47..703c44056 100644 --- a/pkg/apis/flagger/v1beta1/canary.go +++ b/pkg/apis/flagger/v1beta1/canary.go @@ -428,8 +428,6 @@ func (c *Canary) GetMetricInterval() string { // SkipAnalysis returns true if the analysis is nil // or if spec.SkipAnalysis is true func (c *Canary) SkipAnalysis() bool { - // log.Printf("#1 SkipAnalysis, analysis=%v canaryanalysis=%v", c.Spec.Analysis, c.Spec.CanaryAnalysis) - if c.Spec.Analysis == nil || c.Spec.CanaryAnalysis == nil { return true } From 4b098cc7a259d3e1044b306b30df66f7a7743a92 Mon Sep 17 00:00:00 2001 From: Daniel Albuquerque Date: Fri, 18 Sep 2020 18:17:50 +0100 Subject: [PATCH 4/9] Better assertion for new tests --- test/e2e-istio-tests-skip-analysis.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/e2e-istio-tests-skip-analysis.sh b/test/e2e-istio-tests-skip-analysis.sh index 7ad419dde..66cc9e716 100755 --- a/test/e2e-istio-tests-skip-analysis.sh +++ b/test/e2e-istio-tests-skip-analysis.sh @@ -151,7 +151,7 @@ if [[ "$1" = "canary" ]]; then exit 0 fi -echo '>>> Triggering canary deployment with a bad release (unknown docker image)' +echo '>>> Triggering canary deployment with a bad release (non existent docker image)' kubectl -n test set image deployment/podinfo podinfod=stefanprodan/potato:1.0.0 echo '>>> Waiting for canary to fail' @@ -177,7 +177,7 @@ retries=50 count=0 ok=false until ${ok}; do - kubectl -n test describe deployment/podinfo-primary | grep '3.1.1' && ok=true || ok=false + kubectl get deployment podinfo-primary -n test -o jsonpath='{.spec.replicas}' | grep 1 && ok=true || ok=false sleep 5 count=$(($count + 1)) if [[ ${count} -eq ${retries} ]]; then From 0eee5b74023cdb320ae5984187ea6becf936c5d3 Mon Sep 17 00:00:00 2001 From: Daniel Albuquerque Date: Fri, 18 Sep 2020 18:43:27 +0100 Subject: [PATCH 5/9] Revert changes in skip analysis condition --- pkg/apis/flagger/v1beta1/canary.go | 2 +- test/e2e-istio-tests-skip-analysis.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/apis/flagger/v1beta1/canary.go b/pkg/apis/flagger/v1beta1/canary.go index 703c44056..3451e796b 100644 --- a/pkg/apis/flagger/v1beta1/canary.go +++ b/pkg/apis/flagger/v1beta1/canary.go @@ -428,7 +428,7 @@ func (c *Canary) GetMetricInterval() string { // SkipAnalysis returns true if the analysis is nil // or if spec.SkipAnalysis is true func (c *Canary) SkipAnalysis() bool { - if c.Spec.Analysis == nil || c.Spec.CanaryAnalysis == nil { + if c.Spec.Analysis == nil && c.Spec.CanaryAnalysis == nil { return true } return c.Spec.SkipAnalysis diff --git a/test/e2e-istio-tests-skip-analysis.sh b/test/e2e-istio-tests-skip-analysis.sh index 66cc9e716..c6f83db60 100755 --- a/test/e2e-istio-tests-skip-analysis.sh +++ b/test/e2e-istio-tests-skip-analysis.sh @@ -71,8 +71,8 @@ spec: x-envoy-upstream-rq-timeout-ms: "15000" x-envoy-max-retries: "10" x-envoy-retry-on: "gateway-error,connect-failure,refused-stream" + skipAnalysis: true analysis: - skipAnalysis: true interval: 15s threshold: 15 maxWeight: 30 From 26d53dcd44d140e26ba011566e6a301990353121 Mon Sep 17 00:00:00 2001 From: Daniel Albuquerque Date: Fri, 18 Sep 2020 19:05:45 +0100 Subject: [PATCH 6/9] diff test stucture for istio --- .circleci/config.yml | 1 + test/e2e-istio-dependencies.sh | 19 +++++++++++++++++++ test/e2e-istio-tests-skip-analysis.sh | 15 --------------- test/e2e-istio-tests.sh | 15 --------------- 4 files changed, 20 insertions(+), 30 deletions(-) create mode 100755 test/e2e-istio-dependencies.sh diff --git a/.circleci/config.yml b/.circleci/config.yml index 1ab3bd284..047423cb8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -94,6 +94,7 @@ jobs: - run: test/container-build.sh - run: test/e2e-kind.sh v1.18.2 - run: test/e2e-istio.sh + - run: test/e2e-istio-dependencies.sh - run: test/e2e-istio-tests.sh - run: test/e2e-istio-tests-skip-analysis.sh diff --git a/test/e2e-istio-dependencies.sh b/test/e2e-istio-dependencies.sh new file mode 100755 index 000000000..7bfe93cb0 --- /dev/null +++ b/test/e2e-istio-dependencies.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +# This script setups the scenarios for istio tests by creating a Kubernetes namespace, installing the load tester and a test workload (podinfo) +# Prerequisites: Kubernetes Kind and Istio + +set -o errexit + +REPO_ROOT=$(git rev-parse --show-toplevel) + +echo '>>> Creating test namespace' +kubectl create namespace test +kubectl label namespace test istio-injection=enabled + +echo '>>> Installing the load tester' +kubectl apply -k ${REPO_ROOT}/kustomize/tester +kubectl -n test rollout status deployment/flagger-loadtester + +echo '>>> Deploy podinfo' +kubectl apply -f ${REPO_ROOT}/test/e2e-workload.yaml diff --git a/test/e2e-istio-tests-skip-analysis.sh b/test/e2e-istio-tests-skip-analysis.sh index c6f83db60..6cada2eb6 100755 --- a/test/e2e-istio-tests-skip-analysis.sh +++ b/test/e2e-istio-tests-skip-analysis.sh @@ -3,21 +3,6 @@ # This script runs e2e tests for when the canary analysis is skipped # Prerequisites: Kubernetes Kind and Istio -set -o errexit - -REPO_ROOT=$(git rev-parse --show-toplevel) - -echo '>>> Creating test namespace' -kubectl create namespace test -kubectl label namespace test istio-injection=enabled - -echo '>>> Installing the load tester' -kubectl apply -k ${REPO_ROOT}/kustomize/tester -kubectl -n test rollout status deployment/flagger-loadtester - -echo '>>> Deploy podinfo' -kubectl apply -f ${REPO_ROOT}/test/e2e-workload.yaml - echo '>>> Create latency metric template' cat <>> Creating test namespace' -kubectl create namespace test -kubectl label namespace test istio-injection=enabled - -echo '>>> Installing the load tester' -kubectl apply -k ${REPO_ROOT}/kustomize/tester -kubectl -n test rollout status deployment/flagger-loadtester - -echo '>>> Deploy podinfo' -kubectl apply -f ${REPO_ROOT}/test/e2e-workload.yaml - echo '>>> Create latency metric template' cat < Date: Fri, 18 Sep 2020 19:51:03 +0100 Subject: [PATCH 7/9] Remove custom metrics (not needed for tests) --- test/e2e-istio-tests-skip-analysis.sh | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/test/e2e-istio-tests-skip-analysis.sh b/test/e2e-istio-tests-skip-analysis.sh index 6cada2eb6..5b94026b4 100755 --- a/test/e2e-istio-tests-skip-analysis.sh +++ b/test/e2e-istio-tests-skip-analysis.sh @@ -3,32 +3,6 @@ # This script runs e2e tests for when the canary analysis is skipped # Prerequisites: Kubernetes Kind and Istio -echo '>>> Create latency metric template' -cat <>> Initialising canary' cat < Date: Sat, 19 Sep 2020 15:15:39 +0100 Subject: [PATCH 8/9] Add set -o errexit --- test/e2e-istio-tests-skip-analysis.sh | 2 ++ test/e2e-istio-tests.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/test/e2e-istio-tests-skip-analysis.sh b/test/e2e-istio-tests-skip-analysis.sh index 5b94026b4..f6f1937ee 100755 --- a/test/e2e-istio-tests-skip-analysis.sh +++ b/test/e2e-istio-tests-skip-analysis.sh @@ -3,6 +3,8 @@ # This script runs e2e tests for when the canary analysis is skipped # Prerequisites: Kubernetes Kind and Istio +set -o errexit + echo '>>> Initialising canary' cat <>> Create latency metric template' cat < Date: Sat, 19 Sep 2020 17:39:54 +0100 Subject: [PATCH 9/9] Remove metadata tests (unrelated to skip analysis) --- test/e2e-istio-tests-skip-analysis.sh | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/test/e2e-istio-tests-skip-analysis.sh b/test/e2e-istio-tests-skip-analysis.sh index f6f1937ee..621fe285c 100755 --- a/test/e2e-istio-tests-skip-analysis.sh +++ b/test/e2e-istio-tests-skip-analysis.sh @@ -21,17 +21,6 @@ spec: service: port: 9898 portDiscovery: true - apex: - annotations: - test: "annotations-test" - labels: - test: "labels-test" - headers: - request: - add: - x-envoy-upstream-rq-timeout-ms: "15000" - x-envoy-max-retries: "10" - x-envoy-retry-on: "gateway-error,connect-failure,refused-stream" skipAnalysis: true analysis: interval: 15s @@ -65,11 +54,6 @@ done echo '✔ Canary initialization test passed' -kubectl -n test get svc/podinfo -oyaml | grep annotations-test -kubectl -n test get svc/podinfo -oyaml | grep labels-test - -echo '✔ Canary service custom metadata test passed' - echo '>>> Triggering canary deployment' kubectl -n test set image deployment/podinfo podinfod=stefanprodan/podinfo:3.1.1 @@ -133,12 +117,13 @@ until ${ok}; do fi done -echo '>>> Confirm primary pod still running correct version' +echo '>>> Confirm primary pod is still running and with correct version' retries=50 count=0 ok=false -until ${ok}; do - kubectl get deployment podinfo-primary -n test -o jsonpath='{.spec.replicas}' | grep 1 && ok=true || ok=false +until ${okImage} && ${okRunning}; do + kubectl get deployment podinfo-primary -n test -o jsonpath='{.spec.replicas}' | grep 1 && okRunning=true || okRunning=false + kubectl -n test describe deployment/podinfo-primary | grep '3.1.3' && okImage=true || okImage=false sleep 5 count=$(($count + 1)) if [[ ${count} -eq ${retries} ]]; then