diff --git a/artifacts/ab-testing/canary.yaml b/artifacts/ab-testing/canary.yaml index eb96fd220..4bb89b573 100644 --- a/artifacts/ab-testing/canary.yaml +++ b/artifacts/ab-testing/canary.yaml @@ -36,12 +36,12 @@ spec: iterations: 10 # canary match condition match: - - headers: - user-agent: - regex: "^(?!.*Chrome)(?=.*\bSafari\b).*$" - headers: cookie: regex: "^(.*?;)?(type=insider)(;.*)?$" + - headers: + user-agent: + regex: "(?=.*Safari)(?!.*Chrome).*$" metrics: - name: request-success-rate # minimum req success rate (non 5xx responses) diff --git a/docs/diagrams/flagger-abtest-steps.png b/docs/diagrams/flagger-abtest-steps.png index db3ed35d0..6ed4b60d0 100644 Binary files a/docs/diagrams/flagger-abtest-steps.png and b/docs/diagrams/flagger-abtest-steps.png differ diff --git a/docs/gitbook/usage/appmesh-progressive-delivery.md b/docs/gitbook/usage/appmesh-progressive-delivery.md index 495902dda..d872add1e 100644 --- a/docs/gitbook/usage/appmesh-progressive-delivery.md +++ b/docs/gitbook/usage/appmesh-progressive-delivery.md @@ -106,18 +106,18 @@ spec: interval: 30s # testing (optional) webhooks: - - name: acceptance-test - type: pre-rollout - url: http://flagger-loadtester.test/ - timeout: 30s - metadata: - type: bash - cmd: "curl -sd 'test' http://podinfo-canary.test:9898/token | grep token" - - name: load-test - url: http://flagger-loadtester.test/ - timeout: 5s - metadata: - cmd: "hey -z 1m -q 10 -c 2 http://podinfo.test:9898/" + - name: acceptance-test + type: pre-rollout + url: http://flagger-loadtester.test/ + timeout: 30s + metadata: + type: bash + cmd: "curl -sd 'test' http://podinfo-canary.test:9898/token | grep token" + - name: load-test + url: http://flagger-loadtester.test/ + timeout: 5s + metadata: + cmd: "hey -z 1m -q 10 -c 2 http://podinfo-canary.test:9898/" ``` Save the above resource as podinfo-canary.yaml and then apply it: @@ -320,3 +320,74 @@ If you’ve enabled the Slack notifications, you’ll receive a message if the p or if the analysis reached the maximum number of failed checks: ![Flagger Slack Notifications](https://raw.githubusercontent.com/weaveworks/flagger/master/docs/screens/slack-canary-failed.png) + +### A/B Testing + +Besides weighted routing, Flagger can be configured to route traffic to the canary based on HTTP match conditions. +In an A/B testing scenario, you'll be using HTTP headers or cookies to target a certain segment of your users. +This is particularly useful for frontend applications that require session affinity. + +![Flagger A/B Testing Stages](https://raw.githubusercontent.com/weaveworks/flagger/master/docs/diagrams/flagger-abtest-steps.png) + +Edit the canary analysis, remove the max/step weight and add the match conditions and iterations: + +```yaml + canaryAnalysis: + interval: 1m + threshold: 10 + iterations: 10 + match: + - headers: + x-canary: + exact: "insider" + webhooks: + - name: load-test + url: http://flagger-loadtester.test/ + metadata: + cmd: "hey -z 1m -q 10 -c 2 -H 'X-Canary: insider' http://podinfo.test:9898/" +``` + +The above configuration will run an analysis for ten minutes targeting users that have a `X-Canary: insider` header. + +You can also use a HTTP cookie, to target all users with a `canary` cookie set to `insider` the match condition should be: + +```yaml +match: +- headers: + cookie: + regex: "^(.*?;)?(canary=insider)(;.*)?$" +webhooks: +- name: load-test + url: http://flagger-loadtester.test/ + metadata: + cmd: "hey -z 1m -q 10 -c 2 -H 'Cookie: canary=insider' http://podinfo.test:9898/" +``` + +Trigger a canary deployment by updating the container image: + +```bash +kubectl -n test set image deployment/podinfo \ +podinfod=stefanprodan/podinfo:3.1.3 +``` + +Flagger detects that the deployment revision changed and starts the A/B test: + +```text +kubectl -n appmesh-system logs deploy/flagger -f | jq .msg + +New revision detected! Starting canary analysis for podinfo.test +Advance podinfo.test canary iteration 1/10 +Advance podinfo.test canary iteration 2/10 +Advance podinfo.test canary iteration 3/10 +Advance podinfo.test canary iteration 4/10 +Advance podinfo.test canary iteration 5/10 +Advance podinfo.test canary iteration 6/10 +Advance podinfo.test canary iteration 7/10 +Advance podinfo.test canary iteration 8/10 +Advance podinfo.test canary iteration 9/10 +Advance podinfo.test canary iteration 10/10 +Copying podinfo.test template spec to podinfo-primary.test +Waiting for podinfo-primary.test rollout to finish: 1 of 2 updated replicas are available +Routing all traffic to primary +Promotion completed! Scaling down podinfo.test +``` diff --git a/docs/gitbook/usage/nginx-progressive-delivery.md b/docs/gitbook/usage/nginx-progressive-delivery.md index 8426248c3..2dcb117a9 100644 --- a/docs/gitbook/usage/nginx-progressive-delivery.md +++ b/docs/gitbook/usage/nginx-progressive-delivery.md @@ -159,7 +159,6 @@ spec: url: http://flagger-loadtester.test/ timeout: 5s metadata: - type: cmd cmd: "hey -z 1m -q 10 -c 2 http://app.example.com/" ``` @@ -382,12 +381,10 @@ Edit the canary analysis, remove the max/step weight and add the match condition interval: 1m webhooks: - name: load-test - url: http://localhost:8888/ + url: http://flagger-loadtester.test/ timeout: 5s metadata: - type: cmd cmd: "hey -z 1m -q 10 -c 2 -H 'Cookie: canary=always' http://app.example.com/" - logCmdOutput: "true" ``` The above configuration will run an analysis for ten minutes targeting users that have a `canary` cookie set to `always` or diff --git a/pkg/router/appmesh.go b/pkg/router/appmesh.go index f087f242c..a30df7cec 100644 --- a/pkg/router/appmesh.go +++ b/pkg/router/appmesh.go @@ -178,26 +178,43 @@ func (ar *AppMeshRouter) reconcileVirtualService(canary *flaggerv1.Canary, name routePrefix = canary.Spec.Service.Match[0].Uri.Prefix } - vsSpec := appmeshv1.VirtualServiceSpec{ - MeshName: canary.Spec.Service.MeshName, - VirtualRouter: &appmeshv1.VirtualRouter{ + // Canary progressive traffic shift + routes := []appmeshv1.Route{ + { Name: routerName, - Listeners: []appmeshv1.VirtualRouterListener{ - { - PortMapping: appmeshv1.PortMapping{ - Port: int64(canary.Spec.Service.Port), - Protocol: protocol, + Http: &appmeshv1.HttpRoute{ + Match: appmeshv1.HttpRouteMatch{ + Prefix: routePrefix, + }, + RetryPolicy: makeRetryPolicy(canary), + Action: appmeshv1.HttpRouteAction{ + WeightedTargets: []appmeshv1.WeightedTarget{ + { + VirtualNodeName: canaryVirtualNode, + Weight: canaryWeight, + }, + { + VirtualNodeName: primaryVirtualNode, + Weight: 100 - canaryWeight, + }, }, }, }, }, - Routes: []appmeshv1.Route{ + } + + // A/B testing - header based routing + if len(canary.Spec.CanaryAnalysis.Match) > 0 && canaryWeight == 0 { + routes = []appmeshv1.Route{ { - Name: routerName, + Name: fmt.Sprintf("%s-a", targetName), + Priority: int64p(10), Http: &appmeshv1.HttpRoute{ Match: appmeshv1.HttpRouteMatch{ - Prefix: routePrefix, + Prefix: routePrefix, + Headers: makeHeaders(canary), }, + RetryPolicy: makeRetryPolicy(canary), Action: appmeshv1.HttpRouteAction{ WeightedTargets: []appmeshv1.WeightedTarget{ { @@ -212,34 +229,41 @@ func (ar *AppMeshRouter) reconcileVirtualService(canary *flaggerv1.Canary, name }, }, }, - }, - } - - // add retry policy (default: one retry on gateway error with a 250ms timeout) - if canary.Spec.Service.Retries != nil { - timeout := int64(250) - if d, err := time.ParseDuration(canary.Spec.Service.Retries.PerTryTimeout); err == nil { - timeout = d.Milliseconds() - } - - attempts := int64(1) - if canary.Spec.Service.Retries.Attempts > 0 { - attempts = int64(canary.Spec.Service.Retries.Attempts) - } - - retryPolicy := &appmeshv1.HttpRetryPolicy{ - PerRetryTimeoutMillis: int64p(timeout), - MaxRetries: int64p(attempts), + { + Name: fmt.Sprintf("%s-b", targetName), + Priority: int64p(20), + Http: &appmeshv1.HttpRoute{ + Match: appmeshv1.HttpRouteMatch{ + Prefix: routePrefix, + }, + RetryPolicy: makeRetryPolicy(canary), + Action: appmeshv1.HttpRouteAction{ + WeightedTargets: []appmeshv1.WeightedTarget{ + { + VirtualNodeName: primaryVirtualNode, + Weight: 100, + }, + }, + }, + }, + }, } + } - events := []string{"gateway-error"} - if len(canary.Spec.Service.Retries.RetryOn) > 0 { - events = strings.Split(canary.Spec.Service.Retries.RetryOn, ",") - } - for _, value := range events { - retryPolicy.HttpRetryPolicyEvents = append(retryPolicy.HttpRetryPolicyEvents, appmeshv1.HttpRetryPolicyEvent(value)) - } - vsSpec.Routes[0].Http.RetryPolicy = retryPolicy + vsSpec := appmeshv1.VirtualServiceSpec{ + MeshName: canary.Spec.Service.MeshName, + VirtualRouter: &appmeshv1.VirtualRouter{ + Name: routerName, + Listeners: []appmeshv1.VirtualRouterListener{ + { + PortMapping: appmeshv1.PortMapping{ + Port: int64(canary.Spec.Service.Port), + Protocol: protocol, + }, + }, + }, + }, + Routes: routes, } virtualService, err := ar.appmeshClient.AppmeshV1beta1().VirtualServices(canary.Namespace).Get(name, metav1.GetOptions{}) @@ -375,6 +399,60 @@ func (ar *AppMeshRouter) SetRoutes( return nil } +// makeRetryPolicy creates an App Mesh HttpRetryPolicy from the Canary.Service.Retries +// default: one retry on gateway error with a 250ms timeout +func makeRetryPolicy(canary *flaggerv1.Canary) *appmeshv1.HttpRetryPolicy { + if canary.Spec.Service.Retries != nil { + timeout := int64(250) + if d, err := time.ParseDuration(canary.Spec.Service.Retries.PerTryTimeout); err == nil { + timeout = d.Milliseconds() + } + + attempts := int64(1) + if canary.Spec.Service.Retries.Attempts > 0 { + attempts = int64(canary.Spec.Service.Retries.Attempts) + } + + retryPolicy := &appmeshv1.HttpRetryPolicy{ + PerRetryTimeoutMillis: int64p(timeout), + MaxRetries: int64p(attempts), + } + + events := []string{"gateway-error"} + if len(canary.Spec.Service.Retries.RetryOn) > 0 { + events = strings.Split(canary.Spec.Service.Retries.RetryOn, ",") + } + for _, value := range events { + retryPolicy.HttpRetryPolicyEvents = append(retryPolicy.HttpRetryPolicyEvents, appmeshv1.HttpRetryPolicyEvent(value)) + } + return retryPolicy + } + + return nil +} + +// makeRetryPolicy creates an App Mesh HttpRouteHeader from the Canary.CanaryAnalysis.Match +func makeHeaders(canary *flaggerv1.Canary) []appmeshv1.HttpRouteHeader { + headers := []appmeshv1.HttpRouteHeader{} + + for _, m := range canary.Spec.CanaryAnalysis.Match { + for key, value := range m.Headers { + header := appmeshv1.HttpRouteHeader{ + Name: key, + Match: &appmeshv1.HeaderMatchMethod{ + Exact: stringp(value.Exact), + Prefix: stringp(value.Prefix), + Regex: stringp(value.Regex), + Suffix: stringp(value.Suffix), + }, + } + headers = append(headers, header) + } + } + + return headers +} + func getProtocol(canary *flaggerv1.Canary) string { if strings.Contains(canary.Spec.Service.PortName, "grpc") { return "grpc" @@ -385,3 +463,10 @@ func getProtocol(canary *flaggerv1.Canary) string { func int64p(i int64) *int64 { return &i } + +func stringp(s string) *string { + if s != "" { + return &s + } + return nil +} diff --git a/pkg/router/appmesh_test.go b/pkg/router/appmesh_test.go index 56e1bdd8a..003b127bd 100644 --- a/pkg/router/appmesh_test.go +++ b/pkg/router/appmesh_test.go @@ -184,3 +184,45 @@ func TestAppmeshRouter_GetSetRoutes(t *testing.T) { t.Errorf("Got mirror %v wanted %v", m, false) } } + +func TestAppmeshRouter_ABTest(t *testing.T) { + mocks := setupfakeClients() + router := &AppMeshRouter{ + logger: mocks.logger, + flaggerClient: mocks.flaggerClient, + appmeshClient: mocks.meshClient, + kubeClient: mocks.kubeClient, + } + + err := router.Reconcile(mocks.abtest) + if err != nil { + t.Fatal(err.Error()) + } + + // check virtual service + vsName := fmt.Sprintf("%s.%s", mocks.abtest.Spec.TargetRef.Name, mocks.abtest.Namespace) + vs, err := router.appmeshClient.AppmeshV1beta1().VirtualServices("default").Get(vsName, metav1.GetOptions{}) + if err != nil { + t.Fatal(err.Error()) + } + + // check virtual service + if len(vs.Spec.Routes) != 2 { + t.Errorf("Got routes %v wanted %v", len(vs.Spec.Routes), 2) + } + + // check headers + if len(vs.Spec.Routes[0].Http.Match.Headers) < 1 { + t.Errorf("Got no http match headers") + } + + header := vs.Spec.Routes[0].Http.Match.Headers[0].Name + if header != "x-user-type" { + t.Errorf("Got http match header %v wanted %v", header, "x-user-type") + } + + exactMatch := *vs.Spec.Routes[0].Http.Match.Headers[0].Match.Exact + if exactMatch != "test" { + t.Errorf("Got http match header exact %v wanted %v", exactMatch, "test") + } +} diff --git a/pkg/router/router_test.go b/pkg/router/router_test.go index 67360a60e..f781f6925 100644 --- a/pkg/router/router_test.go +++ b/pkg/router/router_test.go @@ -77,10 +77,15 @@ func newMockCanaryAppMesh() *flaggerv1.Canary { MaxWeight: 50, Metrics: []flaggerv1.CanaryMetric{ { - Name: "appmesh_requests_total", + Name: "request-success-rate", Threshold: 99, Interval: "1m", }, + { + Name: "request-duration", + Threshold: 500, + Interval: "1m", + }, }, }, }, @@ -122,12 +127,12 @@ func newMockCanary() *flaggerv1.Canary { MaxWeight: 50, Metrics: []flaggerv1.CanaryMetric{ { - Name: "istio_requests_total", + Name: "request-success-rate", Threshold: 99, Interval: "1m", }, { - Name: "istio_request_duration_seconds_bucket", + Name: "request-duration", Threshold: 500, Interval: "1m", }, @@ -158,7 +163,8 @@ func newMockABTest() *flaggerv1.Canary { Kind: "Deployment", }, Service: flaggerv1.CanaryService{ - Port: 9898, + Port: 9898, + MeshName: "global", }, CanaryAnalysis: flaggerv1.CanaryAnalysis{ Threshold: 10, Iterations: 2, @@ -173,12 +179,12 @@ func newMockABTest() *flaggerv1.Canary { }, Metrics: []flaggerv1.CanaryMetric{ { - Name: "istio_requests_total", + Name: "request-success-rate", Threshold: 99, Interval: "1m", }, { - Name: "istio_request_duration_seconds_bucket", + Name: "request-duration", Threshold: 500, Interval: "1m", },