Skip to content

Commit

Permalink
Merge pull request #420 from ta924/manualrollback
Browse files Browse the repository at this point in the history
Add support for gated rollback
  • Loading branch information
stefanprodan authored Feb 6, 2020
2 parents e31ecbe + 402dda7 commit e457b6d
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 39 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,4 +193,4 @@ If you have any questions about Flagger and progressive delivery:
hands-on training and meetups in your area.
* File an [issue](https://github.com/weaveworks/flagger/issues/new).
Your feedback is always welcome!
Your feedback is always welcome!
1 change: 1 addition & 0 deletions artifacts/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ spec:
- confirm-promotion
- post-rollout
- event
- rollback
url:
description: URL address of this webhook
type: string
Expand Down
1 change: 1 addition & 0 deletions charts/flagger/templates/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ spec:
- confirm-promotion
- post-rollout
- event
- rollback
url:
description: URL address of this webhook
type: string
Expand Down
20 changes: 20 additions & 0 deletions docs/gitbook/how-it-works.md
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,9 @@ The canary promotion is paused until the hooks return HTTP 200.
While the promotion is paused, Flagger will continue to run the metrics checks and rollout hooks.
* Post-rollout hooks are executed after the canary has been promoted or rolled back.
If a post rollout hook fails the error is logged.
* Rollback hooks are executed while a canary deployment is in either Progressing or Waiting status.
This provides the ability to rollback during analysis or while waiting for a confirmation. If a rollback hook
returns a successful HTTP status code, Flagger will rollback the canary deployment.
* Event hooks are executed every time Flagger emits a Kubernetes event. When configured,
every action that Flagger takes during a canary deployment will be sent as JSON via an HTTP POST request.

Expand Down Expand Up @@ -584,6 +587,9 @@ Spec:
timeout: 5s
metadata:
some: "message"
- name: "rollback gate"
type: rollback
url: http://flagger-loadtester.test/gate/halt
- name: "send to Slack"
type: event
url: http://event-recevier.notifications/slack
Expand Down Expand Up @@ -830,6 +836,10 @@ For manual approval of a canary deployment you can use the `confirm-rollout` and
The confirmation rollout hooks are executed before the pre-rollout hooks.
Flagger will halt the canary traffic shifting and analysis until the confirm webhook returns HTTP status 200.

For manual rollback of a canary deployment you can use the `rollback` webhook. The rollback hook will be called
during the analysis and confirmation states. If a rollback webhook returns a successful HTTP status code, Flagger
will shift all traffic back to the primary instance and fail the canary.

Manual gating with Flagger's tester:

```yaml
Expand Down Expand Up @@ -898,4 +908,14 @@ While the promotion is paused, Flagger will continue to run the metrics checks a
url: http://flagger-loadtester.test/gate/halt
```

The `rollback` hook type can be used to manually rollback the canary promotion.

```yaml
canaryAnalysis:
webhooks:
- name: "rollback"
type: rollback
url: http://flagger-loadtester.test/gate/halt
```

If you have notifications enabled, Flagger will post a message to Slack or MS Teams if a canary promotion is waiting for approval.
1 change: 1 addition & 0 deletions kustomize/base/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ spec:
- confirm-promotion
- post-rollout
- event
- rollback
url:
description: URL address of this webhook
type: string
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/flagger/v1alpha3/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ const (
ConfirmPromotionHook HookType = "confirm-promotion"
// EventHook dispatches Flagger events to the specified endpoint
EventHook HookType = "event"
// RollbackHook rollback canary anaylysis if webhook returns HTTP 200
RollbackHook HookType = "rollback"
)

// CanaryWebhook holds the reference to external checks used for canary analysis
Expand Down
104 changes: 66 additions & 38 deletions pkg/controller/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,17 @@ func (c *Controller) advanceCanary(name string, namespace string, skipLivenessCh
return
}

// check if we should rollback
if cd.Status.Phase == flaggerv1.CanaryPhaseProgressing ||
cd.Status.Phase == flaggerv1.CanaryPhaseWaiting {
if ok := c.runRollbackHooks(cd, cd.Status.Phase); ok {
c.recordEventWarningf(cd, "Rolling back %s.%s manual webhook invoked", cd.Name, cd.Namespace)
c.sendNotification(cd, "Rolling back manual webhook invoked", false, true)
c.rollback(cd, canaryController, meshRouter)
return
}
}

// route all traffic to primary if analysis has succeeded
if cd.Status.Phase == flaggerv1.CanaryPhasePromoting {
if provider != "kubernetes" {
Expand Down Expand Up @@ -267,50 +278,13 @@ func (c *Controller) advanceCanary(name string, namespace string, skipLivenessCh
// check if the number of failed checks reached the threshold
if cd.Status.Phase == flaggerv1.CanaryPhaseProgressing &&
(!retriable || cd.Status.FailedChecks >= cd.Spec.CanaryAnalysis.Threshold) {

if cd.Status.FailedChecks >= cd.Spec.CanaryAnalysis.Threshold {
c.recordEventWarningf(cd, "Rolling back %s.%s failed checks threshold reached %v",
cd.Name, cd.Namespace, cd.Status.FailedChecks)
c.sendNotification(cd, fmt.Sprintf("Failed checks threshold reached %v", cd.Status.FailedChecks),
false, true)
}

if !retriable {
c.recordEventWarningf(cd, "Rolling back %s.%s progress deadline exceeded %v",
cd.Name, cd.Namespace, err)
c.sendNotification(cd, fmt.Sprintf("Progress deadline exceeded %v", err),
false, true)
}

// route all traffic back to primary
primaryWeight = 100
canaryWeight = 0
if err := meshRouter.SetRoutes(cd, primaryWeight, canaryWeight, false); err != nil {
c.recordEventWarningf(cd, "%v", err)
return
}

canaryPhaseFailed := cd.DeepCopy()
canaryPhaseFailed.Status.Phase = flaggerv1.CanaryPhaseFailed
c.recordEventWarningf(canaryPhaseFailed, "Canary failed! Scaling down %s.%s",
canaryPhaseFailed.Name, canaryPhaseFailed.Namespace)

c.recorder.SetWeight(cd, primaryWeight, canaryWeight)

// shutdown canary
if err := canaryController.Scale(cd, 0); err != nil {
c.recordEventWarningf(cd, "%v", err)
return
}

// mark canary as failed
if err := canaryController.SyncStatus(cd, flaggerv1.CanaryStatus{Phase: flaggerv1.CanaryPhaseFailed, CanaryWeight: 0}); err != nil {
c.logger.With("canary", fmt.Sprintf("%s.%s", cd.Name, cd.Namespace)).Errorf("%v", err)
return
}

c.recorder.SetStatus(cd, flaggerv1.CanaryPhaseFailed)
c.runPostRolloutHooks(cd, flaggerv1.CanaryPhaseFailed)
c.rollback(cd, canaryController, meshRouter)
return
}

Expand Down Expand Up @@ -757,6 +731,21 @@ func (c *Controller) runPostRolloutHooks(canary *flaggerv1.Canary, phase flagger
return true
}

func (c *Controller) runRollbackHooks(canary *flaggerv1.Canary, phase flaggerv1.CanaryPhase) bool {
for _, webhook := range canary.Spec.CanaryAnalysis.Webhooks {
if webhook.Type == flaggerv1.RollbackHook {
err := CallWebhook(canary.Name, canary.Namespace, phase, webhook)
if err != nil {
c.recordEventInfof(canary, "Rollback hook %s not signaling a rollback", webhook.Name)
} else {
c.recordEventWarningf(canary, "Rollback check %s passed", webhook.Name)
return true
}
}
}
return false
}

func (c *Controller) runAnalysis(r *flaggerv1.Canary) bool {
// run external checks
for _, webhook := range r.Spec.CanaryAnalysis.Webhooks {
Expand Down Expand Up @@ -878,3 +867,42 @@ func (c *Controller) runAnalysis(r *flaggerv1.Canary) bool {

return true
}

func (c *Controller) rollback(canary *flaggerv1.Canary, canaryController canary.Controller, meshRouter router.Interface) {
if canary.Status.FailedChecks >= canary.Spec.CanaryAnalysis.Threshold {
c.recordEventWarningf(canary, "Rolling back %s.%s failed checks threshold reached %v",
canary.Name, canary.Namespace, canary.Status.FailedChecks)
c.sendNotification(canary, fmt.Sprintf("Failed checks threshold reached %v", canary.Status.FailedChecks),
false, true)
}

// route all traffic back to primary
primaryWeight := 100
canaryWeight := 0
if err := meshRouter.SetRoutes(canary, primaryWeight, canaryWeight, false); err != nil {
c.recordEventWarningf(canary, "%v", err)
return
}

canaryPhaseFailed := canary.DeepCopy()
canaryPhaseFailed.Status.Phase = flaggerv1.CanaryPhaseFailed
c.recordEventWarningf(canaryPhaseFailed, "Canary failed! Scaling down %s.%s",
canaryPhaseFailed.Name, canaryPhaseFailed.Namespace)

c.recorder.SetWeight(canary, primaryWeight, canaryWeight)

// shutdown canary
if err := canaryController.Scale(canary, 0); err != nil {
c.recordEventWarningf(canary, "%v", err)
return
}

// mark canary as failed
if err := canaryController.SyncStatus(canary, flaggerv1.CanaryStatus{Phase: flaggerv1.CanaryPhaseFailed, CanaryWeight: 0}); err != nil {
c.logger.With("canary", fmt.Sprintf("%s.%s", canary.Name, canary.Namespace)).Errorf("%v", err)
return
}

c.recorder.SetStatus(canary, flaggerv1.CanaryPhaseFailed)
c.runPostRolloutHooks(canary, flaggerv1.CanaryPhaseFailed)
}

0 comments on commit e457b6d

Please sign in to comment.