diff --git a/pkg/controller/rollout/rollout_canary.go b/pkg/controller/rollout/rollout_canary.go index 73472d16..d1c6fded 100644 --- a/pkg/controller/rollout/rollout_canary.go +++ b/pkg/controller/rollout/rollout_canary.go @@ -386,53 +386,29 @@ func (m *canaryReleaseManager) doCanaryFinalising(c *RolloutContext) (bool, erro return true, nil } klog.Infof("rollout(%s/%s) Finalising Step is %s", c.Rollout.Namespace, c.Rollout.Name, canaryStatus.FinalisingStep) - // the steps. order is maitained by the nextStep - switch canaryStatus.FinalisingStep { - // call the FinalisingTrafficRouting function to: - // 1.restore stable service selector to select all pods - // 2.restore network api(ingress/ gateway api/ istio) configuration - // 3.delete canary service - case v1beta1.FinalisingStepTypeTrafficRouting: - done, err := m.trafficRoutingManager.FinalisingTrafficRouting(tr) - if err != nil || !done { - canaryStatus.LastUpdateTime = tr.LastUpdateTime - return done, err - } + var retry bool + // the order of steps is maitained by calculating thenextStep + switch canaryStatus.FinalisingStep { // set workload.pause=false; set workload.partition=0 case v1beta1.FinalisingStepTypeBatchRelease: - done, err := m.finalizingBatchRelease(c) - if err != nil || !done { - return done, err - } + retry, err = m.finalizingBatchRelease(c) // delete batchRelease case v1beta1.FinalisingStepTypeDeleteBR: - done, err := m.removeBatchRelease(c) - if err != nil { - klog.Errorf("rollout(%s/%s) Finalize batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error()) - return false, err - } else if !done { - return false, nil - } + retry, err = m.removeBatchRelease(c) // restore the gateway resources (ingress/gatewayAPI/Istio), that means // only stable Service will accept the traffic case v1beta1.FinalisingStepTypeGateway: - retry, err := m.trafficRoutingManager.RestoreGateway(tr) - if err != nil || retry { - return false, err - } + retry, err = m.trafficRoutingManager.RestoreGateway(tr) // restore the stable service case v1beta1.FinalisingStepTypeStableService: - retry, err := m.trafficRoutingManager.RestoreStableService(tr) - if err != nil || retry { - return false, err - } + retry, err = m.trafficRoutingManager.RestoreStableService(tr) // remove canary service case v1beta1.FinalisingStepTypeRemoveCanaryService: - retry, err := m.trafficRoutingManager.RemoveCanaryService(tr) - if err != nil || retry { - return false, err - } + retry, err = m.trafficRoutingManager.RemoveCanaryService(tr) + } + if err != nil || retry { + return false, err } // current step is done, run the next step canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()} @@ -549,37 +525,39 @@ func createBatchRelease(rollout *v1beta1.Rollout, rolloutID string, batch int32, return br } +// bool means if we need retry; if error is not nil, always retry func (m *canaryReleaseManager) removeBatchRelease(c *RolloutContext) (bool, error) { batch := &v1beta1.BatchRelease{} err := m.Get(context.TODO(), client.ObjectKey{Namespace: c.Rollout.Namespace, Name: c.Rollout.Name}, batch) if err != nil && errors.IsNotFound(err) { - return true, nil + return false, nil } else if err != nil { klog.Errorf("rollout(%s/%s) fetch BatchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name) - return false, err + return true, err } if !batch.DeletionTimestamp.IsZero() { klog.Infof("rollout(%s/%s) BatchRelease is terminating, and wait a moment", c.Rollout.Namespace, c.Rollout.Name) - return false, nil + return true, nil } //delete batchRelease err = m.Delete(context.TODO(), batch) if err != nil { klog.Errorf("rollout(%s/%s) delete BatchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error()) - return false, err + return true, err } klog.Infof("rollout(%s/%s) deleting BatchRelease, and wait a moment", c.Rollout.Namespace, c.Rollout.Name) - return false, nil + return true, nil } +// bool means if we need retry; if error is not nil, always retry func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool, error) { br, err := m.fetchBatchRelease(c.Rollout.Namespace, c.Rollout.Name) if err != nil { if errors.IsNotFound(err) { - return true, nil + return false, nil } - return false, err + return true, err } waitReady := c.WaitReady // The Completed phase means batchRelease controller has processed all it @@ -587,7 +565,7 @@ func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool, if br.Spec.ReleasePlan.BatchPartition == nil && br.Status.Phase == v1beta1.RolloutPhaseCompleted { klog.Infof("rollout(%s/%s) finalizing batchRelease(%s) done", c.Rollout.Namespace, c.Rollout.Name, util.DumpJSON(br.Status)) - return true, nil + return false, nil } // If BatchPartition is nil, BatchRelease will directly resume workload via: @@ -600,11 +578,11 @@ func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool, switch br.Spec.ReleasePlan.FinalizingPolicy { case v1beta1.WaitResumeFinalizingPolicyType: if waitReady { // no need to patch again - return false, nil + return true, nil } default: if !waitReady { // no need to patch again - return false, nil + return true, nil } } } @@ -618,10 +596,10 @@ func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool, // Patch BatchPartition and FinalizingPolicy, BatchPartition always patch null here. body := fmt.Sprintf(`{"spec":{"releasePlan":{"batchPartition":null,"finalizingPolicy":"%s"}}}`, policy) if err = m.Patch(context.TODO(), br, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { - return false, err + return true, err } klog.Infof("rollout(%s/%s) patch batchRelease(%s) success", c.Rollout.Namespace, c.Rollout.Name, body) - return false, nil + return true, nil } // syncBatchRelease sync status of br to canaryStatus, and sync rollout-id of canaryStatus to br. @@ -654,14 +632,15 @@ func nextTask(reason string, currentTask v1beta1.FinalisingStepType) v1beta1.Fin v1beta1.FinalisingStepTypeGateway, // route all traffic to stable version v1beta1.FinalisingStepTypeBatchRelease, // scale up old, scale down new v1beta1.FinalisingStepTypeDeleteBR, - // v1beta1.FinalisingStepTypeTrafficRouting, // do cleaning works(restore stable Service, remove canary Service) v1beta1.FinalisingStepTypeStableService, v1beta1.FinalisingStepTypeRemoveCanaryService, } default: // others: success/disabled/deleting rollout taskSequence = []v1beta1.FinalisingStepType{ - v1beta1.FinalisingStepTypeTrafficRouting, // remove selector of stable Service - v1beta1.FinalisingStepTypeBatchRelease, // scale up new, scale down old + v1beta1.FinalisingStepTypeStableService, + v1beta1.FinalisingStepTypeGateway, + v1beta1.FinalisingStepTypeRemoveCanaryService, + v1beta1.FinalisingStepTypeBatchRelease, // scale up new, scale down old v1beta1.FinalisingStepTypeDeleteBR, } } diff --git a/pkg/controller/rollout/rollout_progressing.go b/pkg/controller/rollout/rollout_progressing.go index d47b5ada..a1214cbf 100644 --- a/pkg/controller/rollout/rollout_progressing.go +++ b/pkg/controller/rollout/rollout_progressing.go @@ -420,11 +420,11 @@ func (r *RolloutReconciler) doProgressingReset(c *RolloutContext) (bool, error) } // if no trafficRouting exists, simply remove batchRelease if !c.Rollout.Spec.Strategy.HasTrafficRoutings() { - done, err := releaseManager.removeBatchRelease(c) + retry, err := releaseManager.removeBatchRelease(c) if err != nil { klog.Errorf("rollout(%s/%s) DoFinalising batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error()) return false, err - } else if !done { + } else if retry { return false, nil } return true, nil @@ -458,11 +458,11 @@ func (r *RolloutReconciler) doProgressingReset(c *RolloutContext) (bool, error) // canary deployment, for other release, the v2 pods won't be deleted immediately // in both cases, only the stable pods (v1) accept the traffic case v1beta1.FinalisingStepTypeDeleteBR: - done, err := releaseManager.removeBatchRelease(c) + retry, err := releaseManager.removeBatchRelease(c) if err != nil { klog.Errorf("rollout(%s/%s) Finalize batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error()) return false, err - } else if !done { + } else if retry { return false, nil } klog.Infof("rollout(%s/%s) in step (%s), and success", c.Rollout.Namespace, c.Rollout.Name, subStatus.FinalisingStep)