diff --git a/pkg/cmd/server/server.go b/pkg/cmd/server/server.go index f0dc08266b..d2c896972d 100644 --- a/pkg/cmd/server/server.go +++ b/pkg/cmd/server/server.go @@ -936,6 +936,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string s.logger, s.logLevel, newPluginManager, + controller.NewRestoreTracker(), backupStoreGetter, s.metrics, s.config.formatFlag.Parse(), diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index b3be1d0a33..520045dd5c 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -234,7 +234,11 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr case "", velerov1api.BackupPhaseNew: // only process new backups case velerov1api.BackupPhaseInProgress: - // if backup is in progress, we should not process it again + if b.backupTracker.Contains(original.Namespace, original.Name) { + log.Debug("Backup is in progress in another reconcile, skipping") + return ctrl.Result{}, nil + } + // if backup phase is in progress, we should not process it again // we want to mark it as failed to avoid it being stuck in progress // if so, mark it as failed, last loop did not successfully complete the backup log.Debug("Backup has in progress status from prior reconcile, marking it as failed") @@ -246,6 +250,7 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr return ctrl.Result{}, err } // patch to mark it as failed succeeded, do not requeue + b.backupTracker.Delete(original.Namespace, original.Name) return ctrl.Result{}, nil default: b.logger.WithFields(logrus.Fields{ @@ -266,6 +271,7 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr // update status if err := kubeutil.PatchResource(original, request.Backup, b.kbClient); err != nil { + // if we fail to patch to inprogress, we will try again in the next reconcile loop return ctrl.Result{}, errors.Wrapf(err, "error updating Backup status to %s", request.Status.Phase) } // store ref to just-updated item for creating patch @@ -323,6 +329,8 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr log.Info("Updating backup's final status") if err := kubeutil.PatchResource(original, request.Backup, b.kbClient); err != nil { log.WithError(err).Error("error updating backup's final status") + // delete from tracker so next reconcile fails the backup + b.backupTracker.Delete(original.Namespace, original.Name) // return the error so the status can be re-processed; it's currently still not completed or failed return ctrl.Result{}, err } diff --git a/pkg/controller/restore_controller.go b/pkg/controller/restore_controller.go index d58c21cb3a..b7fa65fc77 100644 --- a/pkg/controller/restore_controller.go +++ b/pkg/controller/restore_controller.go @@ -106,6 +106,7 @@ type restoreReconciler struct { newPluginManager func(logger logrus.FieldLogger) clientmgmt.Manager backupStoreGetter persistence.ObjectBackupStoreGetter + restoreTracker RestoreTracker } type backupInfo struct { @@ -121,6 +122,7 @@ func NewRestoreReconciler( logger logrus.FieldLogger, restoreLogLevel logrus.Level, newPluginManager func(logrus.FieldLogger) clientmgmt.Manager, + restoreTracker RestoreTracker, backupStoreGetter persistence.ObjectBackupStoreGetter, metrics *metrics.ServerMetrics, logFormat logging.Format, @@ -134,6 +136,7 @@ func NewRestoreReconciler( kbClient: kbClient, logger: logger, restoreLogLevel: restoreLogLevel, + restoreTracker: restoreTracker, metrics: metrics, logFormat: logFormat, clock: &clock.RealClock{}, @@ -210,6 +213,10 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct case "", api.RestorePhaseNew: // only process new restores case api.RestorePhaseInProgress: + if r.restoreTracker.Contains(restore.Namespace, restore.Name) { + log.Debug("Restore has in progress in another reconcile, skipping") + return ctrl.Result{}, nil + } // if restore is in progress, we should not process it again // we want to mark it as failed to avoid it being stuck in progress // if so, mark it as failed, last loop did not successfully complete the restore @@ -222,6 +229,7 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{}, err } // patch to mark it as failed succeeded, do not requeue + r.restoreTracker.Delete(restore.Namespace, restore.Name) return ctrl.Result{}, nil default: r.logger.WithFields(logrus.Fields{ @@ -283,6 +291,8 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct if err = kubeutil.PatchResource(original, restore, r.kbClient); err != nil { log.WithError(errors.WithStack(err)).Info("Error updating restore's final status") + // delete from tracker so next reconcile fails the restore + r.restoreTracker.Delete(restore.Namespace, restore.Name) // return the error so the status can be re-processed; it's currently still not completed or failed return ctrl.Result{}, err } diff --git a/pkg/controller/restore_controller_test.go b/pkg/controller/restore_controller_test.go index 9437f1d1c9..68661510b0 100644 --- a/pkg/controller/restore_controller_test.go +++ b/pkg/controller/restore_controller_test.go @@ -111,6 +111,7 @@ func TestFetchBackupInfo(t *testing.T) { logger, logrus.InfoLevel, func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }, + NewRestoreTracker(), NewFakeSingleObjectBackupStoreGetter(backupStore), metrics.NewServerMetrics(), formatFlag, @@ -189,6 +190,7 @@ func TestProcessQueueItemSkips(t *testing.T) { logger, logrus.InfoLevel, nil, + NewRestoreTracker(), nil, // backupStoreGetter metrics.NewServerMetrics(), formatFlag, @@ -444,6 +446,7 @@ func TestRestoreReconcile(t *testing.T) { logger, logrus.InfoLevel, func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }, + NewRestoreTracker(), NewFakeSingleObjectBackupStoreGetter(backupStore), metrics.NewServerMetrics(), formatFlag, @@ -617,6 +620,7 @@ func TestValidateAndCompleteWhenScheduleNameSpecified(t *testing.T) { logger, logrus.DebugLevel, func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }, + NewRestoreTracker(), NewFakeSingleObjectBackupStoreGetter(backupStore), metrics.NewServerMetrics(), formatFlag, @@ -710,6 +714,7 @@ func TestValidateAndCompleteWithResourceModifierSpecified(t *testing.T) { logger, logrus.DebugLevel, func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }, + NewRestoreTracker(), NewFakeSingleObjectBackupStoreGetter(backupStore), metrics.NewServerMetrics(), formatFlag, diff --git a/pkg/controller/restore_tracker.go b/pkg/controller/restore_tracker.go new file mode 100644 index 0000000000..6f20f6fe69 --- /dev/null +++ b/pkg/controller/restore_tracker.go @@ -0,0 +1,10 @@ +package controller + +type RestoreTracker interface { + BackupTracker +} + +// NewRestoreTracker returns a new RestoreTracker. +func NewRestoreTracker() RestoreTracker { + return NewBackupTracker() +}