From e703a2b9627f1164e656564b536d0a4c0fc1733f Mon Sep 17 00:00:00 2001 From: Tiger Kaovilai Date: Thu, 27 Jun 2024 14:47:56 -0400 Subject: [PATCH] oadp-1.3: OADP-4265: Reconcile To Fail: Add backup/restore trackers (#324) * OADP-4265: Reconcile To Fail: Add backup/restore trackers Signed-off-by: Tiger Kaovilai * Apply suggestions from code review: backupTracker * Address restoreTracker feedback Signed-off-by: Tiger Kaovilai * s/delete from/add to/ in the comment * unit test fix Signed-off-by: Tiger Kaovilai * backup_controller unit test Signed-off-by: Tiger Kaovilai * restore_controller unit test Signed-off-by: Tiger Kaovilai * `make update` Signed-off-by: Tiger Kaovilai * mock patch to fail failure due to connection refused Signed-off-by: Tiger Kaovilai --------- Signed-off-by: Tiger Kaovilai --- pkg/controller/backup_controller.go | 9 +- pkg/controller/backup_controller_test.go | 76 ++++++- pkg/controller/restore_controller.go | 9 + pkg/controller/restore_controller_test.go | 76 +++++++ pkg/controller/restore_tracker.go | 10 + pkg/test/mocks.go | 17 ++ pkg/test/mocks/Client.go | 264 ++++++++++++++++++++++ 7 files changed, 457 insertions(+), 4 deletions(-) create mode 100644 pkg/controller/restore_tracker.go create mode 100644 pkg/test/mocks/Client.go diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index b3be1d0a33..a3cf54718e 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -234,7 +234,11 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr case "", velerov1api.BackupPhaseNew: // only process new backups case velerov1api.BackupPhaseInProgress: - // if backup is in progress, we should not process it again + if b.backupTracker.Contains(original.Namespace, original.Name) { + log.Debug("Backup is in progress, skipping") + return ctrl.Result{}, nil + } + // if backup phase is in progress, we should not process it again // we want to mark it as failed to avoid it being stuck in progress // if so, mark it as failed, last loop did not successfully complete the backup log.Debug("Backup has in progress status from prior reconcile, marking it as failed") @@ -266,6 +270,7 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr // update status if err := kubeutil.PatchResource(original, request.Backup, b.kbClient); err != nil { + // if we fail to patch to inprogress, we will try again in the next reconcile loop return ctrl.Result{}, errors.Wrapf(err, "error updating Backup status to %s", request.Status.Phase) } // store ref to just-updated item for creating patch @@ -323,6 +328,8 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr log.Info("Updating backup's final status") if err := kubeutil.PatchResource(original, request.Backup, b.kbClient); err != nil { log.WithError(err).Error("error updating backup's final status") + // delete from tracker so next reconcile fails the backup + b.backupTracker.Delete(original.Namespace, original.Name) // return the error so the status can be re-processed; it's currently still not completed or failed return ctrl.Result{}, err } diff --git a/pkg/controller/backup_controller_test.go b/pkg/controller/backup_controller_test.go index df2e22a22a..576b536472 100644 --- a/pkg/controller/backup_controller_test.go +++ b/pkg/controller/backup_controller_test.go @@ -24,6 +24,7 @@ import ( "reflect" "sort" "strings" + "syscall" "testing" "time" @@ -43,6 +44,7 @@ import ( "k8s.io/utils/clock" testclocks "k8s.io/utils/clock/testing" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" kbclient "sigs.k8s.io/controller-runtime/pkg/client" kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube" @@ -63,6 +65,7 @@ import ( pluginmocks "github.com/vmware-tanzu/velero/pkg/plugin/mocks" biav2 "github.com/vmware-tanzu/velero/pkg/plugin/velero/backupitemaction/v2" velerotest "github.com/vmware-tanzu/velero/pkg/test" + velerotestmocks "github.com/vmware-tanzu/velero/pkg/test/mocks" "github.com/vmware-tanzu/velero/pkg/util/boolptr" "github.com/vmware-tanzu/velero/pkg/util/logging" ) @@ -129,9 +132,10 @@ func TestProcessBackupNonProcessedItems(t *testing.T) { ) c := &backupReconciler{ - kbClient: velerotest.NewFakeControllerRuntimeClient(t), - formatFlag: formatFlag, - logger: logger, + kbClient: velerotest.NewFakeControllerRuntimeClient(t), + formatFlag: formatFlag, + logger: logger, + backupTracker: NewBackupTracker(), } if test.backup != nil { require.NoError(t, c.kbClient.Create(context.Background(), test.backup)) @@ -148,6 +152,72 @@ func TestProcessBackupNonProcessedItems(t *testing.T) { } } +// OADP Carry: Test that backup that has status inProgress on reconcile is changed to failed if velero has no memory of it still in-progress. +func TestProcessBackupInProgressFailOnSecondReconcile(t *testing.T) { + tests := []struct { + name string + tracked bool + reconciledPhase velerov1api.BackupPhase + expectedErr error + mockFailedPatch bool + }{ + { + name: "InProgress backup tracked as being in-progress is not processed", + tracked: true, + reconciledPhase: velerov1api.BackupPhaseInProgress, + }, + { + name: "InProgress backup untracked as being in-progress is marked as failed", + tracked: false, + reconciledPhase: velerov1api.BackupPhaseFailed, + }, + { + name: "InProgress backup untracked is marked as failed, if patch fails, err is returned from reconcile to retry patch again, backup still inprogress", + tracked: false, + reconciledPhase: velerov1api.BackupPhaseInProgress, + mockFailedPatch: true, + expectedErr: syscall.ECONNREFUSED, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + formatFlag := logging.FormatText + var ( + logger = logging.DefaultLogger(logrus.DebugLevel, formatFlag) + ) + backup := defaultBackup().Phase(velerov1api.BackupPhaseInProgress).Result() + var kclient kbclient.Client + fakeKclient := velerotest.NewFakeControllerRuntimeClient(t, backup) + if test.mockFailedPatch { + mockClient := velerotestmocks.NewClient(t) + mockClient.On("Patch", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(syscall.ECONNREFUSED).Once() + mockClient.On("Get", mock.Anything, mock.AnythingOfType("types.NamespacedName"), mock.AnythingOfType("*v1.Backup")).Return(func(ctx context.Context, key client.ObjectKey, obj client.Object) error { + return fakeKclient.Get(ctx, key, obj) + }) + kclient = mockClient + } else { + kclient = fakeKclient + } + c := &backupReconciler{ + kbClient: kclient, + formatFlag: formatFlag, + logger: logger, + backupTracker: NewBackupTracker(), + } + if test.tracked { + c.backupTracker.Add(backup.Namespace, backup.Name) + } + _, err := c.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Namespace: backup.Namespace, Name: backup.Name}}) + assert.Equal(t, test.expectedErr, err) + reconciledBackup := velerov1api.Backup{} + err = c.kbClient.Get(context.Background(), types.NamespacedName{Namespace: backup.Namespace, Name: backup.Name}, &reconciledBackup) + assert.Nil(t, err) + assert.Equal(t, test.reconciledPhase, reconciledBackup.Status.Phase) + }) + } +} + func TestProcessBackupValidationFailures(t *testing.T) { defaultBackupLocation := builder.ForBackupStorageLocation("velero", "loc-1").Result() diff --git a/pkg/controller/restore_controller.go b/pkg/controller/restore_controller.go index d58c21cb3a..8671bd17f3 100644 --- a/pkg/controller/restore_controller.go +++ b/pkg/controller/restore_controller.go @@ -106,6 +106,7 @@ type restoreReconciler struct { newPluginManager func(logger logrus.FieldLogger) clientmgmt.Manager backupStoreGetter persistence.ObjectBackupStoreGetter + failingTracker RestoreTracker } type backupInfo struct { @@ -134,6 +135,7 @@ func NewRestoreReconciler( kbClient: kbClient, logger: logger, restoreLogLevel: restoreLogLevel, + failingTracker: NewRestoreTracker(), metrics: metrics, logFormat: logFormat, clock: &clock.RealClock{}, @@ -210,6 +212,10 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct case "", api.RestorePhaseNew: // only process new restores case api.RestorePhaseInProgress: + if !r.failingTracker.Contains(restore.Namespace, restore.Name) { + log.Debug("Restore in progress, skipping") + return ctrl.Result{}, nil + } // if restore is in progress, we should not process it again // we want to mark it as failed to avoid it being stuck in progress // if so, mark it as failed, last loop did not successfully complete the restore @@ -222,6 +228,7 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{}, err } // patch to mark it as failed succeeded, do not requeue + r.failingTracker.Delete(restore.Namespace, restore.Name) return ctrl.Result{}, nil default: r.logger.WithFields(logrus.Fields{ @@ -283,6 +290,8 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct if err = kubeutil.PatchResource(original, restore, r.kbClient); err != nil { log.WithError(errors.WithStack(err)).Info("Error updating restore's final status") + // add to failureTracker so next reconcile fails the restore + r.failingTracker.Add(restore.Namespace, restore.Name) // return the error so the status can be re-processed; it's currently still not completed or failed return ctrl.Result{}, err } diff --git a/pkg/controller/restore_controller_test.go b/pkg/controller/restore_controller_test.go index 9437f1d1c9..de029296d5 100644 --- a/pkg/controller/restore_controller_test.go +++ b/pkg/controller/restore_controller_test.go @@ -20,6 +20,7 @@ import ( "bytes" "context" "io" + "syscall" "testing" "time" @@ -34,6 +35,7 @@ import ( clocktesting "k8s.io/utils/clock/testing" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + kbclient "sigs.k8s.io/controller-runtime/pkg/client" corev1 "k8s.io/api/core/v1" @@ -48,6 +50,7 @@ import ( riav2 "github.com/vmware-tanzu/velero/pkg/plugin/velero/restoreitemaction/v2" pkgrestore "github.com/vmware-tanzu/velero/pkg/restore" velerotest "github.com/vmware-tanzu/velero/pkg/test" + velerotestmocks "github.com/vmware-tanzu/velero/pkg/test/mocks" "github.com/vmware-tanzu/velero/pkg/util/logging" "github.com/vmware-tanzu/velero/pkg/util/results" "github.com/vmware-tanzu/velero/pkg/volume" @@ -922,6 +925,79 @@ func TestMostRecentCompletedBackup(t *testing.T) { assert.Equal(t, expected, mostRecentCompletedBackup(backups)) } +// OADP Carry: Test that restore that has status inProgress on reconcile is changed to failed if velero has memory of it failing. +func TestProcessRestoreInProgressFailOnSecondReconcile(t *testing.T) { + tests := []struct { + name string + trackedAsFailed bool + reconciledPhase velerov1api.RestorePhase + expectedErr error + mockFailedPatch bool + }{ + { + name: "InProgress restore not tracked as failing is not processed", + trackedAsFailed: false, + reconciledPhase: velerov1api.RestorePhaseInProgress, + }, + { + name: "InProgress restore tracked as failing is marked as failed", + trackedAsFailed: true, + reconciledPhase: velerov1api.RestorePhaseFailed, + }, + { + name: "InProgress restore tracked as failing is marked as failed, if patch fails, err is returned from reconcile to retry patch again, restore still inprogress", + trackedAsFailed: true, + reconciledPhase: velerov1api.RestorePhaseInProgress, + mockFailedPatch: true, + expectedErr: syscall.ECONNREFUSED, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + formatFlag := logging.FormatText + var ( + logger = logging.DefaultLogger(logrus.DebugLevel, formatFlag) + ) + restore := &velerov1api.Restore{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1api.DefaultNamespace, + Name: "restore-1", + }, + Status: velerov1api.RestoreStatus{ + Phase: velerov1api.RestorePhaseInProgress, + }, + } + var kclient kbclient.Client + fakeKclient := velerotest.NewFakeControllerRuntimeClient(t, restore) + if test.mockFailedPatch { + mockClient := velerotestmocks.NewClient(t) + mockClient.On("Patch", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(syscall.ECONNREFUSED).Once() + mockClient.On("Get", mock.Anything, mock.AnythingOfType("types.NamespacedName"), mock.AnythingOfType("*v1.Restore")).Return(func(ctx context.Context, key client.ObjectKey, obj client.Object) error { + return fakeKclient.Get(ctx, key, obj) + }) + kclient = mockClient + } else { + kclient = fakeKclient + } + c := &restoreReconciler{ + kbClient: kclient, + logger: logger, + failingTracker: NewRestoreTracker(), + } + if test.trackedAsFailed { + c.failingTracker.Add(restore.Namespace, restore.Name) + } + _, err := c.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Namespace: restore.Namespace, Name: restore.Name}}) + assert.Equal(t, test.expectedErr, err) + reconciledRestore := velerov1api.Restore{} + err = c.kbClient.Get(context.Background(), types.NamespacedName{Namespace: restore.Namespace, Name: restore.Name}, &reconciledRestore) + assert.Nil(t, err) + assert.Equal(t, test.reconciledPhase, reconciledRestore.Status.Phase) + }) + } +} + func NewRestore(ns, name, backup, includeNS, includeResource string, phase velerov1api.RestorePhase) *builder.RestoreBuilder { restore := builder.ForRestore(ns, name).Phase(phase).Backup(backup).ItemOperationTimeout(60 * time.Minute) diff --git a/pkg/controller/restore_tracker.go b/pkg/controller/restore_tracker.go new file mode 100644 index 0000000000..6f20f6fe69 --- /dev/null +++ b/pkg/controller/restore_tracker.go @@ -0,0 +1,10 @@ +package controller + +type RestoreTracker interface { + BackupTracker +} + +// NewRestoreTracker returns a new RestoreTracker. +func NewRestoreTracker() RestoreTracker { + return NewBackupTracker() +} diff --git a/pkg/test/mocks.go b/pkg/test/mocks.go index 9a86d2b705..a77d2f61d5 100644 --- a/pkg/test/mocks.go +++ b/pkg/test/mocks.go @@ -3,7 +3,10 @@ package test import ( snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1" snapshotv1listers "github.com/kubernetes-csi/external-snapshotter/client/v4/listers/volumesnapshot/v1" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" ) // VolumeSnapshotLister helps list VolumeSnapshots. @@ -18,3 +21,17 @@ type VolumeSnapshotLister interface { VolumeSnapshots(namespace string) snapshotv1listers.VolumeSnapshotNamespaceLister snapshotv1listers.VolumeSnapshotListerExpansion } + +// Client knows how to perform CRUD operations on Kubernetes objects. + +//go:generate mockery --name Client +type Client interface { + client.Reader + client.Writer + client.StatusClient + + // Scheme returns the scheme this client is using. + Scheme() *runtime.Scheme + // RESTMapper returns the rest this client is using. + RESTMapper() meta.RESTMapper +} diff --git a/pkg/test/mocks/Client.go b/pkg/test/mocks/Client.go new file mode 100644 index 0000000000..4c77df06bc --- /dev/null +++ b/pkg/test/mocks/Client.go @@ -0,0 +1,264 @@ +// Code generated by mockery v2.42.1. DO NOT EDIT. + +package mocks + +import ( + context "context" + + client "sigs.k8s.io/controller-runtime/pkg/client" + + meta "k8s.io/apimachinery/pkg/api/meta" + + mock "github.com/stretchr/testify/mock" + + runtime "k8s.io/apimachinery/pkg/runtime" + + types "k8s.io/apimachinery/pkg/types" +) + +// Client is an autogenerated mock type for the Client type +type Client struct { + mock.Mock +} + +// Create provides a mock function with given fields: ctx, obj, opts +func (_m *Client) Create(ctx context.Context, obj client.Object, opts ...client.CreateOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, obj) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + if len(ret) == 0 { + panic("no return value specified for Create") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, client.Object, ...client.CreateOption) error); ok { + r0 = rf(ctx, obj, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// Delete provides a mock function with given fields: ctx, obj, opts +func (_m *Client) Delete(ctx context.Context, obj client.Object, opts ...client.DeleteOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, obj) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + if len(ret) == 0 { + panic("no return value specified for Delete") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, client.Object, ...client.DeleteOption) error); ok { + r0 = rf(ctx, obj, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// DeleteAllOf provides a mock function with given fields: ctx, obj, opts +func (_m *Client) DeleteAllOf(ctx context.Context, obj client.Object, opts ...client.DeleteAllOfOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, obj) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + if len(ret) == 0 { + panic("no return value specified for DeleteAllOf") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, client.Object, ...client.DeleteAllOfOption) error); ok { + r0 = rf(ctx, obj, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// Get provides a mock function with given fields: ctx, key, obj +func (_m *Client) Get(ctx context.Context, key types.NamespacedName, obj client.Object) error { + ret := _m.Called(ctx, key, obj) + + if len(ret) == 0 { + panic("no return value specified for Get") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, types.NamespacedName, client.Object) error); ok { + r0 = rf(ctx, key, obj) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// List provides a mock function with given fields: ctx, list, opts +func (_m *Client) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, list) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + if len(ret) == 0 { + panic("no return value specified for List") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, client.ObjectList, ...client.ListOption) error); ok { + r0 = rf(ctx, list, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// Patch provides a mock function with given fields: ctx, obj, patch, opts +func (_m *Client) Patch(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.PatchOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, obj, patch) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + if len(ret) == 0 { + panic("no return value specified for Patch") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, client.Object, client.Patch, ...client.PatchOption) error); ok { + r0 = rf(ctx, obj, patch, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// RESTMapper provides a mock function with given fields: +func (_m *Client) RESTMapper() meta.RESTMapper { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for RESTMapper") + } + + var r0 meta.RESTMapper + if rf, ok := ret.Get(0).(func() meta.RESTMapper); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(meta.RESTMapper) + } + } + + return r0 +} + +// Scheme provides a mock function with given fields: +func (_m *Client) Scheme() *runtime.Scheme { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for Scheme") + } + + var r0 *runtime.Scheme + if rf, ok := ret.Get(0).(func() *runtime.Scheme); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*runtime.Scheme) + } + } + + return r0 +} + +// Status provides a mock function with given fields: +func (_m *Client) Status() client.StatusWriter { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for Status") + } + + var r0 client.StatusWriter + if rf, ok := ret.Get(0).(func() client.StatusWriter); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(client.StatusWriter) + } + } + + return r0 +} + +// Update provides a mock function with given fields: ctx, obj, opts +func (_m *Client) Update(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, obj) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + if len(ret) == 0 { + panic("no return value specified for Update") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, client.Object, ...client.UpdateOption) error); ok { + r0 = rf(ctx, obj, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// NewClient creates a new instance of Client. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewClient(t interface { + mock.TestingT + Cleanup(func()) +}) *Client { + mock := &Client{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +}