Skip to content

Commit

Permalink
oadp-1.3: OADP-4265: Reconcile To Fail: Add backup/restore trackers (#…
Browse files Browse the repository at this point in the history
…324)

* OADP-4265: Reconcile To Fail: Add backup/restore trackers

Signed-off-by: Tiger Kaovilai <[email protected]>

* Apply suggestions from code review: backupTracker

* Address restoreTracker feedback

Signed-off-by: Tiger Kaovilai <[email protected]>

* s/delete from/add to/ in the comment

* unit test fix

Signed-off-by: Tiger Kaovilai <[email protected]>

* backup_controller unit test

Signed-off-by: Tiger Kaovilai <[email protected]>

* restore_controller unit test

Signed-off-by: Tiger Kaovilai <[email protected]>

* `make update`

Signed-off-by: Tiger Kaovilai <[email protected]>

* mock patch to fail failure due to connection refused

Signed-off-by: Tiger Kaovilai <[email protected]>

---------

Signed-off-by: Tiger Kaovilai <[email protected]>
  • Loading branch information
kaovilai authored Jun 27, 2024
1 parent 4b5cf07 commit e703a2b
Show file tree
Hide file tree
Showing 7 changed files with 457 additions and 4 deletions.
9 changes: 8 additions & 1 deletion pkg/controller/backup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,11 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
case "", velerov1api.BackupPhaseNew:
// only process new backups
case velerov1api.BackupPhaseInProgress:
// if backup is in progress, we should not process it again
if b.backupTracker.Contains(original.Namespace, original.Name) {
log.Debug("Backup is in progress, skipping")
return ctrl.Result{}, nil
}
// if backup phase is in progress, we should not process it again
// we want to mark it as failed to avoid it being stuck in progress
// if so, mark it as failed, last loop did not successfully complete the backup
log.Debug("Backup has in progress status from prior reconcile, marking it as failed")
Expand Down Expand Up @@ -266,6 +270,7 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr

// update status
if err := kubeutil.PatchResource(original, request.Backup, b.kbClient); err != nil {
// if we fail to patch to inprogress, we will try again in the next reconcile loop
return ctrl.Result{}, errors.Wrapf(err, "error updating Backup status to %s", request.Status.Phase)
}
// store ref to just-updated item for creating patch
Expand Down Expand Up @@ -323,6 +328,8 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
log.Info("Updating backup's final status")
if err := kubeutil.PatchResource(original, request.Backup, b.kbClient); err != nil {
log.WithError(err).Error("error updating backup's final status")
// delete from tracker so next reconcile fails the backup
b.backupTracker.Delete(original.Namespace, original.Name)
// return the error so the status can be re-processed; it's currently still not completed or failed
return ctrl.Result{}, err
}
Expand Down
76 changes: 73 additions & 3 deletions pkg/controller/backup_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"reflect"
"sort"
"strings"
"syscall"
"testing"
"time"

Expand All @@ -43,6 +44,7 @@ import (
"k8s.io/utils/clock"
testclocks "k8s.io/utils/clock/testing"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
kbclient "sigs.k8s.io/controller-runtime/pkg/client"

kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube"
Expand All @@ -63,6 +65,7 @@ import (
pluginmocks "github.com/vmware-tanzu/velero/pkg/plugin/mocks"
biav2 "github.com/vmware-tanzu/velero/pkg/plugin/velero/backupitemaction/v2"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
velerotestmocks "github.com/vmware-tanzu/velero/pkg/test/mocks"
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
"github.com/vmware-tanzu/velero/pkg/util/logging"
)
Expand Down Expand Up @@ -129,9 +132,10 @@ func TestProcessBackupNonProcessedItems(t *testing.T) {
)

c := &backupReconciler{
kbClient: velerotest.NewFakeControllerRuntimeClient(t),
formatFlag: formatFlag,
logger: logger,
kbClient: velerotest.NewFakeControllerRuntimeClient(t),
formatFlag: formatFlag,
logger: logger,
backupTracker: NewBackupTracker(),
}
if test.backup != nil {
require.NoError(t, c.kbClient.Create(context.Background(), test.backup))
Expand All @@ -148,6 +152,72 @@ func TestProcessBackupNonProcessedItems(t *testing.T) {
}
}

// OADP Carry: Test that backup that has status inProgress on reconcile is changed to failed if velero has no memory of it still in-progress.
func TestProcessBackupInProgressFailOnSecondReconcile(t *testing.T) {
tests := []struct {
name string
tracked bool
reconciledPhase velerov1api.BackupPhase
expectedErr error
mockFailedPatch bool
}{
{
name: "InProgress backup tracked as being in-progress is not processed",
tracked: true,
reconciledPhase: velerov1api.BackupPhaseInProgress,
},
{
name: "InProgress backup untracked as being in-progress is marked as failed",
tracked: false,
reconciledPhase: velerov1api.BackupPhaseFailed,
},
{
name: "InProgress backup untracked is marked as failed, if patch fails, err is returned from reconcile to retry patch again, backup still inprogress",
tracked: false,
reconciledPhase: velerov1api.BackupPhaseInProgress,
mockFailedPatch: true,
expectedErr: syscall.ECONNREFUSED,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
formatFlag := logging.FormatText
var (
logger = logging.DefaultLogger(logrus.DebugLevel, formatFlag)
)
backup := defaultBackup().Phase(velerov1api.BackupPhaseInProgress).Result()
var kclient kbclient.Client
fakeKclient := velerotest.NewFakeControllerRuntimeClient(t, backup)
if test.mockFailedPatch {
mockClient := velerotestmocks.NewClient(t)
mockClient.On("Patch", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(syscall.ECONNREFUSED).Once()
mockClient.On("Get", mock.Anything, mock.AnythingOfType("types.NamespacedName"), mock.AnythingOfType("*v1.Backup")).Return(func(ctx context.Context, key client.ObjectKey, obj client.Object) error {
return fakeKclient.Get(ctx, key, obj)
})
kclient = mockClient
} else {
kclient = fakeKclient
}
c := &backupReconciler{
kbClient: kclient,
formatFlag: formatFlag,
logger: logger,
backupTracker: NewBackupTracker(),
}
if test.tracked {
c.backupTracker.Add(backup.Namespace, backup.Name)
}
_, err := c.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Namespace: backup.Namespace, Name: backup.Name}})
assert.Equal(t, test.expectedErr, err)
reconciledBackup := velerov1api.Backup{}
err = c.kbClient.Get(context.Background(), types.NamespacedName{Namespace: backup.Namespace, Name: backup.Name}, &reconciledBackup)
assert.Nil(t, err)
assert.Equal(t, test.reconciledPhase, reconciledBackup.Status.Phase)
})
}
}

func TestProcessBackupValidationFailures(t *testing.T) {
defaultBackupLocation := builder.ForBackupStorageLocation("velero", "loc-1").Result()

Expand Down
9 changes: 9 additions & 0 deletions pkg/controller/restore_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ type restoreReconciler struct {

newPluginManager func(logger logrus.FieldLogger) clientmgmt.Manager
backupStoreGetter persistence.ObjectBackupStoreGetter
failingTracker RestoreTracker
}

type backupInfo struct {
Expand Down Expand Up @@ -134,6 +135,7 @@ func NewRestoreReconciler(
kbClient: kbClient,
logger: logger,
restoreLogLevel: restoreLogLevel,
failingTracker: NewRestoreTracker(),
metrics: metrics,
logFormat: logFormat,
clock: &clock.RealClock{},
Expand Down Expand Up @@ -210,6 +212,10 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
case "", api.RestorePhaseNew:
// only process new restores
case api.RestorePhaseInProgress:
if !r.failingTracker.Contains(restore.Namespace, restore.Name) {
log.Debug("Restore in progress, skipping")
return ctrl.Result{}, nil
}
// if restore is in progress, we should not process it again
// we want to mark it as failed to avoid it being stuck in progress
// if so, mark it as failed, last loop did not successfully complete the restore
Expand All @@ -222,6 +228,7 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
return ctrl.Result{}, err
}
// patch to mark it as failed succeeded, do not requeue
r.failingTracker.Delete(restore.Namespace, restore.Name)
return ctrl.Result{}, nil
default:
r.logger.WithFields(logrus.Fields{
Expand Down Expand Up @@ -283,6 +290,8 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct

if err = kubeutil.PatchResource(original, restore, r.kbClient); err != nil {
log.WithError(errors.WithStack(err)).Info("Error updating restore's final status")
// add to failureTracker so next reconcile fails the restore
r.failingTracker.Add(restore.Namespace, restore.Name)
// return the error so the status can be re-processed; it's currently still not completed or failed
return ctrl.Result{}, err
}
Expand Down
76 changes: 76 additions & 0 deletions pkg/controller/restore_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"bytes"
"context"
"io"
"syscall"
"testing"
"time"

Expand All @@ -34,6 +35,7 @@ import (
clocktesting "k8s.io/utils/clock/testing"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
kbclient "sigs.k8s.io/controller-runtime/pkg/client"

corev1 "k8s.io/api/core/v1"

Expand All @@ -48,6 +50,7 @@ import (
riav2 "github.com/vmware-tanzu/velero/pkg/plugin/velero/restoreitemaction/v2"
pkgrestore "github.com/vmware-tanzu/velero/pkg/restore"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
velerotestmocks "github.com/vmware-tanzu/velero/pkg/test/mocks"
"github.com/vmware-tanzu/velero/pkg/util/logging"
"github.com/vmware-tanzu/velero/pkg/util/results"
"github.com/vmware-tanzu/velero/pkg/volume"
Expand Down Expand Up @@ -922,6 +925,79 @@ func TestMostRecentCompletedBackup(t *testing.T) {
assert.Equal(t, expected, mostRecentCompletedBackup(backups))
}

// OADP Carry: Test that restore that has status inProgress on reconcile is changed to failed if velero has memory of it failing.
func TestProcessRestoreInProgressFailOnSecondReconcile(t *testing.T) {
tests := []struct {
name string
trackedAsFailed bool
reconciledPhase velerov1api.RestorePhase
expectedErr error
mockFailedPatch bool
}{
{
name: "InProgress restore not tracked as failing is not processed",
trackedAsFailed: false,
reconciledPhase: velerov1api.RestorePhaseInProgress,
},
{
name: "InProgress restore tracked as failing is marked as failed",
trackedAsFailed: true,
reconciledPhase: velerov1api.RestorePhaseFailed,
},
{
name: "InProgress restore tracked as failing is marked as failed, if patch fails, err is returned from reconcile to retry patch again, restore still inprogress",
trackedAsFailed: true,
reconciledPhase: velerov1api.RestorePhaseInProgress,
mockFailedPatch: true,
expectedErr: syscall.ECONNREFUSED,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
formatFlag := logging.FormatText
var (
logger = logging.DefaultLogger(logrus.DebugLevel, formatFlag)
)
restore := &velerov1api.Restore{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1api.DefaultNamespace,
Name: "restore-1",
},
Status: velerov1api.RestoreStatus{
Phase: velerov1api.RestorePhaseInProgress,
},
}
var kclient kbclient.Client
fakeKclient := velerotest.NewFakeControllerRuntimeClient(t, restore)
if test.mockFailedPatch {
mockClient := velerotestmocks.NewClient(t)
mockClient.On("Patch", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(syscall.ECONNREFUSED).Once()
mockClient.On("Get", mock.Anything, mock.AnythingOfType("types.NamespacedName"), mock.AnythingOfType("*v1.Restore")).Return(func(ctx context.Context, key client.ObjectKey, obj client.Object) error {
return fakeKclient.Get(ctx, key, obj)
})
kclient = mockClient
} else {
kclient = fakeKclient
}
c := &restoreReconciler{
kbClient: kclient,
logger: logger,
failingTracker: NewRestoreTracker(),
}
if test.trackedAsFailed {
c.failingTracker.Add(restore.Namespace, restore.Name)
}
_, err := c.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Namespace: restore.Namespace, Name: restore.Name}})
assert.Equal(t, test.expectedErr, err)
reconciledRestore := velerov1api.Restore{}
err = c.kbClient.Get(context.Background(), types.NamespacedName{Namespace: restore.Namespace, Name: restore.Name}, &reconciledRestore)
assert.Nil(t, err)
assert.Equal(t, test.reconciledPhase, reconciledRestore.Status.Phase)
})
}
}

func NewRestore(ns, name, backup, includeNS, includeResource string, phase velerov1api.RestorePhase) *builder.RestoreBuilder {
restore := builder.ForRestore(ns, name).Phase(phase).Backup(backup).ItemOperationTimeout(60 * time.Minute)

Expand Down
10 changes: 10 additions & 0 deletions pkg/controller/restore_tracker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package controller

type RestoreTracker interface {
BackupTracker
}

// NewRestoreTracker returns a new RestoreTracker.
func NewRestoreTracker() RestoreTracker {
return NewBackupTracker()
}
17 changes: 17 additions & 0 deletions pkg/test/mocks.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ package test
import (
snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
snapshotv1listers "github.com/kubernetes-csi/external-snapshotter/client/v4/listers/volumesnapshot/v1"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

// VolumeSnapshotLister helps list VolumeSnapshots.
Expand All @@ -18,3 +21,17 @@ type VolumeSnapshotLister interface {
VolumeSnapshots(namespace string) snapshotv1listers.VolumeSnapshotNamespaceLister
snapshotv1listers.VolumeSnapshotListerExpansion
}

// Client knows how to perform CRUD operations on Kubernetes objects.

//go:generate mockery --name Client
type Client interface {
client.Reader
client.Writer
client.StatusClient

// Scheme returns the scheme this client is using.
Scheme() *runtime.Scheme
// RESTMapper returns the rest this client is using.
RESTMapper() meta.RESTMapper
}
Loading

0 comments on commit e703a2b

Please sign in to comment.