Skip to content

Commit

Permalink
Don't fail backup deletion if downloading tarball fails (vmware-tanzu…
Browse files Browse the repository at this point in the history
…#2993)

* Don't fail backup if downloading tarball fails

Previously, we would always attempt to download the tarball for a backup
for processing DeleteItemAction plugins, even if there weren't any.
This caused an issue for some users in the case where the backup tarball
had been deleted from object storage as the backup deletion would fail.

Now, we only attempt to download the tarball in the case where there are
DeleteItemAction plugins. If downloading that tarball fails, we log
the error, skip the processing of the DeleteItemAction plugins and
proceed with the rest of the deletion.

Signed-off-by: Bridget McErlean <[email protected]>

* Skip file removal in closeAndRemoveFile if nil

Signed-off-by: Bridget McErlean <[email protected]>
  • Loading branch information
zubron authored and georgettica committed Dec 23, 2020
1 parent 3ac2fd5 commit 1fcfedb
Show file tree
Hide file tree
Showing 4 changed files with 289 additions and 20 deletions.
1 change: 1 addition & 0 deletions changelogs/unreleased/2993-zubron
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed an issue where the deletion of a backup would fail if the backup tarball couldn't be downloaded from object storage. Now the tarball is only downloaded if there are associated DeleteItemAction plugins and if downloading the tarball fails, the plugins are skipped.
4 changes: 4 additions & 0 deletions pkg/controller/backup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,10 @@ func persistBackup(backup *pkgbackup.Request,
}

func closeAndRemoveFile(file *os.File, log logrus.FieldLogger) {
if file == nil {
log.Debug("Skipping removal of file due to nil file pointer")
return
}
if err := file.Close(); err != nil {
log.WithError(err).WithField("file", file.Name()).Error("error closing file")
}
Expand Down
43 changes: 23 additions & 20 deletions pkg/controller/backup_deletion_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,34 +295,37 @@ func (c *backupDeletionController) processRequest(req *velerov1api.DeleteBackupR
errs = append(errs, err.Error())
}

// Download the tarball
backupFile, err := downloadToTempFile(backup.Name, backupStore, log)
if err != nil {
return errors.Wrap(err, "error downloading backup")
}
defer closeAndRemoveFile(backupFile, c.logger)

actions, err := pluginManager.GetDeleteItemActions()
log.Debugf("%d actions before invoking actions", len(actions))
if err != nil {
return errors.Wrap(err, "error getting delete item actions")
}
// don't defer CleanupClients here, since it was already called above.

ctx := &delete.Context{
Backup: backup,
BackupReader: backupFile,
Actions: actions,
Log: c.logger,
DiscoveryHelper: c.helper,
Filesystem: filesystem.NewFileSystem(),
}
if len(actions) > 0 {
// Download the tarball
backupFile, err := downloadToTempFile(backup.Name, backupStore, log)
defer closeAndRemoveFile(backupFile, c.logger)

// Optimization: wrap in a gofunc? Would be useful for large backups with lots of objects.
// but what do we do with the error returned? We can't just swallow it as that may lead to dangling resources.
err = delete.InvokeDeleteActions(ctx)
if err != nil {
return errors.Wrap(err, "error invoking delete item actions")
if err != nil {
log.WithError(err).Errorf("Unable to download tarball for backup %s, skipping associated DeleteItemAction plugins", backup.Name)
} else {
ctx := &delete.Context{
Backup: backup,
BackupReader: backupFile,
Actions: actions,
Log: c.logger,
DiscoveryHelper: c.helper,
Filesystem: filesystem.NewFileSystem(),
}

// Optimization: wrap in a gofunc? Would be useful for large backups with lots of objects.
// but what do we do with the error returned? We can't just swallow it as that may lead to dangling resources.
err = delete.InvokeDeleteActions(ctx)
if err != nil {
return errors.Wrap(err, "error invoking delete item actions")
}
}
}

if backupStore != nil {
Expand Down
261 changes: 261 additions & 0 deletions pkg/controller/backup_deletion_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ import (
persistencemocks "github.com/vmware-tanzu/velero/pkg/persistence/mocks"
"github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt"
pluginmocks "github.com/vmware-tanzu/velero/pkg/plugin/mocks"
"github.com/vmware-tanzu/velero/pkg/plugin/velero"
"github.com/vmware-tanzu/velero/pkg/plugin/velero/mocks"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
"github.com/vmware-tanzu/velero/pkg/volume"
)
Expand Down Expand Up @@ -739,6 +741,265 @@ func TestBackupDeletionControllerProcessRequest(t *testing.T) {
// Make sure snapshot was deleted
assert.Equal(t, 0, td.volumeSnapshotter.SnapshotsTaken.Len())
})

t.Run("backup is not downloaded when there are no DeleteItemAction plugins", func(t *testing.T) {
backup := builder.ForBackup(velerov1api.DefaultNamespace, "foo").Result()
backup.UID = "uid"
backup.Spec.StorageLocation = "primary"

td := setupBackupDeletionControllerTest(t, backup)

location := &velerov1api.BackupStorageLocation{
ObjectMeta: metav1.ObjectMeta{
Namespace: backup.Namespace,
Name: backup.Spec.StorageLocation,
},
Spec: velerov1api.BackupStorageLocationSpec{
Provider: "objStoreProvider",
StorageType: velerov1api.StorageType{
ObjectStorage: &velerov1api.ObjectStorageLocation{
Bucket: "bucket",
},
},
},
}

require.NoError(t, td.fakeClient.Create(context.Background(), location))

snapshotLocation := &velerov1api.VolumeSnapshotLocation{
ObjectMeta: metav1.ObjectMeta{
Namespace: backup.Namespace,
Name: "vsl-1",
},
Spec: velerov1api.VolumeSnapshotLocationSpec{
Provider: "provider-1",
},
}
require.NoError(t, td.sharedInformers.Velero().V1().VolumeSnapshotLocations().Informer().GetStore().Add(snapshotLocation))

// Clear out req labels to make sure the controller adds them and does not
// panic when encountering a nil Labels map
// (https://github.com/vmware-tanzu/velero/issues/1546)
td.req.Labels = nil

td.client.PrependReactor("get", "backups", func(action core.Action) (bool, runtime.Object, error) {
return true, backup, nil
})
td.volumeSnapshotter.SnapshotsTaken.Insert("snap-1")

td.client.PrependReactor("patch", "deletebackuprequests", func(action core.Action) (bool, runtime.Object, error) {
return true, td.req, nil
})

td.client.PrependReactor("patch", "backups", func(action core.Action) (bool, runtime.Object, error) {
return true, backup, nil
})

snapshots := []*volume.Snapshot{
{
Spec: volume.SnapshotSpec{
Location: "vsl-1",
},
Status: volume.SnapshotStatus{
ProviderSnapshotID: "snap-1",
},
},
}

pluginManager := &pluginmocks.Manager{}
pluginManager.On("GetVolumeSnapshotter", "provider-1").Return(td.volumeSnapshotter, nil)
pluginManager.On("GetDeleteItemActions").Return([]velero.DeleteItemAction{}, nil)
pluginManager.On("CleanupClients")
td.controller.newPluginManager = func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }

td.backupStore.On("GetBackupVolumeSnapshots", td.req.Spec.BackupName).Return(snapshots, nil)
td.backupStore.On("DeleteBackup", td.req.Spec.BackupName).Return(nil)

err := td.controller.processRequest(td.req)
require.NoError(t, err)

td.backupStore.AssertNotCalled(t, "GetBackupContents", td.req.Spec.BackupName)

expectedActions := []core.Action{
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
td.req.Name,
types.MergePatchType,
[]byte(`{"metadata":{"labels":{"velero.io/backup-name":"foo"}},"status":{"phase":"InProgress"}}`),
),
core.NewGetAction(
velerov1api.SchemeGroupVersion.WithResource("backups"),
td.req.Namespace,
td.req.Spec.BackupName,
),
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
td.req.Name,
types.MergePatchType,
[]byte(`{"metadata":{"labels":{"velero.io/backup-uid":"uid"}}}`),
),
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("backups"),
td.req.Namespace,
td.req.Spec.BackupName,
types.MergePatchType,
[]byte(`{"status":{"phase":"Deleting"}}`),
),
core.NewDeleteAction(
velerov1api.SchemeGroupVersion.WithResource("backups"),
td.req.Namespace,
td.req.Spec.BackupName,
),
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
td.req.Name,
types.MergePatchType,
[]byte(`{"status":{"phase":"Processed"}}`),
),
core.NewDeleteCollectionAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
pkgbackup.NewDeleteBackupRequestListOptions(td.req.Spec.BackupName, "uid"),
),
}

velerotest.CompareActions(t, expectedActions, td.client.Actions())

// Make sure snapshot was deleted
assert.Equal(t, 0, td.volumeSnapshotter.SnapshotsTaken.Len())
})

t.Run("backup is still deleted if downloading tarball fails for DeleteItemAction plugins", func(t *testing.T) {
backup := builder.ForBackup(velerov1api.DefaultNamespace, "foo").Result()
backup.UID = "uid"
backup.Spec.StorageLocation = "primary"

td := setupBackupDeletionControllerTest(t, backup)

location := &velerov1api.BackupStorageLocation{
ObjectMeta: metav1.ObjectMeta{
Namespace: backup.Namespace,
Name: backup.Spec.StorageLocation,
},
Spec: velerov1api.BackupStorageLocationSpec{
Provider: "objStoreProvider",
StorageType: velerov1api.StorageType{
ObjectStorage: &velerov1api.ObjectStorageLocation{
Bucket: "bucket",
},
},
},
}

require.NoError(t, td.fakeClient.Create(context.Background(), location))

snapshotLocation := &velerov1api.VolumeSnapshotLocation{
ObjectMeta: metav1.ObjectMeta{
Namespace: backup.Namespace,
Name: "vsl-1",
},
Spec: velerov1api.VolumeSnapshotLocationSpec{
Provider: "provider-1",
},
}
require.NoError(t, td.sharedInformers.Velero().V1().VolumeSnapshotLocations().Informer().GetStore().Add(snapshotLocation))

// Clear out req labels to make sure the controller adds them and does not
// panic when encountering a nil Labels map
// (https://github.com/vmware-tanzu/velero/issues/1546)
td.req.Labels = nil

td.client.PrependReactor("get", "backups", func(action core.Action) (bool, runtime.Object, error) {
return true, backup, nil
})
td.volumeSnapshotter.SnapshotsTaken.Insert("snap-1")

td.client.PrependReactor("patch", "deletebackuprequests", func(action core.Action) (bool, runtime.Object, error) {
return true, td.req, nil
})

td.client.PrependReactor("patch", "backups", func(action core.Action) (bool, runtime.Object, error) {
return true, backup, nil
})

snapshots := []*volume.Snapshot{
{
Spec: volume.SnapshotSpec{
Location: "vsl-1",
},
Status: volume.SnapshotStatus{
ProviderSnapshotID: "snap-1",
},
},
}

pluginManager := &pluginmocks.Manager{}
pluginManager.On("GetVolumeSnapshotter", "provider-1").Return(td.volumeSnapshotter, nil)
pluginManager.On("GetDeleteItemActions").Return([]velero.DeleteItemAction{new(mocks.DeleteItemAction)}, nil)
pluginManager.On("CleanupClients")
td.controller.newPluginManager = func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }

td.backupStore.On("GetBackupVolumeSnapshots", td.req.Spec.BackupName).Return(snapshots, nil)
td.backupStore.On("GetBackupContents", td.req.Spec.BackupName).Return(nil, fmt.Errorf("error downloading tarball"))
td.backupStore.On("DeleteBackup", td.req.Spec.BackupName).Return(nil)

err := td.controller.processRequest(td.req)
require.NoError(t, err)

expectedActions := []core.Action{
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
td.req.Name,
types.MergePatchType,
[]byte(`{"metadata":{"labels":{"velero.io/backup-name":"foo"}},"status":{"phase":"InProgress"}}`),
),
core.NewGetAction(
velerov1api.SchemeGroupVersion.WithResource("backups"),
td.req.Namespace,
td.req.Spec.BackupName,
),
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
td.req.Name,
types.MergePatchType,
[]byte(`{"metadata":{"labels":{"velero.io/backup-uid":"uid"}}}`),
),
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("backups"),
td.req.Namespace,
td.req.Spec.BackupName,
types.MergePatchType,
[]byte(`{"status":{"phase":"Deleting"}}`),
),
core.NewDeleteAction(
velerov1api.SchemeGroupVersion.WithResource("backups"),
td.req.Namespace,
td.req.Spec.BackupName,
),
core.NewPatchAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
td.req.Name,
types.MergePatchType,
[]byte(`{"status":{"phase":"Processed"}}`),
),
core.NewDeleteCollectionAction(
velerov1api.SchemeGroupVersion.WithResource("deletebackuprequests"),
td.req.Namespace,
pkgbackup.NewDeleteBackupRequestListOptions(td.req.Spec.BackupName, "uid"),
),
}

velerotest.CompareActions(t, expectedActions, td.client.Actions())

// Make sure snapshot was deleted
assert.Equal(t, 0, td.volumeSnapshotter.SnapshotsTaken.Len())
})
}

func TestBackupDeletionControllerDeleteExpiredRequests(t *testing.T) {
Expand Down

0 comments on commit 1fcfedb

Please sign in to comment.