From 457fcc689366276c20c282f26c825115035ade1a Mon Sep 17 00:00:00 2001 From: Scott Seago Date: Wed, 13 Sep 2023 13:24:09 -0400 Subject: [PATCH] issue #6807: Retry failed create when using generateName When creating resources with generateName, apimachinery does not guarantee uniqueness when it appends the random suffix to the generateName stub, so if it fails with already exists error, we need to retry. Signed-off-by: Scott Seago --- changelogs/unreleased/6943-sseago | 1 + pkg/client/retry.go | 44 +++++++++++++++++++++++ pkg/cmd/cli/backup/delete.go | 2 +- pkg/cmd/cli/serverstatus/server_status.go | 3 +- pkg/controller/gc_controller.go | 3 +- pkg/podvolume/backupper.go | 7 +++- pkg/podvolume/restorer.go | 6 +++- pkg/repository/ensurer.go | 3 +- pkg/restore/dataupload_retrieve_action.go | 3 +- 9 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 changelogs/unreleased/6943-sseago create mode 100644 pkg/client/retry.go diff --git a/changelogs/unreleased/6943-sseago b/changelogs/unreleased/6943-sseago new file mode 100644 index 0000000000..2055df15a2 --- /dev/null +++ b/changelogs/unreleased/6943-sseago @@ -0,0 +1 @@ +Retry failed create when using generateName diff --git a/pkg/client/retry.go b/pkg/client/retry.go new file mode 100644 index 0000000000..f9674e1edd --- /dev/null +++ b/pkg/client/retry.go @@ -0,0 +1,44 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package client + +import ( + "context" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/client-go/util/retry" + kbclient "sigs.k8s.io/controller-runtime/pkg/client" +) + +func CreateRetryGenerateName(client kbclient.Client, ctx context.Context, obj kbclient.Object) error { + return CreateRetryGenerateNameWithFunc(obj, func() error { + return client.Create(ctx, obj, &kbclient.CreateOptions{}) + }) +} + +func CreateRetryGenerateNameWithFunc(obj kbclient.Object, createFn func() error) error { + retryCreateFn := func() error { + // needed to ensure that the name from the failed create isn't left on the object between retries + obj.SetName("") + return createFn() + } + if obj.GetGenerateName() != "" && obj.GetName() == "" { + return retry.OnError(retry.DefaultRetry, apierrors.IsAlreadyExists, retryCreateFn) + } else { + return createFn() + } +} diff --git a/pkg/cmd/cli/backup/delete.go b/pkg/cmd/cli/backup/delete.go index 5d235bd72f..4c81477585 100644 --- a/pkg/cmd/cli/backup/delete.go +++ b/pkg/cmd/cli/backup/delete.go @@ -124,7 +124,7 @@ func Run(o *cli.DeleteOptions) error { ObjectMeta(builder.WithLabels(velerov1api.BackupNameLabel, label.GetValidName(b.Name), velerov1api.BackupUIDLabel, string(b.UID)), builder.WithGenerateName(b.Name+"-")).Result() - if err := o.Client.Create(context.TODO(), deleteRequest, &controllerclient.CreateOptions{}); err != nil { + if err := client.CreateRetryGenerateName(o.Client, context.TODO(), deleteRequest); err != nil { errs = append(errs, err) continue } diff --git a/pkg/cmd/cli/serverstatus/server_status.go b/pkg/cmd/cli/serverstatus/server_status.go index 5bf99aff33..628c4a5453 100644 --- a/pkg/cmd/cli/serverstatus/server_status.go +++ b/pkg/cmd/cli/serverstatus/server_status.go @@ -26,6 +26,7 @@ import ( velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/builder" + veleroclient "github.com/vmware-tanzu/velero/pkg/client" ) type Getter interface { @@ -40,7 +41,7 @@ type DefaultServerStatusGetter struct { func (g *DefaultServerStatusGetter) GetServerStatus(kbClient kbclient.Client) (*velerov1api.ServerStatusRequest, error) { created := builder.ForServerStatusRequest(g.Namespace, "", "0").ObjectMeta(builder.WithGenerateName("velero-cli-")).Result() - if err := kbClient.Create(context.Background(), created, &kbclient.CreateOptions{}); err != nil { + if err := veleroclient.CreateRetryGenerateName(kbClient, context.Background(), created); err != nil { return nil, errors.WithStack(err) } diff --git a/pkg/controller/gc_controller.go b/pkg/controller/gc_controller.go index 3d1fe48c74..9d38e5d1b7 100644 --- a/pkg/controller/gc_controller.go +++ b/pkg/controller/gc_controller.go @@ -32,6 +32,7 @@ import ( velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" pkgbackup "github.com/vmware-tanzu/velero/pkg/backup" + veleroclient "github.com/vmware-tanzu/velero/pkg/client" "github.com/vmware-tanzu/velero/pkg/label" "github.com/vmware-tanzu/velero/pkg/util/kube" ) @@ -187,7 +188,7 @@ func (c *gcReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Re log.Info("Creating a new deletion request") ndbr := pkgbackup.NewDeleteBackupRequest(backup.Name, string(backup.UID)) ndbr.SetNamespace(backup.Namespace) - if err := c.Create(ctx, ndbr); err != nil { + if err := veleroclient.CreateRetryGenerateName(c, ctx, ndbr); err != nil { log.WithError(err).Error("error creating DeleteBackupRequests") return ctrl.Result{}, errors.Wrap(err, "error creating DeleteBackupRequest") } diff --git a/pkg/podvolume/backupper.go b/pkg/podvolume/backupper.go index 2955b06b50..e6fe25e245 100644 --- a/pkg/podvolume/backupper.go +++ b/pkg/podvolume/backupper.go @@ -31,6 +31,7 @@ import ( "github.com/vmware-tanzu/velero/internal/resourcepolicies" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + veleroclient "github.com/vmware-tanzu/velero/pkg/client" clientset "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned" "github.com/vmware-tanzu/velero/pkg/label" "github.com/vmware-tanzu/velero/pkg/nodeagent" @@ -297,7 +298,11 @@ func (b *backupper) BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api. } volumeBackup := newPodVolumeBackup(backup, pod, volume, repo.Spec.ResticIdentifier, b.uploaderType, pvc) - if _, err = b.veleroClient.VeleroV1().PodVolumeBackups(volumeBackup.Namespace).Create(context.TODO(), volumeBackup, metav1.CreateOptions{}); err != nil { + // TODO: once backupper is refactored to use controller-runtime, just pass client instead of anonymous func + if err := veleroclient.CreateRetryGenerateNameWithFunc(volumeBackup, func() error { + _, err := b.veleroClient.VeleroV1().PodVolumeBackups(volumeBackup.Namespace).Create(context.TODO(), volumeBackup, metav1.CreateOptions{}) + return err + }); err != nil { errs = append(errs, err) continue } diff --git a/pkg/podvolume/restorer.go b/pkg/podvolume/restorer.go index 0011f3d473..7befbf1f6d 100644 --- a/pkg/podvolume/restorer.go +++ b/pkg/podvolume/restorer.go @@ -31,6 +31,7 @@ import ( "k8s.io/client-go/tools/cache" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + veleroclient "github.com/vmware-tanzu/velero/pkg/client" clientset "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned" "github.com/vmware-tanzu/velero/pkg/label" "github.com/vmware-tanzu/velero/pkg/nodeagent" @@ -172,7 +173,10 @@ func (r *restorer) RestorePodVolumes(data RestoreData) []error { volumeRestore := newPodVolumeRestore(data.Restore, data.Pod, data.BackupLocation, volume, backupInfo.snapshotID, repo.Spec.ResticIdentifier, backupInfo.uploaderType, data.SourceNamespace, pvc) - if err := errorOnly(r.veleroClient.VeleroV1().PodVolumeRestores(volumeRestore.Namespace).Create(context.TODO(), volumeRestore, metav1.CreateOptions{})); err != nil { + // TODO: once restorer is refactored to use controller-runtime, just pass client instead of anonymous func + if err := veleroclient.CreateRetryGenerateNameWithFunc(volumeRestore, func() error { + return errorOnly(r.veleroClient.VeleroV1().PodVolumeRestores(volumeRestore.Namespace).Create(context.TODO(), volumeRestore, metav1.CreateOptions{})) + }); err != nil { errs = append(errs, errors.WithStack(err)) continue } diff --git a/pkg/repository/ensurer.go b/pkg/repository/ensurer.go index 885363328c..255f49d038 100644 --- a/pkg/repository/ensurer.go +++ b/pkg/repository/ensurer.go @@ -28,6 +28,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + veleroclient "github.com/vmware-tanzu/velero/pkg/client" ) // Ensurer ensures that backup repositories are created and ready. @@ -107,7 +108,7 @@ func (r *Ensurer) repoLock(key BackupRepositoryKey) *sync.Mutex { func (r *Ensurer) createBackupRepositoryAndWait(ctx context.Context, namespace string, backupRepoKey BackupRepositoryKey) (*velerov1api.BackupRepository, error) { toCreate := NewBackupRepository(namespace, backupRepoKey) - if err := r.repoClient.Create(ctx, toCreate, &client.CreateOptions{}); err != nil { + if err := veleroclient.CreateRetryGenerateName(r.repoClient, ctx, toCreate); err != nil { return nil, errors.Wrap(err, "unable to create backup repository resource") } diff --git a/pkg/restore/dataupload_retrieve_action.go b/pkg/restore/dataupload_retrieve_action.go index c345e908ff..79193411e7 100644 --- a/pkg/restore/dataupload_retrieve_action.go +++ b/pkg/restore/dataupload_retrieve_action.go @@ -30,6 +30,7 @@ import ( velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" velerov2alpha1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1" + veleroclient "github.com/vmware-tanzu/velero/pkg/client" "github.com/vmware-tanzu/velero/pkg/label" "github.com/vmware-tanzu/velero/pkg/plugin/velero" ) @@ -104,7 +105,7 @@ func (d *DataUploadRetrieveAction) Execute(input *velero.RestoreItemActionExecut }, } - err = d.client.Create(context.Background(), &cm, &client.CreateOptions{}) + err = veleroclient.CreateRetryGenerateName(d.client, context.Background(), &cm) if err != nil { d.logger.Errorf("fail to create DataUploadResult ConfigMap %s/%s: %s", cm.Namespace, cm.Name, err.Error()) return nil, errors.Wrap(err, "fail to create DataUploadResult ConfigMap")