From ec4a7072b3220cf0d9ac78ac4a0eb6d0e0a0ca35 Mon Sep 17 00:00:00 2001 From: Emily McMullan Date: Mon, 27 Feb 2023 18:13:31 -0500 Subject: [PATCH] add server setting for default timeouts Signed-off-by: Emily McMullan --- changelogs/unreleased/5926-eemcmullan | 1 + pkg/cmd/server/server.go | 10 ++++++++-- pkg/controller/backup_controller.go | 5 ++++- pkg/repository/ensurer.go | 16 +++++++++------- pkg/restore/restore.go | 10 +++++++--- 5 files changed, 29 insertions(+), 13 deletions(-) create mode 100644 changelogs/unreleased/5926-eemcmullan diff --git a/changelogs/unreleased/5926-eemcmullan b/changelogs/unreleased/5926-eemcmullan new file mode 100644 index 0000000000..90c375e9bc --- /dev/null +++ b/changelogs/unreleased/5926-eemcmullan @@ -0,0 +1 @@ +Add configurable server setting for default timeouts \ No newline at end of file diff --git a/pkg/cmd/server/server.go b/pkg/cmd/server/server.go index bc87608fab..91d03b5297 100644 --- a/pkg/cmd/server/server.go +++ b/pkg/cmd/server/server.go @@ -103,6 +103,8 @@ const ( defaultCSISnapshotTimeout = 10 * time.Minute defaultItemOperationTimeout = 60 * time.Minute + resourceTimeout = 10 * time.Minute + // defaultCredentialsDirectory is the path on disk where credential // files will be written to defaultCredentialsDirectory = "/tmp/credentials" @@ -113,7 +115,7 @@ type serverConfig struct { pluginDir, metricsAddress, defaultBackupLocation string backupSyncPeriod, podVolumeOperationTimeout, resourceTerminatingTimeout time.Duration defaultBackupTTL, storeValidationFrequency, defaultCSISnapshotTimeout time.Duration - defaultItemOperationTimeout time.Duration + defaultItemOperationTimeout, resourceTimeout time.Duration restoreResourcePriorities restore.Priorities defaultVolumeSnapshotLocations map[string]string restoreOnly bool @@ -148,6 +150,7 @@ func NewCommand(f client.Factory) *cobra.Command { defaultBackupTTL: defaultBackupTTL, defaultCSISnapshotTimeout: defaultCSISnapshotTimeout, defaultItemOperationTimeout: defaultItemOperationTimeout, + resourceTimeout: resourceTimeout, storeValidationFrequency: defaultStoreValidationFrequency, podVolumeOperationTimeout: defaultPodVolumeOperationTimeout, restoreResourcePriorities: defaultRestorePriorities, @@ -227,6 +230,7 @@ func NewCommand(f client.Factory) *cobra.Command { command.Flags().BoolVar(&config.defaultVolumesToFsBackup, "default-volumes-to-fs-backup", config.defaultVolumesToFsBackup, "Backup all volumes with pod volume file system backup by default.") command.Flags().StringVar(&config.uploaderType, "uploader-type", config.uploaderType, "Type of uploader to handle the transfer of data of pod volumes") command.Flags().DurationVar(&config.defaultItemOperationTimeout, "default-item-operation-timeout", config.defaultItemOperationTimeout, "How long to wait on asynchronous BackupItemActions and RestoreItemActions to complete before timing out.") + command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.") return command } @@ -568,7 +572,7 @@ func (s *server) initRepoManager() error { } s.repoLocker = repository.NewRepoLocker() - s.repoEnsurer = repository.NewRepositoryEnsurer(s.mgr.GetClient(), s.logger) + s.repoEnsurer = repository.NewRepositoryEnsurer(s.mgr.GetClient(), s.logger, s.config.resourceTimeout) s.repoManager = repository.NewManager(s.namespace, s.mgr.GetClient(), s.repoLocker, s.repoEnsurer, s.credentialFileStore, s.credentialSecretStore, s.logger) @@ -736,6 +740,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string s.config.defaultVolumesToFsBackup, s.config.defaultBackupTTL, s.config.defaultCSISnapshotTimeout, + s.config.resourceTimeout, s.config.defaultItemOperationTimeout, defaultVolumeSnapshotLocations, s.metrics, @@ -863,6 +868,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string ), s.config.podVolumeOperationTimeout, s.config.resourceTerminatingTimeout, + s.config.resourceTimeout, s.logger, podexec.NewPodCommandExecutor(s.kubeClientConfig, s.kubeClient.CoreV1().RESTClient()), s.kubeClient.CoreV1().RESTClient(), diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index cb587d4844..48180c78b8 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -84,6 +84,7 @@ type backupReconciler struct { defaultVolumesToFsBackup bool defaultBackupTTL time.Duration defaultCSISnapshotTimeout time.Duration + resourceTimeout time.Duration defaultItemOperationTimeout time.Duration defaultSnapshotLocations map[string]string metrics *metrics.ServerMetrics @@ -107,6 +108,7 @@ func NewBackupReconciler( defaultVolumesToFsBackup bool, defaultBackupTTL time.Duration, defaultCSISnapshotTimeout time.Duration, + resourceTimeout time.Duration, defaultItemOperationTimeout time.Duration, defaultSnapshotLocations map[string]string, metrics *metrics.ServerMetrics, @@ -131,6 +133,7 @@ func NewBackupReconciler( defaultVolumesToFsBackup: defaultVolumesToFsBackup, defaultBackupTTL: defaultBackupTTL, defaultCSISnapshotTimeout: defaultCSISnapshotTimeout, + resourceTimeout: resourceTimeout, defaultItemOperationTimeout: defaultItemOperationTimeout, defaultSnapshotLocations: defaultSnapshotLocations, metrics: metrics, @@ -1057,7 +1060,7 @@ func (b *backupReconciler) deleteVolumeSnapshot(volumeSnapshots []snapshotv1api. // Set VolumeSnapshotRef's UID to nil will let the csi-controller finds out the related VS is gone, then // VSC can be deleted. func (b *backupReconciler) recreateVolumeSnapshotContent(vsc snapshotv1api.VolumeSnapshotContent) error { - timeout := 1 * time.Minute + timeout := b.resourceTimeout interval := 1 * time.Second err := b.kbClient.Delete(context.TODO(), &vsc) diff --git a/pkg/repository/ensurer.go b/pkg/repository/ensurer.go index 7d7bd3ffbc..b1a155e118 100644 --- a/pkg/repository/ensurer.go +++ b/pkg/repository/ensurer.go @@ -37,15 +37,17 @@ type RepositoryEnsurer struct { // repoLocksMu synchronizes reads/writes to the repoLocks map itself // since maps are not threadsafe. - repoLocksMu sync.Mutex - repoLocks map[BackupRepositoryKey]*sync.Mutex + repoLocksMu sync.Mutex + repoLocks map[BackupRepositoryKey]*sync.Mutex + resourceTimeout time.Duration } -func NewRepositoryEnsurer(repoClient client.Client, log logrus.FieldLogger) *RepositoryEnsurer { +func NewRepositoryEnsurer(repoClient client.Client, log logrus.FieldLogger, resourceTimeout time.Duration) *RepositoryEnsurer { return &RepositoryEnsurer{ - log: log, - repoClient: repoClient, - repoLocks: make(map[BackupRepositoryKey]*sync.Mutex), + log: log, + repoClient: repoClient, + repoLocks: make(map[BackupRepositoryKey]*sync.Mutex), + resourceTimeout: resourceTimeout, } } @@ -124,7 +126,7 @@ func (r *RepositoryEnsurer) createBackupRepositoryAndWait(ctx context.Context, n } } - err := wait.PollWithContext(ctx, time.Millisecond*500, time.Minute*5, checkFunc) + err := wait.PollWithContext(ctx, time.Millisecond*500, r.resourceTimeout, checkFunc) if err != nil { return nil, errors.Wrap(err, "failed to wait BackupRepository") } else { diff --git a/pkg/restore/restore.go b/pkg/restore/restore.go index 22029b358f..79cf3ab98b 100644 --- a/pkg/restore/restore.go +++ b/pkg/restore/restore.go @@ -96,6 +96,7 @@ type kubernetesRestorer struct { podVolumeRestorerFactory podvolume.RestorerFactory podVolumeTimeout time.Duration resourceTerminatingTimeout time.Duration + resourceTimeout time.Duration resourcePriorities Priorities fileSystem filesystem.Interface pvRenamer func(string) (string, error) @@ -115,6 +116,7 @@ func NewKubernetesRestorer( podVolumeRestorerFactory podvolume.RestorerFactory, podVolumeTimeout time.Duration, resourceTerminatingTimeout time.Duration, + resourceTimeout time.Duration, logger logrus.FieldLogger, podCommandExecutor podexec.PodCommandExecutor, podGetter cache.Getter, @@ -128,6 +130,7 @@ func NewKubernetesRestorer( podVolumeRestorerFactory: podVolumeRestorerFactory, podVolumeTimeout: podVolumeTimeout, resourceTerminatingTimeout: resourceTerminatingTimeout, + resourceTimeout: resourceTimeout, resourcePriorities: resourcePriorities, logger: logger, pvRenamer: func(string) (string, error) { @@ -296,6 +299,7 @@ func (kr *kubernetesRestorer) RestoreWithResolvers( volumeSnapshots: req.VolumeSnapshots, podVolumeBackups: req.PodVolumeBackups, resourceTerminatingTimeout: kr.resourceTerminatingTimeout, + resourceTimeout: kr.resourceTimeout, resourceClients: make(map[resourceClientKey]client.Dynamic), restoredItems: req.RestoredItems, renamedPVs: make(map[string]string), @@ -339,6 +343,7 @@ type restoreContext struct { volumeSnapshots []*volume.Snapshot podVolumeBackups []*velerov1api.PodVolumeBackup resourceTerminatingTimeout time.Duration + resourceTimeout time.Duration resourceClients map[resourceClientKey]client.Dynamic restoredItems map[itemKey]string renamedPVs map[string]string @@ -842,9 +847,8 @@ func (ctx *restoreContext) crdAvailable(name string, crdClient client.Dynamic) ( crdLogger := ctx.log.WithField("crdName", name) var available bool - // Wait 1 minute rather than the standard resource timeout, since each CRD - // will transition fairly quickly. - err := wait.PollImmediate(time.Second, time.Minute*1, func() (bool, error) { + + err := wait.PollImmediate(time.Second, ctx.resourceTimeout, func() (bool, error) { unstructuredCRD, err := crdClient.Get(name, metav1.GetOptions{}) if err != nil { return true, err