Skip to content

Commit

Permalink
Merge pull request #5926 from eemcmullan/server-timeouts
Browse files Browse the repository at this point in the history
Add server setting for resource timeouts
  • Loading branch information
blackpiglet authored Mar 13, 2023
2 parents 54042c3 + ec4a707 commit 36163c9
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 13 deletions.
1 change: 1 addition & 0 deletions changelogs/unreleased/5926-eemcmullan
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add configurable server setting for default timeouts
10 changes: 8 additions & 2 deletions pkg/cmd/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ const (
defaultCSISnapshotTimeout = 10 * time.Minute
defaultItemOperationTimeout = 60 * time.Minute

resourceTimeout = 10 * time.Minute

// defaultCredentialsDirectory is the path on disk where credential
// files will be written to
defaultCredentialsDirectory = "/tmp/credentials"
Expand All @@ -113,7 +115,7 @@ type serverConfig struct {
pluginDir, metricsAddress, defaultBackupLocation string
backupSyncPeriod, podVolumeOperationTimeout, resourceTerminatingTimeout time.Duration
defaultBackupTTL, storeValidationFrequency, defaultCSISnapshotTimeout time.Duration
defaultItemOperationTimeout time.Duration
defaultItemOperationTimeout, resourceTimeout time.Duration
restoreResourcePriorities restore.Priorities
defaultVolumeSnapshotLocations map[string]string
restoreOnly bool
Expand Down Expand Up @@ -148,6 +150,7 @@ func NewCommand(f client.Factory) *cobra.Command {
defaultBackupTTL: defaultBackupTTL,
defaultCSISnapshotTimeout: defaultCSISnapshotTimeout,
defaultItemOperationTimeout: defaultItemOperationTimeout,
resourceTimeout: resourceTimeout,
storeValidationFrequency: defaultStoreValidationFrequency,
podVolumeOperationTimeout: defaultPodVolumeOperationTimeout,
restoreResourcePriorities: defaultRestorePriorities,
Expand Down Expand Up @@ -227,6 +230,7 @@ func NewCommand(f client.Factory) *cobra.Command {
command.Flags().BoolVar(&config.defaultVolumesToFsBackup, "default-volumes-to-fs-backup", config.defaultVolumesToFsBackup, "Backup all volumes with pod volume file system backup by default.")
command.Flags().StringVar(&config.uploaderType, "uploader-type", config.uploaderType, "Type of uploader to handle the transfer of data of pod volumes")
command.Flags().DurationVar(&config.defaultItemOperationTimeout, "default-item-operation-timeout", config.defaultItemOperationTimeout, "How long to wait on asynchronous BackupItemActions and RestoreItemActions to complete before timing out.")
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.")

return command
}
Expand Down Expand Up @@ -568,7 +572,7 @@ func (s *server) initRepoManager() error {
}

s.repoLocker = repository.NewRepoLocker()
s.repoEnsurer = repository.NewRepositoryEnsurer(s.mgr.GetClient(), s.logger)
s.repoEnsurer = repository.NewRepositoryEnsurer(s.mgr.GetClient(), s.logger, s.config.resourceTimeout)

s.repoManager = repository.NewManager(s.namespace, s.mgr.GetClient(), s.repoLocker, s.repoEnsurer, s.credentialFileStore, s.credentialSecretStore, s.logger)

Expand Down Expand Up @@ -736,6 +740,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
s.config.defaultVolumesToFsBackup,
s.config.defaultBackupTTL,
s.config.defaultCSISnapshotTimeout,
s.config.resourceTimeout,
s.config.defaultItemOperationTimeout,
defaultVolumeSnapshotLocations,
s.metrics,
Expand Down Expand Up @@ -863,6 +868,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
),
s.config.podVolumeOperationTimeout,
s.config.resourceTerminatingTimeout,
s.config.resourceTimeout,
s.logger,
podexec.NewPodCommandExecutor(s.kubeClientConfig, s.kubeClient.CoreV1().RESTClient()),
s.kubeClient.CoreV1().RESTClient(),
Expand Down
5 changes: 4 additions & 1 deletion pkg/controller/backup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ type backupReconciler struct {
defaultVolumesToFsBackup bool
defaultBackupTTL time.Duration
defaultCSISnapshotTimeout time.Duration
resourceTimeout time.Duration
defaultItemOperationTimeout time.Duration
defaultSnapshotLocations map[string]string
metrics *metrics.ServerMetrics
Expand All @@ -107,6 +108,7 @@ func NewBackupReconciler(
defaultVolumesToFsBackup bool,
defaultBackupTTL time.Duration,
defaultCSISnapshotTimeout time.Duration,
resourceTimeout time.Duration,
defaultItemOperationTimeout time.Duration,
defaultSnapshotLocations map[string]string,
metrics *metrics.ServerMetrics,
Expand All @@ -131,6 +133,7 @@ func NewBackupReconciler(
defaultVolumesToFsBackup: defaultVolumesToFsBackup,
defaultBackupTTL: defaultBackupTTL,
defaultCSISnapshotTimeout: defaultCSISnapshotTimeout,
resourceTimeout: resourceTimeout,
defaultItemOperationTimeout: defaultItemOperationTimeout,
defaultSnapshotLocations: defaultSnapshotLocations,
metrics: metrics,
Expand Down Expand Up @@ -1057,7 +1060,7 @@ func (b *backupReconciler) deleteVolumeSnapshot(volumeSnapshots []snapshotv1api.
// Set VolumeSnapshotRef's UID to nil will let the csi-controller finds out the related VS is gone, then
// VSC can be deleted.
func (b *backupReconciler) recreateVolumeSnapshotContent(vsc snapshotv1api.VolumeSnapshotContent) error {
timeout := 1 * time.Minute
timeout := b.resourceTimeout
interval := 1 * time.Second

err := b.kbClient.Delete(context.TODO(), &vsc)
Expand Down
16 changes: 9 additions & 7 deletions pkg/repository/ensurer.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,17 @@ type RepositoryEnsurer struct {

// repoLocksMu synchronizes reads/writes to the repoLocks map itself
// since maps are not threadsafe.
repoLocksMu sync.Mutex
repoLocks map[BackupRepositoryKey]*sync.Mutex
repoLocksMu sync.Mutex
repoLocks map[BackupRepositoryKey]*sync.Mutex
resourceTimeout time.Duration
}

func NewRepositoryEnsurer(repoClient client.Client, log logrus.FieldLogger) *RepositoryEnsurer {
func NewRepositoryEnsurer(repoClient client.Client, log logrus.FieldLogger, resourceTimeout time.Duration) *RepositoryEnsurer {
return &RepositoryEnsurer{
log: log,
repoClient: repoClient,
repoLocks: make(map[BackupRepositoryKey]*sync.Mutex),
log: log,
repoClient: repoClient,
repoLocks: make(map[BackupRepositoryKey]*sync.Mutex),
resourceTimeout: resourceTimeout,
}
}

Expand Down Expand Up @@ -124,7 +126,7 @@ func (r *RepositoryEnsurer) createBackupRepositoryAndWait(ctx context.Context, n
}
}

err := wait.PollWithContext(ctx, time.Millisecond*500, time.Minute*5, checkFunc)
err := wait.PollWithContext(ctx, time.Millisecond*500, r.resourceTimeout, checkFunc)
if err != nil {
return nil, errors.Wrap(err, "failed to wait BackupRepository")
} else {
Expand Down
10 changes: 7 additions & 3 deletions pkg/restore/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ type kubernetesRestorer struct {
podVolumeRestorerFactory podvolume.RestorerFactory
podVolumeTimeout time.Duration
resourceTerminatingTimeout time.Duration
resourceTimeout time.Duration
resourcePriorities Priorities
fileSystem filesystem.Interface
pvRenamer func(string) (string, error)
Expand All @@ -115,6 +116,7 @@ func NewKubernetesRestorer(
podVolumeRestorerFactory podvolume.RestorerFactory,
podVolumeTimeout time.Duration,
resourceTerminatingTimeout time.Duration,
resourceTimeout time.Duration,
logger logrus.FieldLogger,
podCommandExecutor podexec.PodCommandExecutor,
podGetter cache.Getter,
Expand All @@ -128,6 +130,7 @@ func NewKubernetesRestorer(
podVolumeRestorerFactory: podVolumeRestorerFactory,
podVolumeTimeout: podVolumeTimeout,
resourceTerminatingTimeout: resourceTerminatingTimeout,
resourceTimeout: resourceTimeout,
resourcePriorities: resourcePriorities,
logger: logger,
pvRenamer: func(string) (string, error) {
Expand Down Expand Up @@ -296,6 +299,7 @@ func (kr *kubernetesRestorer) RestoreWithResolvers(
volumeSnapshots: req.VolumeSnapshots,
podVolumeBackups: req.PodVolumeBackups,
resourceTerminatingTimeout: kr.resourceTerminatingTimeout,
resourceTimeout: kr.resourceTimeout,
resourceClients: make(map[resourceClientKey]client.Dynamic),
restoredItems: req.RestoredItems,
renamedPVs: make(map[string]string),
Expand Down Expand Up @@ -339,6 +343,7 @@ type restoreContext struct {
volumeSnapshots []*volume.Snapshot
podVolumeBackups []*velerov1api.PodVolumeBackup
resourceTerminatingTimeout time.Duration
resourceTimeout time.Duration
resourceClients map[resourceClientKey]client.Dynamic
restoredItems map[itemKey]string
renamedPVs map[string]string
Expand Down Expand Up @@ -842,9 +847,8 @@ func (ctx *restoreContext) crdAvailable(name string, crdClient client.Dynamic) (
crdLogger := ctx.log.WithField("crdName", name)

var available bool
// Wait 1 minute rather than the standard resource timeout, since each CRD
// will transition fairly quickly.
err := wait.PollImmediate(time.Second, time.Minute*1, func() (bool, error) {

err := wait.PollImmediate(time.Second, ctx.resourceTimeout, func() (bool, error) {
unstructuredCRD, err := crdClient.Get(name, metav1.GetOptions{})
if err != nil {
return true, err
Expand Down

0 comments on commit 36163c9

Please sign in to comment.