diff --git a/docs/2-the-zarf-cli/100-cli-commands/zarf_dev_deploy.md b/docs/2-the-zarf-cli/100-cli-commands/zarf_dev_deploy.md index 36961236b3..cf2ac00723 100644 --- a/docs/2-the-zarf-cli/100-cli-commands/zarf_dev_deploy.md +++ b/docs/2-the-zarf-cli/100-cli-commands/zarf_dev_deploy.md @@ -14,6 +14,7 @@ zarf dev deploy [flags] ## Options ``` + --adopt-existing-resources Adopts any pre-existing K8s resources into the Helm charts managed by Zarf. ONLY use when you have existing deployments you want Zarf to takeover. --components string Comma-separated list of components to deploy. Adding this flag will skip the prompts for selected components. Globbing component names with '*' and deselecting 'default' components with a leading '-' are also supported. --create-set stringToString Specify package variables to set on the command line (KEY=value) (default []) --deploy-set stringToString Specify deployment variables to set on the command line (KEY=value) (default []) @@ -21,6 +22,9 @@ zarf dev deploy [flags] -h, --help help for deploy --no-yolo Disable the YOLO mode default override and create / deploy the package as-defined --registry-override stringToString Specify a map of domains to override on package create when pulling images (e.g. --registry-override docker.io=dockerio-reg.enterprise.intranet) (default []) + --retries int Number of retries to perform for Zarf deploy operations like git/image pushes or Helm installs (default 3) + --skip-webhooks [alpha] Skip waiting for external webhooks to execute as each package component is deployed + --timeout duration Timeout for Helm operations such as installs and rollbacks (default 15m0s) ``` ## Options inherited from parent commands diff --git a/docs/2-the-zarf-cli/100-cli-commands/zarf_init.md b/docs/2-the-zarf-cli/100-cli-commands/zarf_init.md index 6b353aecdd..b16bbd34f6 100644 --- a/docs/2-the-zarf-cli/100-cli-commands/zarf_init.md +++ b/docs/2-the-zarf-cli/100-cli-commands/zarf_init.md @@ -70,6 +70,7 @@ $ zarf init --artifact-push-password={PASSWORD} --artifact-push-username={USERNA --registry-push-username string Username to access to the registry Zarf is configured to use (default "zarf-push") --registry-secret string Registry secret value --registry-url string External registry url address to use for this Zarf cluster + --retries int Number of retries to perform for Zarf deploy operations like git/image pushes or Helm installs (default 3) --set stringToString Specify deployment variables to set on the command line (KEY=value) (default []) --skip-webhooks [alpha] Skip waiting for external webhooks to execute as each package component is deployed --storage-class string Specify the storage class to use for the registry and git server. E.g. --storage-class=standard diff --git a/docs/2-the-zarf-cli/100-cli-commands/zarf_package_create.md b/docs/2-the-zarf-cli/100-cli-commands/zarf_package_create.md index 79f8fbe7f7..81044a3acc 100644 --- a/docs/2-the-zarf-cli/100-cli-commands/zarf_package_create.md +++ b/docs/2-the-zarf-cli/100-cli-commands/zarf_package_create.md @@ -23,6 +23,7 @@ zarf package create [ DIRECTORY ] [flags] -m, --max-package-size int Specify the maximum size of the package in megabytes, packages larger than this will be split into multiple parts to be loaded onto smaller media (i.e. DVDs). Use 0 to disable splitting. -o, --output string Specify the output (either a directory or an oci:// URL) for the created Zarf package --registry-override stringToString Specify a map of domains to override on package create when pulling images (e.g. --registry-override docker.io=dockerio-reg.enterprise.intranet) (default []) + --retries int Number of retries to perform for Zarf deploy operations like git/image pushes or Helm installs (default 3) -s, --sbom View SBOM contents after creating the package --sbom-out string Specify an output directory for the SBOMs from the created Zarf package --set stringToString Specify package variables to set on the command line (KEY=value) (default []) diff --git a/docs/2-the-zarf-cli/100-cli-commands/zarf_package_deploy.md b/docs/2-the-zarf-cli/100-cli-commands/zarf_package_deploy.md index 960898294b..8b9a1721a3 100644 --- a/docs/2-the-zarf-cli/100-cli-commands/zarf_package_deploy.md +++ b/docs/2-the-zarf-cli/100-cli-commands/zarf_package_deploy.md @@ -19,6 +19,7 @@ zarf package deploy [ PACKAGE_SOURCE ] [flags] --components string Comma-separated list of components to deploy. Adding this flag will skip the prompts for selected components. Globbing component names with '*' and deselecting 'default' components with a leading '-' are also supported. --confirm Confirms package deployment without prompting. ONLY use with packages you trust. Skips prompts to review SBOM, configure variables, select optional components and review potential breaking changes. -h, --help help for deploy + --retries int Number of retries to perform for Zarf deploy operations like git/image pushes or Helm installs (default 3) --set stringToString Specify deployment variables to set on the command line (KEY=value) (default []) --shasum string Shasum of the package to deploy. Required if deploying a remote package and "--insecure" is not provided --skip-webhooks [alpha] Skip waiting for external webhooks to execute as each package component is deployed diff --git a/docs/2-the-zarf-cli/100-cli-commands/zarf_package_mirror-resources.md b/docs/2-the-zarf-cli/100-cli-commands/zarf_package_mirror-resources.md index 2493836807..fd658197d4 100644 --- a/docs/2-the-zarf-cli/100-cli-commands/zarf_package_mirror-resources.md +++ b/docs/2-the-zarf-cli/100-cli-commands/zarf_package_mirror-resources.md @@ -49,6 +49,7 @@ $ zarf package mirror-resources \ --registry-push-password string Password for the push-user to connect to the registry --registry-push-username string Username to access to the registry Zarf is configured to use (default "zarf-push") --registry-url string External registry url address to use for this Zarf cluster + --retries int Number of retries to perform for Zarf deploy operations like git/image pushes or Helm installs (default 3) ``` ## Options inherited from parent commands diff --git a/src/cmd/common/viper.go b/src/cmd/common/viper.go index 34d6d0888b..b852800611 100644 --- a/src/cmd/common/viper.go +++ b/src/cmd/common/viper.go @@ -84,6 +84,7 @@ const ( VPkgDeploySget = "package.deploy.sget" VPkgDeploySkipWebhooks = "package.deploy.skip_webhooks" VPkgDeployTimeout = "package.deploy.timeout" + VPkgRetries = "package.deploy.retries" // Package publish config keys @@ -184,7 +185,8 @@ func setDefaults() { // Package defaults that are non-zero values v.SetDefault(VPkgOCIConcurrency, 3) + v.SetDefault(VPkgRetries, config.ZarfDefaultRetries) // Deploy opts that are non-zero values - v.SetDefault(VPkgDeployTimeout, config.ZarfDefaultHelmTimeout) + v.SetDefault(VPkgDeployTimeout, config.ZarfDefaultTimeout) } diff --git a/src/cmd/dev.go b/src/cmd/dev.go index bccac8b781..cc53358e5b 100644 --- a/src/cmd/dev.go +++ b/src/cmd/dev.go @@ -282,6 +282,12 @@ func bindDevDeployFlags(v *viper.Viper) { devDeployFlags.StringToStringVar(&pkgConfig.PkgOpts.SetVariables, "deploy-set", v.GetStringMapString(common.VPkgDeploySet), lang.CmdPackageDeployFlagSet) + // Always require adopt-existing-resources flag (no viper) + devDeployFlags.BoolVar(&pkgConfig.DeployOpts.AdoptExistingResources, "adopt-existing-resources", false, lang.CmdPackageDeployFlagAdoptExistingResources) + devDeployFlags.BoolVar(&pkgConfig.DeployOpts.SkipWebhooks, "skip-webhooks", v.GetBool(common.VPkgDeploySkipWebhooks), lang.CmdPackageDeployFlagSkipWebhooks) + devDeployFlags.DurationVar(&pkgConfig.DeployOpts.Timeout, "timeout", v.GetDuration(common.VPkgDeployTimeout), lang.CmdPackageDeployFlagTimeout) + + devDeployFlags.IntVar(&pkgConfig.PkgOpts.Retries, "retries", v.GetInt(common.VPkgRetries), lang.CmdPackageFlagRetries) devDeployFlags.StringVar(&pkgConfig.PkgOpts.OptionalComponents, "components", v.GetString(common.VPkgDeployComponents), lang.CmdPackageDeployFlagComponents) devDeployFlags.BoolVar(&pkgConfig.CreateOpts.NoYOLO, "no-yolo", v.GetBool(common.VDevDeployNoYolo), lang.CmdDevDeployFlagNoYolo) diff --git a/src/cmd/initialize.go b/src/cmd/initialize.go index eea079af54..a51041ec5f 100644 --- a/src/cmd/initialize.go +++ b/src/cmd/initialize.go @@ -216,11 +216,10 @@ func init() { // Flags that control how a deployment proceeds // Always require adopt-existing-resources flag (no viper) initCmd.Flags().BoolVar(&pkgConfig.DeployOpts.AdoptExistingResources, "adopt-existing-resources", false, lang.CmdPackageDeployFlagAdoptExistingResources) - initCmd.Flags().BoolVar(&pkgConfig.DeployOpts.SkipWebhooks, "skip-webhooks", v.GetBool(common.VPkgDeploySkipWebhooks), lang.CmdPackageDeployFlagSkipWebhooks) - initCmd.Flags().DurationVar(&pkgConfig.DeployOpts.Timeout, "timeout", v.GetDuration(common.VPkgDeployTimeout), lang.CmdPackageDeployFlagTimeout) + initCmd.Flags().IntVar(&pkgConfig.PkgOpts.Retries, "retries", v.GetInt(common.VPkgRetries), lang.CmdPackageFlagRetries) initCmd.Flags().StringVarP(&pkgConfig.PkgOpts.PublicKeyPath, "key", "k", v.GetString(common.VPkgPublicKey), lang.CmdPackageFlagFlagPublicKey) initCmd.Flags().SortFlags = true diff --git a/src/cmd/package.go b/src/cmd/package.go index 3e29b74b64..ba0951f1b8 100644 --- a/src/cmd/package.go +++ b/src/cmd/package.go @@ -364,6 +364,8 @@ func bindCreateFlags(v *viper.Viper) { createFlags.StringVarP(&pkgConfig.CreateOpts.SigningKeyPath, "key", "k", v.GetString(common.VPkgCreateSigningKey), lang.CmdPackageCreateFlagDeprecatedKey) createFlags.StringVar(&pkgConfig.CreateOpts.SigningKeyPassword, "key-pass", v.GetString(common.VPkgCreateSigningKeyPassword), lang.CmdPackageCreateFlagDeprecatedKeyPassword) + createFlags.IntVar(&pkgConfig.PkgOpts.Retries, "retries", v.GetInt(common.VPkgRetries), lang.CmdPackageFlagRetries) + createFlags.MarkHidden("output-directory") createFlags.MarkHidden("key") createFlags.MarkHidden("key-pass") @@ -377,11 +379,10 @@ func bindDeployFlags(v *viper.Viper) { // Always require adopt-existing-resources flag (no viper) deployFlags.BoolVar(&pkgConfig.DeployOpts.AdoptExistingResources, "adopt-existing-resources", false, lang.CmdPackageDeployFlagAdoptExistingResources) - deployFlags.BoolVar(&pkgConfig.DeployOpts.SkipWebhooks, "skip-webhooks", v.GetBool(common.VPkgDeploySkipWebhooks), lang.CmdPackageDeployFlagSkipWebhooks) - deployFlags.DurationVar(&pkgConfig.DeployOpts.Timeout, "timeout", v.GetDuration(common.VPkgDeployTimeout), lang.CmdPackageDeployFlagTimeout) + deployFlags.IntVar(&pkgConfig.PkgOpts.Retries, "retries", v.GetInt(common.VPkgRetries), lang.CmdPackageFlagRetries) deployFlags.StringToStringVar(&pkgConfig.PkgOpts.SetVariables, "set", v.GetStringMapString(common.VPkgDeploySet), lang.CmdPackageDeployFlagSet) deployFlags.StringVar(&pkgConfig.PkgOpts.OptionalComponents, "components", v.GetString(common.VPkgDeployComponents), lang.CmdPackageDeployFlagComponents) deployFlags.StringVar(&pkgConfig.PkgOpts.Shasum, "shasum", v.GetString(common.VPkgDeployShasum), lang.CmdPackageDeployFlagShasum) @@ -403,6 +404,7 @@ func bindMirrorFlags(v *viper.Viper) { mirrorFlags.BoolVar(&pkgConfig.MirrorOpts.NoImgChecksum, "no-img-checksum", false, lang.CmdPackageMirrorFlagNoChecksum) + mirrorFlags.IntVar(&pkgConfig.PkgOpts.Retries, "retries", v.GetInt(common.VPkgRetries), lang.CmdPackageFlagRetries) mirrorFlags.StringVar(&pkgConfig.PkgOpts.OptionalComponents, "components", v.GetString(common.VPkgDeployComponents), lang.CmdPackageMirrorFlagComponents) // Flags for using an external Git server diff --git a/src/config/config.go b/src/config/config.go index df4deb9030..309e21bcda 100644 --- a/src/config/config.go +++ b/src/config/config.go @@ -99,8 +99,11 @@ var ( operationStartTime = time.Now().Unix() dataInjectionMarker = ".zarf-injection-%d" - ZarfDefaultCachePath = filepath.Join("~", ".zarf-cache") - ZarfDefaultHelmTimeout = 15 * time.Minute + ZarfDefaultCachePath = filepath.Join("~", ".zarf-cache") + + // Default Time Vars + ZarfDefaultTimeout = 15 * time.Minute + ZarfDefaultRetries = 3 ) // GetArch returns the arch based on a priority list with options for overriding. diff --git a/src/config/lang/english.go b/src/config/lang/english.go index 76487fb44e..1ad58ed92b 100644 --- a/src/config/lang/english.go +++ b/src/config/lang/english.go @@ -232,6 +232,7 @@ $ zarf init --artifact-push-password={PASSWORD} --artifact-push-username={USERNA CmdPackageShort = "Zarf package commands for creating, deploying, and inspecting packages" CmdPackageFlagConcurrency = "Number of concurrent layer operations to perform when interacting with a remote package." CmdPackageFlagFlagPublicKey = "Path to public key file for validating signed packages" + CmdPackageFlagRetries = "Number of retries to perform for Zarf deploy operations like git/image pushes or Helm installs" CmdPackageCreateShort = "Creates a Zarf package from a given directory or the current directory" CmdPackageCreateLong = "Builds an archive of resources and dependencies defined by the 'zarf.yaml' in the specified directory.\n" + diff --git a/src/internal/packager/helm/chart.go b/src/internal/packager/helm/chart.go index a52c51967f..7b82e8b39c 100644 --- a/src/internal/packager/helm/chart.go +++ b/src/internal/packager/helm/chart.go @@ -5,9 +5,12 @@ package helm import ( + "errors" "fmt" "time" + "github.com/defenseunicorns/zarf/src/pkg/utils/helpers" + "github.com/Masterminds/semver/v3" "github.com/defenseunicorns/zarf/src/config" "github.com/defenseunicorns/zarf/src/types" @@ -24,9 +27,6 @@ import ( "helm.sh/helm/v3/pkg/storage/driver" ) -// Set the default number of Helm install/upgrade attempts to 3 -const defaultHelmAttempts = 3 - // InstallOrUpgradeChart performs a helm install of the given chart. func (h *Helm) InstallOrUpgradeChart() (types.ConnectStrings, string, error) { fromMessage := h.chart.URL @@ -63,49 +63,14 @@ func (h *Helm) InstallOrUpgradeChart() (types.ConnectStrings, string, error) { return nil, "", fmt.Errorf("unable to create helm renderer: %w", err) } - attempt := 0 - for { - attempt++ - - histClient := action.NewHistory(h.actionConfig) + histClient := action.NewHistory(h.actionConfig) + tryHelm := func() error { + var err error releases, histErr := histClient.Run(h.chart.ReleaseName) - if attempt > 3 { - previouslyDeployedVersion := 0 - - // Check for previous releases that successfully deployed - for _, release := range releases { - if release.Info.Status == "deployed" { - previouslyDeployedVersion = release.Version - } - } - - // On total failure try to rollback (if there was a previously deployed version) or uninstall. - if previouslyDeployedVersion > 0 { - spinner.Updatef("Performing chart rollback") - - err = h.rollbackChart(h.chart.ReleaseName, previouslyDeployedVersion) - if err != nil { - return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts and unable to rollback: %w", defaultHelmAttempts, err) - } - - return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts", defaultHelmAttempts) - } - - spinner.Updatef("Performing chart uninstall") - _, err = h.uninstallChart(h.chart.ReleaseName) - if err != nil { - return nil, "", fmt.Errorf("unable to install chart after %d attempts and unable to uninstall: %w", defaultHelmAttempts, err) - } - - return nil, "", fmt.Errorf("unable to install chart after %d attempts", defaultHelmAttempts) - } - - spinner.Updatef("Attempt %d of %d to install chart", attempt, defaultHelmAttempts) - spinner.Updatef("Checking for existing helm deployment") - if histErr == driver.ErrReleaseNotFound { + if errors.Is(histErr, driver.ErrReleaseNotFound) { // No prior release, try to install it. spinner.Updatef("Attempting chart installation") @@ -119,19 +84,50 @@ func (h *Helm) InstallOrUpgradeChart() (types.ConnectStrings, string, error) { output, err = h.upgradeChart(lastRelease, postRender) } else { // 😭 things aren't working - return nil, "", fmt.Errorf("unable to verify the chart installation status: %w", histErr) + return fmt.Errorf("unable to verify the chart installation status: %w", histErr) } if err != nil { - message.Warnf("Unable to complete helm chart install/upgrade, waiting 10 seconds and trying again: %s", err.Error()) + return fmt.Errorf("unable to complete the helm chart install/upgrade: %w", err) + } - // Simply wait for dust to settle and try again. - time.Sleep(10 * time.Second) - } else { - message.Debug(output.Info.Description) - spinner.Success() - break + message.Debug(output.Info.Description) + spinner.Success() + return nil + } + + err = helpers.Retry(tryHelm, h.retries, 5*time.Second, message.Warnf) + if err != nil { + // Try to rollback any deployed releases + releases, _ := histClient.Run(h.chart.ReleaseName) + previouslyDeployedVersion := 0 + + // Check for previous releases that successfully deployed + for _, release := range releases { + if release.Info.Status == "deployed" { + previouslyDeployedVersion = release.Version + } } + + // On total failure try to rollback (if there was a previously deployed version) or uninstall. + if previouslyDeployedVersion > 0 { + spinner.Updatef("Performing chart rollback") + + err = h.rollbackChart(h.chart.ReleaseName, previouslyDeployedVersion) + if err != nil { + return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts and unable to rollback: %w", h.retries, err) + } + + return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts", h.retries) + } + + spinner.Updatef("Performing chart uninstall") + _, err = h.uninstallChart(h.chart.ReleaseName) + if err != nil { + return nil, "", fmt.Errorf("unable to install chart after %d attempts and unable to uninstall: %w", h.retries, err) + } + + return nil, "", fmt.Errorf("unable to install chart after %d attempts", h.retries) } // return any collected connect strings for zarf connect. diff --git a/src/internal/packager/helm/common.go b/src/internal/packager/helm/common.go index 9b2dd729a2..cde9bd6992 100644 --- a/src/internal/packager/helm/common.go +++ b/src/internal/packager/helm/common.go @@ -33,6 +33,7 @@ type Helm struct { component types.ZarfComponent cluster *cluster.Cluster timeout time.Duration + retries int kubeVersion string @@ -52,7 +53,7 @@ func New(chart types.ZarfChart, chartPath string, valuesPath string, mods ...Mod chart: chart, chartPath: chartPath, valuesPath: valuesPath, - timeout: config.ZarfDefaultHelmTimeout, + timeout: config.ZarfDefaultTimeout, } for _, mod := range mods { @@ -67,7 +68,8 @@ func NewClusterOnly(cfg *types.PackagerConfig, cluster *cluster.Cluster) *Helm { return &Helm{ cfg: cfg, cluster: cluster, - timeout: config.ZarfDefaultHelmTimeout, + timeout: config.ZarfDefaultTimeout, + retries: config.ZarfDefaultRetries, } } @@ -118,7 +120,7 @@ func NewFromZarfManifest(manifest types.ZarfManifest, manifestPath, packageName, NoWait: manifest.NoWait, }, chartOverride: tmpChart, - timeout: config.ZarfDefaultHelmTimeout, + timeout: config.ZarfDefaultTimeout, } for _, mod := range mods { @@ -131,13 +133,14 @@ func NewFromZarfManifest(manifest types.ZarfManifest, manifestPath, packageName, } // WithDeployInfo adds the necessary information to deploy a given chart -func WithDeployInfo(component types.ZarfComponent, cfg *types.PackagerConfig, cluster *cluster.Cluster, valuesOverrides map[string]any, timeout time.Duration) Modifier { +func WithDeployInfo(component types.ZarfComponent, cfg *types.PackagerConfig, cluster *cluster.Cluster, valuesOverrides map[string]any, timeout time.Duration, retries int) Modifier { return func(h *Helm) { h.component = component h.cfg = cfg h.cluster = cluster h.valuesOverrides = valuesOverrides h.timeout = timeout + h.retries = retries } } diff --git a/src/pkg/packager/create_stages.go b/src/pkg/packager/create_stages.go index 5ea66cc3fb..45ef127dd9 100644 --- a/src/pkg/packager/create_stages.go +++ b/src/pkg/packager/create_stages.go @@ -163,8 +163,8 @@ func (p *Packager) assemble() error { return err } - if err := helpers.Retry(doPull, 3, 5*time.Second, message.Warnf); err != nil { - return fmt.Errorf("unable to pull images after 3 attempts: %w", err) + if err := helpers.Retry(doPull, p.cfg.PkgOpts.Retries, 5*time.Second, message.Warnf); err != nil { + return fmt.Errorf("unable to pull images after %d attempts: %w", p.cfg.PkgOpts.Retries, err) } for _, imgInfo := range pulled { diff --git a/src/pkg/packager/deploy.go b/src/pkg/packager/deploy.go index 585b50de14..2d27174cdd 100644 --- a/src/pkg/packager/deploy.go +++ b/src/pkg/packager/deploy.go @@ -470,7 +470,7 @@ func (p *Packager) pushImagesToRegistry(componentImages []string, noImgChecksum return helpers.Retry(func() error { return imgConfig.PushToZarfRegistry() - }, 3, 5*time.Second, message.Warnf) + }, p.cfg.PkgOpts.Retries, 5*time.Second, message.Warnf) } // Push all of the components git repos to the configured git server. @@ -511,8 +511,8 @@ func (p *Packager) pushReposToRepository(reposPath string, repos []string) error return gitClient.PushRepo(repoURL, reposPath) } - // Try repo push up to 3 times - if err := helpers.Retry(tryPush, 3, 5*time.Second, message.Warnf); err != nil { + // Try repo push up to retry limit + if err := helpers.Retry(tryPush, p.cfg.PkgOpts.Retries, 5*time.Second, message.Warnf); err != nil { return fmt.Errorf("unable to push repo %s to the Git Server: %w", repoURL, err) } } @@ -549,7 +549,8 @@ func (p *Packager) installChartAndManifests(componentPaths *layout.ComponentPath p.cfg, p.cluster, valuesOverrides, - p.cfg.DeployOpts.Timeout), + p.cfg.DeployOpts.Timeout, + p.cfg.PkgOpts.Retries), ) addedConnectStrings, installedChartName, err := helmCfg.InstallOrUpgradeChart() @@ -596,7 +597,8 @@ func (p *Packager) installChartAndManifests(componentPaths *layout.ComponentPath p.cfg, p.cluster, nil, - p.cfg.DeployOpts.Timeout), + p.cfg.DeployOpts.Timeout, + p.cfg.PkgOpts.Retries), ) if err != nil { return installedCharts, err diff --git a/src/pkg/utils/helpers/misc.go b/src/pkg/utils/helpers/misc.go index 9b6fefbbd8..f0129c710f 100644 --- a/src/pkg/utils/helpers/misc.go +++ b/src/pkg/utils/helpers/misc.go @@ -6,22 +6,27 @@ package helpers import ( "fmt" + "math" "reflect" "regexp" "time" ) -// Retry will retry a function until it succeeds or the timeout is reached, timeout == retries * delay. -func Retry(fn func() error, retries int, delay time.Duration, logger func(format string, args ...any)) (err error) { +// Retry will retry a function until it succeeds or the timeout is reached. timeout == 2^attempt * delay. +func Retry(fn func() error, retries int, delay time.Duration, logger func(format string, args ...any)) error { + var err error for r := 0; r < retries; r++ { err = fn() if err == nil { break } - logger("Retrying (%d/%d): %s", r+1, retries, err.Error()) + pow := math.Pow(2, float64(r)) + backoff := delay * time.Duration(pow) - time.Sleep(delay) + logger("Retrying (%d/%d) in %s: %s", r+1, retries, backoff, err.Error()) + + time.Sleep(backoff) } return err diff --git a/src/test/e2e/36_custom_retries_test.go b/src/test/e2e/36_custom_retries_test.go new file mode 100644 index 0000000000..e66a6ff435 --- /dev/null +++ b/src/test/e2e/36_custom_retries_test.go @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2021-Present The Zarf Authors + +// Package test provides e2e tests for Zarf. +package test + +import ( + "fmt" + "path" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestRetries(t *testing.T) { + t.Log("E2E: Custom Retries") + e2e.SetupWithCluster(t) + + tmpDir := t.TempDir() + + buildPath := filepath.Join("src", "test", "packages", "25-evil-dos-games") + pkgName := fmt.Sprintf("zarf-package-dos-games-%s.tar.zst", e2e.Arch) + + stdOut, stdErr, err := e2e.Zarf("package", "create", buildPath, "--tmpdir", tmpDir, "--output", tmpDir, "--confirm") + require.NoError(t, err, stdOut, stdErr) + + stdOut, stdErr, err = e2e.Zarf("package", "deploy", path.Join(tmpDir, pkgName), "--retries", "2", "--timeout", "3s", "--tmpdir", tmpDir, "--confirm") + require.Error(t, err, stdOut, stdErr) + require.Contains(t, stdErr, "Retrying (1/2) in 5s:") + require.Contains(t, stdErr, "Retrying (2/2) in 10s:") + require.Contains(t, stdErr, "unable to install chart after 2 attempts") +} diff --git a/src/types/runtime.go b/src/types/runtime.go index 3878bb0aef..becf8ec56c 100644 --- a/src/types/runtime.go +++ b/src/types/runtime.go @@ -43,6 +43,7 @@ type ZarfPackageOptions struct { SGetKeyPath string `json:"sGetKeyPath" jsonschema:"description=Location where the public key component of a cosign key-pair can be found"` SetVariables map[string]string `json:"setVariables" jsonschema:"description=Key-Value map of variable names and their corresponding values that will be used to template manifests and files in the Zarf package"` PublicKeyPath string `json:"publicKeyPath" jsonschema:"description=Location where the public key component of a cosign key-pair can be found"` + Retries int `json:"retries" jsonschema:"description=The number of retries to perform for Zarf deploy operations like image pushes or Helm installs"` } // ZarfInspectOptions tracks the user-defined preferences during a package inspection.