From d76f3a355b89b3ae576d9c758b1649f6adb2e41b Mon Sep 17 00:00:00 2001 From: Hidde Beydals Date: Wed, 9 Aug 2023 16:19:03 +0200 Subject: [PATCH] controller: jitter requeue interval This adds a `--interval-jitter-percentage` flag to the controller to add a +/- percentage jitter to the interval defined in a HelmRelease (defaults to 5%). Effectively, this results in a reconciliation every 9.5 - 10.5 minutes for a resource with an interval of 10 minutes. Main reason to add this change is to mitigate spikes in memory and CPU usage caused by many resources being configured with the same interval. Signed-off-by: Hidde Beydals --- api/v2beta1/helmrelease_types.go | 2 ++ config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml | 4 +++- docs/api/v2beta1/helm.md | 8 ++++++-- docs/spec/v2beta1/helmreleases.md | 7 +++++++ internal/controller/helmrelease_controller.go | 7 ++++--- main.go | 8 ++++++++ 6 files changed, 30 insertions(+), 6 deletions(-) diff --git a/api/v2beta1/helmrelease_types.go b/api/v2beta1/helmrelease_types.go index 6cc80bb6b..4678a35cc 100644 --- a/api/v2beta1/helmrelease_types.go +++ b/api/v2beta1/helmrelease_types.go @@ -70,6 +70,8 @@ type HelmReleaseSpec struct { Chart HelmChartTemplate `json:"chart"` // Interval at which to reconcile the Helm release. + // This interval is approximate and may be subject to jitter to ensure + // efficient use of resources. // +kubebuilder:validation:Type=string // +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ms|s|m|h))+$" // +required diff --git a/config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml b/config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml index 2fb4cb060..d60c61267 100644 --- a/config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml +++ b/config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml @@ -282,7 +282,9 @@ spec: type: string type: object interval: - description: Interval at which to reconcile the Helm release. + description: Interval at which to reconcile the Helm release. This + interval is approximate and may be subject to jitter to ensure efficient + use of resources. pattern: ^([0-9]+(\.[0-9]+)?(ms|s|m|h))+$ type: string kubeConfig: diff --git a/docs/api/v2beta1/helm.md b/docs/api/v2beta1/helm.md index 1079bdc99..4076569d8 100644 --- a/docs/api/v2beta1/helm.md +++ b/docs/api/v2beta1/helm.md @@ -92,7 +92,9 @@ Kubernetes meta/v1.Duration -

Interval at which to reconcile the Helm release.

+

Interval at which to reconcile the Helm release. +This interval is approximate and may be subject to jitter to ensure +efficient use of resources.

@@ -901,7 +903,9 @@ Kubernetes meta/v1.Duration -

Interval at which to reconcile the Helm release.

+

Interval at which to reconcile the Helm release. +This interval is approximate and may be subject to jitter to ensure +efficient use of resources.

diff --git a/docs/spec/v2beta1/helmreleases.md b/docs/spec/v2beta1/helmreleases.md index 34881b0c1..6f9561295 100644 --- a/docs/spec/v2beta1/helmreleases.md +++ b/docs/spec/v2beta1/helmreleases.md @@ -20,6 +20,8 @@ type HelmReleaseSpec struct { Chart HelmChartTemplate `json:"chart"` // Interval at which to reconcile the Helm release. + // This interval is approximate and may be subject to jitter to ensure + // efficient use of resources. // +required Interval metav1.Duration `json:"interval"` @@ -822,6 +824,11 @@ desired state, so an upgrade is made in this case as well. The `spec.interval` tells the reconciler at which interval to reconcile the release. The interval time units are `s`, `m` and `h` e.g. `interval: 5m`, the minimum value should be 60 seconds. +**Note:** The controller can be configured to apply a jitter to the interval in +order to distribute the load more evenly when multiple HelmRelease objects are +set up with the same interval. For more information, please refer to the +[helm-controller configuration options](https://fluxcd.io/flux/components/helm/options/). + The reconciler can be told to reconcile the `HelmRelease` outside of the specified interval by annotating the object with a `reconcile.fluxcd.io/requestedAt` annotation. For example: diff --git a/internal/controller/helmrelease_controller.go b/internal/controller/helmrelease_controller.go index b6369f803..fbc0ab12a 100644 --- a/internal/controller/helmrelease_controller.go +++ b/internal/controller/helmrelease_controller.go @@ -54,6 +54,7 @@ import ( "github.com/fluxcd/pkg/apis/meta" "github.com/fluxcd/pkg/runtime/acl" runtimeClient "github.com/fluxcd/pkg/runtime/client" + "github.com/fluxcd/pkg/runtime/jitter" "github.com/fluxcd/pkg/runtime/metrics" "github.com/fluxcd/pkg/runtime/predicates" "github.com/fluxcd/pkg/runtime/transform" @@ -233,7 +234,7 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease log.Error(reconcileErr, "access denied to cross-namespace source") r.event(ctx, hr, hr.Status.LastAttemptedRevision, eventv1.EventSeverityError, reconcileErr.Error()) return v2.HelmReleaseNotReady(hr, apiacl.AccessDeniedReason, reconcileErr.Error()), - ctrl.Result{RequeueAfter: hr.Spec.Interval.Duration}, nil + jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: hr.GetRequeueAfter()}), nil } msg := fmt.Sprintf("chart reconciliation failed: %s", reconcileErr.Error()) @@ -248,7 +249,7 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease log.Info(msg) // Do not requeue immediately, when the artifact is created // the watcher should trigger a reconciliation. - return v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, msg), ctrl.Result{RequeueAfter: hc.Spec.Interval.Duration}, nil + return v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, msg), jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: hr.GetRequeueAfter()}), nil } // Check dependencies @@ -287,7 +288,7 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease r.event(ctx, hr, hc.GetArtifact().Revision, eventv1.EventSeverityError, fmt.Sprintf("reconciliation failed: %s", reconcileErr.Error())) } - return reconciledHr, ctrl.Result{RequeueAfter: hr.Spec.Interval.Duration}, reconcileErr + return reconciledHr, jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: hr.GetRequeueAfter()}), reconcileErr } type HelmReleaseReconcilerOptions struct { diff --git a/main.go b/main.go index 7e43a8419..e89ca3403 100644 --- a/main.go +++ b/main.go @@ -41,6 +41,7 @@ import ( helper "github.com/fluxcd/pkg/runtime/controller" "github.com/fluxcd/pkg/runtime/events" feathelper "github.com/fluxcd/pkg/runtime/features" + "github.com/fluxcd/pkg/runtime/jitter" "github.com/fluxcd/pkg/runtime/leaderelection" "github.com/fluxcd/pkg/runtime/logger" "github.com/fluxcd/pkg/runtime/metrics" @@ -89,6 +90,7 @@ func main() { leaderElectionOptions leaderelection.Options rateLimiterOptions helper.RateLimiterOptions watchOptions helper.WatchOptions + intervalJitterOptions jitter.IntervalOptions oomWatchInterval time.Duration oomWatchMemoryThreshold uint8 oomWatchMaxMemoryPath string @@ -128,6 +130,7 @@ func main() { kubeConfigOpts.BindFlags(flag.CommandLine) featureGates.BindFlags(flag.CommandLine) watchOptions.BindFlags(flag.CommandLine) + intervalJitterOptions.BindFlags(flag.CommandLine) flag.Parse() @@ -143,6 +146,11 @@ func main() { metricsRecorder := metrics.NewRecorder() crtlmetrics.Registry.MustRegister(metricsRecorder.Collectors()...) + if err := intervalJitterOptions.SetGlobalJitter(nil); err != nil { + setupLog.Error(err, "unable to set global jitter") + os.Exit(1) + } + watchNamespace := "" if !watchOptions.AllNamespaces { watchNamespace = os.Getenv("RUNTIME_NAMESPACE")