From ade5e0814e7313a18058dbaedf37e311d6587b49 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 20 Sep 2023 11:20:35 +0200 Subject: [PATCH] fix incomplete startup of informers Previously, SharedInformerFactory.Start was called before core.NewAutoscaler. That had the effect that any new informer created as part of core.NewAutoscaler, in particular in kubernetes.NewListerRegistryWithDefaultListers, never got started. One of them was the DaemonSet informer. This had the effect that the DaemonSet lister had an empty cache and scale down failed with: I0920 11:06:36.046889 31805 cluster.go:164] node gke-cluster-pohly-default-pool-c9f60a43-5rvz cannot be removed: daemonset for kube-system/pdcsi-node-7hnmc is not present, err: daemonset.apps "pdcsi-node" not found This was on a GKE cluster with cluster-autoscaler running outside of the cluster on a development machine. --- cluster-autoscaler/main.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 61caae6f5f52..ab21e8f0fae6 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -498,16 +498,23 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter Comparator: nodeInfoComparator, } - stop := make(chan struct{}) - informerFactory.Start(stop) - // These metrics should be published only once. metrics.UpdateNapEnabled(autoscalingOptions.NodeAutoprovisioningEnabled) metrics.UpdateCPULimitsCores(autoscalingOptions.MinCoresTotal, autoscalingOptions.MaxCoresTotal) metrics.UpdateMemoryLimitsBytes(autoscalingOptions.MinMemoryTotal, autoscalingOptions.MaxMemoryTotal) // Create autoscaler. - return core.NewAutoscaler(opts) + autoscaler, err := core.NewAutoscaler(opts) + if err != nil { + return nil, err + } + + // Start informers. This must come after fully constructing the autoscaler because + // additional informers might have been registered in the factory during NewAutoscaler. + stop := make(chan struct{}) + informerFactory.Start(stop) + + return autoscaler, nil } func run(healthCheck *metrics.HealthCheck, debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter) {