From e1b5599b88a680b6bc71f05ceeeba2fc66736037 Mon Sep 17 00:00:00 2001 From: David Shay Date: Thu, 10 Mar 2022 16:01:24 -0500 Subject: [PATCH] Fix for buggy ingress sync with retries --- cmd/nginx/flags.go | 63 ++++++++++++----------- internal/ingress/controller/controller.go | 15 ++++-- 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/cmd/nginx/flags.go b/cmd/nginx/flags.go index 3029305511..337a1f8276 100644 --- a/cmd/nginx/flags.go +++ b/cmd/nginx/flags.go @@ -65,7 +65,7 @@ The parameter --controller-class has precedence over this.`) ingressClassController = flags.String("controller-class", ingressclass.DefaultControllerName, `Ingress Class Controller value this Ingress satisfies. -The class of an Ingress object is set using the field IngressClassName in Kubernetes clusters version v1.19.0 or higher. The .spec.controller value of the IngressClass +The class of an Ingress object is set using the field IngressClassName in Kubernetes clusters version v1.19.0 or higher. The .spec.controller value of the IngressClass referenced in an Ingress Object should be the same value specified here to make this object be watched.`) watchWithoutClass = flags.Bool("watch-ingress-without-class", false, @@ -203,6 +203,8 @@ Takes the form ":port". If not provided, no admission controller is starte postShutdownGracePeriod = flags.Int("post-shutdown-grace-period", 10, "Seconds to wait after the nginx process has stopped before controller exits.") deepInspector = flags.Bool("deep-inspect", true, "Enables ingress object security deep inspector") + + dynamicConfigurationRetries = flags.Int("dynamic-configuration-retries", 15, "Number of times to retry failed dynamic configuration before failing to sync an ingress.") ) flags.StringVar(&nginx.MaxmindMirror, "maxmind-mirror", "", `Maxmind mirror url (example: http://geoip.local/databases`) @@ -303,35 +305,36 @@ https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-g ngx_config.EnableSSLChainCompletion = *enableSSLChainCompletion config := &controller.Configuration{ - APIServerHost: *apiserverHost, - KubeConfigFile: *kubeConfigFile, - UpdateStatus: *updateStatus, - ElectionID: *electionID, - EnableProfiling: *profiling, - EnableMetrics: *enableMetrics, - MetricsPerHost: *metricsPerHost, - MetricsBuckets: histogramBuckets, - MonitorMaxBatchSize: *monitorMaxBatchSize, - DisableServiceExternalName: *disableServiceExternalName, - EnableSSLPassthrough: *enableSSLPassthrough, - ResyncPeriod: *resyncPeriod, - DefaultService: *defaultSvc, - Namespace: *watchNamespace, - WatchNamespaceSelector: namespaceSelector, - ConfigMapName: *configMap, - TCPConfigMapName: *tcpConfigMapName, - UDPConfigMapName: *udpConfigMapName, - DisableFullValidationTest: *disableFullValidationTest, - DefaultSSLCertificate: *defSSLCertificate, - DeepInspector: *deepInspector, - PublishService: *publishSvc, - PublishStatusAddress: *publishStatusAddress, - UpdateStatusOnShutdown: *updateStatusOnShutdown, - ShutdownGracePeriod: *shutdownGracePeriod, - PostShutdownGracePeriod: *postShutdownGracePeriod, - UseNodeInternalIP: *useNodeInternalIP, - SyncRateLimit: *syncRateLimit, - HealthCheckHost: *healthzHost, + APIServerHost: *apiserverHost, + KubeConfigFile: *kubeConfigFile, + UpdateStatus: *updateStatus, + ElectionID: *electionID, + EnableProfiling: *profiling, + EnableMetrics: *enableMetrics, + MetricsPerHost: *metricsPerHost, + MetricsBuckets: histogramBuckets, + MonitorMaxBatchSize: *monitorMaxBatchSize, + DisableServiceExternalName: *disableServiceExternalName, + EnableSSLPassthrough: *enableSSLPassthrough, + ResyncPeriod: *resyncPeriod, + DefaultService: *defaultSvc, + Namespace: *watchNamespace, + WatchNamespaceSelector: namespaceSelector, + ConfigMapName: *configMap, + TCPConfigMapName: *tcpConfigMapName, + UDPConfigMapName: *udpConfigMapName, + DisableFullValidationTest: *disableFullValidationTest, + DefaultSSLCertificate: *defSSLCertificate, + DeepInspector: *deepInspector, + PublishService: *publishSvc, + PublishStatusAddress: *publishStatusAddress, + UpdateStatusOnShutdown: *updateStatusOnShutdown, + ShutdownGracePeriod: *shutdownGracePeriod, + PostShutdownGracePeriod: *postShutdownGracePeriod, + UseNodeInternalIP: *useNodeInternalIP, + SyncRateLimit: *syncRateLimit, + HealthCheckHost: *healthzHost, + DynamicConfigurationRetries: *dynamicConfigurationRetries, ListenPorts: &ngx_config.ListenPorts{ Default: *defServerPort, Health: *healthzPort, diff --git a/internal/ingress/controller/controller.go b/internal/ingress/controller/controller.go index 778dfa03d3..f43c72d921 100644 --- a/internal/ingress/controller/controller.go +++ b/internal/ingress/controller/controller.go @@ -125,6 +125,8 @@ type Configuration struct { InternalLoggerAddress string IsChroot bool DeepInspector bool + + DynamicConfigurationRetries int } // GetPublishService returns the Service used to set the load-balancer status of Ingresses. @@ -194,19 +196,24 @@ func (n *NGINXController) syncIngress(interface{}) error { } retry := wait.Backoff{ - Steps: 15, - Duration: 1 * time.Second, - Factor: 0.8, + Steps: 1 + n.cfg.DynamicConfigurationRetries, + Duration: time.Second, + Factor: 1.3, Jitter: 0.1, } + retriesRemaining := retry.Steps err := wait.ExponentialBackoff(retry, func() (bool, error) { err := n.configureDynamically(pcfg) if err == nil { klog.V(2).Infof("Dynamic reconfiguration succeeded.") return true, nil } - + retriesRemaining-- + if retriesRemaining > 0 { + klog.Warningf("Dynamic reconfiguration failed (retrying; %d retries left): %v", retriesRemaining, err) + return false, nil + } klog.Warningf("Dynamic reconfiguration failed: %v", err) return false, err })