Allow configuring nginx worker reload behaviour, to prevent multiple …

…concurrent worker reloads which can lead to high resource usage and OOMKill (#10884) * feat: allow configuring nginx worker reload behaviour, to prevent multiple concurrent worker reloads Signed-off-by: Rafael da Fonseca <[email protected]> * appease linter, remove unnecessary log line Signed-off-by: Rafael da Fonseca <[email protected]> * Flip to using a positive behaviour flag instead of negative Signed-off-by: Rafael da Fonseca <[email protected]> * Update helm-docs Signed-off-by: Rafael da Fonseca <[email protected]> * Avoid calling GetBackendConfiguration() twice, use clearer name for helm chart option Signed-off-by: Rafael da Fonseca <[email protected]> * Fix helm-docs ordering Signed-off-by: Rafael da Fonseca <[email protected]> --------- Signed-off-by: Rafael da Fonseca <[email protected]>
kubernetes · May 14, 2024 · 4e11074 · 4e11074
1 parent 689b993
commit 4e11074
Show file tree

Hide file tree

Showing 7 changed files with 69 additions and 3 deletions.
diff --git a/charts/ingress-nginx/README.md b/charts/ingress-nginx/README.md
@@ -301,6 +301,7 @@ As of version `1.26.0` of this chart, by simply not providing any clusterIP valu
 | controller.enableAnnotationValidations | bool | `false` |  |
 | controller.enableMimalloc | bool | `true` | Enable mimalloc as a drop-in replacement for malloc. # ref: https://github.com/microsoft/mimalloc # |
 | controller.enableTopologyAwareRouting | bool | `false` | This configuration enables Topology Aware Routing feature, used together with service annotation service.kubernetes.io/topology-mode="auto" Defaults to false |
+| controller.enableWorkerSerialReloads | bool | `false` | This configuration defines if NGINX workers should reload serially instead of concurrently when multiple changes that require reloads are queued |
 | controller.existingPsp | string | `""` | Use an existing PSP instead of creating one |
 | controller.extraArgs | object | `{}` | Additional command line arguments to pass to Ingress-Nginx Controller E.g. to specify the default SSL certificate you can use |
 | controller.extraContainers | list | `[]` | Additional containers to be added to the controller pod. See https://github.com/lemonldap-ng-controller/lemonldap-ng-controller as example. |

diff --git a/charts/ingress-nginx/templates/controller-configmap.yaml b/charts/ingress-nginx/templates/controller-configmap.yaml
@@ -14,6 +14,7 @@ metadata:
   namespace: {{ include "ingress-nginx.namespace" . }}
 data:
   allow-snippet-annotations: "{{ .Values.controller.allowSnippetAnnotations }}"
+  enable-serial-reloads: "{{ .Values.controller.enableWorkerSerialReloads }}"
 {{- if .Values.controller.addHeaders }}
   add-headers: {{ include "ingress-nginx.namespace" . }}/{{ include "ingress-nginx.fullname" . }}-custom-add-headers
 {{- end }}

diff --git a/charts/ingress-nginx/values.yaml b/charts/ingress-nginx/values.yaml
@@ -93,6 +93,8 @@ controller:
   # when users add those annotations.
   # Global snippets in ConfigMap are still respected
   allowSnippetAnnotations: false
+  # -- This configuration defines if NGINX workers should reload serially instead of concurrently when multiple changes that require reloads are queued
+  enableWorkerSerialReloads: false
   # -- Required for use with CNI based kubernetes installations (such as ones set up by kubeadm),
   # since CNI and hostport don't mix yet. Can be deprecated once https://github.com/kubernetes/kubernetes/issues/23920
   # is merged

diff --git a/docs/user-guide/nginx-configuration/configmap.md b/docs/user-guide/nginx-configuration/configmap.md
@@ -114,6 +114,7 @@ The following table shows a configuration option's name, type, and the default v
 |[worker-processes](#worker-processes)| string       | `<Number of CPUs>`                                                                                                                                                                                                                                                                                                                                           ||
 |[worker-cpu-affinity](#worker-cpu-affinity)| string       | ""                                                                                                                                                                                                                                                                                                                                                           ||
 |[worker-shutdown-timeout](#worker-shutdown-timeout)| string       | "240s"                                                                                                                                                                                                                                                                                                                                                       ||
+|[enable-serial-reloads](#enable-serial-reloads)|bool|"false"||
 |[load-balance](#load-balance)| string       | "round_robin"                                                                                                                                                                                                                                                                                                                                                ||
 |[variables-hash-bucket-size](#variables-hash-bucket-size)| int          | 128                                                                                                                                                                                                                                                                                                                                                          ||
 |[variables-hash-max-size](#variables-hash-max-size)| int          | 2048                                                                                                                                                                                                                                                                                                                                                         ||

diff --git a/internal/ingress/controller/config/config.go b/internal/ingress/controller/config/config.go
@@ -477,6 +477,13 @@ type Configuration struct {
 	// http://nginx.org/en/docs/ngx_core_module.html#worker_processes
 	WorkerProcesses string `json:"worker-processes,omitempty"`
 
+	// Defines whether multiple concurrent reloads of worker processes should occur.
+	// Set this to false to prevent more than n x 2 workers to exist at any time, to avoid potential OOM situations and high CPU load
+	// With this setting on false, configuration changes in the queue will be re-queued with an exponential backoff, until the number of worker process is the expected value.
+	// By default new worker processes are spawned every time there's a change that cannot be applied dynamically with no upper limit to the number of running workers
+	// http://nginx.org/en/docs/ngx_core_module.html#worker_processes
+	WorkerSerialReloads bool `json:"enable-serial-reloads,omitempty"`
+
 	// Defines a timeout for a graceful shutdown of worker processes
 	// http://nginx.org/en/docs/ngx_core_module.html#worker_shutdown_timeout
 	WorkerShutdownTimeout string `json:"worker-shutdown-timeout,omitempty"`
@@ -851,6 +858,7 @@ func NewDefault() Configuration {
 		UseGeoIP2:                        false,
 		GeoIP2AutoReloadMinutes:          0,
 		WorkerProcesses:                  strconv.Itoa(runtime.NumCPU()),
+		WorkerSerialReloads:              false,
 		WorkerShutdownTimeout:            "240s",
 		VariablesHashBucketSize:          256,
 		VariablesHashMaxSize:             2048,

diff --git a/internal/ingress/controller/nginx.go b/internal/ingress/controller/nginx.go
@@ -35,6 +35,7 @@ import (
 	"syscall"
 	"text/template"
 	"time"
+	"unicode"
 
 	proxyproto "github.com/armon/go-proxyproto"
 	"github.com/eapache/channels"
@@ -87,9 +88,10 @@ func NewNGINXController(config *Configuration, mc metric.Collector) *NGINXContro
 	n := &NGINXController{
 		isIPV6Enabled: ing_net.IsIPv6Enabled(),
 
-		resolver:        h,
-		cfg:             config,
-		syncRateLimiter: flowcontrol.NewTokenBucketRateLimiter(config.SyncRateLimit, 1),
+		resolver:         h,
+		cfg:              config,
+		syncRateLimiter:  flowcontrol.NewTokenBucketRateLimiter(config.SyncRateLimit, 1),
+		workersReloading: false,
 
 		recorder: eventBroadcaster.NewRecorder(scheme.Scheme, apiv1.EventSource{
 			Component: "nginx-ingress-controller",
@@ -229,6 +231,8 @@ type NGINXController struct {
 
 	syncRateLimiter flowcontrol.RateLimiter
 
+	workersReloading bool
+
 	// stopLock is used to enforce that only a single call to Stop send at
 	// a given time. We allow stopping through an HTTP endpoint and
 	// allowing concurrent stoppers leads to stack traces.
@@ -676,6 +680,11 @@ func (n *NGINXController) OnUpdate(ingressCfg ingress.Configuration) error {
 	cfg := n.store.GetBackendConfiguration()
 	cfg.Resolver = n.resolver
 
+	workerSerialReloads := cfg.WorkerSerialReloads
+	if workerSerialReloads && n.workersReloading {
+		return errors.New("worker reload already in progress, requeuing reload")
+	}
+
 	content, err := n.generateTemplate(cfg, ingressCfg)
 	if err != nil {
 		return err
@@ -738,9 +747,41 @@ func (n *NGINXController) OnUpdate(ingressCfg ingress.Configuration) error {
 		return fmt.Errorf("%v\n%v", err, string(o))
 	}
 
+	// Reload status checking runs in a separate goroutine to avoid blocking the sync queue
+	if workerSerialReloads {
+		go n.awaitWorkersReload()
+	}
+
 	return nil
 }
 
+// awaitWorkersReload checks if the number of workers has returned to the expected count
+func (n *NGINXController) awaitWorkersReload() {
+	n.workersReloading = true
+	defer func() { n.workersReloading = false }()
+
+	expectedWorkers := n.store.GetBackendConfiguration().WorkerProcesses
+	var numWorkers string
+	klog.V(3).Infof("waiting for worker count to be equal to %s", expectedWorkers)
+	for numWorkers != expectedWorkers {
+		time.Sleep(time.Second)
+		o, err := exec.Command("/bin/sh", "-c", "pgrep worker | wc -l").Output()
+		if err != nil {
+			klog.ErrorS(err, numWorkers)
+			return
+		}
+		// cleanup any non-printable chars from shell output
+		numWorkers = strings.Map(func(r rune) rune {
+			if unicode.IsPrint(r) {
+				return r
+			}
+			return -1
+		}, string(o))
+
+		klog.V(3).Infof("Currently running nginx worker processes: %s, expected %s", numWorkers, expectedWorkers)
+	}
+}
+
 // nginxHashBucketSize computes the correct NGINX hash_bucket_size for a hash
 // with the given longest key.
 func nginxHashBucketSize(longestString int) int {

diff --git a/internal/ingress/controller/template/configmap.go b/internal/ingress/controller/template/configmap.go
@@ -69,6 +69,7 @@ const (
 	luaSharedDictsKey             = "lua-shared-dicts"
 	plugins                       = "plugins"
 	debugConnections              = "debug-connections"
+	workerSerialReloads           = "enable-serial-reloads"
 )
 
 var (
@@ -404,6 +405,17 @@ func ReadConfig(src map[string]string) config.Configuration {
 		delete(conf, workerProcesses)
 	}
 
+	if val, ok := conf[workerSerialReloads]; ok {
+		boolVal, err := strconv.ParseBool(val)
+		if err != nil {
+			to.WorkerSerialReloads = false
+			klog.Warningf("failed to parse enable-serial-reloads setting, valid values are true or false, found %s", val)
+		} else {
+			to.WorkerSerialReloads = boolVal
+		}
+		delete(conf, workerSerialReloads)
+	}
+
 	if val, ok := conf[plugins]; ok {
 		to.Plugins = splitAndTrimSpace(val, ",")
 		delete(conf, plugins)