From 1d8055a505ca5ee56174f836baf54458d0acd93c Mon Sep 17 00:00:00 2001 From: Ciara Stacke Date: Fri, 12 Aug 2022 13:21:42 +0100 Subject: [PATCH 1/5] Extend batch reloads to cover multiple items in the queue during runtime --- internal/configs/configurator.go | 5 + internal/k8s/controller.go | 20 ++ tests/data/smoke/smoke-batch-ingress.yaml | 202 ++++++++++++++++++++ tests/suite/test_app_protect_integration.py | 1 + tests/suite/test_smoke.py | 28 ++- tests/suite/yaml_utils.py | 11 ++ 6 files changed, 266 insertions(+), 1 deletion(-) create mode 100644 tests/data/smoke/smoke-batch-ingress.yaml diff --git a/internal/configs/configurator.go b/internal/configs/configurator.go index d33c6627d3..8ce08941a1 100644 --- a/internal/configs/configurator.go +++ b/internal/configs/configurator.go @@ -1039,6 +1039,11 @@ func (cnf *Configurator) EnableReloads() { cnf.isReloadsEnabled = true } +// DisableReloads disables NGINX reloads meaning that configuration changes will not be followed by a reload. +func (cnf *Configurator) DisableReloads() { + cnf.isReloadsEnabled = false +} + func (cnf *Configurator) reload(isEndpointsUpdate bool) error { if !cnf.isReloadsEnabled { return nil diff --git a/internal/k8s/controller.go b/internal/k8s/controller.go index f46cb523c5..f8a1dc533d 100644 --- a/internal/k8s/controller.go +++ b/internal/k8s/controller.go @@ -166,6 +166,7 @@ type LoadBalancerController struct { configMap *api_v1.ConfigMap certManagerController *cm_controller.CmController externalDNSController *ed_controller.ExtDNSController + batchSyncEnabled bool } var keyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc @@ -764,6 +765,11 @@ func (lbc *LoadBalancerController) syncConfigMap(task task) { return } + if lbc.batchSyncEnabled { + glog.V(3).Infof("Skipping ConfigMap update because batch sync is on") + return + } + lbc.updateAllConfigs() } @@ -836,6 +842,12 @@ func (lbc *LoadBalancerController) preSyncSecrets() { } func (lbc *LoadBalancerController) sync(task task) { + if lbc.isNginxReady && lbc.syncQueue.Len() > 1 && !lbc.batchSyncEnabled { + lbc.configurator.DisableReloads() + lbc.batchSyncEnabled = true + + glog.V(3).Infof("Batch processing %v items", lbc.syncQueue.Len()) + } glog.V(3).Infof("Syncing %v", task.Key) if lbc.spiffeCertFetcher != nil { lbc.syncLock.Lock() @@ -892,6 +904,14 @@ func (lbc *LoadBalancerController) sync(task task) { lbc.isNginxReady = true glog.V(3).Infof("NGINX is ready") } + + if lbc.batchSyncEnabled && lbc.syncQueue.Len() == 0 { + lbc.batchSyncEnabled = false + lbc.configurator.EnableReloads() + lbc.updateAllConfigs() + + glog.V(3).Infof("Batch sync completed") + } } func (lbc *LoadBalancerController) syncIngressLink(task task) { diff --git a/tests/data/smoke/smoke-batch-ingress.yaml b/tests/data/smoke/smoke-batch-ingress.yaml new file mode 100644 index 0000000000..1abbd06adc --- /dev/null +++ b/tests/data/smoke/smoke-batch-ingress.yaml @@ -0,0 +1,202 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: smoke-batch-ingress1 +spec: + ingressClassName: nginx + tls: + - hosts: + - smoke-batch1.example.com + secretName: smoke-secret + rules: + - host: smoke-batch1.example.com + http: + paths: + - path: /backend2 + pathType: Prefix + backend: + service: + name: backend2-svc + port: + number: 80 + - path: /backend1 + pathType: Prefix + backend: + service: + name: backend1-svc + port: + number: 80 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: smoke-batch-ingress2 +spec: + ingressClassName: nginx + tls: + - hosts: + - smoke-batch2.example.com + secretName: smoke-secret + rules: + - host: smoke-batch2.example.com + http: + paths: + - path: /backend2 + pathType: Prefix + backend: + service: + name: backend2-svc + port: + number: 80 + - path: /backend1 + pathType: Prefix + backend: + service: + name: backend1-svc + port: + number: 80 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: smoke-batch-ingress3 +spec: + ingressClassName: nginx + tls: + - hosts: + - smoke-batch3.example.com + secretName: smoke-secret + rules: + - host: smoke-batch3.example.com + http: + paths: + - path: /backend2 + pathType: Prefix + backend: + service: + name: backend2-svc + port: + number: 80 + - path: /backend1 + pathType: Prefix + backend: + service: + name: backend1-svc + port: + number: 80 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: smoke-batch-ingress4 +spec: + ingressClassName: nginx + tls: + - hosts: + - smoke-batch4.example.com + secretName: smoke-secret + rules: + - host: smoke-batch4.example.com + http: + paths: + - path: /backend2 + pathType: Prefix + backend: + service: + name: backend2-svc + port: + number: 80 + - path: /backend1 + pathType: Prefix + backend: + service: + name: backend1-svc + port: + number: 80 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: smoke-batch-ingress5 +spec: + ingressClassName: nginx + tls: + - hosts: + - smoke-batch5.example.com + secretName: smoke-secret + rules: + - host: smoke-batch5.example.com + http: + paths: + - path: /backend2 + pathType: Prefix + backend: + service: + name: backend2-svc + port: + number: 80 + - path: /backend1 + pathType: Prefix + backend: + service: + name: backend1-svc + port: + number: 80 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: smoke-batch-ingress6 +spec: + ingressClassName: nginx + tls: + - hosts: + - smoke-batch6.example.com + secretName: smoke-secret + rules: + - host: smoke-batch6.example.com + http: + paths: + - path: /backend2 + pathType: Prefix + backend: + service: + name: backend2-svc + port: + number: 80 + - path: /backend1 + pathType: Prefix + backend: + service: + name: backend1-svc + port: + number: 80 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: smoke-batch-ingress7 +spec: + ingressClassName: nginx + tls: + - hosts: + - smoke-batch7.example.com + secretName: smoke-secret + rules: + - host: smoke-batch7.example.com + http: + paths: + - path: /backend2 + pathType: Prefix + backend: + service: + name: backend2-svc + port: + number: 80 + - path: /backend1 + pathType: Prefix + backend: + service: + name: backend1-svc + port: + number: 80 diff --git a/tests/suite/test_app_protect_integration.py b/tests/suite/test_app_protect_integration.py index a0b3457de8..abc05f2b0a 100644 --- a/tests/suite/test_app_protect_integration.py +++ b/tests/suite/test_app_protect_integration.py @@ -228,6 +228,7 @@ def test_ap_enable_false_policy_correct( ap_crd_info = read_ap_custom_resource(kube_apis.custom_objects, test_namespace, "appolicies", ap_policy) assert_ap_crd_info(ap_crd_info, ap_policy) ensure_response_from_backend(appprotect_setup.req_url, ingress_host, check404=True) + wait_before_test(5) print("----------------------- Send request ----------------------") response = requests.get(appprotect_setup.req_url + "/", + headers={"host": virtual_server_setup.vs_host}, + ) + print(response1.status_code) + + print("----------------------- Send request with blocked keyword in UDS----------------------") + response2 = requests.get( + virtual_server_setup.backend_1_url, + headers={"host": virtual_server_setup.vs_host}, + data="kic", + ) + + total_vs = int(request.config.getoption("--batch-resources")) + count_before = get_reload_count(virtual_server_setup.metrics_url) + print(response2.status_code) + with open(waf_spec_vs_src) as f: + doc = yaml.safe_load(f) + with tempfile.NamedTemporaryFile(mode="w+", suffix=".yml", delete=False) as temp: + for i in range(1, total_vs + 1): + doc["metadata"]["name"] = f"virtual-server-{i}" + doc["spec"]["host"] = f"virtual-server-{i}.example.com" + temp.write(yaml.safe_dump(doc) + "---\n") + create_custom_items_from_yaml(kube_apis.custom_objects, temp.name, test_namespace) + os.remove(temp.name) + + print(f"Total resources deployed is {total_vs}") + wait_before_test(5) + count_after = get_reload_count(virtual_server_setup.metrics_url) + new_reloads = count_after - count_before + assert get_last_reload_status(virtual_server_setup.metrics_url, "nginx") == "1" and new_reloads <= int( + request.config.getoption("--batch-reload-number") + ) + reload_ms = get_last_reload_time(virtual_server_setup.metrics_url, "nginx") + print(f"last reload duration: {reload_ms} ms") + + for i in range(1, total_vs + 1): + delete_virtual_server(kube_apis.custom_objects, f"virtual-server-{i}", test_namespace) + delete_policy(kube_apis.custom_objects, "waf-policy", test_namespace) + + +############################################################################################################## + + +@pytest.mark.batch_start +@pytest.mark.parametrize( + "crd_ingress_controller", + [ + pytest.param( + { + "type": "complete", + "extra_args": [ + "-enable-custom-resources", + "-enable-prometheus-metrics", + "-enable-leader-election=false", + ], + }, + ) + ], + indirect=True, +) +class TestSingleVSMultipleVSRs: + def test_startup_time( + self, + request, + kube_apis, + test_namespace, + ingress_controller_prerequisites, + crd_ingress_controller, + ingress_controller_endpoint, + ): + """ + Pod startup time with 1 VS and multiple VSRs. + """ + total_vsr = int(request.config.getoption("--batch-resources")) + + vsr_source = f"{TEST_DATA}/startup/virtual-server-routes/route.yaml" + + with open(vsr_source) as f: + vsr = yaml.safe_load(f) + with tempfile.NamedTemporaryFile(mode="w+", suffix=".yml", delete=False) as temp: + for i in range(1, total_vsr + 1): + vsr["metadata"]["name"] = f"route-{i}" + vsr["spec"]["subroutes"][0]["path"] = f"/route-{i}" + temp.write(yaml.safe_dump(vsr) + "---\n") + create_custom_items_from_yaml(kube_apis.custom_objects, temp.name, test_namespace) + os.remove(temp.name) + + vs_source = f"{TEST_DATA}/startup/virtual-server-routes/virtual-server.yaml" + + with open(vs_source) as f: + vs = yaml.safe_load(f) + + routes = [] + for i in range(1, total_vsr + 1): + route = {"path": f"/route-{i}", "route": f"route-{i}"} + routes.append(route) + + vs["spec"]["routes"] = routes + create_virtual_server(kube_apis.custom_objects, vs, test_namespace) + + metrics_url = ( + f"http://{ingress_controller_endpoint.public_ip}:{ingress_controller_endpoint.metrics_port}/metrics" + ) + count_before = get_reload_count(metrics_url) + wait_before_test(5) + count_after = get_reload_count(metrics_url) + new_reloads = count_after - count_before + + assert get_last_reload_status(metrics_url, "nginx") == "1" and new_reloads <= int( + request.config.getoption("--batch-reload-number") + ) + reload_ms = get_last_reload_time(metrics_url, "nginx") + print(f"last reload duration: {reload_ms} ms") diff --git a/tests/suite/test_batch_startup_times.py b/tests/suite/test_batch_startup_times.py index 0cb107bea1..a9945bb4ee 100644 --- a/tests/suite/test_batch_startup_times.py +++ b/tests/suite/test_batch_startup_times.py @@ -267,6 +267,7 @@ def test_ap_ingress_batch_start( print(f"Number of replicas not 0, retrying...") wait_before_test() num = scale_deployment(kube_apis.v1, kube_apis.apps_v1_api, "nginx-ingress", ic_ns, 1) + wait_before_test(30) assert ( get_total_ingresses(ap_ingress_setup.metrics_url, "nginx") == str(total_ing + 1) diff --git a/tests/suite/test_smoke.py b/tests/suite/test_smoke.py index ee02ca3363..07a59321ad 100644 --- a/tests/suite/test_smoke.py +++ b/tests/suite/test_smoke.py @@ -1,7 +1,9 @@ -import json +import os +import tempfile import pytest import requests +import yaml from settings import TEST_DATA from suite.fixtures import PublicEndpoint from suite.resources_utils import ( @@ -22,7 +24,7 @@ wait_until_all_pods_are_ready, write_to_json, ) -from suite.yaml_utils import get_first_ingress_host_from_yaml, get_num_resources_from_yaml +from suite.yaml_utils import get_first_ingress_host_from_yaml paths = ["backend1", "backend2"] reload_times = {} @@ -127,21 +129,30 @@ def test_reload_count_after_start(self, kube_apis, smoke_setup, ingress_controll ], indirect=True, ) - def test_batch_create_reload_count( - self, kube_apis, smoke_setup, ingress_controller_prerequisites, test_namespace - ): - metrics_url = f"http://{smoke_setup.public_endpoint.public_ip}:{smoke_setup.public_endpoint.metrics_port}/metrics" + def test_batch_create_reload_count(self, kube_apis, smoke_setup, ingress_controller_prerequisites, test_namespace): + metrics_url = ( + f"http://{smoke_setup.public_endpoint.public_ip}:{smoke_setup.public_endpoint.metrics_port}/metrics" + ) count_before = get_reload_count(metrics_url) + num_res = 10 + manifest = f"{TEST_DATA}/smoke/standard/smoke-ingress.yaml" + with open(manifest) as f: + doc = yaml.safe_load(f) + with tempfile.NamedTemporaryFile(mode="w+", suffix=".yml", delete=False) as temp: + for i in range(1, num_res + 1): + doc["metadata"]["name"] = f"smoke-ingress-{i}" + doc["spec"]["rules"][0]["host"] = f"smoke-{i}.example.com" + temp.write(yaml.safe_dump(doc) + "---\n") + create_items_from_yaml(kube_apis, temp.name, test_namespace) - create_items_from_yaml(kube_apis, f"{TEST_DATA}/smoke/smoke-batch-ingress.yaml", test_namespace) wait_before_test(5) - num_res = get_num_resources_from_yaml(f"{TEST_DATA}/smoke/smoke-batch-ingress.yaml") count_after = get_reload_count(metrics_url) new_reloads = count_after - count_before print(f"Counted {new_reloads} reloads for {num_res} new config objects") - delete_items_from_yaml(kube_apis, f"{TEST_DATA}/smoke/smoke-batch-ingress.yaml", test_namespace) + delete_items_from_yaml(kube_apis, temp.name, test_namespace) + os.remove(temp.name) - assert new_reloads == 2 + assert new_reloads <= (int(num_res / 2) + 1) diff --git a/tests/suite/vs_vsr_resources_utils.py b/tests/suite/vs_vsr_resources_utils.py index cda34430b0..8fe698ec0e 100644 --- a/tests/suite/vs_vsr_resources_utils.py +++ b/tests/suite/vs_vsr_resources_utils.py @@ -39,6 +39,31 @@ def create_virtual_server_from_yaml(custom_objects: CustomObjectsApi, yaml_manif return create_virtual_server(custom_objects, dep, namespace) +def create_custom_items_from_yaml(custom_objects, yaml_manifest, namespace) -> {}: + """ + Apply yaml manifest with multiple items. + + :param kube_apis: KubeApis + :param yaml_manifest: an absolute path to a file + :param namespace: + :return: + """ + res = {} + print("Load yaml:") + with open(yaml_manifest) as f: + docs = yaml.safe_load_all(f) + try: + for doc in docs: + if doc["kind"] == "VirtualServer": + res["VirtualServer"] = create_virtual_server(custom_objects, doc, namespace) + elif doc["kind"] == "VirtualServerRoute": + res["VirtualServerRoute"] = create_v_s_route(custom_objects, doc, namespace) + except Exception: + pass + + return res + + def create_virtual_server(custom_objects: CustomObjectsApi, vs, namespace) -> str: """ Create a VirtualServer. diff --git a/tests/suite/yaml_utils.py b/tests/suite/yaml_utils.py index 5b0e8d4d9e..c7c8c43d62 100644 --- a/tests/suite/yaml_utils.py +++ b/tests/suite/yaml_utils.py @@ -136,14 +136,3 @@ def get_secret_name_from_vs_yaml(file) -> str: for dep in docs: return dep["spec"]["tls"]["secret"] return res - -def get_num_resources_from_yaml(file) -> int: - """ - Parse yaml file and count the number of discrete resources it defines - - :param file: an absolute path to file - :return: int - """ - with open(file) as f: - docs = yaml.safe_load_all(f) - return len(list(docs)) From e96ef81228ec1bbc23b0de0f70b2de4cbd55df79 Mon Sep 17 00:00:00 2001 From: Ciara Stacke Date: Wed, 31 Aug 2022 10:08:23 +0100 Subject: [PATCH 3/5] Update reloads doc --- docs/content/intro/how-nginx-ingress-controller-works.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/content/intro/how-nginx-ingress-controller-works.md b/docs/content/intro/how-nginx-ingress-controller-works.md index 4245422da5..74e8fa94f7 100644 --- a/docs/content/intro/how-nginx-ingress-controller-works.md +++ b/docs/content/intro/how-nginx-ingress-controller-works.md @@ -315,6 +315,7 @@ A reload involves multiple steps: The Ingress Controller reloads NGINX every time the Control Loop processes a change that affects the generated NGINX configuration. In general, every time a resource is changed, the Ingress Controller will regenerate the configuration and reload NGINX. A resource could be of any type the Ingress Controller monitors -- see [The Ingress Controller is a Kubernetes Controller](#the-ingress-controller-is-a-kubernetes-controller) section. -There are two special cases: +There are three special cases: - *Start*. When the Ingress Controller starts, it processes all resources in the cluster and only then reloads NGINX. This avoids creating a "reload storm" by reloading only once. +- *Batch updates*. When the Ingress Controller receives a number of concurrent requests from the Kubernetes API, it will pause NGINX reloads untill the task queue is empty. This reduces the number of reloads to minimize the impact of batch updates and reduce the risk of OOM errors. - *NGINX Plus*. If the Ingress Controller uses NGINX Plus, it will not reload NGINX Plus for changes to the Endpoints resources. In this case, the Ingress Controller will use the NGINX Plus API to update the corresponding upstreams and skip reloading. From ede74f89c18e1fbd5ee9f5247611c176ce35d8ab Mon Sep 17 00:00:00 2001 From: Ciara Stacke Date: Thu, 1 Sep 2022 10:38:53 +0100 Subject: [PATCH 4/5] Minor test updates --- tests/suite/resources_utils.py | 12 ++++-------- tests/suite/test_app_protect_integration.py | 1 + 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/suite/resources_utils.py b/tests/suite/resources_utils.py index a63ec47556..16bda1a5e3 100644 --- a/tests/suite/resources_utils.py +++ b/tests/suite/resources_utils.py @@ -1209,8 +1209,8 @@ def create_items_from_yaml(kube_apis, yaml_manifest, namespace) -> {}: print("Load yaml:") with open(yaml_manifest) as f: docs = yaml.safe_load_all(f) - try: - for doc in docs: + for doc in docs: + if doc: if doc["kind"] == "Secret": res["Secret"] = create_secret(kube_apis.v1, namespace, doc) elif doc["kind"] == "ConfigMap": @@ -1225,8 +1225,6 @@ def create_items_from_yaml(kube_apis, yaml_manifest, namespace) -> {}: res["DaemonSet"] = create_daemon_set(kube_apis.apps_v1_api, namespace, doc) elif doc["kind"] == "Namespace": res["Namespace"] = create_namespace(kube_apis.v1, doc) - except Exception: - pass return res @@ -1325,8 +1323,8 @@ def delete_items_from_yaml(kube_apis, yaml_manifest, namespace) -> None: print("Load yaml:") with open(yaml_manifest) as f: docs = yaml.safe_load_all(f) - try: - for doc in docs: + for doc in docs: + if doc: if doc["kind"] == "Namespace": delete_namespace(kube_apis.v1, doc["metadata"]["name"]) elif doc["kind"] == "Secret": @@ -1341,8 +1339,6 @@ def delete_items_from_yaml(kube_apis, yaml_manifest, namespace) -> None: delete_daemon_set(kube_apis.apps_v1_api, doc["metadata"]["name"], namespace) elif doc["kind"] == "ConfigMap": delete_configmap(kube_apis.v1, doc["metadata"]["name"], namespace) - except Exception: - pass def ensure_connection(request_url, expected_code=404, headers={}) -> None: diff --git a/tests/suite/test_app_protect_integration.py b/tests/suite/test_app_protect_integration.py index abc05f2b0a..71aa754e95 100644 --- a/tests/suite/test_app_protect_integration.py +++ b/tests/suite/test_app_protect_integration.py @@ -285,6 +285,7 @@ def test_ap_enable_false_policy_incorrect( ensure_response_from_backend(appprotect_setup.req_url, ingress_host, check404=True) print("----------------------- Send request ----------------------") + wait_before_test(5) response = requests.get(appprotect_setup.req_url + "/