From f4e327443bef486aa391aac99b0b41e906446e9a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 7 Sep 2023 08:59:24 -0500 Subject: [PATCH 1/2] ovnkube: set northd backoff-interval to save CPU northd has an option to sleep for a short amount of time after processing changes from NB/SB that allows it to trade off a bit of latency for a lot of CPU savings. Since events from NB come frequently during scale tests northd doesn't have a lot of time to sleep. Until we have more incremental processing, most of that CPU time is burned just recalculating things that haven't changed, so it's mostly wasted. Letting northd sleep has been shown in density-light and density-cni 120 node scale tests to have almost no adverse effect on P99 PodReady times, but a huge improvement in CPU utilization. --- .../managed/multi-zone-interconnect/ovnkube-node.yaml | 5 +++++ .../managed/single-zone-interconnect/ovnkube-master.yaml | 5 +++++ .../self-hosted/multi-zone-interconnect/ovnkube-node.yaml | 5 +++++ .../self-hosted/single-zone-interconnect/ovnkube-master.yaml | 5 +++++ pkg/network/ovn_kubernetes.go | 2 ++ 5 files changed, 22 insertions(+) diff --git a/bindata/network/ovn-kubernetes/managed/multi-zone-interconnect/ovnkube-node.yaml b/bindata/network/ovn-kubernetes/managed/multi-zone-interconnect/ovnkube-node.yaml index 36bf8c964f..fbdff1bb2d 100644 --- a/bindata/network/ovn-kubernetes/managed/multi-zone-interconnect/ovnkube-node.yaml +++ b/bindata/network/ovn-kubernetes/managed/multi-zone-interconnect/ovnkube-node.yaml @@ -506,6 +506,11 @@ spec: if ! retry 20 "ipsec" "${OVN_NB_CTL} set nb_global . ipsec=${ipsec}"; then exit 1 fi + + # Tell northd to sleep a bit so it takes less CPU + if ! retry 20 "northd-backoff" "${OVN_NB_CTL} set nb_global . options:northd-backoff-interval-ms={{.OVN_NORTHD_BACKOFF_MS}}"; then + exit 1 + fi preStop: exec: command: diff --git a/bindata/network/ovn-kubernetes/managed/single-zone-interconnect/ovnkube-master.yaml b/bindata/network/ovn-kubernetes/managed/single-zone-interconnect/ovnkube-master.yaml index a4da2eea91..d47e9e9805 100644 --- a/bindata/network/ovn-kubernetes/managed/single-zone-interconnect/ovnkube-master.yaml +++ b/bindata/network/ovn-kubernetes/managed/single-zone-interconnect/ovnkube-master.yaml @@ -462,6 +462,11 @@ spec: if ! retry 20 "ipsec" "${OVN_NB_CTL} set nb_global . ipsec=${ipsec}"; then exit 1 fi + + # Tell northd to sleep a bit so it takes less CPU + if ! retry 20 "northd-backoff" "${OVN_NB_CTL} set nb_global . options:northd-backoff-interval-ms={{.OVN_NORTHD_BACKOFF_MS}}"; then + exit 1 + fi preStop: exec: command: diff --git a/bindata/network/ovn-kubernetes/self-hosted/multi-zone-interconnect/ovnkube-node.yaml b/bindata/network/ovn-kubernetes/self-hosted/multi-zone-interconnect/ovnkube-node.yaml index 7643f1b5d0..e2bb10191e 100644 --- a/bindata/network/ovn-kubernetes/self-hosted/multi-zone-interconnect/ovnkube-node.yaml +++ b/bindata/network/ovn-kubernetes/self-hosted/multi-zone-interconnect/ovnkube-node.yaml @@ -516,6 +516,11 @@ spec: if ! retry 20 "ipsec" "${OVN_NB_CTL} set nb_global . ipsec=${ipsec} options:ipsec_encapsulation=${ipsec_encapsulation}"; then exit 1 fi + + # Tell northd to sleep a bit so it takes less CPU + if ! retry 20 "northd-backoff" "${OVN_NB_CTL} set nb_global . options:northd-backoff-interval-ms={{.OVN_NORTHD_BACKOFF_MS}}"; then + exit 1 + fi preStop: exec: command: diff --git a/bindata/network/ovn-kubernetes/self-hosted/single-zone-interconnect/ovnkube-master.yaml b/bindata/network/ovn-kubernetes/self-hosted/single-zone-interconnect/ovnkube-master.yaml index 06318d4f0f..44acd28acf 100644 --- a/bindata/network/ovn-kubernetes/self-hosted/single-zone-interconnect/ovnkube-master.yaml +++ b/bindata/network/ovn-kubernetes/self-hosted/single-zone-interconnect/ovnkube-master.yaml @@ -408,6 +408,11 @@ spec: if ! retry 20 "ipsec" "${OVN_NB_CTL} set nb_global . ipsec=${ipsec} options:ipsec_encapsulation=${ipsec_encapsulation}"; then exit 1 fi + + # Tell northd to sleep a bit so it takes less CPU + if ! retry 20 "northd-backoff" "${OVN_NB_CTL} set nb_global . options:northd-backoff-interval-ms={{.OVN_NORTHD_BACKOFF_MS}}"; then + exit 1 + fi preStop: exec: command: diff --git a/pkg/network/ovn_kubernetes.go b/pkg/network/ovn_kubernetes.go index f0b07fae19..5d433edc8e 100644 --- a/pkg/network/ovn_kubernetes.go +++ b/pkg/network/ovn_kubernetes.go @@ -191,6 +191,8 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo nb_inactivity_probe = "60000" klog.Infof("OVN_NB_INACTIVITY_PROBE env var is not defined. Using: %s", nb_inactivity_probe) } + // Tell northd to sleep a bit to save CPU + data.Data["OVN_NORTHD_BACKOFF_MS"] = "300" // Hypershift data.Data["ManagementClusterName"] = names.ManagementClusterName From 703ccb516dc5eef5a5d108f2458e31c54f258428 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 7 Sep 2023 15:11:00 -0500 Subject: [PATCH 2/2] ovnkube: drop northd to single-threading rather than 4 threads Northd threading parallelizes the logical flow (lflow) building part of the northd processing loop. While this speeds up northd processing it does have a slight CPU cost (~20%) to map/reduce the work. Threading improved latency when northd processed large numbers of logical flows in centralized OVN clusters. With IC each northd only handles a single node in the cluster and thus processes fewer lflows. Scale testing indicates that the threading tradeoff is no longer worth it; we achieve the same P99 PodReadyLatency across multiple scenarios with 1 or 4 threads. We might as well save the CPU if there no longer any latency benefit. --- pkg/network/ovn_kubernetes.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/network/ovn_kubernetes.go b/pkg/network/ovn_kubernetes.go index 5d433edc8e..dd513483d6 100644 --- a/pkg/network/ovn_kubernetes.go +++ b/pkg/network/ovn_kubernetes.go @@ -391,7 +391,7 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo // Less resource constrained clusters can use multiple threads // in northd to improve network operation latency at the cost // of a bit of CPU. - data.Data["NorthdThreads"] = 4 + data.Data["NorthdThreads"] = 1 } data.Data["OVN_MULTI_NETWORK_ENABLE"] = true