From d5d0d3aa875890a3b21da18b9fcb2e1f5b95b40d Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Mon, 31 Jul 2023 12:51:53 +0200 Subject: [PATCH] cnf-tests: dynamically created KubeletConfig Make NUMA/SR-IOV integration tests creating their own KubeletConfig that sets single-numa-node policy and reserve an enitre NUMA node to system. Add functions to manipulate `node-role.kubernetes.io/x` labels on node to apply the performacen profile to arbitrary nodes. Signed-off-by: Andrea Panattoni --- .../e2esuite/dpdk/numa_node_sriov.go | 54 +++++-- .../machineconfigpool/machineconfigpool.go | 144 +++++++++++++++++- cnf-tests/testsuites/pkg/nodes/nodes.go | 57 +++++++ 3 files changed, 242 insertions(+), 13 deletions(-) diff --git a/cnf-tests/testsuites/e2esuite/dpdk/numa_node_sriov.go b/cnf-tests/testsuites/e2esuite/dpdk/numa_node_sriov.go index b340533f3a..4d9bba4e0c 100644 --- a/cnf-tests/testsuites/e2esuite/dpdk/numa_node_sriov.go +++ b/cnf-tests/testsuites/e2esuite/dpdk/numa_node_sriov.go @@ -2,6 +2,7 @@ package dpdk import ( "context" + "encoding/json" "fmt" "path/filepath" "strings" @@ -14,17 +15,24 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/client" "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/discovery" + "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/machineconfigpool" "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/namespaces" "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/networks" - utilNodes "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/nodes" - "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/performanceprofile" + utilnodes "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/nodes" "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/pods" + "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components" "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodes" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/klog/v2" + + mcv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" + kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1" ) var _ = Describe("[sriov] NUMA node alignment", Ordered, func() { @@ -39,18 +47,12 @@ var _ = Describe("[sriov] NUMA node alignment", Ordered, func() { Skip("Discovery mode not supported") } - isSNO, err := utilNodes.IsSingleNodeCluster() + isSNO, err := utilnodes.IsSingleNodeCluster() Expect(err).ToNot(HaveOccurred()) if isSNO { Skip("Single Node openshift not yet supported") } - perfProfile, err := performanceprofile.FindDefaultPerformanceProfile(performanceProfileName) - Expect(err).ToNot(HaveOccurred()) - if !performanceprofile.IsSingleNUMANode(perfProfile) { - Skip("SR-IOV NUMA test suite expects a performance profile with 'single-numa-node' to be present") - } - err = namespaces.Create(sriovnamespaces.Test, client.Client) Expect(err).ToNot(HaveOccurred()) @@ -118,6 +120,13 @@ var _ = Describe("[sriov] NUMA node alignment", Ordered, func() { By("Waiting for SRIOV devices to get configured") networks.WaitStable(sriovclient) + + cleanupFn, err := machineconfigpool.ApplyKubeletConfigToNode( + testingNode, "test-sriov-numa", makeKubeletConfigWithReservedNUMA0(testingNode)) + Expect(err).ToNot(HaveOccurred()) + DeferCleanup(cleanupFn) + + By("KubeletConfig test-sriov-numa applied to " + testingNode.Name) }) BeforeEach(func() { @@ -401,3 +410,30 @@ func expectPodCPUsAreOnNUMANode(pod *corev1.Pod, expectedCPUsNUMA int) { ExpectWithOffset(1, numaNode).To(Equal(expectedCPUsNUMA)) } + +// makeKubeletConfigWithReservedNUMA0 creates a KubeletConfig.Spec that sets all NUMA0 CPUs as systemReservedCPUs +// and topology manager to "single-numa-node". +func makeKubeletConfigWithReservedNUMA0(node *corev1.Node) *mcv1.KubeletConfigSpec { + numaToCpu, err := nodes.GetNumaNodes(node) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + ExpectWithOffset(1, len(numaToCpu)). + To(BeNumerically(">=", 2), + fmt.Sprintf("Node %s has only one NUMA node[%v]. At least two expected", node.Name, numaToCpu)) + + kubeletConfig := &kubeletconfigv1beta1.KubeletConfiguration{} + kubeletConfig.CPUManagerPolicy = "static" + kubeletConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 5 * time.Second} + kubeletConfig.TopologyManagerPolicy = kubeletconfigv1beta1.SingleNumaNodeTopologyManagerPolicy + kubeletConfig.ReservedSystemCPUs = components.ListToString(numaToCpu[0]) + + raw, err := json.Marshal(kubeletConfig) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + + ret := &mcv1.KubeletConfigSpec{ + KubeletConfig: &runtime.RawExtension{ + Raw: raw, + }, + } + + return ret +} diff --git a/cnf-tests/testsuites/pkg/machineconfigpool/machineconfigpool.go b/cnf-tests/testsuites/pkg/machineconfigpool/machineconfigpool.go index 3fc49a9a00..234790a715 100644 --- a/cnf-tests/testsuites/pkg/machineconfigpool/machineconfigpool.go +++ b/cnf-tests/testsuites/pkg/machineconfigpool/machineconfigpool.go @@ -6,12 +6,15 @@ import ( "time" testclient "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/client" + "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/nodes" mcov1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" mcoScheme "github.com/openshift/machine-config-operator/pkg/generated/clientset/versioned/scheme" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/klog" ) // WaitForCondition waits until the machine config pool will have specified condition type with the expected status @@ -97,7 +100,9 @@ func FindMCPByMCLabel(mcLabel string) (mcov1.MachineConfigPool, error) { return mcov1.MachineConfigPool{}, fmt.Errorf("cannot find MCP that targets MC with label: %s", mcLabel) } -// WaitForMCPStable waits until the mcp is stable +// WaitForMCPStable waits until the mcp is updating and then waits +// for mcp to be stable again. Former wait is useful to avoid returning +// from this function before the operator is working. func WaitForMCPStable(mcp mcov1.MachineConfigPool) error { err := WaitForCondition( testclient.Client, @@ -110,15 +115,18 @@ func WaitForMCPStable(mcp mcov1.MachineConfigPool) error { return err } + return WaitForMCPUpdated(mcp) +} + +// WaitForMCPUpdated waits for the MCP to be in the updated state. +func WaitForMCPUpdated(mcp mcov1.MachineConfigPool) error { // We need to wait a long time here for the nodes to reboot - err = WaitForCondition( + return WaitForCondition( testclient.Client, &mcp, mcov1.MachineConfigPoolUpdated, corev1.ConditionTrue, time.Duration(30*mcp.Status.MachineCount)*time.Minute) - - return err } // DecodeMCYaml decodes a MachineConfig YAML to a MachineConfig struct @@ -135,3 +143,131 @@ func DecodeMCYaml(mcyaml string) (*mcov1.MachineConfig, error) { return mc, err } + +// ApplyKubeletConfigToNode creates a KubeletConfig, a MachineConfigPool and a +// `node-role.kubernetes.io/` label in order to target a single node in the cluster. +// The role label is applied to the target node after removing any provious `node-role.kubernetes.io/` label, +// as MachineConfigOperator doesn't support multiple roles. +// Return value is a function that can be used to revert the node labeling. +func ApplyKubeletConfigToNode(node *corev1.Node, name string, spec *mcov1.KubeletConfigSpec) (func(), error) { + nilFn := func() {} + + newNodeRole := name + newNodeRoleSelector := map[string]string{ + "node-role.kubernetes.io/" + newNodeRole: "", + } + + mcp := mcov1.MachineConfigPool{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "machineconfiguration.openshift.io/role": name, + }, + }, + + Spec: mcov1.MachineConfigPoolSpec{ + MachineConfigSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "machineconfiguration.openshift.io/role", + Operator: metav1.LabelSelectorOpIn, + Values: []string{name, "worker"}, + }}, + }, + Paused: false, + NodeSelector: &metav1.LabelSelector{ + MatchLabels: newNodeRoleSelector, + }, + }, + } + + kubeletConfig := &mcov1.KubeletConfig{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: *spec.DeepCopy(), + } + + // Link the KubeletConfig to the MCP + kubeletConfig.Spec.MachineConfigPoolSelector = &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "machineconfiguration.openshift.io/role": name}, + } + + // Create the KubeletConfig + _, err := controllerutil.CreateOrUpdate(context.Background(), testclient.Client, kubeletConfig, func() error { return nil }) + if err != nil { + return nilFn, err + } + klog.Infof("Created KubeletConfig %s", kubeletConfig.Name) + + // Create MCP + _, err = controllerutil.CreateOrUpdate(context.Background(), testclient.Client, &mcp, func() error { return nil }) + if err != nil { + return nilFn, err + } + klog.Infof("Created MachineConfigPool %s", mcp.Name) + + // Following wait ensure the node is rebooted only once, as if we apply the MCP to + // the node before the KubeletConfig has been rendered, the node will reboot twice. + klog.Infof("Waiting for KubeletConfig to be rendered to MCP") + err = waitUntilKubeletConfigHasUpdatedTheMCP(name) + if err != nil { + return nilFn, err + } + + // Move the node role to the new one + previousNodeRole := nodes.FindRoleLabel(node) + if previousNodeRole != "" { + err = nodes.RemoveRoleFrom(node.Name, previousNodeRole) + if err != nil { + return nilFn, err + } + klog.Infof("Removed role[%s] from node %s", previousNodeRole, node.Name) + } + + err = nodes.AddRoleTo(node.Name, newNodeRole) + if err != nil { + return func() { + nodes.AddRoleTo(node.Name, previousNodeRole) + klog.Infof("Restored role[%s] on node %s", previousNodeRole, node.Name) + }, err + } + klog.Infof("Added role[%s] to node %s", newNodeRole, node.Name) + + err = WaitForMCPStable(mcp) + if err != nil { + return func() { + nodes.RemoveRoleFrom(node.Name, newNodeRole) + nodes.AddRoleTo(node.Name, previousNodeRole) + + klog.Infof("Moved back node role from [%s] to [%s] on %s", newNodeRole, previousNodeRole, node.Name) + }, err + } + + return func() { + nodes.RemoveRoleFrom(node.Name, newNodeRole) + nodes.AddRoleTo(node.Name, previousNodeRole) + klog.Infof("Moved back node role from [%s] to [%s] on %s", newNodeRole, previousNodeRole, node.Name) + + WaitForMCPStable(mcp) + }, nil +} + +func waitUntilKubeletConfigHasUpdatedTheMCP(name string) error { + return wait.Poll(10*time.Second, 3*time.Minute, func() (bool, error) { + + mcp, err := testclient.Client.MachineConfigPools().Get(context.Background(), name, metav1.GetOptions{}) + if err != nil { + klog.Warningf("Error while waiting for MachineConfigPool[%s] to be updated: %v", name, err) + return false, nil + } + + expectedSource := fmt.Sprintf("99-%s-generated-kubelet", name) + + for _, source := range mcp.Spec.Configuration.Source { + if source.Name == expectedSource { + return true, nil + } + } + + return false, nil + }) +} diff --git a/cnf-tests/testsuites/pkg/nodes/nodes.go b/cnf-tests/testsuites/pkg/nodes/nodes.go index dee301f1a2..38f0538ab1 100644 --- a/cnf-tests/testsuites/pkg/nodes/nodes.go +++ b/cnf-tests/testsuites/pkg/nodes/nodes.go @@ -2,6 +2,7 @@ package nodes import ( "context" + "encoding/json" "errors" "fmt" "os" @@ -17,6 +18,7 @@ import ( ptpv1 "github.com/openshift/ptp-operator/api/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/exec" kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1" ) @@ -391,3 +393,58 @@ func IsSingleNodeCluster() (bool, error) { } return len(nodes.Items) == 1, nil } + +// FindRoleLabel loops over node labels and return the first with key like +// "node-role.kubernetest.io/*", except "node-role.kubernetest.io/worker". +// +// Consider that a node is suppose to have only one "custom role" (role != "worker"). If a node +// has two or more custom roles, MachineConfigOperato stops managing that node. +func FindRoleLabel(node *corev1.Node) string { + for label := range node.Labels { + if !strings.HasPrefix(label, "node-role.kubernetes.io/") { + continue + } + + if label == "node-role.kubernetes.io/worker" { + continue + } + + return strings.TrimPrefix(label, "node-role.kubernetes.io/") + } + + return "" +} + +// AddRoleTo adds the "node-role.kubernetes.io/" to the given node +func AddRoleTo(nodeName, role string) error { + return setLabel(nodeName, "node-role.kubernetes.io/"+role, "") +} + +// RemoveRoleFrom removes the "node-role.kubernetes.io/" from the given node +func RemoveRoleFrom(nodeName, role string) error { + return setLabel(nodeName, "node-role.kubernetes.io/"+role, nil) +} + +func setLabel(nodeName, label string, value any) error { + patch := struct { + Metadata map[string]any `json:"metadata"` + }{ + Metadata: map[string]any{ + "labels": map[string]any{ + label: value, + }, + }, + } + + patchData, err := json.Marshal(&patch) + if err != nil { + return fmt.Errorf("can't marshal patch data[%v] to label node[%s]: %w", patch, nodeName, err) + } + + _, err = client.Client.Nodes().Patch(context.Background(), nodeName, types.MergePatchType, patchData, metav1.PatchOptions{}) + if err != nil { + return fmt.Errorf("can't patch labels[%s] of node[%s]: %w", string(patchData), nodeName, err) + } + + return nil +}