diff --git a/cnf-tests/TESTLIST.md b/cnf-tests/TESTLIST.md index e88b4e1a50..8155f8962d 100644 --- a/cnf-tests/TESTLIST.md +++ b/cnf-tests/TESTLIST.md @@ -80,6 +80,13 @@ The cnf tests instrument each different feature required by CNF. Following, a de | Test Name | Description | | -- | ----------- | | [sriov] Bond CNI integration bond cni over sriov pod with sysctl's on bond over sriov interfaces should start | Verfies a pod with bond over sriov interfaces starts correctly | +| [sriov] NUMA node alignment Utilize all available VFs then create a pod with guaranteed CPU and excludeTopology set to True | Verifies all Virtual Function of an SriovNetworkNodePolicy with excludeTopology set to true can be used by pods | +| [sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to False and an SRIOV interface in a different NUMA node than the pod | Verifies excludeTopology field set to false still ensure every resource is on the same NUMA node | +| [sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to False and each interface is in the different NUMA as the pod | Verifies excludeTopology field set to false and multiple interfaces are locateld on a different NUMA node | +| [sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to True and an SRIOV interface in a different NUMA node than the pod | Verifies excludeTopology field set true still ensure every resource is on a different NUMA node | +| [sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to True and an SRIOV interface in a same NUMA node than the pod | Verifies excludeTopology field set to true still ensure every resource is on the same NUMA node | +| [sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to True and multiple SRIOV interfaces located in different NUMA nodes than the pod | Verifies excludeTopology field set to false still multiple interfaces in a different NUMA node | +| [sriov] NUMA node alignment Validate the creation of a pod with two sriovnetworknodepolicies one with excludeTopology False and the second true each interface is in different NUMA as the pod | Verifies the use of two excludeTopology one set to true and the second to false ensure all resource are in a different NUMA node | | [sriov] SCTP integration Test Connectivity Connectivity between client and server Should work over a SR-IOV device | SCTP connectivity test over SR-IOV vfs. | | [sriov] Tuning CNI integration tuning cni over sriov pods with sysctl's on bond over sriov interfaces should start | Pod with tuning-cni on bond over sriov should start | | [sriov] Tuning CNI integration tuning cni over sriov pods with sysctl's over sriov interface should start | Pod with tuning-cni over sriov should start | diff --git a/cnf-tests/docgen/e2e.json b/cnf-tests/docgen/e2e.json index 2f74025563..57cce88a9b 100755 --- a/cnf-tests/docgen/e2e.json +++ b/cnf-tests/docgen/e2e.json @@ -148,6 +148,13 @@ "[sctp] Test Connectivity Connectivity between client and server connect a client pod to a server pod via Service Node Port Custom namespace": "Pod to pod connectivity via service nodeport, custom namespace", "[sctp] Test Connectivity Connectivity between client and server connect a client pod to a server pod via Service Node Port Default namespace": "Pod to pod connectivity via service nodeport, default namespace", "[sriov] Bond CNI integration bond cni over sriov pod with sysctl's on bond over sriov interfaces should start": "Verfies a pod with bond over sriov interfaces starts correctly", + "[sriov] NUMA node alignment Utilize all available VFs then create a pod with guaranteed CPU and excludeTopology set to True": "Verifies all Virtual Function of an SriovNetworkNodePolicy with excludeTopology set to true can be used by pods", + "[sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to False and an SRIOV interface in a different NUMA node than the pod": "Verifies excludeTopology field set to false still ensure every resource is on the same NUMA node", + "[sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to False and each interface is in the different NUMA as the pod": "Verifies excludeTopology field set to false and multiple interfaces are locateld on a different NUMA node", + "[sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to True and an SRIOV interface in a different NUMA node than the pod": "Verifies excludeTopology field set true still ensure every resource is on a different NUMA node", + "[sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to True and an SRIOV interface in a same NUMA node than the pod": "Verifies excludeTopology field set to true still ensure every resource is on the same NUMA node", + "[sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to True and multiple SRIOV interfaces located in different NUMA nodes than the pod": "Verifies excludeTopology field set to false still multiple interfaces in a different NUMA node", + "[sriov] NUMA node alignment Validate the creation of a pod with two sriovnetworknodepolicies one with excludeTopology False and the second true each interface is in different NUMA as the pod": "Verifies the use of two excludeTopology one set to true and the second to false ensure all resource are in a different NUMA node", "[sriov] SCTP integration Test Connectivity Connectivity between client and server Should work over a SR-IOV device": "SCTP connectivity test over SR-IOV vfs.", "[sriov] Tuning CNI integration tuning cni over sriov pods with sysctl's on bond over sriov interfaces should start": "Pod with tuning-cni on bond over sriov should start", "[sriov] Tuning CNI integration tuning cni over sriov pods with sysctl's over sriov interface should start": "Pod with tuning-cni over sriov should start", diff --git a/cnf-tests/testsuites/e2esuite/dpdk/dpdk.go b/cnf-tests/testsuites/e2esuite/dpdk/dpdk.go index 96702e2aa8..f16dfdefbc 100644 --- a/cnf-tests/testsuites/e2esuite/dpdk/dpdk.go +++ b/cnf-tests/testsuites/e2esuite/dpdk/dpdk.go @@ -911,7 +911,7 @@ func findNUMAForCPUs(pod *corev1.Pod, cpuList []string) (int, error) { } if !findCPUOnSameNuma { - return numaNode, fmt.Errorf("not all the cpus are in the same numa node") + return numaNode, fmt.Errorf("not all the cpus are in the same numa node. cpuList[%v] lscpu[%s]", cpuList, buff.String()) } return numaNode, nil diff --git a/cnf-tests/testsuites/e2esuite/dpdk/numa_node_sriov.go b/cnf-tests/testsuites/e2esuite/dpdk/numa_node_sriov.go new file mode 100644 index 0000000000..4d9bba4e0c --- /dev/null +++ b/cnf-tests/testsuites/e2esuite/dpdk/numa_node_sriov.go @@ -0,0 +1,439 @@ +package dpdk + +import ( + "context" + "encoding/json" + "fmt" + "path/filepath" + "strings" + "time" + + sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + sriovcluster "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster" + sriovnamespaces "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces" + sriovnetwork "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/network" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/client" + "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/discovery" + "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/machineconfigpool" + "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/namespaces" + "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/networks" + utilnodes "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/nodes" + "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/pods" + "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodes" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/klog/v2" + + mcv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" + kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1" +) + +var _ = Describe("[sriov] NUMA node alignment", Ordered, func() { + + var ( + numa0DeviceList []*sriovv1.InterfaceExt + numa1DeviceList []*sriovv1.InterfaceExt + ) + + BeforeAll(func() { + if discovery.Enabled() { + Skip("Discovery mode not supported") + } + + isSNO, err := utilnodes.IsSingleNodeCluster() + Expect(err).ToNot(HaveOccurred()) + if isSNO { + Skip("Single Node openshift not yet supported") + } + + err = namespaces.Create(sriovnamespaces.Test, client.Client) + Expect(err).ToNot(HaveOccurred()) + + By("Clean SRIOV policies and networks") + networks.CleanSriov(sriovclient) + + By("Discover SRIOV devices") + sriovCapableNodes, err := sriovcluster.DiscoverSriov(sriovclient, namespaces.SRIOVOperator) + Expect(err).ToNot(HaveOccurred()) + Expect(len(sriovCapableNodes.Nodes)).To(BeNumerically(">", 0)) + testingNode, err := nodes.GetByName(sriovCapableNodes.Nodes[0]) + Expect(err).ToNot(HaveOccurred()) + By("Using node " + testingNode.Name) + + sriovDevices, err := sriovCapableNodes.FindSriovDevices(testingNode.Name) + Expect(err).ToNot(HaveOccurred()) + + numa0DeviceList = findDevicesOnNUMANode(testingNode, sriovDevices, "0") + Expect(len(numa0DeviceList)).To(BeNumerically(">=", 1)) + By("Using NUMA0 device1 " + numa0DeviceList[0].Name) + + numa1DeviceList = findDevicesOnNUMANode(testingNode, sriovDevices, "1") + Expect(len(numa1DeviceList)).To(BeNumerically(">=", 1)) + By("Using NUMA1 device1 " + numa1DeviceList[0].Name) + + // SriovNetworkNodePolicy + // NUMA node0 device1 excludeTopology = false + // NUMA node0 device1 excludeTopology = true + // NUMA node0 device2 excludeTopology = false + // NUMA node0 device2 excludeTopology = true + // NUMA node1 device3 excludeTopology = false + // NUMA node1 device3 excludeTopology = true + + By("Create SRIOV policies and networks") + + ipam := `{ "type": "host-local", "subnet": "192.0.2.0/24" }` + + createSriovNetworkAndPolicyForNumaAffinityTest(8, numa0DeviceList[0], "#0-3", + "test-numa-0-nic1-exclude-topology-false-", testingNode.Name, + "testNuma0NIC1ExcludeTopoplogyFalse", ipam, false) + + createSriovNetworkAndPolicyForNumaAffinityTest(8, numa0DeviceList[0], "#4-7", + "test-numa-0-nic1-exclude-topology-true-", testingNode.Name, + "testNuma0NIC1ExcludeTopoplogyTrue", ipam, true) + + if len(numa0DeviceList) > 1 { + By("Using NUMA0 device2 " + numa0DeviceList[1].Name) + + createSriovNetworkAndPolicyForNumaAffinityTest(8, numa0DeviceList[1], "#0-3", + "test-numa-0-nic2-exclude-topology-false-", testingNode.Name, + "testNuma1NIC2ExcludeTopoplogyFalse", ipam, false) + + createSriovNetworkAndPolicyForNumaAffinityTest(8, numa0DeviceList[1], "#4-7", + "test-numa-0-nic2-exclude-topology-true-", testingNode.Name, + "testNuma1NIC2ExcludeTopoplogyTrue", ipam, true) + } + + createSriovNetworkAndPolicyForNumaAffinityTest(8, numa1DeviceList[0], "#0-3", + "test-numa-1-nic1-exclude-topology-false-", testingNode.Name, + "testNuma1NIC1ExcludeTopoplogyFalse", ipam, false) + + createSriovNetworkAndPolicyForNumaAffinityTest(8, numa1DeviceList[0], "#4-7", + "test-numa-1-nic1-exclude-topology-true-", testingNode.Name, + "testNuma1NIC1ExcludeTopoplogyTrue", ipam, true) + + By("Waiting for SRIOV devices to get configured") + networks.WaitStable(sriovclient) + + cleanupFn, err := machineconfigpool.ApplyKubeletConfigToNode( + testingNode, "test-sriov-numa", makeKubeletConfigWithReservedNUMA0(testingNode)) + Expect(err).ToNot(HaveOccurred()) + DeferCleanup(cleanupFn) + + By("KubeletConfig test-sriov-numa applied to " + testingNode.Name) + }) + + BeforeEach(func() { + By("Clean any pods in " + sriovnamespaces.Test + " namespace") + namespaces.CleanPods(sriovnamespaces.Test, sriovclient) + }) + + It("Validate the creation of a pod with excludeTopology set to False and an SRIOV interface in a different NUMA node than the pod", func() { + pod := pods.DefinePod(sriovnamespaces.Test) + pods.RedefineWithGuaranteedQoS(pod, "2", "500Mi") + pod = pods.RedefinePodWithNetwork(pod, "test-numa-0-nic1-exclude-topology-false-network") + + pod, err := client.Client.Pods(sriovnamespaces.Test). + Create(context.Background(), pod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) { + actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), pod.Name, metav1.GetOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodFailed)) + g.Expect(actualPod.Status.Reason).To(Equal("TopologyAffinityError")) + }, 30*time.Second, 1*time.Second).Should(Succeed()) + }) + + It("Validate the creation of a pod with excludeTopology set to True and an SRIOV interface in a same NUMA node "+ + "than the pod", func() { + pod := pods.DefinePod(sriovnamespaces.Test) + pods.RedefineWithGuaranteedQoS(pod, "2", "500Mi") + pod = pods.RedefinePodWithNetwork(pod, "test-numa-1-nic1-exclude-topology-true-network") + + pod, err := client.Client.Pods(sriovnamespaces.Test). + Create(context.Background(), pod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) { + actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), pod.Name, metav1.GetOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodRunning)) + g.Expect(actualPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed)) + }, 30*time.Second, 1*time.Second).Should(Succeed()) + + By("Validate Pod NUMA Node") + expectPodCPUsAreOnNUMANode(pod, 1) + + By("Create server Pod and run E2E ICMP validation") + validateE2EICMPTraffic(pod, `[{"name": "test-numa-1-nic1-exclude-topology-true-network","ips":["192.0.2.250/24"]}]`) + }) + + It("Validate the creation of a pod with excludeTopology set to True and an SRIOV interface in a different NUMA node "+ + "than the pod", func() { + pod := pods.DefinePod(sriovnamespaces.Test) + pods.RedefineWithGuaranteedQoS(pod, "2", "500Mi") + pod = pods.RedefinePodWithNetwork(pod, "test-numa-0-nic1-exclude-topology-true-network") + + pod, err := client.Client.Pods(sriovnamespaces.Test). + Create(context.Background(), pod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) { + actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), pod.Name, metav1.GetOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodRunning)) + g.Expect(actualPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed)) + }, 30*time.Second, 1*time.Second).Should(Succeed()) + + By("Validate Pod NUMA Node") + expectPodCPUsAreOnNUMANode(pod, 1) + + By("Create server Pod and run E2E ICMP validation") + validateE2EICMPTraffic(pod, `[{"name": "test-numa-0-nic1-exclude-topology-true-network","ips":["192.0.2.250/24"]}]`) + }) + + It("Validate the creation of a pod with two sriovnetworknodepolicies one with excludeTopology False and the "+ + "second true each interface is in different NUMA as the pod", func() { + + if len(numa0DeviceList) < 2 { + testSkip := "There are not enough Interfaces in NUMA Node 0 to complete this test" + Skip(testSkip) + } + + pod := pods.DefinePod(sriovnamespaces.Test) + pods.RedefineWithGuaranteedQoS(pod, "2", "500Mi") + pod = pods.RedefinePodWithNetwork(pod, "test-numa-0-nic1-exclude-topology-true-network, "+ + "test-numa-0-nic2-exclude-topology-false-network") + + pod, err := client.Client.Pods(sriovnamespaces.Test). + Create(context.Background(), pod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) { + actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), pod.Name, metav1.GetOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actualPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed)) + g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodFailed)) + g.Expect(actualPod.Status.Reason).To(Equal("TopologyAffinityError")) + }, 30*time.Second, 1*time.Second).Should(Succeed()) + }) + + It("Validate the creation of a pod with excludeTopology set to True and multiple SRIOV interfaces located in "+ + "different NUMA nodes than the pod", func() { + + if len(numa0DeviceList) < 2 { + testSkip := "There are not enough Interfaces in NUMA Node 0 to complete this test" + Skip(testSkip) + } + + pod := pods.DefinePod(sriovnamespaces.Test) + pods.RedefineWithGuaranteedQoS(pod, "2", "500Mi") + pod = pods.RedefinePodWithNetwork(pod, "test-numa-0-nic1-exclude-topology-true-network, "+ + "test-numa-0-nic2-exclude-topology-true-network") + + pod, err := client.Client.Pods(sriovnamespaces.Test). + Create(context.Background(), pod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) { + actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), pod.Name, metav1.GetOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodRunning)) + g.Expect(actualPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed)) + }, 30*time.Second, 1*time.Second).Should(Succeed()) + + By("Validate Pod NUMA Node") + expectPodCPUsAreOnNUMANode(pod, 1) + + By("Create server Pod and run E2E ICMP validation") + validateE2EICMPTraffic(pod, `[{"name": "test-numa-0-nic1-exclude-topology-true-network","ips":["192.0.2.250/24"]}]`) + }) + + It("Validate the creation of a pod with excludeTopology set to False and each interface is "+ + "in the different NUMA as the pod", func() { + + if len(numa0DeviceList) < 2 { + testSkip := "There are not enough Interfaces in NUMA Node 0 to complete this test" + Skip(testSkip) + } + + pod := pods.DefinePod(sriovnamespaces.Test) + pods.RedefineWithGuaranteedQoS(pod, "2", "500Mi") + pod = pods.RedefinePodWithNetwork(pod, "test-numa-0-nic1-exclude-topology-false-network, "+ + "test-numa-0-nic2-exclude-topology-false-network") + + pod, err := client.Client.Pods(sriovnamespaces.Test). + Create(context.Background(), pod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) { + actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), pod.Name, metav1.GetOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actualPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed)) + g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodFailed)) + g.Expect(actualPod.Status.Message).To(ContainSubstring("Resources cannot be allocated with Topology locality")) + }, 30*time.Second, 1*time.Second).Should(Succeed()) + }) + + It("Utilize all available VFs then create a pod with guaranteed CPU and excludeTopology set to True", func() { + barePod := pods.DefinePod(sriovnamespaces.Test) + podWithQos := pods.RedefineWithGuaranteedQoS(barePod, "2", "500Mi") + + numVFs := 4 + + By("Verifies a pod can consume all the available VFs") + useAllVFsNetworkSpec := []string{} + for vf := 0; vf < numVFs; vf++ { + useAllVFsNetworkSpec = append(useAllVFsNetworkSpec, "test-numa-0-nic1-exclude-topology-true-network") + } + podWithAllVfs := pods.RedefinePodWithNetwork(podWithQos.DeepCopy(), strings.Join(useAllVFsNetworkSpec, ",")) + + podWithAllVfs, err := client.Client.Pods(sriovnamespaces.Test). + Create(context.Background(), podWithAllVfs, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) { + actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), podWithAllVfs.Name, metav1.GetOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodRunning)) + g.Expect(actualPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed)) + }, 30*time.Second, 1*time.Second).Should(Succeed()) + + By("A pod that uses a VF should not go to Running state") + podWithOneVf := pods.RedefinePodWithNetwork(podWithQos.DeepCopy(), "test-numa-0-nic1-exclude-topology-true-network") + podWithOneVf, err = client.Client.Pods(sriovnamespaces.Test). + Create(context.Background(), podWithOneVf, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + Eventually(pods.GetStringEventsForPodFn(client.Client, podWithOneVf), 30*time.Second, 1*time.Second). + Should(ContainSubstring("Insufficient openshift.io/testNuma0NIC1ExcludeTopoplogyTrue")) + + Eventually(func(g Gomega) { + actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), podWithOneVf.Name, metav1.GetOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodPending)) + }, 30*time.Second, 1*time.Second).Should(Succeed()) + + By("Release all VFs by deleting the running pod") + err = client.Client.Pods(sriovnamespaces.Test). + Delete(context.Background(), podWithAllVfs.Name, metav1.DeleteOptions{}) + Expect(err).ToNot(HaveOccurred()) + + By("The pod with one VF should start") + Eventually(func(g Gomega) { + actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), podWithOneVf.Name, metav1.GetOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodRunning)) + g.Expect(actualPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed)) + }, 30*time.Second, 1*time.Second).Should(Succeed()) + }) + +}) + +func withExcludeTopology(excludeTopology bool) func(*sriovv1.SriovNetworkNodePolicy) { + return func(p *sriovv1.SriovNetworkNodePolicy) { + p.Spec.ExcludeTopology = excludeTopology + } +} + +func createSriovNetworkAndPolicyForNumaAffinityTest(numVFs int, intf *sriovv1.InterfaceExt, vfSelector, policyGeneratedName, nodeName, resourceName, ipam string, excludeTopology bool) { + _, err := sriovnetwork.CreateSriovPolicy( + sriovclient, policyGeneratedName, namespaces.SRIOVOperator, + intf.Name+vfSelector, nodeName, numVFs, + resourceName, "netdevice", + withExcludeTopology(excludeTopology), + ) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + + err = sriovnetwork.CreateSriovNetwork(sriovclient, intf, policyGeneratedName+"network", + sriovnamespaces.Test, namespaces.SRIOVOperator, resourceName, ipam) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + +} + +func validateE2EICMPTraffic(pod *corev1.Pod, annotation string) { + serverPod := pods.DefinePod(sriovnamespaces.Test) + serverPod = pods.RedefinePodWithNetwork(serverPod, annotation) + command := []string{"bash", "-c", "ping -I net1 192.0.2.250 -c 5"} + _, err := client.Client.Pods(sriovnamespaces.Test). + Create(context.Background(), serverPod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) error { + _, err = pods.ExecCommand(client.Client, *pod, command) + return err + }, 30*time.Second, 1*time.Second).Should(Succeed(), "ICMP traffic failed over SRIOV interface pod interface") +} + +func findDevicesOnNUMANode(node *corev1.Node, devices []*sriovv1.InterfaceExt, numaNode string) []*sriovv1.InterfaceExt { + listOfDevices := []*sriovv1.InterfaceExt{} + + for _, device := range devices { + out, err := nodes.ExecCommandOnNode([]string{ + "cat", + filepath.Clean(filepath.Join("/sys/class/net/", device.Name, "/device/numa_node")), + }, node) + if err != nil { + klog.Warningf("can't get device [%s] NUMA node: out(%s) err(%s)", device.Name, string(out), err.Error()) + continue + } + + if out == numaNode { + listOfDevices = append(listOfDevices, device) + } + } + + return listOfDevices +} + +func expectPodCPUsAreOnNUMANode(pod *corev1.Pod, expectedCPUsNUMA int) { + // Guaranteed workload pod can be in a different cgroup + // if on the node there have ever been applied a PerformanceProfile, no matter if it's not active at the moment. + // + // https://github.com/openshift/cluster-node-tuning-operator/blob/a4c70abb71036341dfaf0cac30dab0d166e55cbd/assets/performanceprofile/scripts/cpuset-configure.sh#L9 + buff, err := pods.ExecCommand(client.Client, *pod, []string{"sh", "-c", + "cat /sys/fs/cgroup/cpuset/cpuset.cpus 2>/dev/null || cat /sys/fs/cgroup/cpuset.cpus 2>/dev/null"}) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + + cpuList, err := getCpuSet(buff.String()) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + + numaNode, err := findNUMAForCPUs(pod, cpuList) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + + ExpectWithOffset(1, numaNode).To(Equal(expectedCPUsNUMA)) +} + +// makeKubeletConfigWithReservedNUMA0 creates a KubeletConfig.Spec that sets all NUMA0 CPUs as systemReservedCPUs +// and topology manager to "single-numa-node". +func makeKubeletConfigWithReservedNUMA0(node *corev1.Node) *mcv1.KubeletConfigSpec { + numaToCpu, err := nodes.GetNumaNodes(node) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + ExpectWithOffset(1, len(numaToCpu)). + To(BeNumerically(">=", 2), + fmt.Sprintf("Node %s has only one NUMA node[%v]. At least two expected", node.Name, numaToCpu)) + + kubeletConfig := &kubeletconfigv1beta1.KubeletConfiguration{} + kubeletConfig.CPUManagerPolicy = "static" + kubeletConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 5 * time.Second} + kubeletConfig.TopologyManagerPolicy = kubeletconfigv1beta1.SingleNumaNodeTopologyManagerPolicy + kubeletConfig.ReservedSystemCPUs = components.ListToString(numaToCpu[0]) + + raw, err := json.Marshal(kubeletConfig) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + + ret := &mcv1.KubeletConfigSpec{ + KubeletConfig: &runtime.RawExtension{ + Raw: raw, + }, + } + + return ret +} diff --git a/cnf-tests/testsuites/pkg/machineconfigpool/machineconfigpool.go b/cnf-tests/testsuites/pkg/machineconfigpool/machineconfigpool.go index 3fc49a9a00..234790a715 100644 --- a/cnf-tests/testsuites/pkg/machineconfigpool/machineconfigpool.go +++ b/cnf-tests/testsuites/pkg/machineconfigpool/machineconfigpool.go @@ -6,12 +6,15 @@ import ( "time" testclient "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/client" + "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/nodes" mcov1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" mcoScheme "github.com/openshift/machine-config-operator/pkg/generated/clientset/versioned/scheme" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/klog" ) // WaitForCondition waits until the machine config pool will have specified condition type with the expected status @@ -97,7 +100,9 @@ func FindMCPByMCLabel(mcLabel string) (mcov1.MachineConfigPool, error) { return mcov1.MachineConfigPool{}, fmt.Errorf("cannot find MCP that targets MC with label: %s", mcLabel) } -// WaitForMCPStable waits until the mcp is stable +// WaitForMCPStable waits until the mcp is updating and then waits +// for mcp to be stable again. Former wait is useful to avoid returning +// from this function before the operator is working. func WaitForMCPStable(mcp mcov1.MachineConfigPool) error { err := WaitForCondition( testclient.Client, @@ -110,15 +115,18 @@ func WaitForMCPStable(mcp mcov1.MachineConfigPool) error { return err } + return WaitForMCPUpdated(mcp) +} + +// WaitForMCPUpdated waits for the MCP to be in the updated state. +func WaitForMCPUpdated(mcp mcov1.MachineConfigPool) error { // We need to wait a long time here for the nodes to reboot - err = WaitForCondition( + return WaitForCondition( testclient.Client, &mcp, mcov1.MachineConfigPoolUpdated, corev1.ConditionTrue, time.Duration(30*mcp.Status.MachineCount)*time.Minute) - - return err } // DecodeMCYaml decodes a MachineConfig YAML to a MachineConfig struct @@ -135,3 +143,131 @@ func DecodeMCYaml(mcyaml string) (*mcov1.MachineConfig, error) { return mc, err } + +// ApplyKubeletConfigToNode creates a KubeletConfig, a MachineConfigPool and a +// `node-role.kubernetes.io/` label in order to target a single node in the cluster. +// The role label is applied to the target node after removing any provious `node-role.kubernetes.io/` label, +// as MachineConfigOperator doesn't support multiple roles. +// Return value is a function that can be used to revert the node labeling. +func ApplyKubeletConfigToNode(node *corev1.Node, name string, spec *mcov1.KubeletConfigSpec) (func(), error) { + nilFn := func() {} + + newNodeRole := name + newNodeRoleSelector := map[string]string{ + "node-role.kubernetes.io/" + newNodeRole: "", + } + + mcp := mcov1.MachineConfigPool{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "machineconfiguration.openshift.io/role": name, + }, + }, + + Spec: mcov1.MachineConfigPoolSpec{ + MachineConfigSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "machineconfiguration.openshift.io/role", + Operator: metav1.LabelSelectorOpIn, + Values: []string{name, "worker"}, + }}, + }, + Paused: false, + NodeSelector: &metav1.LabelSelector{ + MatchLabels: newNodeRoleSelector, + }, + }, + } + + kubeletConfig := &mcov1.KubeletConfig{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: *spec.DeepCopy(), + } + + // Link the KubeletConfig to the MCP + kubeletConfig.Spec.MachineConfigPoolSelector = &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "machineconfiguration.openshift.io/role": name}, + } + + // Create the KubeletConfig + _, err := controllerutil.CreateOrUpdate(context.Background(), testclient.Client, kubeletConfig, func() error { return nil }) + if err != nil { + return nilFn, err + } + klog.Infof("Created KubeletConfig %s", kubeletConfig.Name) + + // Create MCP + _, err = controllerutil.CreateOrUpdate(context.Background(), testclient.Client, &mcp, func() error { return nil }) + if err != nil { + return nilFn, err + } + klog.Infof("Created MachineConfigPool %s", mcp.Name) + + // Following wait ensure the node is rebooted only once, as if we apply the MCP to + // the node before the KubeletConfig has been rendered, the node will reboot twice. + klog.Infof("Waiting for KubeletConfig to be rendered to MCP") + err = waitUntilKubeletConfigHasUpdatedTheMCP(name) + if err != nil { + return nilFn, err + } + + // Move the node role to the new one + previousNodeRole := nodes.FindRoleLabel(node) + if previousNodeRole != "" { + err = nodes.RemoveRoleFrom(node.Name, previousNodeRole) + if err != nil { + return nilFn, err + } + klog.Infof("Removed role[%s] from node %s", previousNodeRole, node.Name) + } + + err = nodes.AddRoleTo(node.Name, newNodeRole) + if err != nil { + return func() { + nodes.AddRoleTo(node.Name, previousNodeRole) + klog.Infof("Restored role[%s] on node %s", previousNodeRole, node.Name) + }, err + } + klog.Infof("Added role[%s] to node %s", newNodeRole, node.Name) + + err = WaitForMCPStable(mcp) + if err != nil { + return func() { + nodes.RemoveRoleFrom(node.Name, newNodeRole) + nodes.AddRoleTo(node.Name, previousNodeRole) + + klog.Infof("Moved back node role from [%s] to [%s] on %s", newNodeRole, previousNodeRole, node.Name) + }, err + } + + return func() { + nodes.RemoveRoleFrom(node.Name, newNodeRole) + nodes.AddRoleTo(node.Name, previousNodeRole) + klog.Infof("Moved back node role from [%s] to [%s] on %s", newNodeRole, previousNodeRole, node.Name) + + WaitForMCPStable(mcp) + }, nil +} + +func waitUntilKubeletConfigHasUpdatedTheMCP(name string) error { + return wait.Poll(10*time.Second, 3*time.Minute, func() (bool, error) { + + mcp, err := testclient.Client.MachineConfigPools().Get(context.Background(), name, metav1.GetOptions{}) + if err != nil { + klog.Warningf("Error while waiting for MachineConfigPool[%s] to be updated: %v", name, err) + return false, nil + } + + expectedSource := fmt.Sprintf("99-%s-generated-kubelet", name) + + for _, source := range mcp.Spec.Configuration.Source { + if source.Name == expectedSource { + return true, nil + } + } + + return false, nil + }) +} diff --git a/cnf-tests/testsuites/pkg/nodes/nodes.go b/cnf-tests/testsuites/pkg/nodes/nodes.go index dee301f1a2..38f0538ab1 100644 --- a/cnf-tests/testsuites/pkg/nodes/nodes.go +++ b/cnf-tests/testsuites/pkg/nodes/nodes.go @@ -2,6 +2,7 @@ package nodes import ( "context" + "encoding/json" "errors" "fmt" "os" @@ -17,6 +18,7 @@ import ( ptpv1 "github.com/openshift/ptp-operator/api/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/exec" kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1" ) @@ -391,3 +393,58 @@ func IsSingleNodeCluster() (bool, error) { } return len(nodes.Items) == 1, nil } + +// FindRoleLabel loops over node labels and return the first with key like +// "node-role.kubernetest.io/*", except "node-role.kubernetest.io/worker". +// +// Consider that a node is suppose to have only one "custom role" (role != "worker"). If a node +// has two or more custom roles, MachineConfigOperato stops managing that node. +func FindRoleLabel(node *corev1.Node) string { + for label := range node.Labels { + if !strings.HasPrefix(label, "node-role.kubernetes.io/") { + continue + } + + if label == "node-role.kubernetes.io/worker" { + continue + } + + return strings.TrimPrefix(label, "node-role.kubernetes.io/") + } + + return "" +} + +// AddRoleTo adds the "node-role.kubernetes.io/" to the given node +func AddRoleTo(nodeName, role string) error { + return setLabel(nodeName, "node-role.kubernetes.io/"+role, "") +} + +// RemoveRoleFrom removes the "node-role.kubernetes.io/" from the given node +func RemoveRoleFrom(nodeName, role string) error { + return setLabel(nodeName, "node-role.kubernetes.io/"+role, nil) +} + +func setLabel(nodeName, label string, value any) error { + patch := struct { + Metadata map[string]any `json:"metadata"` + }{ + Metadata: map[string]any{ + "labels": map[string]any{ + label: value, + }, + }, + } + + patchData, err := json.Marshal(&patch) + if err != nil { + return fmt.Errorf("can't marshal patch data[%v] to label node[%s]: %w", patch, nodeName, err) + } + + _, err = client.Client.Nodes().Patch(context.Background(), nodeName, types.MergePatchType, patchData, metav1.PatchOptions{}) + if err != nil { + return fmt.Errorf("can't patch labels[%s] of node[%s]: %w", string(patchData), nodeName, err) + } + + return nil +} diff --git a/cnf-tests/testsuites/pkg/performanceprofile/performanceprofile.go b/cnf-tests/testsuites/pkg/performanceprofile/performanceprofile.go index c7d7dfa553..17bfbccb82 100644 --- a/cnf-tests/testsuites/pkg/performanceprofile/performanceprofile.go +++ b/cnf-tests/testsuites/pkg/performanceprofile/performanceprofile.go @@ -242,3 +242,15 @@ func RestorePerformanceProfile(machineConfigPoolName string) error { err = machineconfigpool.WaitForMCPStable(*mcp) return err } + +func IsSingleNUMANode(perfProfile *performancev2.PerformanceProfile) bool { + if perfProfile.Spec.NUMA == nil { + return false + } + + if perfProfile.Spec.NUMA.TopologyPolicy == nil { + return false + } + + return *perfProfile.Spec.NUMA.TopologyPolicy == "single-numa-node" +} diff --git a/cnf-tests/testsuites/pkg/pods/pods.go b/cnf-tests/testsuites/pkg/pods/pods.go index e217166be5..c8d473f615 100644 --- a/cnf-tests/testsuites/pkg/pods/pods.go +++ b/cnf-tests/testsuites/pkg/pods/pods.go @@ -158,6 +158,21 @@ func RedefineAsPrivileged(pod *corev1.Pod, containerName string) (*corev1.Pod, e return pod, nil } +// RedefineWithGuaranteedQoS updates the pod definition by adding resource limits and request +// to the specified values. As requests and limits are equal, the pod will work with a Guarantted +// quality of service (QoS). Resource specification are added to the first container +func RedefineWithGuaranteedQoS(pod *corev1.Pod, cpu, memory string) *corev1.Pod { + resources := map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse(memory), + corev1.ResourceCPU: resource.MustParse(cpu), + } + + pod.Spec.Containers[0].Resources.Requests = resources + pod.Spec.Containers[0].Resources.Limits = resources + + return pod +} + // DefinePodOnHostNetwork updates the pod defintion with a host network flag func DefinePodOnHostNetwork(namespace string, nodeName string) *corev1.Pod { pod := DefinePodOnNode(namespace, nodeName) diff --git a/go.mod b/go.mod index 3b5dd366ad..65606da4f1 100644 --- a/go.mod +++ b/go.mod @@ -186,7 +186,7 @@ replace ( // Test deps replace ( - github.com/k8snetworkplumbingwg/sriov-network-operator => github.com/openshift/sriov-network-operator v0.0.0-20230330150324-84715294edb9 // release-4.13 + github.com/k8snetworkplumbingwg/sriov-network-operator => github.com/openshift/sriov-network-operator v0.0.0-20230901022743-ea8f391a5e9f // release-4.13 github.com/k8stopologyawareschedwg/resource-topology-exporter => github.com/k8stopologyawareschedwg/resource-topology-exporter v0.8.0 github.com/metallb/metallb-operator => github.com/openshift/metallb-operator v0.0.0-20230807120428-6267b32eaa61 // release-4.13 github.com/openshift-psap/special-resource-operator => github.com/openshift/special-resource-operator v0.0.0-20211202035230-4c86f99c426b // release-4.10 diff --git a/go.sum b/go.sum index 9b847b72bf..ad682846c1 100644 --- a/go.sum +++ b/go.sum @@ -2396,8 +2396,8 @@ github.com/openshift/ptp-operator v0.0.0-20230206122400-e0231ea64d3a/go.mod h1:x github.com/openshift/runtime-utils v0.0.0-20220926190846-5c488b20a19f/go.mod h1:l9/qeKZuAmYUMl0yicJlbkPGDsIycGhwxOvOAWyaP0E= github.com/openshift/special-resource-operator v0.0.0-20211202035230-4c86f99c426b h1:NlOsWwZI4tYu6XbqG1/9jtg2I20+zs+8vy7d4X7ieZs= github.com/openshift/special-resource-operator v0.0.0-20211202035230-4c86f99c426b/go.mod h1:ESuS9sfrzo0EpEHaHNEvjo1oThseBnGU5s+RT1psTRA= -github.com/openshift/sriov-network-operator v0.0.0-20230330150324-84715294edb9 h1:FHIYzOus9fNPaJC4DAsOhYakwXob9I9KwHYBs9hOKt0= -github.com/openshift/sriov-network-operator v0.0.0-20230330150324-84715294edb9/go.mod h1:qcsO9FbnUCtdnm8wrsx/EyfWtVhZYWHKFDdjThu6HLs= +github.com/openshift/sriov-network-operator v0.0.0-20230901022743-ea8f391a5e9f h1:AAug9YAe6cKoUIxcWX4KQpEmgZEmFThoiRQpacZfKAY= +github.com/openshift/sriov-network-operator v0.0.0-20230901022743-ea8f391a5e9f/go.mod h1:qcsO9FbnUCtdnm8wrsx/EyfWtVhZYWHKFDdjThu6HLs= github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis= github.com/opentracing/basictracer-go v1.0.0/go.mod h1:QfBfYuafItcjQuMwinw9GhYKwFXS9KnPs5lxoYwgW74= github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= diff --git a/vendor/github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1/sriovnetworknodepolicy_types.go b/vendor/github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1/sriovnetworknodepolicy_types.go index 3646ec719f..8955c300da 100644 --- a/vendor/github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1/sriovnetworknodepolicy_types.go +++ b/vendor/github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1/sriovnetworknodepolicy_types.go @@ -57,6 +57,8 @@ type SriovNetworkNodePolicySpec struct { // +kubebuilder:validation:Enum=virtio // VDPA device type. Allowed value "virtio" VdpaType string `json:"vdpaType,omitempty"` + // Exclude device's NUMA node when advertising this resource by SRIOV network device plugin. Default to false. + ExcludeTopology bool `json:"excludeTopology,omitempty"` } type SriovNetworkNicSelector struct { diff --git a/vendor/modules.txt b/vendor/modules.txt index 818dfdf2bd..7ebe470df3 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -225,7 +225,7 @@ github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/client/clientset/version ## explicit; go 1.17 github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1 -# github.com/k8snetworkplumbingwg/sriov-network-operator v0.0.0-00010101000000-000000000000 => github.com/openshift/sriov-network-operator v0.0.0-20230330150324-84715294edb9 +# github.com/k8snetworkplumbingwg/sriov-network-operator v0.0.0-00010101000000-000000000000 => github.com/openshift/sriov-network-operator v0.0.0-20230901022743-ea8f391a5e9f ## explicit; go 1.19 github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1 github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned/scheme @@ -1266,7 +1266,7 @@ sigs.k8s.io/yaml # github.com/openshift/library-go => github.com/openshift/library-go v0.0.0-20230130232623-47904dd9ff5a # github.com/openshift/machine-config-operator => github.com/openshift/machine-config-operator v0.0.1-0.20230419202402-70aa0a560c0b # github.com/test-network-function/l2discovery-lib => github.com/test-network-function/l2discovery-lib v0.0.5 -# github.com/k8snetworkplumbingwg/sriov-network-operator => github.com/openshift/sriov-network-operator v0.0.0-20230330150324-84715294edb9 +# github.com/k8snetworkplumbingwg/sriov-network-operator => github.com/openshift/sriov-network-operator v0.0.0-20230901022743-ea8f391a5e9f # github.com/k8stopologyawareschedwg/resource-topology-exporter => github.com/k8stopologyawareschedwg/resource-topology-exporter v0.8.0 # github.com/metallb/metallb-operator => github.com/openshift/metallb-operator v0.0.0-20230807120428-6267b32eaa61 # github.com/openshift-psap/special-resource-operator => github.com/openshift/special-resource-operator v0.0.0-20211202035230-4c86f99c426b