Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cnf-tests: Add numa tests with multiple interfaces #1590

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cnf-tests/TESTLIST.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ The cnf tests instrument each different feature required by CNF. Following, a de
| -- | ----------- |
| [sriov] Bond CNI integration bond cni over sriov pod with sysctl's on bond over sriov interfaces should start | Verfies a pod with bond over sriov interfaces starts correctly |
| [sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to False and an SRIOV interface in a different NUMA node than the pod | Verifies excludeTopology field set to false still ensure every resource is on the same NUMA node |
| [sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to False and each interface is in the different NUMA as the pod | Verifies excludeTopology field set to false and multiple interfaces are locateld on a different NUMA node |
| [sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to True and an SRIOV interface in a different NUMA node than the pod | Verifies excludeTopology field set true still ensure every resource is on a different NUMA node |
| [sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to True and an SRIOV interface in a same NUMA node than the pod | Verifies excludeTopology field set to true still ensure every resource is on the same NUMA node |
| [sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to True and multiple SRIOV interfaces located in different NUMA nodes than the pod | Verifies excludeTopology field set to false still multiple interfaces in a different NUMA node |
| [sriov] NUMA node alignment Validate the creation of a pod with two sriovnetworknodepolicies one with excludeTopology False and the second true each interface is in different NUMA as the pod | Verifies the use of two excludeTopology one set to true and the second to false ensure all resource are in a different NUMA node |
| [sriov] SCTP integration Test Connectivity Connectivity between client and server Should work over a SR-IOV device | SCTP connectivity test over SR-IOV vfs. |
| [sriov] Tuning CNI integration tuning cni over sriov pods with sysctl's on bond over sriov interfaces should start | Pod with tuning-cni on bond over sriov should start |
| [sriov] Tuning CNI integration tuning cni over sriov pods with sysctl's over sriov interface should start | Pod with tuning-cni over sriov should start |
Expand Down
4 changes: 4 additions & 0 deletions cnf-tests/docgen/e2e.json
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,11 @@
"[sctp] Test Connectivity Connectivity between client and server connect a client pod to a server pod via Service Node Port Default namespace": "Pod to pod connectivity via service nodeport, default namespace",
"[sriov] Bond CNI integration bond cni over sriov pod with sysctl's on bond over sriov interfaces should start": "Verfies a pod with bond over sriov interfaces starts correctly",
"[sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to False and an SRIOV interface in a different NUMA node than the pod": "Verifies excludeTopology field set to false still ensure every resource is on the same NUMA node",
"[sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to False and each interface is in the different NUMA as the pod": "Verifies excludeTopology field set to false and multiple interfaces are locateld on a different NUMA node",
"[sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to True and an SRIOV interface in a different NUMA node than the pod": "Verifies excludeTopology field set true still ensure every resource is on a different NUMA node",
"[sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to True and an SRIOV interface in a same NUMA node than the pod": "Verifies excludeTopology field set to true still ensure every resource is on the same NUMA node",
"[sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to True and multiple SRIOV interfaces located in different NUMA nodes than the pod": "Verifies excludeTopology field set to false still multiple interfaces in a different NUMA node",
"[sriov] NUMA node alignment Validate the creation of a pod with two sriovnetworknodepolicies one with excludeTopology False and the second true each interface is in different NUMA as the pod": "Verifies the use of two excludeTopology one set to true and the second to false ensure all resource are in a different NUMA node",
"[sriov] SCTP integration Test Connectivity Connectivity between client and server Should work over a SR-IOV device": "SCTP connectivity test over SR-IOV vfs.",
"[sriov] Tuning CNI integration tuning cni over sriov pods with sysctl's on bond over sriov interfaces should start": "Pod with tuning-cni on bond over sriov should start",
"[sriov] Tuning CNI integration tuning cni over sriov pods with sysctl's over sriov interface should start": "Pod with tuning-cni over sriov should start",
Expand Down
225 changes: 185 additions & 40 deletions cnf-tests/testsuites/e2esuite/dpdk/numa_node_sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ import (

var _ = Describe("[sriov] NUMA node alignment", Ordered, func() {

var (
numa0DeviceList []*sriovv1.InterfaceExt
numa1DeviceList []*sriovv1.InterfaceExt
)

BeforeAll(func() {
if discovery.Enabled() {
Skip("Discovery mode not supported")
Expand Down Expand Up @@ -62,39 +67,53 @@ var _ = Describe("[sriov] NUMA node alignment", Ordered, func() {
sriovDevices, err := sriovCapableNodes.FindSriovDevices(testingNode.Name)
Expect(err).ToNot(HaveOccurred())

numa0Device, err := findDeviceOnNUMANode(testingNode, sriovDevices, "0")
Expect(err).ToNot(HaveOccurred())
By("Using NUMA0 device " + numa0Device.Name)
numa0DeviceList, err = findDevicesOnNUMANode(testingNode, sriovDevices, "0")
Expect(len(numa0DeviceList)).To(BeNumerically(">=", 1))
By("Using NUMA0 device1 " + numa0DeviceList[0].Name)

numa1Device, err := findDeviceOnNUMANode(testingNode, sriovDevices, "1")
Expect(err).ToNot(HaveOccurred())
By("Using NUMA1 device " + numa1Device.Name)
numa1DeviceList, err = findDevicesOnNUMANode(testingNode, sriovDevices, "1")
Expect(len(numa1DeviceList)).To(BeNumerically(">=", 1))
By("Using NUMA1 device1 " + numa1DeviceList[0].Name)

// SriovNetworkNodePolicy
// NUMA node0 device excludeTopology = true
// NUMA node0 device excludeTopology = false
// NUMA node1 device excludeTopology = true
// NUMA node1 device excludeTopology = false
// NUMA node0 device1 excludeTopology = false
// NUMA node0 device1 excludeTopology = true
// NUMA node0 device2 excludeTopology = false
// NUMA node0 device2 excludeTopology = true
// NUMA node1 device3 excludeTopology = false
// NUMA node1 device3 excludeTopology = true

By("Create SRIOV policies and networks")

ipam := `{ "type": "host-local", "subnet": "192.0.2.0/24" }`

createSriovNetworkAndPolicyForNumaAffinityTest(8, numa0Device, "#0-3",
"test-numa-0-exclude-topology-false-", testingNode.Name,
"testNuma0ExcludeTopoplogyFalse", ipam, false)
createSriovNetworkAndPolicyForNumaAffinityTest(8, numa0DeviceList[0], "#0-3",
"test-numa-0-nic1-exclude-topology-false-", testingNode.Name,
"testNuma0NIC1ExcludeTopoplogyFalse", ipam, false)

createSriovNetworkAndPolicyForNumaAffinityTest(8, numa0Device, "#4-7",
"test-numa-0-exclude-topology-true-", testingNode.Name,
"testNuma0ExcludeTopoplogyTrue", ipam, true)
createSriovNetworkAndPolicyForNumaAffinityTest(8, numa0DeviceList[0], "#4-7",
"test-numa-0-nic1-exclude-topology-true-", testingNode.Name,
"testNuma0NIC1ExcludeTopoplogyTrue", ipam, true)

if len(numa0DeviceList) > 1 {
By("Using NUMA0 device2 " + numa0DeviceList[1].Name)

createSriovNetworkAndPolicyForNumaAffinityTest(8, numa0DeviceList[1], "#0-3",
"test-numa-0-nic2-exclude-topology-false-", testingNode.Name,
"testNuma1NIC2ExcludeTopoplogyFalse", ipam, false)

createSriovNetworkAndPolicyForNumaAffinityTest(8, numa0DeviceList[1], "#4-7",
"test-numa-0-nic2-exclude-topology-true-", testingNode.Name,
"testNuma1NIC2ExcludeTopoplogyTrue", ipam, true)
}

createSriovNetworkAndPolicyForNumaAffinityTest(8, numa1Device, "#0-3",
"test-numa-1-exclude-topology-false-", testingNode.Name,
"testNuma1ExcludeTopoplogyFalse", ipam, false)
createSriovNetworkAndPolicyForNumaAffinityTest(8, numa1DeviceList[0], "#0-3",
"test-numa-1-nic1-exclude-topology-false-", testingNode.Name,
"testNuma1NIC1ExcludeTopoplogyFalse", ipam, false)

createSriovNetworkAndPolicyForNumaAffinityTest(8, numa1Device, "#4-7",
"test-numa-1-exclude-topology-true-", testingNode.Name,
"testNuma1ExcludeTopoplogyTrue", ipam, true)
createSriovNetworkAndPolicyForNumaAffinityTest(8, numa1DeviceList[0], "#4-7",
"test-numa-1-nic1-exclude-topology-true-", testingNode.Name,
"testNuma1NIC1ExcludeTopoplogyTrue", ipam, true)

By("Waiting for SRIOV devices to get configured")
networks.WaitStable(sriovclient)
Expand All @@ -108,7 +127,7 @@ var _ = Describe("[sriov] NUMA node alignment", Ordered, func() {
It("Validate the creation of a pod with excludeTopology set to False and an SRIOV interface in a different NUMA node than the pod", func() {
pod := pods.DefinePod(sriovnamespaces.Test)
pods.RedefineWithGuaranteedQoS(pod, "2", "500Mi")
pod = pods.RedefinePodWithNetwork(pod, "test-numa-0-exclude-topology-false-network")
pod = pods.RedefinePodWithNetwork(pod, "test-numa-0-nic1-exclude-topology-false-network")

pod, err := client.Client.Pods(sriovnamespaces.Test).
Create(context.Background(), pod, metav1.CreateOptions{})
Expand All @@ -126,7 +145,7 @@ var _ = Describe("[sriov] NUMA node alignment", Ordered, func() {
"than the pod", func() {
pod := pods.DefinePod(sriovnamespaces.Test)
pods.RedefineWithGuaranteedQoS(pod, "2", "500Mi")
pod = pods.RedefinePodWithNetwork(pod, "test-numa-1-exclude-topology-true-network")
pod = pods.RedefinePodWithNetwork(pod, "test-numa-1-nic1-exclude-topology-true-network")

pod, err := client.Client.Pods(sriovnamespaces.Test).
Create(context.Background(), pod, metav1.CreateOptions{})
Expand All @@ -139,29 +158,120 @@ var _ = Describe("[sriov] NUMA node alignment", Ordered, func() {
g.Expect(actualPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed))
}, 30*time.Second, 1*time.Second).Should(Succeed())

By("Validate Pod NUMA Node")
expectPodCPUsAreOnNUMANode(pod, 1)
SchSeba marked this conversation as resolved.
Show resolved Hide resolved

By("Create server Pod and run E2E ICMP validation")
validateE2EICMPTraffic(pod, fmt.Sprintf(`[{"name": "test-numa-1-exclude-topology-true-network","ips":["192.0.2.250/24"]}]`))
validateE2EICMPTraffic(pod, fmt.Sprintf(`[{"name": "test-numa-1-nic1-exclude-topology-true-network","ips":["192.0.2.250/24"]}]`))
})
})

func findDeviceOnNUMANode(node *corev1.Node, devices []*sriovv1.InterfaceExt, numaNode string) (*sriovv1.InterfaceExt, error) {
for _, device := range devices {
out, err := nodes.ExecCommandOnNode([]string{
"cat",
filepath.Clean(filepath.Join("/sys/class/net/", device.Name, "/device/numa_node")),
}, node)
if err != nil {
klog.Warningf("can't get device [%s] NUMA node: out(%s) err(%s)", device.Name, string(out), err.Error())
continue
It("Validate the creation of a pod with excludeTopology set to True and an SRIOV interface in a different NUMA node "+
SchSeba marked this conversation as resolved.
Show resolved Hide resolved
"than the pod", func() {
pod := pods.DefinePod(sriovnamespaces.Test)
pods.RedefineWithGuaranteedQoS(pod, "2", "500Mi")
pod = pods.RedefinePodWithNetwork(pod, "test-numa-0-nic1-exclude-topology-true-network")

pod, err := client.Client.Pods(sriovnamespaces.Test).
Create(context.Background(), pod, metav1.CreateOptions{})
Expect(err).ToNot(HaveOccurred())

Eventually(func(g Gomega) {
actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), pod.Name, metav1.GetOptions{})
g.Expect(err).ToNot(HaveOccurred())
g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodRunning))
g.Expect(actualPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed))
}, 30*time.Second, 1*time.Second).Should(Succeed())

By("Validate Pod NUMA Node")
expectPodCPUsAreOnNUMANode(pod, 1)

By("Create server Pod and run E2E ICMP validation")
validateE2EICMPTraffic(pod, fmt.Sprintf(`[{"name": "test-numa-0-nic1-exclude-topology-true-network","ips":["192.0.2.250/24"]}]`))
})

It("Validate the creation of a pod with two sriovnetworknodepolicies one with excludeTopology False and the "+
"second true each interface is in different NUMA as the pod", func() {

if len(numa0DeviceList) < 2 {
testSkip := "There are not enough Interfaces in NUMA Node 0 to complete this test"
Skip(testSkip)
}

if out == numaNode {
return device, nil
pod := pods.DefinePod(sriovnamespaces.Test)
pods.RedefineWithGuaranteedQoS(pod, "2", "500Mi")
pod = pods.RedefinePodWithNetwork(pod, "test-numa-0-nic1-exclude-topology-true-network, "+
"test-numa-0-nic2-exclude-topology-false-network")

pod, err := client.Client.Pods(sriovnamespaces.Test).
Create(context.Background(), pod, metav1.CreateOptions{})
Expect(err).ToNot(HaveOccurred())

Eventually(func(g Gomega) {
actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), pod.Name, metav1.GetOptions{})
g.Expect(err).ToNot(HaveOccurred())
g.Expect(actualPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed))
g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodFailed))
g.Expect(actualPod.Status.Reason).To(Equal("TopologyAffinityError"))
}, 30*time.Second, 1*time.Second).Should(Succeed())
})

It("Validate the creation of a pod with excludeTopology set to True and multiple SRIOV interfaces located in "+
"different NUMA nodes than the pod", func() {

if len(numa0DeviceList) < 2 {
testSkip := "There are not enough Interfaces in NUMA Node 0 to complete this test"
Skip(testSkip)
}
}

return nil, fmt.Errorf("can't find any SR-IOV device on NUMA [%s] for node [%s]. Available devices: %+v", numaNode, node.Name, devices)
}
pod := pods.DefinePod(sriovnamespaces.Test)
pods.RedefineWithGuaranteedQoS(pod, "2", "500Mi")
pod = pods.RedefinePodWithNetwork(pod, "test-numa-0-nic1-exclude-topology-true-network, "+
"test-numa-0-nic2-exclude-topology-true-network")

pod, err := client.Client.Pods(sriovnamespaces.Test).
Create(context.Background(), pod, metav1.CreateOptions{})
Expect(err).ToNot(HaveOccurred())

Eventually(func(g Gomega) {
actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), pod.Name, metav1.GetOptions{})
g.Expect(err).ToNot(HaveOccurred())
g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodRunning))
g.Expect(actualPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed))
}, 30*time.Second, 1*time.Second).Should(Succeed())

By("Validate Pod NUMA Node")
expectPodCPUsAreOnNUMANode(pod, 1)

By("Create server Pod and run E2E ICMP validation")
validateE2EICMPTraffic(pod, fmt.Sprintf(`[{"name": "test-numa-0-nic1-exclude-topology-true-network","ips":["192.0.2.250/24"]}]`))
})

It("Validate the creation of a pod with excludeTopology set to False and each interface is "+
"in the different NUMA as the pod", func() {

if len(numa0DeviceList) < 2 {
testSkip := "There are not enough Interfaces in NUMA Node 0 to complete this test"
Skip(testSkip)
}

pod := pods.DefinePod(sriovnamespaces.Test)
pods.RedefineWithGuaranteedQoS(pod, "2", "500Mi")
pod = pods.RedefinePodWithNetwork(pod, "test-numa-0-nic1-exclude-topology-false-network, "+
"test-numa-0-nic2-exclude-topology-false-network")

pod, err := client.Client.Pods(sriovnamespaces.Test).
Create(context.Background(), pod, metav1.CreateOptions{})
Expect(err).ToNot(HaveOccurred())

Eventually(func(g Gomega) {
actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), pod.Name, metav1.GetOptions{})
g.Expect(err).ToNot(HaveOccurred())
g.Expect(actualPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed))
g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodFailed))
g.Expect(actualPod.Status.Message).To(ContainSubstring("Resources cannot be allocated with Topology locality"))
}, 30*time.Second, 1*time.Second).Should(Succeed())
})
})

func withExcludeTopology(excludeTopology bool) func(*sriovv1.SriovNetworkNodePolicy) {
return func(p *sriovv1.SriovNetworkNodePolicy) {
Expand Down Expand Up @@ -197,3 +307,38 @@ func validateE2EICMPTraffic(pod *corev1.Pod, annotation string) {
return err
}, 30*time.Second, 1*time.Second).Should(Succeed(), "ICMP traffic failed over SRIOV interface pod interface")
}

func findDevicesOnNUMANode(node *corev1.Node, devices []*sriovv1.InterfaceExt, numaNode string) ([]*sriovv1.InterfaceExt, error) {
listOfDevices := []*sriovv1.InterfaceExt{}

for _, device := range devices {
out, err := nodes.ExecCommandOnNode([]string{
"cat",
filepath.Clean(filepath.Join("/sys/class/net/", device.Name, "/device/numa_node")),
}, node)
if err != nil {
klog.Warningf("can't get device [%s] NUMA node: out(%s) err(%s)", device.Name, string(out), err.Error())
continue
}

if out == numaNode {
listOfDevices = append(listOfDevices, device)
}
}

return listOfDevices, nil
}

func expectPodCPUsAreOnNUMANode(pod *corev1.Pod, expectedCPUsNUMA int) {

buff, err := pods.ExecCommand(client.Client, *pod, []string{"cat", "/sys/fs/cgroup/cpuset.cpus"})
ExpectWithOffset(1, err).ToNot(HaveOccurred())

cpuList, err := getCpuSet(buff.String())
ExpectWithOffset(1, err).ToNot(HaveOccurred())

numaNode, err := findNUMAForCPUs(pod, cpuList)
ExpectWithOffset(1, err).ToNot(HaveOccurred())

ExpectWithOffset(1, numaNode).To(Equal(expectedCPUsNUMA))
}