Skip to content

Commit

Permalink
cnf-tests: SRIOV ExcludeTopology tests
Browse files Browse the repository at this point in the history
Create an helper function to create `single-numa-node` performance
profiles.

Test cases uses a set SriovNetworkNodePolicies that targets at least
two NIC, placed on two different NUMA nodes. Playing with the
`excludeTopology` field, is it possible to create workload pod that
uses multiple or a single NUMA node.

Signed-off-by: Andrea Panattoni <[email protected]>
  • Loading branch information
zeeke committed Jul 26, 2023
1 parent d88f80f commit b11131f
Show file tree
Hide file tree
Showing 5 changed files with 267 additions and 6 deletions.
1 change: 1 addition & 0 deletions cnf-tests/TESTLIST.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ The cnf tests instrument each different feature required by CNF. Following, a de
| Test Name | Description |
| -- | ----------- |
| [sriov] Bond CNI integration bond cni over sriov pod with sysctl's on bond over sriov interfaces should start | Verfies a pod with bond over sriov interfaces starts correctly |
| [sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to False and an SRIOV interface in a different NUMA node than the pod | Verifies excludeTopology field set to false still ensure every resource is on the same NUMA node |
| [sriov] SCTP integration Test Connectivity Connectivity between client and server Should work over a SR-IOV device | SCTP connectivity test over SR-IOV vfs. |
| [sriov] Tuning CNI integration tuning cni over sriov pods with sysctl's on bond over sriov interfaces should start | Pod with tuning-cni on bond over sriov should start |
| [sriov] Tuning CNI integration tuning cni over sriov pods with sysctl's over sriov interface should start | Pod with tuning-cni over sriov should start |
Expand Down
1 change: 1 addition & 0 deletions cnf-tests/docgen/e2e.json
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@
"[sctp] Test Connectivity Connectivity between client and server connect a client pod to a server pod via Service Node Port Custom namespace": "Pod to pod connectivity via service nodeport, custom namespace",
"[sctp] Test Connectivity Connectivity between client and server connect a client pod to a server pod via Service Node Port Default namespace": "Pod to pod connectivity via service nodeport, default namespace",
"[sriov] Bond CNI integration bond cni over sriov pod with sysctl's on bond over sriov interfaces should start": "Verfies a pod with bond over sriov interfaces starts correctly",
"[sriov] NUMA node alignment Validate the creation of a pod with excludeTopology set to False and an SRIOV interface in a different NUMA node than the pod": "Verifies excludeTopology field set to false still ensure every resource is on the same NUMA node",
"[sriov] SCTP integration Test Connectivity Connectivity between client and server Should work over a SR-IOV device": "SCTP connectivity test over SR-IOV vfs.",
"[sriov] Tuning CNI integration tuning cni over sriov pods with sysctl's on bond over sriov interfaces should start": "Pod with tuning-cni on bond over sriov should start",
"[sriov] Tuning CNI integration tuning cni over sriov pods with sysctl's over sriov interface should start": "Pod with tuning-cni over sriov should start",
Expand Down
163 changes: 163 additions & 0 deletions cnf-tests/testsuites/e2esuite/dpdk/numa_node_sriov.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
package dpdk

import (
"context"
"fmt"
"path/filepath"

sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
sriovcluster "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster"
sriovnamespaces "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces"
sriovnetwork "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/network"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/client"
"github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/discovery"
"github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/namespaces"
"github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/networks"
utilNodes "github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/nodes"
"github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/performanceprofile"
"github.com/openshift-kni/cnf-features-deploy/cnf-tests/testsuites/pkg/pods"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodes"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog/v2"
)

var _ = Describe("[sriov] NUMA node alignment", Ordered, func() {

BeforeAll(func() {
if discovery.Enabled() {
Skip("Discovery mode not supported")
}

isSNO, err := utilNodes.IsSingleNodeCluster()
Expect(err).ToNot(HaveOccurred())
if isSNO {
Skip("Single Node openshift not yet supported")
}

perfProfile, err := performanceprofile.FindDefaultPerformanceProfile(performanceProfileName)
Expect(err).ToNot(HaveOccurred())
if !performanceprofile.IsSingleNUMANode(perfProfile) {
Skip("SR-IOV NUMA test suite expects a performance profile with 'single-numa-node' to be present")
}

err = namespaces.Create(sriovnamespaces.Test, client.Client)
Expect(err).ToNot(HaveOccurred())

By("Clean SRIOV policies and networks")
networks.CleanSriov(sriovclient)

By("Discover SRIOV devices")
sriovCapableNodes, err := sriovcluster.DiscoverSriov(sriovclient, namespaces.SRIOVOperator)
Expect(err).ToNot(HaveOccurred())
Expect(len(sriovCapableNodes.Nodes)).To(BeNumerically(">", 0))
testingNode, err := nodes.GetByName(sriovCapableNodes.Nodes[0])
Expect(err).ToNot(HaveOccurred())
By("Using node " + testingNode.Name)

sriovDevices, err := sriovCapableNodes.FindSriovDevices(testingNode.Name)
Expect(err).ToNot(HaveOccurred())

numa0Device, err := findDeviceOnNUMANode(testingNode, sriovDevices, "0")
Expect(err).ToNot(HaveOccurred())
By("Using NUMA0 device " + numa0Device.Name)

numa1Device, err := findDeviceOnNUMANode(testingNode, sriovDevices, "1")
Expect(err).ToNot(HaveOccurred())
By("Using NUMA1 device " + numa1Device.Name)

// SriovNetworkNodePolicy
// NUMA node0 device excludeTopology = true
// NUMA node0 device excludeTopology = false
// NUMA node1 device excludeTopology = true
// NUMA node1 device excludeTopology = false

By("Create SRIOV policies and networks")

ipam := `{ "type": "host-local", "subnet": "192.0.2.0/24" }`

createSriovNetworkAndPolicyForNumaAffinityTest(8, numa0Device, "#0-3",
"test-numa-0-exclude-topology-false-", testingNode.Name,
"testNuma0ExcludeTopoplogyFalse", ipam, false)

createSriovNetworkAndPolicyForNumaAffinityTest(8, numa0Device, "#4-7",
"test-numa-0-exclude-topology-true-", testingNode.Name,
"testNuma0ExcludeTopoplogyTrue", ipam, true)

createSriovNetworkAndPolicyForNumaAffinityTest(8, numa1Device, "#0-3",
"test-numa-1-exclude-topology-true-", testingNode.Name,
"testNuma1ExcludeTopoplogyFalse", ipam, false)

createSriovNetworkAndPolicyForNumaAffinityTest(8, numa1Device, "#4-7",
"test-numa-1-exclude-topology-true-", testingNode.Name,
"testNuma1ExcludeTopoplogyTrue", ipam, true)

By("Waiting for SRIOV devices to get configured")
networks.WaitStable(sriovclient)
})

BeforeEach(func() {
By("Clean any pods in " + sriovnamespaces.Test + " namespace")
namespaces.CleanPods(sriovnamespaces.Test, sriovclient)
})

It("XXX Validate the creation of a pod with excludeTopology set to False and an SRIOV interface in a different NUMA node than the pod", func() {
pod := pods.DefinePod(sriovnamespaces.Test)
pods.RedefineWithGuaranteedQoS(pod, "1", "100m")
pod = pods.RedefinePodWithNetwork(pod, "test-numa-0-exclude-topology-false")

pod, err := client.Client.Pods(sriovnamespaces.Test).
Create(context.Background(), pod, metav1.CreateOptions{})
Expect(err).ToNot(HaveOccurred())

Eventually(func(g Gomega) {
actualPod, err := client.Client.Pods(sriovnamespaces.Test).Get(context.Background(), pod.Name, metav1.GetOptions{})
g.Expect(err).ToNot(HaveOccurred())
g.Expect(actualPod.Status.Phase).To(Equal(corev1.PodFailed))
g.Expect(actualPod.Status.Reason).To(Equal("TopologyAffinityError"))
}).Should(Succeed())
})
})

func findDeviceOnNUMANode(node *corev1.Node, devices []*sriovv1.InterfaceExt, numaNode string) (*sriovv1.InterfaceExt, error) {
for _, device := range devices {
out, err := nodes.ExecCommandOnNode([]string{
"cat",
filepath.Clean(filepath.Join("/sys/class/net/", device.Name, "/device/numa_node")),
}, node)
if err != nil {
klog.Warningf("can't get device [%s] NUMA node: out(%s) err(%s)", device.Name, string(out), err.Error())
continue
}

if out == numaNode {
return device, nil
}
}

return nil, fmt.Errorf("can't find any SR-IOV device on NUMA [%s] for node [%s]. Available devices: %+v", numaNode, node.Name, devices)
}

func withExcludeTopology(excludeTopology bool) func(*sriovv1.SriovNetworkNodePolicy) {
return func(p *sriovv1.SriovNetworkNodePolicy) {
p.Spec.ExcludeTopology = excludeTopology
}
}

func createSriovNetworkAndPolicyForNumaAffinityTest(numVFs int, intf *sriovv1.InterfaceExt, vfSelector, policyGeneratedName, nodeName, resourceName, ipam string, excludeTopology bool) {
_, err := sriovnetwork.CreateSriovPolicy(
sriovclient, policyGeneratedName, namespaces.SRIOVOperator,
intf.Name+vfSelector, nodeName, numVFs,
resourceName, "netdevice",
withExcludeTopology(false),
)
ExpectWithOffset(1, err).ToNot(HaveOccurred())

sriovnetwork.CreateSriovNetwork(sriovclient, intf, "test-numa-0-exclude-topology-false",
sriovnamespaces.Test, namespaces.SRIOVOperator, "testNuma0ExcludeTopoplogyFalse", ipam)
ExpectWithOffset(1, err).ToNot(HaveOccurred())

}
93 changes: 87 additions & 6 deletions cnf-tests/testsuites/pkg/performanceprofile/performanceprofile.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
mcv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
"k8s.io/utils/pointer"

"k8s.io/apimachinery/pkg/api/errors"
goclient "sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -64,7 +65,7 @@ func FindOrOverridePerformanceProfile(performanceProfileName, machineConfigPoolN
}
}

err = CreatePerformanceProfile(performanceProfileName, machineConfigPoolName)
err = client.Client.Create(context.TODO(), DefinePerfomanceProfile(performanceProfileName, machineConfigPoolName))
if err != nil {
return err
}
Expand All @@ -78,6 +79,50 @@ func FindOrOverridePerformanceProfile(performanceProfileName, machineConfigPoolN
return nil
}

func OverridePerformanceProfile(performanceProfileName, machineConfigPoolName string, newPerformanceProfile *performancev2.PerformanceProfile) error {

previousPerfProfile, err := FindDefaultPerformanceProfile(performanceProfileName)
if err != nil {
if !errors.IsNotFound(err) {
return err
}
previousPerfProfile = nil
}

mcp := &mcv1.MachineConfigPool{}
err = client.Client.Get(context.TODO(), goclient.ObjectKey{Name: machineConfigPoolName}, mcp)
if err != nil {
return err
}

if previousPerfProfile != nil {
OriginalPerformanceProfile = previousPerfProfile.DeepCopy()

// Clean and create a new performance profile for the dpdk application
err = CleanPerformanceProfiles()
if err != nil {
return err
}

err = machineconfigpool.WaitForMCPStable(*mcp)
if err != nil {
return err
}
}

err = client.Client.Create(context.TODO(), newPerformanceProfile)
if err != nil {
return err
}

err = machineconfigpool.WaitForMCPStable(*mcp)
if err != nil {
return err
}

return nil
}

func ValidatePerformanceProfile(performanceProfile *performancev2.PerformanceProfile) (bool, error) {

// Check we have more then two isolated CPU
Expand Down Expand Up @@ -154,7 +199,30 @@ func DiscoverPerformanceProfiles(enforcedPerformanceProfileName string) (bool, s
return false, fmt.Sprintf("Can not run tests in discovery mode. Failed to find a valid perfomance profile. %s", err), nil
}

func CreatePerformanceProfile(performanceProfileName, machineConfigPoolName string) error {
func DefineSingleNUMANode(performanceProfileName, machineConfigPoolName string) *performancev2.PerformanceProfile {
isolatedCPUSet := performancev2.CPUSet("2-3")
reservedCPUSet := performancev2.CPUSet("0-1")

return &performancev2.PerformanceProfile{
ObjectMeta: metav1.ObjectMeta{
Name: performanceProfileName,
},
Spec: performancev2.PerformanceProfileSpec{
CPU: &performancev2.CPU{
Isolated: &isolatedCPUSet,
Reserved: &reservedCPUSet,
},
NodeSelector: map[string]string{
fmt.Sprintf("node-role.kubernetes.io/%s", machineConfigPoolName): "",
},
NUMA: &performancev2.NUMA{
TopologyPolicy: pointer.String("single-numa-node"),
},
},
}
}

func DefinePerfomanceProfile(performanceProfileName, machineConfigPoolName string) *performancev2.PerformanceProfile {
isolatedCPUSet := performancev2.CPUSet("8-15")
reservedCPUSet := performancev2.CPUSet("0-7")
hugepageSize := performancev2.HugePageSize("1G")
Expand Down Expand Up @@ -191,7 +259,7 @@ func CreatePerformanceProfile(performanceProfileName, machineConfigPoolName stri
}
}

return client.Client.Create(context.TODO(), performanceProfile)
return performanceProfile
}

func CleanPerformanceProfiles() error {
Expand All @@ -212,9 +280,6 @@ func CleanPerformanceProfiles() error {
}

func RestorePerformanceProfile(machineConfigPoolName string) error {
if OriginalPerformanceProfile == nil {
return nil
}

err := CleanPerformanceProfiles()
if err != nil {
Expand All @@ -232,6 +297,10 @@ func RestorePerformanceProfile(machineConfigPoolName string) error {
return err
}

if OriginalPerformanceProfile == nil {
return nil
}

name := OriginalPerformanceProfile.Name
OriginalPerformanceProfile.ObjectMeta = metav1.ObjectMeta{Name: name}
err = client.Client.Create(context.TODO(), OriginalPerformanceProfile)
Expand All @@ -242,3 +311,15 @@ func RestorePerformanceProfile(machineConfigPoolName string) error {
err = machineconfigpool.WaitForMCPStable(*mcp)
return err
}

func IsSingleNUMANode(perfProfile *performancev2.PerformanceProfile) bool {
if perfProfile.Spec.NUMA == nil {
return false
}

if perfProfile.Spec.NUMA.TopologyPolicy == nil {
return false
}

return *perfProfile.Spec.NUMA.TopologyPolicy == "single-numa-node"
}
15 changes: 15 additions & 0 deletions cnf-tests/testsuites/pkg/pods/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,21 @@ func RedefineAsPrivileged(pod *corev1.Pod, containerName string) (*corev1.Pod, e
return pod, nil
}

// RedefineWithGuaranteedQoS updates the pod definition by adding resource limits and request
// to the specified values. As requests and limits are equal, the pod will work with a Guarantted
// quality of service (QoS). Resource specification are added to the first container
func RedefineWithGuaranteedQoS(pod *corev1.Pod, cpu, memory string) *corev1.Pod {
resources := map[corev1.ResourceName]resource.Quantity{
corev1.ResourceMemory: resource.MustParse(memory),
corev1.ResourceCPU: resource.MustParse(cpu),
}

pod.Spec.Containers[0].Resources.Requests = resources
pod.Spec.Containers[0].Resources.Limits = resources

return pod
}

// DefinePodOnHostNetwork updates the pod defintion with a host network flag
func DefinePodOnHostNetwork(namespace string, nodeName string) *corev1.Pod {
pod := DefinePodOnNode(namespace, nodeName)
Expand Down

0 comments on commit b11131f

Please sign in to comment.