diff --git a/bundle/manifests/sriov-network-operator.clusterserviceversion.yaml b/bundle/manifests/sriov-network-operator.clusterserviceversion.yaml index fc91ceb29..fdcc13923 100644 --- a/bundle/manifests/sriov-network-operator.clusterserviceversion.yaml +++ b/bundle/manifests/sriov-network-operator.clusterserviceversion.yaml @@ -100,7 +100,7 @@ metadata: categories: Networking certified: "false" containerImage: quay.io/openshift/origin-sriov-network-operator:4.18 - createdAt: "2024-09-10T23:50:28Z" + createdAt: "2024-09-18T23:50:48Z" description: An operator for configuring SR-IOV components and initializing SRIOV network devices in Openshift cluster. features.operators.openshift.io/cnf: "false" diff --git a/controllers/generic_network_controller.go b/controllers/generic_network_controller.go index de3d78070..e6b84d3aa 100644 --- a/controllers/generic_network_controller.go +++ b/controllers/generic_network_controller.go @@ -36,6 +36,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) @@ -168,9 +169,9 @@ func (r *genericNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Reque reqLogger.Error(err, "Couldn't create NetworkAttachmentDefinition CR", "Namespace", netAttDef.Namespace, "Name", netAttDef.Name) return reconcile.Result{}, err } - anno := map[string]string{sriovnetworkv1.LASTNETWORKNAMESPACE: netAttDef.Namespace} - instance.SetAnnotations(anno) - if err := r.Update(ctx, instance); err != nil { + + err = utils.AnnotateObject(ctx, instance, sriovnetworkv1.LASTNETWORKNAMESPACE, netAttDef.Namespace, r.Client) + if err != nil { return reconcile.Result{}, err } } else { diff --git a/controllers/sriovnetwork_controller_test.go b/controllers/sriovnetwork_controller_test.go index 2eceb2133..f2f7d2a07 100644 --- a/controllers/sriovnetwork_controller_test.go +++ b/controllers/sriovnetwork_controller_test.go @@ -314,6 +314,34 @@ var _ = Describe("SriovNetwork Controller", Ordered, func() { }) }) + It("should preserve user defined annotations", func() { + cr := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-annotations", + Namespace: testNamespace, + Annotations: map[string]string{"foo": "bar"}, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + NetworkNamespace: "default", + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + DeferCleanup(k8sClient.Delete, ctx, &cr) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + err = k8sClient.Get(ctx, types.NamespacedName{Name: cr.GetName(), Namespace: testNamespace}, network) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(network.Annotations).To(HaveKeyWithValue("foo", "bar")) + g.Expect(network.Annotations).To(HaveKeyWithValue("operator.sriovnetwork.openshift.io/last-network-namespace", "default")) + }). + WithPolling(100 * time.Millisecond). + WithTimeout(5 * time.Second). + MustPassRepeatedly(10). + Should(Succeed()) + }) }) }) diff --git a/manifests/stable/sriov-network-operator.clusterserviceversion.yaml b/manifests/stable/sriov-network-operator.clusterserviceversion.yaml index fc91ceb29..fdcc13923 100644 --- a/manifests/stable/sriov-network-operator.clusterserviceversion.yaml +++ b/manifests/stable/sriov-network-operator.clusterserviceversion.yaml @@ -100,7 +100,7 @@ metadata: categories: Networking certified: "false" containerImage: quay.io/openshift/origin-sriov-network-operator:4.18 - createdAt: "2024-09-10T23:50:28Z" + createdAt: "2024-09-18T23:50:48Z" description: An operator for configuring SR-IOV components and initializing SRIOV network devices in Openshift cluster. features.operators.openshift.io/cnf: "false" diff --git a/pkg/host/internal/sriov/sriov.go b/pkg/host/internal/sriov/sriov.go index 586b4438f..379cf6a70 100644 --- a/pkg/host/internal/sriov/sriov.go +++ b/pkg/host/internal/sriov/sriov.go @@ -898,9 +898,14 @@ func (s *sriov) SetNicSriovMode(pciAddress string, mode string) error { dev, err := s.netlinkLib.DevLinkGetDeviceByName("pci", pciAddress) if err != nil { - return err + return fmt.Errorf("can't get devlink device [%s] to set eSwitch to [%s]: %w", pciAddress, mode, err) + } + + err = s.netlinkLib.DevLinkSetEswitchMode(dev, mode) + if err != nil { + return fmt.Errorf("can't set eSwitch mode to [%s] on device [%s]: %w", mode, pciAddress, err) } - return s.netlinkLib.DevLinkSetEswitchMode(dev, mode) + return nil } func (s *sriov) GetLinkType(name string) string { @@ -989,22 +994,41 @@ func (s *sriov) createVFs(iface *sriovnetworkv1.Interface) error { return s.setEswitchModeAndNumVFs(iface.PciAddress, expectedEswitchMode, iface.NumVfs) } -func (s *sriov) setEswitchMode(pciAddr, eswitchMode string) error { - log.Log.V(2).Info("setEswitchMode(): set eswitch mode", "device", pciAddr, "mode", eswitchMode) - if err := s.unbindAllVFsOnPF(pciAddr); err != nil { - log.Log.Error(err, "setEswitchMode(): failed to unbind VFs", "device", pciAddr, "mode", eswitchMode) +type setEswitchModeAndNumVFsFn func(string, string, int) error + +func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode string, numVFs int) error { + pfDriverName, err := s.dputilsLib.GetDriverName(pciAddr) + if err != nil { return err } - if err := s.SetNicSriovMode(pciAddr, eswitchMode); err != nil { - err = fmt.Errorf("failed to switch NIC to SRIOV %s mode: %v", eswitchMode, err) - log.Log.Error(err, "setEswitchMode(): failed to set mode", "device", pciAddr, "mode", eswitchMode) - return err + + log.Log.V(2).Info("setEswitchModeAndNumVFs(): configure VFs for device", + "device", pciAddr, "count", numVFs, "mode", desiredEswitchMode, "driver", pfDriverName) + + setEswitchModeAndNumVFsByDriverName := map[string]setEswitchModeAndNumVFsFn{ + "ice": s.setEswitchModeAndNumVFsIce, + "mlx5_core": s.setEswitchModeAndNumVFsMlx, } - return nil + + fn, ok := setEswitchModeAndNumVFsByDriverName[pfDriverName] + if !ok { + log.Log.V(2).Info("setEswitchModeAndNumVFs(): driver not found in the support list. Using fallback implementation", + "device", pciAddr, "driver", pfDriverName) + + // Fallback to mlx5 driver + fn = s.setEswitchModeAndNumVFsMlx + } + + return fn(pciAddr, desiredEswitchMode, numVFs) } -func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode string, numVFs int) error { - log.Log.V(2).Info("setEswitchModeAndNumVFs(): configure VFs for device", +// setEswitchModeAndNumVFsMlx configures PF eSwitch and sriov_numvfs in the following order: +// a. set eSwitchMode to legacy +// b. set the desired number of Virtual Functions +// c. unbind driver of all VFs +// d. set eSwitchMode to `switchdev` if requested +func (s *sriov) setEswitchModeAndNumVFsMlx(pciAddr string, desiredEswitchMode string, numVFs int) error { + log.Log.V(2).Info("setEswitchModeAndNumVFsMlx(): configure VFs for device", "device", pciAddr, "count", numVFs, "mode", desiredEswitchMode) // always switch NIC to the legacy mode before creating VFs. This is required because some drivers @@ -1015,7 +1039,11 @@ func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode strin if err := s.detachPFFromBridge(pciAddr); err != nil { return err } - if err := s.setEswitchMode(pciAddr, sriovnetworkv1.ESwithModeLegacy); err != nil { + if err := s.unbindAllVFsOnPF(pciAddr); err != nil { + log.Log.Error(err, "setEswitchModeAndNumVFsMlx(): failed to unbind VFs", "device", pciAddr, "mode", desiredEswitchMode) + return err + } + if err := s.SetNicSriovMode(pciAddr, sriovnetworkv1.ESwithModeLegacy); err != nil { return err } } @@ -1024,8 +1052,39 @@ func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode strin } if desiredEswitchMode == sriovnetworkv1.ESwithModeSwitchDev { - return s.setEswitchMode(pciAddr, sriovnetworkv1.ESwithModeSwitchDev) + if err := s.unbindAllVFsOnPF(pciAddr); err != nil { + log.Log.Error(err, "setEswitchModeAndNumVFsMlx(): failed to unbind VFs", "device", pciAddr, "mode", desiredEswitchMode) + return err + } + if err := s.SetNicSriovMode(pciAddr, desiredEswitchMode); err != nil { + return err + } + } + return nil +} + +// setEswitchModeAndNumVFsIce configures PF eSwitch and sriov_numvfs in the following order: +// a. set eSwitchMode to the desired mode if needed +// a1. set sriov_numvfs to 0 before updating the eSwitchMode +// b. set sriov_numvfs to the desired number of VFs +func (s *sriov) setEswitchModeAndNumVFsIce(pciAddr string, desiredEswitchMode string, numVFs int) error { + log.Log.V(2).Info("setEswitchModeAndNumVFsIce(): configure VFs for device", + "device", pciAddr, "count", numVFs, "mode", desiredEswitchMode) + + if s.GetNicSriovMode(pciAddr) != desiredEswitchMode { + if err := s.SetSriovNumVfs(pciAddr, 0); err != nil { + return err + } + + if err := s.SetNicSriovMode(pciAddr, desiredEswitchMode); err != nil { + return err + } } + + if err := s.SetSriovNumVfs(pciAddr, numVFs); err != nil { + return err + } + return nil } @@ -1047,11 +1106,11 @@ func (s *sriov) unbindAllVFsOnPF(addr string) error { log.Log.V(2).Info("unbindAllVFsOnPF(): unbind all VFs on PF", "device", addr) vfAddrs, err := s.dputilsLib.GetVFList(addr) if err != nil { - return fmt.Errorf("failed to read VF list: %v", err) + return fmt.Errorf("failed to read VF list for pci[%s]: %w", addr, err) } for _, vfAddr := range vfAddrs { if err := s.kernelHelper.Unbind(vfAddr); err != nil { - return fmt.Errorf("failed to unbind VF from the driver: %v", err) + return fmt.Errorf("failed to unbind VF from the driver PF[%s], PF[%s]: %w", addr, vfAddr, err) } } return nil diff --git a/pkg/host/internal/sriov/sriov_test.go b/pkg/host/internal/sriov/sriov_test.go index 6db7d9617..f30e93773 100644 --- a/pkg/host/internal/sriov/sriov_test.go +++ b/pkg/host/internal/sriov/sriov_test.go @@ -214,6 +214,7 @@ var _ = Describe("SRIOV", func() { dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(2) dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0) + dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil) netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(&netlink.DevlinkDevice{ Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}}, nil) hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil) @@ -282,6 +283,7 @@ var _ = Describe("SRIOV", func() { dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1) dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0) + dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil) netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(&netlink.DevlinkDevice{ Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}}, nil) hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil) @@ -333,6 +335,7 @@ var _ = Describe("SRIOV", func() { dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1) dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0) + dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil) hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil) hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil) hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil) @@ -393,6 +396,74 @@ var _ = Describe("SRIOV", func() { helpers.GinkgoAssertFileContentsEquals("/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs", "1") }) + It("should configure switchdev on ice driver", func() { + helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ + Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0"}, + Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}}, + }) + + dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1) + dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0) + dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("ice", nil) + hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil) + hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil) + hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil) + hostMock.EXPECT().AddDisableNMUdevRule("0000:d8:00.0").Return(nil) + hostMock.EXPECT().AddPersistPFNameUdevRule("0000:d8:00.0", "enp216s0f0np0").Return(nil) + hostMock.EXPECT().EnableHwTcOffload("enp216s0f0np0").Return(nil) + hostMock.EXPECT().GetDevlinkDeviceParam("0000:d8:00.0", "flow_steering_mode").Return("", syscall.EINVAL) + dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).Times(1) + pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl) + netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(2) + netlinkLibMock.EXPECT().IsLinkAdminStateUp(pfLinkMock).Return(false) + netlinkLibMock.EXPECT().LinkSetUp(pfLinkMock).Return(nil) + netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(&netlink.DevlinkDevice{ + Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}}, nil).Times(2) + netlinkLibMock.EXPECT().DevLinkSetEswitchMode(gomock.Any(), "switchdev").Return(nil) + + dputilsLibMock.EXPECT().GetVFID("0000:d8:00.2").Return(0, nil).Times(2) + hostMock.EXPECT().HasDriver("0000:d8:00.2").Return(false, "") + hostMock.EXPECT().BindDefaultDriver("0000:d8:00.2").Return(nil) + hostMock.EXPECT().HasDriver("0000:d8:00.2").Return(true, "test") + hostMock.EXPECT().UnbindDriverIfNeeded("0000:d8:00.2", true).Return(nil) + hostMock.EXPECT().BindDefaultDriver("0000:d8:00.2").Return(nil) + hostMock.EXPECT().SetNetdevMTU("0000:d8:00.2", 2000).Return(nil) + hostMock.EXPECT().GetInterfaceIndex("0000:d8:00.2").Return(42, nil).AnyTimes() + vf0LinkMock := netlinkMockPkg.NewMockLink(testCtrl) + vf0Mac, _ := net.ParseMAC("02:42:19:51:2f:af") + vf0LinkMock.EXPECT().Attrs().Return(&netlink.LinkAttrs{Name: "enp216s0f0_0", HardwareAddr: vf0Mac}) + netlinkLibMock.EXPECT().LinkByIndex(42).Return(vf0LinkMock, nil).AnyTimes() + netlinkLibMock.EXPECT().LinkSetVfHardwareAddr(vf0LinkMock, 0, vf0Mac).Return(nil) + hostMock.EXPECT().GetPhysPortName("enp216s0f0np0").Return("p0", nil) + hostMock.EXPECT().GetPhysSwitchID("enp216s0f0np0").Return("7cfe90ff2cc0", nil) + hostMock.EXPECT().AddVfRepresentorUdevRule("0000:d8:00.0", "enp216s0f0np0", "7cfe90ff2cc0", "p0").Return(nil) + hostMock.EXPECT().CreateVDPADevice("0000:d8:00.2", "vhost_vdpa") + hostMock.EXPECT().LoadUdevRules().Return(nil) + + storeManagerMode.EXPECT().SaveLastPfAppliedStatus(gomock.Any()).Return(nil) + + Expect(s.ConfigSriovInterfaces(storeManagerMode, + []sriovnetworkv1.Interface{{ + Name: "enp216s0f0np0", + PciAddress: "0000:d8:00.0", + NumVfs: 1, + LinkType: "ETH", + EswitchMode: "switchdev", + VfGroups: []sriovnetworkv1.VfGroup{ + { + VfRange: "0-0", + ResourceName: "test-resource0", + PolicyName: "test-policy0", + Mtu: 2000, + IsRdma: true, + VdpaType: "vhost_vdpa", + }}, + }}, + []sriovnetworkv1.InterfaceExt{{PciAddress: "0000:d8:00.0"}}, + false)).NotTo(HaveOccurred()) + helpers.GinkgoAssertFileContentsEquals("/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs", "1") + }) + It("externally managed - wrong VF count", func() { dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0) @@ -459,6 +530,7 @@ var _ = Describe("SRIOV", func() { hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil) hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil) hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil) + dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil) hostMock.EXPECT().SetNetdevMTU("0000:d8:00.0", 1500).Return(nil) Expect(s.ConfigSriovInterfaces(storeManagerMode, @@ -499,6 +571,7 @@ var _ = Describe("SRIOV", func() { dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(2) dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0) + dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil) netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return( &netlink.DevlinkDevice{Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}}, nil) diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index b665c99f0..3c3cc4e84 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -1822,6 +1822,7 @@ func findMainSriovDevice(executorPod *corev1.Pod, sriovDevices []*sriovv1.Interf stdout, _, err = pod.ExecCommand(clients, executorPod, "ip", "link", "show", device.Name) Expect(err).ToNot(HaveOccurred()) Expect(len(stdout)).Should(Not(Equal(0)), "Unable to query link state") + if strings.Contains(stdout, "state DOWN") { continue // The interface is not active } @@ -1830,6 +1831,7 @@ func findMainSriovDevice(executorPod *corev1.Pod, sriovDevices []*sriovv1.Interf return device } } + return nil } @@ -2124,7 +2126,7 @@ func createVanillaNetworkPolicy(node string, sriovInfos *cluster.EnabledNodes, n }))) } -func runCommandOnConfigDaemon(nodeName string, command ...string) (string, string, error) { +func getConfigDaemonPod(nodeName string) *corev1.Pod { pods := &corev1.PodList{} label, err := labels.Parse("app=sriov-network-config-daemon") Expect(err).ToNot(HaveOccurred()) @@ -2133,8 +2135,11 @@ func runCommandOnConfigDaemon(nodeName string, command ...string) (string, strin err = clients.List(context.Background(), pods, &runtimeclient.ListOptions{Namespace: operatorNamespace, LabelSelector: label, FieldSelector: field}) Expect(err).ToNot(HaveOccurred()) Expect(len(pods.Items)).To(Equal(1)) + return &pods.Items[0] +} - output, errOutput, err := pod.ExecCommand(clients, &pods.Items[0], command...) +func runCommandOnConfigDaemon(nodeName string, command ...string) (string, string, error) { + output, errOutput, err := pod.ExecCommand(clients, getConfigDaemonPod(nodeName), command...) return output, errOutput, err } diff --git a/test/conformance/tests/test_switchdev.go b/test/conformance/tests/test_switchdev.go new file mode 100644 index 000000000..62e340b1b --- /dev/null +++ b/test/conformance/tests/test_switchdev.go @@ -0,0 +1,107 @@ +package tests + +import ( + "context" + "fmt" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/discovery" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/network" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("[sriov] Switchdev", Ordered, func() { + + BeforeAll(func() { + if cluster.VirtualCluster() { + Skip("IGB driver does not support switchdev driver model") + } + + err := namespaces.Create(namespaces.Test, clients) + Expect(err).ToNot(HaveOccurred()) + + err = namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled()) + Expect(err).ToNot(HaveOccurred()) + + WaitForSRIOVStable() + }) + + It("create switchdev policies on supported devices", func() { + sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) + Expect(err).ToNot(HaveOccurred()) + Expect(len(sriovInfos.Nodes)).ToNot(BeZero()) + + testNode, interfaces, err := sriovInfos.FindSriovDevicesAndNode() + Expect(err).ToNot(HaveOccurred()) + + // Avoid testing against primary NIC + interfaces, err = findUnusedSriovDevices(testNode, interfaces) + Expect(err).ToNot(HaveOccurred()) + + // Avoid testing the same NIC model more than once + interfaces = filterDuplicateNICModels(interfaces) + + By(fmt.Sprintf("Testing on node %s, %d devices found", testNode, len(interfaces))) + + for _, intf := range interfaces { + if !doesInterfaceSupportSwitchdev(intf) { + continue + } + + By("Testing device " + nameVendorDeviceID(intf) + " on node " + testNode) + resourceName := "swtichdev" + intf.Name + _, err = network.CreateSriovPolicy(clients, "test-switchdev-policy-", operatorNamespace, intf.Name, testNode, 8, resourceName, "netdevice", func(snnp *sriovv1.SriovNetworkNodePolicy) { + snnp.Spec.EswitchMode = "switchdev" + }) + Expect(err).ToNot(HaveOccurred()) + + WaitForSRIOVStable() + + Eventually(func() int64 { + testedNode, err := clients.CoreV1Interface.Nodes().Get(context.Background(), testNode, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + capacity, _ := resNum.AsInt64() + return capacity + }, 10*time.Minute, time.Second).Should(Equal(int64(8))) + } + }) +}) + +func doesInterfaceSupportSwitchdev(intf *sriovv1.InterfaceExt) bool { + if intf.Driver == "mlx5_core" { + return true + } + + if intf.Driver == "ice" { + return true + } + + return false +} + +func filterDuplicateNICModels(devices []*sriovv1.InterfaceExt) []*sriovv1.InterfaceExt { + foundVendorAndModels := map[string]bool{} + ret := []*sriovv1.InterfaceExt{} + + for _, device := range devices { + vendorAndDevice := device.Vendor + "_" + device.DeviceID + if _, value := foundVendorAndModels[vendorAndDevice]; !value { + ret = append(ret, device) + foundVendorAndModels[vendorAndDevice] = true + } + } + return ret +} + +func nameVendorDeviceID(intf *sriovv1.InterfaceExt) string { + return fmt.Sprintf("%s (%s:%s)", intf.Name, intf.Vendor, intf.DeviceID) +} diff --git a/test/util/cluster/cluster.go b/test/util/cluster/cluster.go index b79e61ad2..2d34c327a 100644 --- a/test/util/cluster/cluster.go +++ b/test/util/cluster/cluster.go @@ -153,6 +153,33 @@ func (n *EnabledNodes) FindSriovDevices(node string) ([]*sriovv1.InterfaceExt, e return filteredDevices, nil } +// FindSriovDevicesAndNode retrieves the node with the most number of SRIOV devices after filtering by `SRIOV_NODE_AND_DEVICE_NAME_FILTER` environment variable. +func (n *EnabledNodes) FindSriovDevicesAndNode() (string, []*sriovv1.InterfaceExt, error) { + errs := []error{} + + retNode := "" + retDevices := []*sriovv1.InterfaceExt{} + + for _, node := range n.Nodes { + devices, err := n.FindSriovDevices(node) + if err != nil { + errs = append(errs, err) + continue + } + + if len(devices) > len(retDevices) { + retNode = node + retDevices = devices + } + } + + if len(retDevices) == 0 { + return "", nil, fmt.Errorf("can't find any SR-IOV devices in cluster's nodes: %w", errors.Join(errs...)) + } + + return retNode, retDevices, nil +} + // FindSriovDevicesIgnoreFilters retrieves all valid sriov devices for the given node. func (n *EnabledNodes) FindSriovDevicesIgnoreFilters(node string) ([]*sriovv1.InterfaceExt, error) { devices := []*sriovv1.InterfaceExt{}