From 9a35de5b5d892418983674f5f4aa5d7f872df5c2 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Sun, 21 Aug 2022 19:09:05 +0300 Subject: [PATCH] Add support for BF2 in connectX mode Fix the issue introduced in https://github.com/k8snetworkplumbingwg/sriov-network-operator/pull/240/files#r808624002 Allow to use BF in connectX mode also for OCP platform. This is needed until we support the systemd configuration Signed-off-by: Sebastian Sch --- pkg/plugins/generic/generic_plugin.go | 21 ++++- pkg/plugins/mellanox/mellanox_plugin.go | 31 +------ pkg/utils/switchdev.go | 6 +- pkg/utils/utils.go | 35 +++++--- pkg/utils/utils_mlx.go | 102 ++++++++++++++++++++++++ 5 files changed, 155 insertions(+), 40 deletions(-) create mode 100644 pkg/utils/utils_mlx.go diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index 3443ff6ddb..838b790249 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -87,12 +87,31 @@ func (p *GenericPlugin) Apply() error { return nil } } + + // Create a map with all the PFs we will need to configure + // we need to create it here before we access the host file system using the chroot function + // because the SkipConfigVf needs the mstconfig package that exist only inside the sriov-config-daemon file system + vfsToSkip := map[string]bool{} + for _, ifaceStatus := range p.DesireState.Status.Interfaces { + for _, iface := range p.DesireState.Spec.Interfaces { + if iface.PciAddress == ifaceStatus.PciAddress { + skip, err := utils.SkipConfigVf(iface, ifaceStatus) + if err != nil { + glog.Errorf("generic-plugin Apply(): fail to check for skip VFs %s: %v.", iface.PciAddress, err) + return err + } + vfsToSkip[iface.PciAddress] = skip + break + } + } + } + exit, err := utils.Chroot("/host") if err != nil { return err } defer exit() - if err := utils.SyncNodeState(p.DesireState); err != nil { + if err := utils.SyncNodeState(p.DesireState, vfsToSkip); err != nil { return err } p.LastState = &sriovnetworkv1.SriovNetworkNodeState{} diff --git a/pkg/plugins/mellanox/mellanox_plugin.go b/pkg/plugins/mellanox/mellanox_plugin.go index cd2e2f0106..7970a0dd90 100644 --- a/pkg/plugins/mellanox/mellanox_plugin.go +++ b/pkg/plugins/mellanox/mellanox_plugin.go @@ -2,7 +2,6 @@ package mellanox import ( "fmt" - "regexp" "strconv" "strings" @@ -216,24 +215,17 @@ func configFW() error { return nil } -func mstConfigReadData(pciAddress string) (string, error) { - glog.Infof("mellanox-plugin mstConfigReadData(): device %s", pciAddress) - args := []string{"-e", "-d", pciAddress, "q"} - out, err := utils.RunCommand("mstconfig", args...) - return out, err -} - func getMlnxNicFwData(pciAddress string) (current, next *mlnxNic, err error) { glog.Infof("mellanox-plugin getMlnxNicFwData(): device %s", pciAddress) err = nil attrs := []string{TotalVfs, EnableSriov, LinkTypeP1, LinkTypeP2} - out, err := mstConfigReadData(pciAddress) + out, err := utils.MstConfigReadData(pciAddress) if err != nil { glog.Errorf("mellanox-plugin getMlnxNicFwData(): failed %v", err) return } - mstCurrentData, mstNextData := parseMstconfigOutput(out, attrs) + mstCurrentData, mstNextData := utils.ParseMstconfigOutput(out, attrs) current, err = mlnxNicFromMap(mstCurrentData) if err != nil { glog.Errorf("mellanox-plugin getMlnxNicFwData(): %v", err) @@ -266,25 +258,6 @@ func mlnxNicFromMap(mstData map[string]string) (*mlnxNic, error) { return fwData, nil } -func parseMstconfigOutput(mstOutput string, attributes []string) (fwCurrent, fwNext map[string]string) { - glog.Infof("mellanox-plugin parseMstconfigOutput(): Attributes %v", attributes) - fwCurrent = map[string]string{} - fwNext = map[string]string{} - formatRegex := regexp.MustCompile(`(?P\w+)\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)`) - mstOutputLines := strings.Split(mstOutput, "\n") - for _, attr := range attributes { - for _, line := range mstOutputLines { - if strings.Contains(line, attr) { - regexResult := formatRegex.FindStringSubmatch(line) - fwCurrent[attr] = regexResult[3] - fwNext[attr] = regexResult[4] - break - } - } - } - return -} - func getPciAddressPrefix(pciAddress string) string { return pciAddress[:len(pciAddress)-1] } diff --git a/pkg/utils/switchdev.go b/pkg/utils/switchdev.go index b93dbeb108..2699c8505b 100644 --- a/pkg/utils/switchdev.go +++ b/pkg/utils/switchdev.go @@ -35,7 +35,11 @@ func WriteSwitchdevConfFile(newState *sriovnetworkv1.SriovNetworkNodeState) (upd if iface.PciAddress != ifaceStatus.PciAddress { continue } - if !SkipConfigVf(iface, ifaceStatus) { + skip, err := SkipConfigVf(iface, ifaceStatus) + if err != nil { + return false, err + } + if !skip { continue } i := sriovnetworkv1.Interface{} diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index e83dbc6de8..5d93aa4ed0 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -146,7 +146,7 @@ func DiscoverSriovDevices(withUnsupported bool) ([]sriovnetworkv1.InterfaceExt, // SyncNodeState Attempt to update the node state to match the desired state // -func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState) error { +func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState, vfsToConfig map[string]bool) error { if IsKernelLockdownMode(true) && hasMellanoxInterfacesInSpec(newState) { glog.Warningf("cannot use mellanox devices when in kernel lockdown mode") return fmt.Errorf("cannot use mellanox devices when in kernel lockdown mode") @@ -157,10 +157,11 @@ func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState) error { for _, iface := range newState.Spec.Interfaces { if iface.PciAddress == ifaceStatus.PciAddress { configured = true - if SkipConfigVf(iface, ifaceStatus) { - glog.V(2).Infof("syncNodeState(): skip config VF in config daemon for %s, it shall be done by switchdev-configuration.service", iface.PciAddress) + + if skip := vfsToConfig[iface.PciAddress]; skip { break } + if !needUpdate(&iface, &ifaceStatus) { glog.V(2).Infof("syncNodeState(): no need update interface %s", iface.PciAddress) break @@ -175,7 +176,11 @@ func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState) error { break } } - if !configured && ifaceStatus.NumVfs > 0 && !SkipConfigVf(sriovnetworkv1.Interface{}, ifaceStatus) { + if !configured && ifaceStatus.NumVfs > 0 { + if skip := vfsToConfig[ifaceStatus.PciAddress]; skip { + continue + } + if err = resetSriovDevice(ifaceStatus); err != nil { return err } @@ -185,17 +190,29 @@ func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState) error { } // SkipConfigVf Use systemd service to configure switchdev mode or BF-2 NICs in OpenShift -func SkipConfigVf(ifSpec sriovnetworkv1.Interface, ifStatus sriovnetworkv1.InterfaceExt) bool { +func SkipConfigVf(ifSpec sriovnetworkv1.Interface, ifStatus sriovnetworkv1.InterfaceExt) (bool, error) { if ifSpec.EswitchMode == sriovnetworkv1.ESwithModeSwitchDev { glog.V(2).Infof("SkipConfigVf(): skip config VF for switchdev device") - return true + return true, nil } + // Nvidia_mlx5_MT42822_BlueField-2_integrated_ConnectX-6_Dx in OpenShift if ClusterType == ClusterTypeOpenshift && ifStatus.Vendor == VendorMellanox && ifStatus.DeviceID == DeviceBF2 { - glog.V(2).Infof("SkipConfigVf(): skip config VF for BF2 device") - return true + // TODO: remove this when switch to the systemd configuration support. + mode, err := mellanoxBlueFieldMode(ifStatus.PciAddress) + if err != nil { + return false, fmt.Errorf("failed to read Mellanox Bluefield card mode for %s,%v", ifStatus.PciAddress, err) + } + + if mode == bluefield2Conntexd { + return false, nil + } + + glog.V(2).Infof("SkipConfigVf(): skip config VF for BF2 device on DPU mode") + return true, nil } - return false + + return false, nil } func needUpdate(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetworkv1.InterfaceExt) bool { diff --git a/pkg/utils/utils_mlx.go b/pkg/utils/utils_mlx.go new file mode 100644 index 0000000000..202da5c8ae --- /dev/null +++ b/pkg/utils/utils_mlx.go @@ -0,0 +1,102 @@ +package utils + +import ( + "fmt" + "regexp" + "strings" + + "github.com/golang/glog" +) + +//BlueField2 mode representation +type BlueField2Mode int + +const ( + bluefield2Dpu BlueField2Mode = iota + bluefield2Conntexd + + internalCPUPageSupplier = "INTERNAL_CPU_PAGE_SUPPLIER" + internalCPUEswitchManager = "INTERNAL_CPU_ESWITCH_MANAGER" + internalCPUIbVporto = "INTERNAL_CPU_IB_VPORT0" + internalCPUOffloadEngine = "INTERNAL_CPU_OFFLOAD_ENGINE" + + ecpf = "ECPF" + extHostPf = "EXT_HOST_PF" + + disabled = "DISABLED" + enabled = "ENABLED" +) + +func MstConfigReadData(pciAddress string) (string, error) { + glog.Infof("MstConfigReadData(): device %s", pciAddress) + args := []string{"-e", "-d", pciAddress, "q"} + out, err := RunCommand("mstconfig", args...) + return out, err +} + +func ParseMstconfigOutput(mstOutput string, attributes []string) (fwCurrent, fwNext map[string]string) { + glog.Infof("ParseMstconfigOutput(): Attributes %v", attributes) + fwCurrent = map[string]string{} + fwNext = map[string]string{} + formatRegex := regexp.MustCompile(`(?P\w+)\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)`) + mstOutputLines := strings.Split(mstOutput, "\n") + for _, attr := range attributes { + for _, line := range mstOutputLines { + if strings.Contains(line, attr) { + regexResult := formatRegex.FindStringSubmatch(line) + fwCurrent[attr] = regexResult[3] + fwNext[attr] = regexResult[4] + break + } + } + } + return +} + +func mellanoxBlueFieldMode(PciAddress string) (BlueField2Mode, error) { + glog.V(2).Infof("MellanoxBlueFieldMode():checking mode for card %s", PciAddress) + out, err := MstConfigReadData(PciAddress) + if err != nil { + glog.Errorf("MellanoxBlueFieldMode(): failed to get mlx nic fw data %v", err) + return -1, fmt.Errorf("failed to get mlx nic fw data %v", err) + } + + attrs := []string{internalCPUPageSupplier, internalCPUEswitchManager, internalCPUIbVporto, internalCPUOffloadEngine} + mstCurrentData, _ := ParseMstconfigOutput(out, attrs) + + internalCPUPageSupplierstatus, exist := mstCurrentData[internalCPUPageSupplier] + if !exist { + return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUPageSupplier) + } + + internalCPUEswitchManagerStatus, exist := mstCurrentData[internalCPUEswitchManager] + if !exist { + return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUEswitchManager) + } + + internalCPUIbVportoStatus, exist := mstCurrentData[internalCPUIbVporto] + if !exist { + return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUIbVporto) + } + internalCPUOffloadEngineStatus, exist := mstCurrentData[internalCPUOffloadEngine] + if !exist { + return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUOffloadEngine) + } + + // check for DPU + if strings.Contains(internalCPUPageSupplierstatus, ecpf) && + strings.Contains(internalCPUEswitchManagerStatus, ecpf) && + strings.Contains(internalCPUIbVportoStatus, ecpf) && + strings.Contains(internalCPUOffloadEngineStatus, enabled) { + glog.V(2).Infof("MellanoxBlueFieldMode():card %s in DPU mode", PciAddress) + return bluefield2Dpu, nil + } else if strings.Contains(internalCPUPageSupplierstatus, extHostPf) && + strings.Contains(internalCPUEswitchManagerStatus, extHostPf) && + strings.Contains(internalCPUIbVportoStatus, extHostPf) && + strings.Contains(internalCPUOffloadEngineStatus, disabled) { + glog.V(2).Infof("MellanoxBlueFieldMode():card %s in ConnectX mode", PciAddress) + return bluefield2Conntexd, nil + } + + return -1, fmt.Errorf("MellanoxBlueFieldMode(): unknown card status for %s", PciAddress) +}