Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for BF2 in connectX mode #353

Merged
merged 2 commits into from
Sep 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion pkg/plugins/generic/generic_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,21 @@ func (p *GenericPlugin) Apply() error {
return nil
}
}

// Create a map with all the PFs we will need to configure
// we need to create it here before we access the host file system using the chroot function
SchSeba marked this conversation as resolved.
Show resolved Hide resolved
// because the skipConfigVf needs the mstconfig package that exist only inside the sriov-config-daemon file system
pfsToSkip, err := utils.GetPfsToSkip(p.DesireState)
if err != nil {
return err
}

exit, err := utils.Chroot("/host")
if err != nil {
return err
}
defer exit()
if err := utils.SyncNodeState(p.DesireState); err != nil {
if err := utils.SyncNodeState(p.DesireState, pfsToSkip); err != nil {
return err
}
p.LastState = &sriovnetworkv1.SriovNetworkNodeState{}
Expand Down
31 changes: 2 additions & 29 deletions pkg/plugins/mellanox/mellanox_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package mellanox

import (
"fmt"
"regexp"
"strconv"
"strings"

Expand Down Expand Up @@ -215,24 +214,17 @@ func configFW() error {
return nil
}

func mstConfigReadData(pciAddress string) (string, error) {
glog.Infof("mellanox-plugin mstConfigReadData(): device %s", pciAddress)
args := []string{"-e", "-d", pciAddress, "q"}
out, err := utils.RunCommand("mstconfig", args...)
return out, err
}

func getMlnxNicFwData(pciAddress string) (current, next *mlnxNic, err error) {
glog.Infof("mellanox-plugin getMlnxNicFwData(): device %s", pciAddress)
err = nil
attrs := []string{TotalVfs, EnableSriov, LinkTypeP1, LinkTypeP2}

out, err := mstConfigReadData(pciAddress)
out, err := utils.MstConfigReadData(pciAddress)
if err != nil {
glog.Errorf("mellanox-plugin getMlnxNicFwData(): failed %v", err)
return
}
mstCurrentData, mstNextData := parseMstconfigOutput(out, attrs)
mstCurrentData, mstNextData := utils.ParseMstconfigOutput(out, attrs)
current, err = mlnxNicFromMap(mstCurrentData)
if err != nil {
glog.Errorf("mellanox-plugin getMlnxNicFwData(): %v", err)
Expand Down Expand Up @@ -265,25 +257,6 @@ func mlnxNicFromMap(mstData map[string]string) (*mlnxNic, error) {
return fwData, nil
}

func parseMstconfigOutput(mstOutput string, attributes []string) (fwCurrent, fwNext map[string]string) {
glog.Infof("mellanox-plugin parseMstconfigOutput(): Attributes %v", attributes)
fwCurrent = map[string]string{}
fwNext = map[string]string{}
formatRegex := regexp.MustCompile(`(?P<Attribute>\w+)\s+(?P<Default>\S+)\s+(?P<Current>\S+)\s+(?P<Next>\S+)`)
mstOutputLines := strings.Split(mstOutput, "\n")
for _, attr := range attributes {
for _, line := range mstOutputLines {
if strings.Contains(line, attr) {
regexResult := formatRegex.FindStringSubmatch(line)
fwCurrent[attr] = regexResult[3]
fwNext[attr] = regexResult[4]
break
}
}
}
return
}

SchSeba marked this conversation as resolved.
Show resolved Hide resolved
func getPciAddressPrefix(pciAddress string) string {
return pciAddress[:len(pciAddress)-1]
}
Expand Down
10 changes: 9 additions & 1 deletion pkg/utils/switchdev.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,23 @@ func IsSwitchdevModeSpec(spec sriovnetworkv1.SriovNetworkNodeStateSpec) bool {
}

func WriteSwitchdevConfFile(newState *sriovnetworkv1.SriovNetworkNodeState) (update bool, err error) {
// Create a map with all the PFs we will need to SKIP for systemd configuration
pfsToSkip, err := GetPfsToSkip(newState)
if err != nil {
return false, err
}

cfg := config{}
for _, iface := range newState.Spec.Interfaces {
for _, ifaceStatus := range newState.Status.Interfaces {
if iface.PciAddress != ifaceStatus.PciAddress {
continue
}
if !SkipConfigVf(iface, ifaceStatus) {

if skip := pfsToSkip[iface.PciAddress]; !skip {
continue
}

i := sriovnetworkv1.Interface{}
if iface.NumVfs > 0 {
i = sriovnetworkv1.Interface{
Expand Down
60 changes: 49 additions & 11 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ func DiscoverSriovDevices(withUnsupported bool) ([]sriovnetworkv1.InterfaceExt,

// SyncNodeState Attempt to update the node state to match the desired state
//
func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState) error {
func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState, pfsToConfig map[string]bool) error {
if IsKernelLockdownMode(true) && hasMellanoxInterfacesInSpec(newState) {
glog.Warningf("cannot use mellanox devices when in kernel lockdown mode")
return fmt.Errorf("cannot use mellanox devices when in kernel lockdown mode")
Expand All @@ -158,10 +158,11 @@ func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState) error {
for _, iface := range newState.Spec.Interfaces {
if iface.PciAddress == ifaceStatus.PciAddress {
configured = true
if SkipConfigVf(iface, ifaceStatus) {
glog.V(2).Infof("syncNodeState(): skip config VF in config daemon for %s, it shall be done by switchdev-configuration.service", iface.PciAddress)

if skip := pfsToConfig[iface.PciAddress]; skip {
break
}

if !NeedUpdate(&iface, &ifaceStatus) {
glog.V(2).Infof("syncNodeState(): no need update interface %s", iface.PciAddress)
break
Expand All @@ -176,7 +177,11 @@ func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState) error {
break
}
}
if !configured && ifaceStatus.NumVfs > 0 && !SkipConfigVf(sriovnetworkv1.Interface{}, ifaceStatus) {
if !configured && ifaceStatus.NumVfs > 0 {
if skip := pfsToConfig[ifaceStatus.PciAddress]; skip {
continue
}

if err = resetSriovDevice(ifaceStatus); err != nil {
return err
}
Expand All @@ -185,18 +190,51 @@ func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState) error {
return nil
}

// SkipConfigVf Use systemd service to configure switchdev mode or BF-2 NICs in OpenShift
func SkipConfigVf(ifSpec sriovnetworkv1.Interface, ifStatus sriovnetworkv1.InterfaceExt) bool {
// skipConfigVf Use systemd service to configure switchdev mode or BF-2 NICs in OpenShift
func skipConfigVf(ifSpec sriovnetworkv1.Interface, ifStatus sriovnetworkv1.InterfaceExt) (bool, error) {
if ifSpec.EswitchMode == sriovnetworkv1.ESwithModeSwitchDev {
glog.V(2).Infof("SkipConfigVf(): skip config VF for switchdev device")
return true
glog.V(2).Infof("skipConfigVf(): skip config VF for switchdev device")
return true, nil
}

// Nvidia_mlx5_MT42822_BlueField-2_integrated_ConnectX-6_Dx in OpenShift
if ClusterType == ClusterTypeOpenshift && ifStatus.Vendor == VendorMellanox && ifStatus.DeviceID == DeviceBF2 {
glog.V(2).Infof("SkipConfigVf(): skip config VF for BF2 device")
return true
// TODO: remove this when switch to the systemd configuration support.
mode, err := mellanoxBlueFieldMode(ifStatus.PciAddress)
if err != nil {
return false, fmt.Errorf("failed to read Mellanox Bluefield card mode for %s,%v", ifStatus.PciAddress, err)
}

if mode == bluefieldConnectXMode {
return false, nil
}

glog.V(2).Infof("skipConfigVf(): skip config VF for Bluefiled card on DPU mode")
return true, nil
}
return false

return false, nil
}

// GetPfsToSkip return a map of devices pci addresses to should be configured via systemd instead if the legacy mode
// we skip devices in switchdev mode and Bluefield card in ConnectX mode
func GetPfsToSkip(ns *sriovnetworkv1.SriovNetworkNodeState) (map[string]bool, error) {
pfsToSkip := map[string]bool{}
for _, ifaceStatus := range ns.Status.Interfaces {
for _, iface := range ns.Spec.Interfaces {
if iface.PciAddress == ifaceStatus.PciAddress {
skip, err := skipConfigVf(iface, ifaceStatus)
if err != nil {
glog.Errorf("GetPfsToSkip(): fail to check for skip VFs %s: %v.", iface.PciAddress, err)
return pfsToSkip, err
}
pfsToSkip[iface.PciAddress] = skip
break
}
}
}

return pfsToSkip, nil
}

func NeedUpdate(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetworkv1.InterfaceExt) bool {
Expand Down
117 changes: 117 additions & 0 deletions pkg/utils/utils_mlx.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package utils

import (
"fmt"
"regexp"
"strings"

"github.com/golang/glog"
)

//BlueField mode representation
type BlueFieldMode int

const (
bluefieldDpu BlueFieldMode = iota
bluefieldConnectXMode

internalCPUPageSupplier = "INTERNAL_CPU_PAGE_SUPPLIER"
internalCPUEswitchManager = "INTERNAL_CPU_ESWITCH_MANAGER"
internalCPUIbVporto = "INTERNAL_CPU_IB_VPORT0"
internalCPUOffloadEngine = "INTERNAL_CPU_OFFLOAD_ENGINE"
internalCPUModel = "INTERNAL_CPU_MODEL"

ecpf = "ECPF"
extHostPf = "EXT_HOST_PF"
embeddedCPU = "EMBEDDED_CPU"

disabled = "DISABLED"
enabled = "ENABLED"
)

func MstConfigReadData(pciAddress string) (string, error) {
glog.Infof("MstConfigReadData(): device %s", pciAddress)
args := []string{"-e", "-d", pciAddress, "q"}
out, err := RunCommand("mstconfig", args...)
return out, err
}

func ParseMstconfigOutput(mstOutput string, attributes []string) (fwCurrent, fwNext map[string]string) {
glog.Infof("ParseMstconfigOutput(): Attributes %v", attributes)
fwCurrent = map[string]string{}
fwNext = map[string]string{}
formatRegex := regexp.MustCompile(`(?P<Attribute>\w+)\s+(?P<Default>\S+)\s+(?P<Current>\S+)\s+(?P<Next>\S+)`)
mstOutputLines := strings.Split(mstOutput, "\n")
for _, attr := range attributes {
for _, line := range mstOutputLines {
if strings.Contains(line, attr) {
regexResult := formatRegex.FindStringSubmatch(line)
fwCurrent[attr] = regexResult[3]
fwNext[attr] = regexResult[4]
break
}
}
}
return
}

func mellanoxBlueFieldMode(PciAddress string) (BlueFieldMode, error) {
glog.V(2).Infof("MellanoxBlueFieldMode():checking mode for card %s", PciAddress)
out, err := MstConfigReadData(PciAddress)
if err != nil {
glog.Errorf("MellanoxBlueFieldMode(): failed to get mlx nic fw data %v", err)
return -1, fmt.Errorf("failed to get mlx nic fw data %v", err)
}

attrs := []string{internalCPUPageSupplier,
internalCPUEswitchManager,
internalCPUIbVporto,
internalCPUOffloadEngine,
internalCPUModel}
mstCurrentData, _ := ParseMstconfigOutput(out, attrs)

internalCPUPageSupplierstatus, exist := mstCurrentData[internalCPUPageSupplier]
if !exist {
return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUPageSupplier)
}

internalCPUEswitchManagerStatus, exist := mstCurrentData[internalCPUEswitchManager]
if !exist {
return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUEswitchManager)
}

internalCPUIbVportoStatus, exist := mstCurrentData[internalCPUIbVporto]
if !exist {
return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUIbVporto)
}

internalCPUOffloadEngineStatus, exist := mstCurrentData[internalCPUOffloadEngine]
if !exist {
return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUOffloadEngine)
}

internalCPUModelStatus, exist := mstCurrentData[internalCPUModel]
if !exist {
return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUModel)
}

// check for DPU
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a comment: These registers aren't available with some versions of mstflint.

INTERNAL_CPU_MODEL should also be 1 as well. This is true for both DPU mode and NIC mode. Maybe we should add a check here just to be safe.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wizhaoredhat if for both it will be 1 why do we need to check it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is possible that the NIC was incorrectly set to Separated Host Mode which will have a value of zero.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good done

if strings.Contains(internalCPUPageSupplierstatus, ecpf) &&
strings.Contains(internalCPUEswitchManagerStatus, ecpf) &&
strings.Contains(internalCPUIbVportoStatus, ecpf) &&
strings.Contains(internalCPUOffloadEngineStatus, enabled) &&
strings.Contains(internalCPUModelStatus, embeddedCPU) {
glog.V(2).Infof("MellanoxBlueFieldMode():card %s in DPU mode", PciAddress)
return bluefieldDpu, nil
} else if strings.Contains(internalCPUPageSupplierstatus, extHostPf) &&
strings.Contains(internalCPUEswitchManagerStatus, extHostPf) &&
strings.Contains(internalCPUIbVportoStatus, extHostPf) &&
strings.Contains(internalCPUOffloadEngineStatus, disabled) &&
strings.Contains(internalCPUModelStatus, embeddedCPU) {
glog.V(2).Infof("MellanoxBlueFieldMode():card %s in ConnectX mode", PciAddress)
bn222 marked this conversation as resolved.
Show resolved Hide resolved
return bluefieldConnectXMode, nil
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a suggestion. I believe it would help with debugging if who have some logs here (in the case of failure) what the actual mstconfig is.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

glog.Errorf("MellanoxBlueFieldMode(): unknown card status for %s mstconfig output \n %s", PciAddress, out)
return -1, fmt.Errorf("MellanoxBlueFieldMode(): unknown card status for %s", PciAddress)
}