From 2c52beb8ba6fd751b0eb52dfd0e494e2c54eeea6 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Wed, 2 Mar 2022 13:44:10 +0200 Subject: [PATCH 1/3] Improve the virtual plugin support This commit add the support for virtio interfaces like vhostuser for openstack virtual workers. implementation details: * on first run (after a reboot) we get all the information we need when the devices are visible to the kernel * we match the mac address to the openstack network ID * if the sriov-network-config-daemon gets reboot it will use the initial file on the node so even if the nics are in vfio the node state will be right move the operator initial state file to /tmp on the host so it will get deleted on every reboot to support nic changes both for virtual and BM environments Signed-off-by: Sebastian Sch --- cmd/sriov-network-config-daemon/start.go | 10 +- pkg/daemon/writer.go | 85 +++++++++++---- pkg/utils/utils.go | 2 +- pkg/utils/utils_virtual.go | 133 ++++++++++++++++++++--- 4 files changed, 187 insertions(+), 43 deletions(-) diff --git a/cmd/sriov-network-config-daemon/start.go b/cmd/sriov-network-config-daemon/start.go index cd01d57ef..e6f1c5d64 100644 --- a/cmd/sriov-network-config-daemon/start.go +++ b/cmd/sriov-network-config-daemon/start.go @@ -143,7 +143,7 @@ func runStartCmd(cmd *cobra.Command, args []string) { destdir := os.Getenv("DEST_DIR") if destdir == "" { - destdir = "/host/etc" + destdir = "/host/tmp" } platformType := utils.Baremetal @@ -161,8 +161,12 @@ func runStartCmd(cmd *cobra.Command, args []string) { glog.V(0).Infof("Running on platform: %s", platformType.String()) // block the deamon process until nodeWriter finish first its run - nodeWriter.Run(stopCh, refreshCh, syncCh, destdir, true, platformType) - go nodeWriter.Run(stopCh, refreshCh, syncCh, "", false, platformType) + err = nodeWriter.RunOnce(destdir, platformType) + if err != nil { + glog.Errorf("failed to run writer: %v", err) + panic(err.Error()) + } + go nodeWriter.Run(stopCh, refreshCh, syncCh, platformType) glog.V(0).Info("Starting SriovNetworkConfigDaemon") err = daemon.New( diff --git a/pkg/daemon/writer.go b/pkg/daemon/writer.go index a4cc0df95..b3ad2d393 100644 --- a/pkg/daemon/writer.go +++ b/pkg/daemon/writer.go @@ -9,13 +9,14 @@ import ( "time" "github.com/golang/glog" - sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/retry" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" ) const ( @@ -27,8 +28,7 @@ type NodeStateStatusWriter struct { node string status sriovnetworkv1.SriovNetworkNodeStateStatus OnHeartbeatFailure func() - metaData *utils.OSPMetaData - networkData *utils.OSPNetworkData + openStackDevicesInfo utils.OSPDevicesInfo withUnsupportedDevices bool } @@ -42,35 +42,56 @@ func NewNodeStateStatusWriter(c snclientset.Interface, n string, f func(), devMo } } -// Run reads from the writer channel and sets the interface status. It will -// return if the stop channel is closed. Intended to be run via a goroutine. -func (writer *NodeStateStatusWriter) Run(stop <-chan struct{}, refresh <-chan Message, syncCh chan<- struct{}, destDir string, runonce bool, platformType utils.PlatformType) { - glog.V(0).Infof("Run(): start writer") +// RunOnce initial the interface status for both baremetal and virtual environments +func (writer *NodeStateStatusWriter) RunOnce(destDir string, platformType utils.PlatformType) error { + glog.V(0).Infof("RunOnce(): start writer") msg := Message{} - var err error - if platformType == utils.VirtualOpenStack { - writer.metaData, writer.networkData, err = utils.GetOpenstackData() + ns, err := writer.getCheckPointNodeState(destDir) if err != nil { - glog.Errorf("Run(): failed to get OpenStack data: %v", err) + return err + } + + metaData, networkData, err := utils.GetOpenstackData() + if err != nil { + glog.Errorf("RunOnce(): failed to read OpenStack data: %v", err) } - } - if runonce { - glog.V(0).Info("Run(): once") - if err := writer.pollNicStatus(platformType); err != nil { - glog.Errorf("Run(): first poll failed: %v", err) + if ns == nil { + writer.openStackDevicesInfo, err = utils.CreateOpenstackDevicesInfo(metaData, networkData) + if err != nil { + return err + } + } else { + devicesInfo := make(utils.OSPDevicesInfo) + for _, iface := range ns.Status.Interfaces { + devicesInfo[iface.PciAddress] = &utils.OSPDeviceInfo{MacAddress: iface.Mac, NetworkID: iface.NetFilter} + } + writer.openStackDevicesInfo = devicesInfo } - ns, _ := writer.setNodeStateStatus(msg) - writer.writeCheckpointFile(ns, destDir) - return } + + glog.V(0).Info("RunOnce(): once") + if err := writer.pollNicStatus(platformType); err != nil { + glog.Errorf("RunOnce(): first poll failed: %v", err) + } + + ns, _ := writer.setNodeStateStatus(msg) + return writer.writeCheckpointFile(ns, destDir) +} + +// Run reads from the writer channel and sets the interface status. It will +// return if the stop channel is closed. Intended to be run via a goroutine. +func (writer *NodeStateStatusWriter) Run(stop <-chan struct{}, refresh <-chan Message, syncCh chan<- struct{}, platformType utils.PlatformType) error { + glog.V(0).Infof("Run(): start writer") + msg := Message{} + for { select { case <-stop: glog.V(0).Info("Run(): stop writer") - return + return nil case msg = <-refresh: glog.V(0).Info("Run(): refresh trigger") if err := writer.pollNicStatus(platformType); err != nil { @@ -96,7 +117,7 @@ func (writer *NodeStateStatusWriter) pollNicStatus(platformType utils.PlatformTy var err error if platformType == utils.VirtualOpenStack { - iface, err = utils.DiscoverSriovDevicesVirtual(platformType, writer.metaData, writer.networkData) + iface, err = utils.DiscoverSriovDevicesVirtual(writer.openStackDevicesInfo) } else { iface, err = utils.DiscoverSriovDevices(writer.withUnsupportedDevices) } @@ -198,3 +219,21 @@ func (w *NodeStateStatusWriter) writeCheckpointFile(ns *sriovnetworkv1.SriovNetw } return nil } + +func (w *NodeStateStatusWriter) getCheckPointNodeState(destDir string) (*sriovnetworkv1.SriovNetworkNodeState, error) { + glog.Infof("getCheckPointNodeState()") + configdir := filepath.Join(destDir, CheckpointFileName) + file, err := os.OpenFile(configdir, os.O_RDONLY, 0644) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + defer file.Close() + if err = json.NewDecoder(file).Decode(&utils.InitialState); err != nil { + return nil, err + } + + return &utils.InitialState, nil +} diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 3edd0e638..f0376ae50 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -459,7 +459,7 @@ func getNetdevMTU(pciAddr string) int { } func getNetDevMac(ifaceName string) string { - glog.V(2).Infof("getNetDevMac(): get Mac for device %s", ifaceName) + glog.Infof("getNetDevMac(): get Mac for device %s", ifaceName) macFilePath := filepath.Join(sysClassNet, ifaceName, "address") data, err := ioutil.ReadFile(macFilePath) if err != nil { diff --git a/pkg/utils/utils_virtual.go b/pkg/utils/utils_virtual.go index c54dfccdf..6a1eaa5bb 100644 --- a/pkg/utils/utils_virtual.go +++ b/pkg/utils/utils_virtual.go @@ -5,13 +5,16 @@ import ( "errors" "fmt" "io" + "io/ioutil" "os" + "path/filepath" "strconv" "github.com/golang/glog" "github.com/hashicorp/go-retryablehttp" - dputils "github.com/intel/sriov-network-device-plugin/pkg/utils" "github.com/jaypipes/ghw" + + dputils "github.com/intel/sriov-network-device-plugin/pkg/utils" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" ) @@ -98,6 +101,13 @@ type OSPNetworkData struct { // Omit Services } +type OSPDevicesInfo map[string]*OSPDeviceInfo + +type OSPDeviceInfo struct { + MacAddress string + NetworkID string +} + // GetOpenstackData gets the metadata and network_data func GetOpenstackData() (metaData *OSPMetaData, networkData *OSPNetworkData, err error) { metaData, networkData, err = getOpenstackDataFromConfigDrive() @@ -183,32 +193,84 @@ func getOpenstackDataFromMetadataService() (metaData *OSPMetaData, networkData * return metaData, networkData, nil } -func parseOpenstackMetaData(pciAddr string, metaData *OSPMetaData, networkData *OSPNetworkData) (networkID string, macAddress string) { +// CreateOpenstackDevicesInfo create the openstack device info map +func CreateOpenstackDevicesInfo(metaData *OSPMetaData, networkData *OSPNetworkData) (OSPDevicesInfo, error) { + glog.Infof("CreateOpenstackDevicesInfo()") + devicesInfo := make(OSPDevicesInfo) if metaData == nil || networkData == nil { - return + return nil, nil } + // use this for hw pass throw interfaces for _, device := range metaData.Devices { - if pciAddr == device.Address { - for _, link := range networkData.Links { - if device.Mac == link.EthernetMac { - for _, network := range networkData.Networks { - if network.Link == link.ID { - networkID = sriovnetworkv1.OpenstackNetworkID.String() + ":" + network.NetworkID - macAddress = device.Mac - } + for _, link := range networkData.Links { + if device.Mac == link.EthernetMac { + for _, network := range networkData.Networks { + if network.Link == link.ID { + networkID := sriovnetworkv1.OpenstackNetworkID.String() + ":" + network.NetworkID + devicesInfo[device.Address] = &OSPDeviceInfo{MacAddress: device.Mac, NetworkID: networkID} } } } } } - return + // for vhostuser interface type we check the interfaces on the node + pci, err := ghw.PCI() + if err != nil { + return nil, fmt.Errorf("CreateOpenstackDevicesInfo(): error getting PCI info: %v", err) + } + + devices := pci.ListDevices() + if len(devices) == 0 { + return nil, fmt.Errorf("CreateOpenstackDevicesInfo(): could not retrieve PCI devices") + } + + for _, device := range devices { + if _, exist := devicesInfo[device.Address]; exist { + //we already discover the device via openstack metadata + continue + } + + devClass, err := strconv.ParseInt(device.Class.ID, 16, 64) + if err != nil { + glog.Warningf("CreateOpenstackDevicesInfo(): unable to parse device class for device %+v %q", device, err) + continue + } + if devClass != netClass { + // Not network device + continue + } + + macAddress := "" + if name := tryToGetVirtualInterfaceName(device.Address); name != "" { + if mac := getNetDevMac(name); mac != "" { + macAddress = mac + } + } + if macAddress == "" { + // we didn't manage to find a mac address for the nic skipping + continue + } + + for _, link := range networkData.Links { + if macAddress == link.EthernetMac { + for _, network := range networkData.Networks { + if network.Link == link.ID { + networkID := sriovnetworkv1.OpenstackNetworkID.String() + ":" + network.NetworkID + devicesInfo[device.Address] = &OSPDeviceInfo{MacAddress: macAddress, NetworkID: networkID} + } + } + } + } + } + + return devicesInfo, err } // DiscoverSriovDevicesVirtual discovers VFs on a virtual platform -func DiscoverSriovDevicesVirtual(platformType PlatformType, metaData *OSPMetaData, networkData *OSPNetworkData) ([]sriovnetworkv1.InterfaceExt, error) { - glog.V(2).Info("DiscoverSriovDevicesVirtual") +func DiscoverSriovDevicesVirtual(devicesInfo OSPDevicesInfo) ([]sriovnetworkv1.InterfaceExt, error) { + glog.V(2).Info("DiscoverSriovDevicesVirtual()") pfList := []sriovnetworkv1.InterfaceExt{} pci, err := ghw.PCI() @@ -232,7 +294,13 @@ func DiscoverSriovDevicesVirtual(platformType PlatformType, metaData *OSPMetaDat continue } - netFilter, metaMac := parseOpenstackMetaData(device.Address, metaData, networkData) + deviceInfo, exist := devicesInfo[device.Address] + if !exist { + glog.Warningf("DiscoverSriovDevicesVirtual(): unable to find device in devicesInfo list for pci %s", device.Address) + continue + } + netFilter := deviceInfo.NetworkID + metaMac := deviceInfo.MacAddress driver, err := dputils.GetDriverName(device.Address) if err != nil { @@ -249,7 +317,7 @@ func DiscoverSriovDevicesVirtual(platformType PlatformType, metaData *OSPMetaDat if mtu := getNetdevMTU(device.Address); mtu > 0 { iface.Mtu = mtu } - if name := tryGetInterfaceName(device.Address); name != "" { + if name := tryToGetVirtualInterfaceName(device.Address); name != "" { iface.Name = name if iface.Mac = getNetDevMac(name); iface.Mac == "" { iface.Mac = metaMac @@ -277,6 +345,39 @@ func DiscoverSriovDevicesVirtual(platformType PlatformType, metaData *OSPMetaDat return pfList, nil } +// tryToGetVirtualInterfaceName get the interface name of a virtio interface +func tryToGetVirtualInterfaceName(pciAddr string) string { + glog.Infof("tryToGetVirtualInterfaceName() get interface name for device %s", pciAddr) + + // To support different driver that is not virtio-pci like mlx + name := tryGetInterfaceName(pciAddr) + if name != "" { + return name + } + + netDir, err := filepath.Glob(filepath.Join(sysBusPciDevices, pciAddr, "virtio*", "net")) + if err != nil || len(netDir) < 1 { + return "" + } + + fInfos, err := ioutil.ReadDir(netDir[0]) + if err != nil { + glog.Warningf("tryToGetVirtualInterfaceName(): failed to read net directory %s: %q", netDir, err) + return "" + } + + names := make([]string, 0) + for _, f := range fInfos { + names = append(names, f.Name()) + } + + if len(names) < 1 { + return "" + } + + return names[0] +} + // SyncNodeStateVirtual attempt to update the node state to match the desired state // in virtual platforms func SyncNodeStateVirtual(newState *sriovnetworkv1.SriovNetworkNodeState) error { From 04a3f698003cfea381af9a3def9c0c495fcde0a2 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 31 Mar 2022 13:41:53 +0300 Subject: [PATCH 2/3] Fix the validation webhook for virtual environments Signed-off-by: Sebastian Sch --- deploy/configmap.yaml | 2 +- deployment/sriov-network-operator/templates/configmap.yaml | 2 +- pkg/webhook/validate.go | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/deploy/configmap.yaml b/deploy/configmap.yaml index 8a32c77b1..3e447432b 100644 --- a/deploy/configmap.yaml +++ b/deploy/configmap.yaml @@ -21,4 +21,4 @@ data: Broadcom_bnxt_BCM57414_2x25G: "14e4 16d7 16dc" Broadcom_bnxt_BCM75508_2x100G: "14e4 1750 1806" Qlogic_qede_QL45000_50G: "1077 1654 1664" - + Red_Hat_Virtio_network_device: "1af4 1000 1000" diff --git a/deployment/sriov-network-operator/templates/configmap.yaml b/deployment/sriov-network-operator/templates/configmap.yaml index 8a32c77b1..3e447432b 100644 --- a/deployment/sriov-network-operator/templates/configmap.yaml +++ b/deployment/sriov-network-operator/templates/configmap.yaml @@ -21,4 +21,4 @@ data: Broadcom_bnxt_BCM57414_2x25G: "14e4 16d7 16dc" Broadcom_bnxt_BCM75508_2x100G: "14e4 1750 1806" Qlogic_qede_QL45000_50G: "1077 1654 1664" - + Red_Hat_Virtio_network_device: "1af4 1000 1000" diff --git a/pkg/webhook/validate.go b/pkg/webhook/validate.go index 09119cbf5..4c86fd4e7 100644 --- a/pkg/webhook/validate.go +++ b/pkg/webhook/validate.go @@ -297,8 +297,11 @@ func validateNicModel(selector *sriovnetworkv1.SriovNetworkNicSelector, iface *s return true } + // Check the vendor and device ID of the VF only if we are on a virtual environment for key := range utils.PlatformMap { - if strings.Contains(strings.ToLower(node.Spec.ProviderID), strings.ToLower(key)) && sriovnetworkv1.IsVfSupportedModel(iface.Vendor, iface.DeviceID) { + if strings.Contains(strings.ToLower(node.Spec.ProviderID), strings.ToLower(key)) && + selector.NetFilter != "" && selector.NetFilter == iface.NetFilter && + sriovnetworkv1.IsVfSupportedModel(iface.Vendor, iface.DeviceID) { return true } } From 346de677b14696b3495291b922b868dba205d79c Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 31 Mar 2022 17:40:47 +0300 Subject: [PATCH 3/3] Don't run the networkManager udev rule on virtual environments we can't run the udev rule on virtual environments because all the nics in that type of deployment will be presented as VFs. this means that we are going to disable nics on the machine itself Signed-off-by: Sebastian Sch --- pkg/daemon/daemon.go | 11 +++++++---- pkg/daemon/writer.go | 20 ++++++++------------ pkg/utils/utils.go | 2 +- pkg/utils/utils_virtual.go | 9 +++++++++ 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 628b1ee7e..966da4bc3 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -214,10 +214,13 @@ func (dn *Daemon) tryCreateUdevRuleWrapper() error { } } - // update udev rule and if update succeeds - err := tryCreateNMUdevRule() - if err != nil { - return err + // update udev rule only if we are on a BM environment + // for virtual environments we don't disable the vfs as they may be used by the platform/host + if dn.platform != utils.VirtualOpenStack { + err := tryCreateNMUdevRule() + if err != nil { + return err + } } return nil diff --git a/pkg/daemon/writer.go b/pkg/daemon/writer.go index b3ad2d393..cc40e329e 100644 --- a/pkg/daemon/writer.go +++ b/pkg/daemon/writer.go @@ -44,7 +44,7 @@ func NewNodeStateStatusWriter(c snclientset.Interface, n string, f func(), devMo // RunOnce initial the interface status for both baremetal and virtual environments func (writer *NodeStateStatusWriter) RunOnce(destDir string, platformType utils.PlatformType) error { - glog.V(0).Infof("RunOnce(): start writer") + glog.V(0).Infof("RunOnce()") msg := Message{} if platformType == utils.VirtualOpenStack { @@ -53,26 +53,22 @@ func (writer *NodeStateStatusWriter) RunOnce(destDir string, platformType utils. return err } - metaData, networkData, err := utils.GetOpenstackData() - if err != nil { - glog.Errorf("RunOnce(): failed to read OpenStack data: %v", err) - } - if ns == nil { + metaData, networkData, err := utils.GetOpenstackData() + if err != nil { + glog.Errorf("RunOnce(): failed to read OpenStack data: %v", err) + } + writer.openStackDevicesInfo, err = utils.CreateOpenstackDevicesInfo(metaData, networkData) if err != nil { return err } } else { - devicesInfo := make(utils.OSPDevicesInfo) - for _, iface := range ns.Status.Interfaces { - devicesInfo[iface.PciAddress] = &utils.OSPDeviceInfo{MacAddress: iface.Mac, NetworkID: iface.NetFilter} - } - writer.openStackDevicesInfo = devicesInfo + writer.openStackDevicesInfo = utils.CreateOpenstackDevicesInfoFromNodeStatus(ns) } } - glog.V(0).Info("RunOnce(): once") + glog.V(0).Info("RunOnce(): first poll for nic status") if err := writer.pollNicStatus(platformType); err != nil { glog.Errorf("RunOnce(): first poll failed: %v", err) } diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index f0376ae50..3edd0e638 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -459,7 +459,7 @@ func getNetdevMTU(pciAddr string) int { } func getNetDevMac(ifaceName string) string { - glog.Infof("getNetDevMac(): get Mac for device %s", ifaceName) + glog.V(2).Infof("getNetDevMac(): get Mac for device %s", ifaceName) macFilePath := filepath.Join(sysClassNet, ifaceName, "address") data, err := ioutil.ReadFile(macFilePath) if err != nil { diff --git a/pkg/utils/utils_virtual.go b/pkg/utils/utils_virtual.go index 6a1eaa5bb..ecf1085f1 100644 --- a/pkg/utils/utils_virtual.go +++ b/pkg/utils/utils_virtual.go @@ -345,6 +345,15 @@ func DiscoverSriovDevicesVirtual(devicesInfo OSPDevicesInfo) ([]sriovnetworkv1.I return pfList, nil } +func CreateOpenstackDevicesInfoFromNodeStatus(networkState *sriovnetworkv1.SriovNetworkNodeState) OSPDevicesInfo { + devicesInfo := make(OSPDevicesInfo) + for _, iface := range networkState.Status.Interfaces { + devicesInfo[iface.PciAddress] = &OSPDeviceInfo{MacAddress: iface.Mac, NetworkID: iface.NetFilter} + } + + return devicesInfo +} + // tryToGetVirtualInterfaceName get the interface name of a virtio interface func tryToGetVirtualInterfaceName(pciAddr string) string { glog.Infof("tryToGetVirtualInterfaceName() get interface name for device %s", pciAddr)