Skip to content

Commit

Permalink
Improve the virtual plugin support
Browse files Browse the repository at this point in the history
This commit add the support for virtio interfaces like vhostuser for openstack virtual workers.

implementation details:
* on first run (after a reboot) we get all the information we need when the devices are visible to the kernel
* we match the mac address to the openstack network ID
* if the sriov-network-config-daemon gets reboot it will use the initial file on the node so even if the nics are in vfio the node state will be right
* introduce a new system service to remove the initial state file after a reboot to support nic changes both for virtual and BM environments

Signed-off-by: Sebastian Sch <[email protected]>
  • Loading branch information
SchSeba committed Mar 2, 2022
1 parent 9bd5b39 commit 315b4be
Show file tree
Hide file tree
Showing 4 changed files with 225 additions and 42 deletions.
17 changes: 17 additions & 0 deletions bindata/manifests/config-units/initial-config-reset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
contents: |
[Unit]
Description=Remove the initial SR-IOV state file
# Removal of this file signals firstboot completion
ConditionPathExists=!/etc/ignition-machine-config-encapsulated.json
Before=kubelet.service
[Service]
Type=oneshot
ExecStart=rm -f /etc/sno-initial-node-state.json | true
StandardOutput=journal+console
StandardError=journal+console
[Install]
WantedBy=default.target
enabled: true
name: sriov-operator-initial-configuration-reset.service
6 changes: 5 additions & 1 deletion cmd/sriov-network-config-daemon/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,11 @@ func runStartCmd(cmd *cobra.Command, args []string) {
glog.V(0).Infof("Running on platform: %s", platformType.String())

// block the deamon process until nodeWriter finish first its run
nodeWriter.Run(stopCh, refreshCh, syncCh, destdir, true, platformType)
err = nodeWriter.Run(stopCh, refreshCh, syncCh, destdir, true, platformType)
if err != nil {
glog.Errorf("failed to run writer: %v", err)
panic(err.Error())
}
go nodeWriter.Run(stopCh, refreshCh, syncCh, "", false, platformType)

glog.V(0).Info("Starting SriovNetworkConfigDaemon")
Expand Down
118 changes: 93 additions & 25 deletions pkg/daemon/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,28 @@ import (
"time"

"github.com/golang/glog"
sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
"github.com/pkg/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/retry"

sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/service"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
)

const (
CheckpointFileName = "sno-initial-node-state.json"
CheckpointFileName = "sno-initial-node-state.json"
InitialConfigurationResetUnitFile = "bindata/manifests/config-units/initial-config-reset.yaml"
)

type NodeStateStatusWriter struct {
client snclientset.Interface
node string
status sriovnetworkv1.SriovNetworkNodeStateStatus
OnHeartbeatFailure func()
metaData *utils.OSPMetaData
networkData *utils.OSPNetworkData
client snclientset.Interface
node string
status sriovnetworkv1.SriovNetworkNodeStateStatus
OnHeartbeatFailure func()
openStackDevicesInfo utils.OSPDevicesInfo
}

// NewNodeStateStatusWriter Create a new NodeStateStatusWriter
Expand All @@ -42,37 +44,58 @@ func NewNodeStateStatusWriter(c snclientset.Interface, n string, f func()) *Node

// Run reads from the writer channel and sets the interface status. It will
// return if the stop channel is closed. Intended to be run via a goroutine.
func (writer *NodeStateStatusWriter) Run(stop <-chan struct{}, refresh <-chan Message, syncCh chan<- struct{}, destDir string, runonce bool, platformType utils.PlatformType) {
func (writer *NodeStateStatusWriter) Run(stop <-chan struct{}, refresh <-chan Message, syncCh chan<- struct{}, destDir string, runonce bool, platformType utils.PlatformType) error {
glog.V(0).Infof("Run(): start writer")
msg := Message{}

var err error

if platformType == utils.VirtualOpenStack {
writer.metaData, err = utils.ReadOpenstackMetaData()
if runonce {
err := writer.createCleanUpServiceIfNeeded()
if err != nil {
glog.Errorf("Run(): failed to read OpenStack meta_data: %v", err)
return err
}
writer.networkData, err = utils.ReadOpenstackNetworkData()
if err != nil {
glog.Errorf("Run(): failed to read OpenStack network_data: %v", err)

if platformType == utils.VirtualOpenStack {
ns, err := writer.getCheckPointNodeState(destDir)
if err != nil {
return err
}

metaData, err := utils.ReadOpenstackMetaData()
if err != nil {
glog.Errorf("Run(): failed to read OpenStack meta_data: %v", err)
}
networkData, err := utils.ReadOpenstackNetworkData()
if err != nil {
glog.Errorf("Run(): failed to read OpenStack network_data: %v", err)
}

if ns == nil {
writer.openStackDevicesInfo, err = utils.CreateOpenstackDevicesInfo(metaData, networkData)
if err != nil {
return err
}
} else {
devicesInfo := make(utils.OSPDevicesInfo)
for _, iface := range ns.Status.Interfaces {
devicesInfo[iface.PciAddress] = &utils.OSPDeviceInfo{MacAddress: iface.Mac, NetworkID: iface.NetFilter}
}
writer.openStackDevicesInfo = devicesInfo
}
}
}

if runonce {
glog.V(0).Info("Run(): once")
if err := writer.pollNicStatus(platformType); err != nil {
glog.Errorf("Run(): first poll failed: %v", err)
}
ns, _ := writer.setNodeStateStatus(msg)
writer.writeCheckpointFile(ns, destDir)
return
return writer.writeCheckpointFile(ns, destDir)
}

for {
select {
case <-stop:
glog.V(0).Info("Run(): stop writer")
return
return nil
case msg = <-refresh:
glog.V(0).Info("Run(): refresh trigger")
if err := writer.pollNicStatus(platformType); err != nil {
Expand All @@ -98,7 +121,7 @@ func (writer *NodeStateStatusWriter) pollNicStatus(platformType utils.PlatformTy
var err error

if platformType == utils.VirtualOpenStack {
iface, err = utils.DiscoverSriovDevicesVirtual(platformType, writer.metaData, writer.networkData)
iface, err = utils.DiscoverSriovDevicesVirtual(writer.openStackDevicesInfo)
} else {
iface, err = utils.DiscoverSriovDevices()
}
Expand Down Expand Up @@ -200,3 +223,48 @@ func (w *NodeStateStatusWriter) writeCheckpointFile(ns *sriovnetworkv1.SriovNetw
}
return nil
}

func (w *NodeStateStatusWriter) getCheckPointNodeState(destDir string) (*sriovnetworkv1.SriovNetworkNodeState, error) {
glog.Infof("getCheckPointNodeState()")
configdir := filepath.Join(destDir, CheckpointFileName)
file, err := os.OpenFile(configdir, os.O_RDONLY, 0644)
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
return nil, err
}
defer file.Close()
if err = json.NewDecoder(file).Decode(&utils.InitialState); err != nil {
return nil, err
}

return &utils.InitialState, nil
}

func (w *NodeStateStatusWriter) createCleanUpServiceIfNeeded() error {
glog.Infof("createCleanUpServiceIfNeeded()")
resetService, err := service.ReadServiceManifestFile(InitialConfigurationResetUnitFile)
if err != nil {
glog.Errorf("createCleanUpServiceIfNeeded(): failed to read service sriov-operator-initial-configuration-reset manifest: %v", err)
return err
}
serviceManager := service.NewServiceManager("/host")

svcExist, err := serviceManager.IsServiceExist(resetService.Path)
if err != nil {
glog.Errorf("createCleanUpServiceIfNeeded(): failed to check if service sriov-operator-initial-configuration-reset exist: %v", err)
return err
}

if svcExist {
glog.Infof("createCleanUpServiceIfNeeded(): service sriov-operator-initial-configuration-reset already exist on the node")
return nil
}

err = serviceManager.EnableService(resetService)
if err != nil {
glog.Errorf("createCleanUpServiceIfNeeded(): failed to enable service sriov-operator-initial-configuration-reset: %v", err)
}
return err
}
126 changes: 110 additions & 16 deletions pkg/utils/utils_virtual.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ import (
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"

"github.com/golang/glog"
dputils "github.com/intel/sriov-network-device-plugin/pkg/utils"
"github.com/jaypipes/ghw"

dputils "github.com/intel/sriov-network-device-plugin/pkg/utils"
sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
)

Expand Down Expand Up @@ -93,6 +96,13 @@ type OSPNetworkData struct {
// Omit Services
}

type OSPDevicesInfo map[string]*OSPDeviceInfo

type OSPDeviceInfo struct {
MacAddress string
NetworkID string
}

// ReadOpenstackMetaData reads the meta data from the openstack metadata file
func ReadOpenstackMetaData() (metaData *OSPMetaData, err error) {
glog.Infof("ReadOpenstackMetaData(): read OpenStack meta_data")
Expand Down Expand Up @@ -132,32 +142,84 @@ func ReadOpenstackNetworkData() (networkData *OSPNetworkData, err error) {
return networkData, err
}

func parseOpenstackMetaData(pciAddr string, metaData *OSPMetaData, networkData *OSPNetworkData) (networkID string, macAddress string) {
// CreateOpenstackDevicesInfo create the openstack device info map
func CreateOpenstackDevicesInfo(metaData *OSPMetaData, networkData *OSPNetworkData) (OSPDevicesInfo, error) {
glog.Infof("CreateOpenstackDevicesInfo()")
devicesInfo := make(OSPDevicesInfo)
if metaData == nil || networkData == nil {
return
return nil, nil
}

// use this for hw pass throw interfaces
for _, device := range metaData.Devices {
if pciAddr == device.Address {
for _, link := range networkData.Links {
if device.Mac == link.EthernetMac {
for _, network := range networkData.Networks {
if network.Link == link.ID {
networkID = sriovnetworkv1.OpenstackNetworkID.String() + ":" + network.NetworkID
macAddress = device.Mac
}
for _, link := range networkData.Links {
if device.Mac == link.EthernetMac {
for _, network := range networkData.Networks {
if network.Link == link.ID {
networkID := sriovnetworkv1.OpenstackNetworkID.String() + ":" + network.NetworkID
devicesInfo[device.Address] = &OSPDeviceInfo{MacAddress: device.Mac, NetworkID: networkID}
}
}
}
}
}

return
// for vhostuser interface type we check the interfaces on the node
pci, err := ghw.PCI()
if err != nil {
return nil, fmt.Errorf("CreateOpenstackDevicesInfo(): error getting PCI info: %v", err)
}

devices := pci.ListDevices()
if len(devices) == 0 {
return nil, fmt.Errorf("CreateOpenstackDevicesInfo(): could not retrieve PCI devices")
}

for _, device := range devices {
if _, exist := devicesInfo[device.Address]; exist {
//we already discover the device via openstack metadata
continue
}

devClass, err := strconv.ParseInt(device.Class.ID, 16, 64)
if err != nil {
glog.Warningf("CreateOpenstackDevicesInfo(): unable to parse device class for device %+v %q", device, err)
continue
}
if devClass != netClass {
// Not network device
continue
}

macAddress := ""
if name := tryToGetVirtualInterfaceName(device.Address); name != "" {
if mac := getNetDevMac(name); mac != "" {
macAddress = mac
}
}
if macAddress == "" {
// we didn't manage to find a mac address for the nic skipping
continue
}

for _, link := range networkData.Links {
if macAddress == link.EthernetMac {
for _, network := range networkData.Networks {
if network.Link == link.ID {
networkID := sriovnetworkv1.OpenstackNetworkID.String() + ":" + network.NetworkID
devicesInfo[device.Address] = &OSPDeviceInfo{MacAddress: macAddress, NetworkID: networkID}
}
}
}
}
}

return devicesInfo, err
}

// DiscoverSriovDevicesVirtual discovers VFs on a virtual platform
func DiscoverSriovDevicesVirtual(platformType PlatformType, metaData *OSPMetaData, networkData *OSPNetworkData) ([]sriovnetworkv1.InterfaceExt, error) {
glog.V(2).Info("DiscoverSriovDevicesVirtual")
func DiscoverSriovDevicesVirtual(devicesInfo OSPDevicesInfo) ([]sriovnetworkv1.InterfaceExt, error) {
glog.V(2).Info("DiscoverSriovDevicesVirtual()")
pfList := []sriovnetworkv1.InterfaceExt{}

pci, err := ghw.PCI()
Expand All @@ -181,7 +243,13 @@ func DiscoverSriovDevicesVirtual(platformType PlatformType, metaData *OSPMetaDat
continue
}

netFilter, metaMac := parseOpenstackMetaData(device.Address, metaData, networkData)
deviceInfo, exist := devicesInfo[device.Address]
if !exist {
glog.Warningf("DiscoverSriovDevicesVirtual(): unable to find device in devicesInfo list for pci %s", device.Address)
continue
}
netFilter := deviceInfo.NetworkID
metaMac := deviceInfo.MacAddress

driver, err := dputils.GetDriverName(device.Address)
if err != nil {
Expand All @@ -198,7 +266,7 @@ func DiscoverSriovDevicesVirtual(platformType PlatformType, metaData *OSPMetaDat
if mtu := getNetdevMTU(device.Address); mtu > 0 {
iface.Mtu = mtu
}
if name := tryGetInterfaceName(device.Address); name != "" {
if name := tryToGetVirtualInterfaceName(device.Address); name != "" {
iface.Name = name
if iface.Mac = getNetDevMac(name); iface.Mac == "" {
iface.Mac = metaMac
Expand Down Expand Up @@ -226,6 +294,32 @@ func DiscoverSriovDevicesVirtual(platformType PlatformType, metaData *OSPMetaDat
return pfList, nil
}

// tryToGetVirtualInterfaceName get the interface name of a virtio interface
func tryToGetVirtualInterfaceName(pciAddr string) string {
glog.Infof("tryToGetVirtualInterfaceName()")
netDir, err := filepath.Glob(filepath.Join(sysBusPciDevices, pciAddr, "virtio*", "net"))
if err != nil || len(netDir) < 1 {
return ""
}

fInfos, err := ioutil.ReadDir(netDir[0])
if err != nil {
glog.Warningf("tryToGetVirtualInterfaceName(): failed to read net directory %s: %q", netDir, err)
return ""
}

names := make([]string, 0)
for _, f := range fInfos {
names = append(names, f.Name())
}

if len(names) < 1 {
return ""
}

return names[0]
}

// SyncNodeStateVirtual attempt to update the node state to match the desired state
// in virtual platforms
func SyncNodeStateVirtual(newState *sriovnetworkv1.SriovNetworkNodeState) error {
Expand Down

0 comments on commit 315b4be

Please sign in to comment.