From 6b4e75cea887e113e2103ad369f156aafdca80f6 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Mon, 8 May 2023 17:27:11 +0300 Subject: [PATCH] Create the needed functions to save info on the host this commit adds the functionality needed to save the PF information into the host. we need this in case the user removes a policy that was externallyCreated. we must NOT delete reset the VFs in that case and the only way to know that the device was externallyCreated after the user removes the policy is to check this file on the host. Signed-off-by: Sebastian Sch --- pkg/daemon/daemon.go | 6 ++ pkg/plugins/generic/generic_plugin.go | 21 ++++ pkg/utils/host.go | 135 ++++++++++++++++++++++++++ pkg/utils/utils.go | 65 ++++++++++++- 4 files changed, 225 insertions(+), 2 deletions(-) create mode 100644 pkg/utils/host.go diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 727ffe6420..3803f3de80 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -436,6 +436,12 @@ func (dn *Daemon) nodeStateSyncHandler() error { } if latestState.GetGeneration() == 1 && len(latestState.Spec.Interfaces) == 0 { + err = utils.ClearPCIAddressFolder() + if err != nil { + glog.Errorf("failed to clear the PCI address configuration: %v", err) + return err + } + glog.V(0).Infof("nodeStateSyncHandler(): Name: %s, Interface policy spec not yet set by controller", latestState.Name) if latestState.Status.SyncStatus != "Succeeded" { dn.refreshCh <- Message{ diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index a2a7e1c00b..1f3cdd397f 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -198,6 +198,27 @@ func needDrainNode(desired sriovnetworkv1.Interfaces, current sriovnetworkv1.Int } } if !configured && ifaceStatus.NumVfs > 0 { + // load the PF info + pfStatus, exist, err := utils.LoadPfsStatus(ifaceStatus.PciAddress, true) + if err != nil { + glog.Errorf("generic-plugin needDrainNode(): failed to load info about PF status for pci address %s: %v", ifaceStatus.PciAddress, err) + continue + } + + if !exist { + glog.Infof("generic-plugin needDrainNode(): PF name %s with pci address %s has VFs configured but they weren't created by the sriov operator. Skipping drain", + ifaceStatus.Name, + ifaceStatus.PciAddress) + continue + } + + if pfStatus.ExternallyCreated { + glog.Infof("generic-plugin needDrainNode()(): PF name %s with pci address %s was externally created. Skipping drain", + ifaceStatus.Name, + ifaceStatus.PciAddress) + continue + } + glog.V(2).Infof("generic-plugin needDrainNode(): need drain, %v needs to be reset", ifaceStatus) needDrain = true return diff --git a/pkg/utils/host.go b/pkg/utils/host.go new file mode 100644 index 0000000000..7032b818f8 --- /dev/null +++ b/pkg/utils/host.go @@ -0,0 +1,135 @@ +package utils + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/golang/glog" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" +) + +const ( + SriovConfBasePath = "/etc/sriov-operator" + PfAppliedConfig = SriovConfBasePath + "/pci" + HostSriovConfBasePath = "/host" + SriovConfBasePath + HostPfAppliedConfig = HostSriovConfBasePath + "/pci" +) + +type PfStatus struct { + NumVfs int `json:"numVfs"` + Mtu int `json:"mtu"` + LinkType string `json:"linkType"` + EswitchMode string `json:"eSwitchMode"` + ExternallyCreated bool `json:"externallyCreated"` +} + +// create the operator base folder on the host together with the pci folder to save the PF status objects as json files +func CreateOperatorConfigFolderIfNeeded() error { + _, err := os.Stat(SriovConfBasePath) + if err != nil { + if os.IsNotExist(err) { + err = os.Mkdir(SriovConfBasePath, os.ModeDir) + if err != nil { + return fmt.Errorf("failed to create the sriov config folder on host in path %s: %v", SriovConfBasePath, err) + } + } else { + return fmt.Errorf("failed to check if the sriov config folder on host in path %s exist: %v", SriovConfBasePath, err) + } + } + + _, err = os.Stat(PfAppliedConfig) + if err != nil { + if os.IsNotExist(err) { + err = os.Mkdir(PfAppliedConfig, os.ModeDir) + if err != nil { + return fmt.Errorf("failed to create the pci folder on host in path %s: %v", PfAppliedConfig, err) + } + } else { + return fmt.Errorf("failed to check if the pci folder on host in path %s exist: %v", PfAppliedConfig, err) + } + } + + return nil +} + +func ClearPCIAddressFolder() error { + _, err := os.Stat(HostPfAppliedConfig) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return fmt.Errorf("failed to check the pci address folder path %s", HostPfAppliedConfig) + } + + err = os.RemoveAll(HostPfAppliedConfig) + if err != nil { + return fmt.Errorf("failed to remove the PCI address folder on path %s: %v", HostPfAppliedConfig, err) + } + + err = os.Mkdir(HostPfAppliedConfig, os.ModeDir) + if err != nil { + return fmt.Errorf("failed to create the pci folder on host in path %s: %v", HostPfAppliedConfig, err) + } + + return nil +} + +func CreatePfAppliedStatusFromSpec(p *sriovnetworkv1.Interface) *PfStatus { + return &PfStatus{ + ExternallyCreated: p.ExternallyCreated, + NumVfs: p.NumVfs, + EswitchMode: p.EswitchMode, + Mtu: p.Mtu, + LinkType: p.LinkType, + } +} + +// SaveLastPfAppliedStatus will save the PF object as a json into the /etc/sriov-operator/pci/ +// this function must be called after running the chroot function +func SaveLastPfAppliedStatus(pciAddress string, pfStatus *PfStatus) error { + if err := CreateOperatorConfigFolderIfNeeded(); err != nil { + return err + } + + data, err := json.Marshal(pfStatus) + if err != nil { + glog.Errorf("failed to marshal PF status %+v: %v", *pfStatus, err) + return err + } + + path := filepath.Join(PfAppliedConfig, pciAddress) + err = os.WriteFile(path, data, 0644) + return err +} + +// LoadPfsStatus convert the /etc/sriov-operator/pci/ json to pfstatus +// returns false if the file doesn't exist. +func LoadPfsStatus(pciAddress string, chroot bool) (*PfStatus, bool, error) { + if chroot { + exit, err := Chroot("/host") + if err != nil { + return nil, false, err + } + defer exit() + } + + pfStatus := &PfStatus{} + path := filepath.Join(PfAppliedConfig, pciAddress) + data, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return nil, false, nil + } + glog.Errorf("failed to read PF status from path %s: %v", path, err) + } + + err = json.Unmarshal(data, pfStatus) + if err != nil { + glog.Errorf("failed to unmarshal PF status %s: %v", data, err) + } + + return pfStatus, true, nil +} diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 30601847c5..dd2173cd85 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -121,6 +121,15 @@ func DiscoverSriovDevices(withUnsupported bool) ([]sriovnetworkv1.InterfaceExt, } iface.LinkType = getLinkType(iface) + pfStatus, exist, err := LoadPfsStatus(iface.PciAddress, true) + if err != nil { + glog.Warningf("DiscoverSriovDevices(): failed to load PF status from disk: %v", err) + } + + if exist { + iface.ExternallyCreated = pfStatus.ExternallyCreated + } + if dputils.IsSriovPF(device.Address) { iface.TotalVfs = dputils.GetSriovVFcapacity(device.Address) iface.NumVfs = dputils.GetVFconfigured(device.Address) @@ -164,15 +173,34 @@ func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState, pfsToConfig m if !NeedUpdate(&iface, &ifaceStatus) { glog.V(2).Infof("syncNodeState(): no need update interface %s", iface.PciAddress) + + // Save the PF status to the host + err = SaveLastPfAppliedStatus(iface.PciAddress, CreatePfAppliedStatusFromSpec(&iface)) + if err != nil { + glog.Errorf("SyncNodeState(): failed to save PF applied config to host: %v", err) + return err + } + break } if err = configSriovDevice(&iface, &ifaceStatus); err != nil { glog.Errorf("SyncNodeState(): fail to configure sriov interface %s: %v. resetting interface.", iface.PciAddress, err) - if resetErr := resetSriovDevice(ifaceStatus); resetErr != nil { - glog.Errorf("SyncNodeState(): fail to reset on error SR-IOV interface: %s", resetErr) + if iface.ExternallyCreated { + glog.Infof("SyncNodeState(): skipping device reset as the nic is marked as externally created") + } else { + if resetErr := resetSriovDevice(ifaceStatus); resetErr != nil { + glog.Errorf("SyncNodeState(): failed to reset on error SR-IOV interface: %s", resetErr) + } } return err } + + // Save the PF status to the host + err = SaveLastPfAppliedStatus(iface.PciAddress, CreatePfAppliedStatusFromSpec(&iface)) + if err != nil { + glog.Errorf("SyncNodeState(): failed to save PF applied config to host: %v", err) + return err + } break } } @@ -181,6 +209,27 @@ func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState, pfsToConfig m continue } + // load the PF info + pfStatus, exist, err := LoadPfsStatus(ifaceStatus.PciAddress, false) + if err != nil { + glog.Errorf("SyncNodeState(): failed to load info about PF status for pci address %s: %v", ifaceStatus.PciAddress, err) + return err + } + + if !exist { + glog.Infof("SyncNodeState(): PF name %s with pci address %s has VFs configured but they weren't created by the sriov operator. Skipping the device reset", + ifaceStatus.Name, + ifaceStatus.PciAddress) + continue + } + + if pfStatus.ExternallyCreated { + glog.Infof("SyncNodeState(): PF name %s with pci address %s was externally created skipping the device reset", + ifaceStatus.Name, + ifaceStatus.PciAddress) + continue + } + if err = resetSriovDevice(ifaceStatus); err != nil { return err } @@ -269,6 +318,12 @@ func NeedUpdate(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetworkv1.Int glog.V(2).Infof("NeedUpdate(): VF %d MTU needs update, desired=%d, current=%d", vf.VfID, group.Mtu, vf.Mtu) return true } + + // this is needed to be sure the admin mac address is configured as expected + if iface.ExternallyCreated { + glog.V(2).Infof("NeedUpdate(): need to update the device as it's externally manage for pci address %s", ifaceStatus.PciAddress) + return true + } } break } @@ -292,6 +347,12 @@ func configSriovDevice(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetwor } // set numVFs if iface.NumVfs != ifaceStatus.NumVfs { + if iface.ExternallyCreated { + errMsg := fmt.Sprintf("configSriovDevice(): number of request virtual functions %d is not equal to configured virtual functions %d but the policy is configured as ExternallyCreated for device %s", iface.NumVfs, ifaceStatus.NumVfs, iface.PciAddress) + glog.Error(errMsg) + return fmt.Errorf(errMsg) + } + err = setSriovNumVfs(iface.PciAddress, iface.NumVfs) if err != nil { glog.Errorf("configSriovDevice(): fail to set NumVfs for device %s", iface.PciAddress)