From 21d919c487828759a3236e36534703c7e476d2ef Mon Sep 17 00:00:00 2001 From: Marcelo Guerrero Date: Mon, 22 Jan 2024 18:55:58 +0100 Subject: [PATCH] Abstract logic to check and load missing KArgs Logic to check missing kernel arguments is placed in a method to be used by both OnNodeStateChange and CheckStatusChanges. --- pkg/daemon/daemon.go | 9 +- pkg/plugins/fake/fake_plugin.go | 4 +- pkg/plugins/generic/generic_plugin.go | 104 +++++++++++++-------- pkg/plugins/generic/generic_plugin_test.go | 65 ++++++++++++- pkg/plugins/intel/intel_plugin.go | 4 +- pkg/plugins/k8s/k8s_plugin.go | 4 +- pkg/plugins/mellanox/mellanox_plugin.go | 4 +- pkg/plugins/mock/mock_plugin.go | 5 +- pkg/plugins/plugin.go | 2 +- pkg/plugins/virtual/virtual_plugin.go | 4 +- 10 files changed, 147 insertions(+), 58 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 65964d7bc..950287a16 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -565,10 +565,13 @@ func (dn *Daemon) shouldSkipReconciliation(latestState *sriovnetworkv1.SriovNetw // Verify changes in the spec of the SriovNetworkNodeState CR. if dn.currentNodeState.GetGeneration() == latestState.GetGeneration() && !dn.isDrainCompleted() { - genericPlugin, ok := dn.loadedPlugins[GenericPluginName] - if ok { + for _, p := range dn.loadedPlugins { // Verify changes in the status of the SriovNetworkNodeState CR. - if genericPlugin.CheckStatusChanges(latestState) { + changed, err := p.CheckStatusChanges(latestState) + if err != nil { + return false, err + } + if changed { return false, nil } } diff --git a/pkg/plugins/fake/fake_plugin.go b/pkg/plugins/fake/fake_plugin.go index 11c30d6ea..0aa218f28 100644 --- a/pkg/plugins/fake/fake_plugin.go +++ b/pkg/plugins/fake/fake_plugin.go @@ -21,8 +21,8 @@ func (f *FakePlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState return false, false, nil } -func (f *FakePlugin) CheckStatusChanges(new *sriovnetworkv1.SriovNetworkNodeState) bool { - return false +func (f *FakePlugin) CheckStatusChanges(new *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil } func (f *FakePlugin) Apply() error { diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index 20ef4a49c..5e0827c69 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -140,28 +140,34 @@ func (p *GenericPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt } // CheckStatusChanges verify whether SriovNetworkNodeState CR status present changes on configured VFs. -func (p *GenericPlugin) CheckStatusChanges(new *sriovnetworkv1.SriovNetworkNodeState) bool { +func (p *GenericPlugin) CheckStatusChanges(current *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { log.Log.Info("generic-plugin CheckStatusChanges()") - changed := false - for _, iface := range new.Spec.Interfaces { + for _, iface := range current.Spec.Interfaces { found := false - for _, ifaceStatus := range new.Status.Interfaces { + for _, ifaceStatus := range current.Status.Interfaces { + // TODO: remove the check for ExternallyManaged - https://github.com/k8snetworkplumbingwg/sriov-network-operator/issues/632 if iface.PciAddress == ifaceStatus.PciAddress && !iface.ExternallyManaged { found = true if sriovnetworkv1.NeedToUpdateSriov(&iface, &ifaceStatus) { - changed = true log.Log.Info("CheckStatusChanges(): status changed for interface", "address", iface.PciAddress) + return true, nil } break } } - if !found { log.Log.Info("CheckStatusChanges(): no status found for interface", "address", iface.PciAddress) } } - return changed + + missingKernelArgs, err := p.getMissingKernelArgs() + if err != nil { + log.Log.Error(err, "generic-plugin CheckStatusChanges(): failed to verify missing kernel arguments") + return false, err + } + + return len(missingKernelArgs) != 0, nil } func (p *GenericPlugin) syncDriverState() error { @@ -266,37 +272,48 @@ func (p *GenericPlugin) addToDesiredKernelArgs(karg string) { } } -// syncDesiredKernelArgs Should be called to set all the kernel arguments. Returns bool if node update is needed. -func (p *GenericPlugin) syncDesiredKernelArgs() (bool, error) { - needReboot := false +// getMissingKernelArgs gets Kernel arguments that have not been set. +func (p *GenericPlugin) getMissingKernelArgs() ([]string, error) { + missingArgs := make([]string, 0, len(p.DesiredKernelArgs)) if len(p.DesiredKernelArgs) == 0 { - return false, nil + return nil, nil } + kargs, err := p.helpers.GetCurrentKernelArgs() if err != nil { - return false, err + return nil, err } - for desiredKarg, attempted := range p.DesiredKernelArgs { - set := p.helpers.IsKernelArgsSet(kargs, desiredKarg) - if !set { - if attempted { - log.Log.V(2).Info("generic plugin syncDesiredKernelArgs(): previously attempted to set kernel arg", - "karg", desiredKarg) - } - // There is a case when we try to set the kernel argument here, the daemon could decide to not reboot because - // the daemon encountered a potentially one-time error. However we always want to make sure that the kernel - // argument is set once the daemon goes through node state sync again. - update, err := setKernelArg(desiredKarg) - if err != nil { - log.Log.Error(err, "generic plugin syncDesiredKernelArgs(): fail to set kernel arg", "karg", desiredKarg) - return false, err - } - if update { - needReboot = true - log.Log.V(2).Info("generic plugin syncDesiredKernelArgs(): need reboot for setting kernel arg", "karg", desiredKarg) - } - p.DesiredKernelArgs[desiredKarg] = true + + for desiredKarg := range p.DesiredKernelArgs { + if !p.helpers.IsKernelArgsSet(kargs, desiredKarg) { + missingArgs = append(missingArgs, desiredKarg) + } + } + return missingArgs, nil +} + +// syncDesiredKernelArgs should be called to set all the kernel arguments. Returns bool if node update is needed. +func (p *GenericPlugin) syncDesiredKernelArgs(kargs []string) (bool, error) { + needReboot := false + + for _, karg := range kargs { + if p.DesiredKernelArgs[karg] { + log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): previously attempted to set kernel arg", + "karg", karg) + } + // There is a case when we try to set the kernel argument here, the daemon could decide to not reboot because + // the daemon encountered a potentially one-time error. However we always want to make sure that the kernel + // argument is set once the daemon goes through node state sync again. + update, err := setKernelArg(karg) + if err != nil { + log.Log.Error(err, "generic-plugin syncDesiredKernelArgs(): fail to set kernel arg", "karg", karg) + return false, err + } + if update { + needReboot = true + log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): need reboot for setting kernel arg", "karg", karg) } + p.DesiredKernelArgs[karg] = true } return needReboot, nil } @@ -365,19 +382,28 @@ func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetwo } } -func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (needReboot bool, err error) { - needReboot = false +func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + needReboot := false + p.addVfioDesiredKernelArg(state) - updateNode, err := p.syncDesiredKernelArgs() + missingKernelArgs, err := p.getMissingKernelArgs() if err != nil { - log.Log.Error(err, "generic plugin needRebootNode(): failed to set the desired kernel arguments") + log.Log.Error(err, "generic-plugin needRebootNode(): failed to verify missing kernel arguments") return false, err } - if updateNode { - log.Log.V(2).Info("generic plugin needRebootNode(): need reboot for updating kernel arguments") - needReboot = true + + if len(missingKernelArgs) != 0 { + needReboot, err = p.syncDesiredKernelArgs(missingKernelArgs) + if err != nil { + log.Log.Error(err, "generic-plugin needRebootNode(): failed to set the desired kernel arguments") + return false, err + } + if needReboot { + log.Log.V(2).Info("generic-plugin needRebootNode(): need reboot for updating kernel arguments") + } } + return needReboot, nil } diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index 6b75535bb..d2e9c1bbe 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -8,6 +8,7 @@ import ( . "github.com/onsi/gomega" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" mock_helper "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper/mock" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" ) @@ -178,12 +179,12 @@ var _ = Describe("Generic plugin", func() { }, } - changed := genericPlugin.CheckStatusChanges(networkNodeState) + changed, err := genericPlugin.CheckStatusChanges(networkNodeState) Expect(err).ToNot(HaveOccurred()) Expect(changed).To(BeFalse()) }) - It("should detect changes on status", func() { + It("should detect changes on status due to spec mismatch", func() { networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ Interfaces: sriovnetworkv1.Interfaces{{ @@ -232,7 +233,65 @@ var _ = Describe("Generic plugin", func() { }, } - changed := genericPlugin.CheckStatusChanges(networkNodeState) + changed, err := genericPlugin.CheckStatusChanges(networkNodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeTrue()) + }) + + It("should detect changes on status due to missing kernel args", func() { + networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ + Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ + Interfaces: sriovnetworkv1.Interfaces{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + Mtu: 1500, + VfGroups: []sriovnetworkv1.VfGroup{{ + DeviceType: "vfio-pci", + PolicyName: "policy-1", + ResourceName: "resource-1", + VfRange: "0-1", + Mtu: 1500, + }}}}, + }, + Status: sriovnetworkv1.SriovNetworkNodeStateStatus{ + Interfaces: sriovnetworkv1.InterfaceExts{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + TotalVfs: 2, + DeviceID: "159b", + Vendor: "8086", + Name: "sriovif1", + Mtu: 1500, + Mac: "0c:42:a1:55:ee:46", + Driver: "ice", + EswitchMode: "legacy", + LinkSpeed: "25000 Mb/s", + LinkType: "ETH", + VFs: []sriovnetworkv1.VirtualFunction{{ + PciAddress: "0000:00:00.1", + DeviceID: "1889", + Vendor: "8086", + VfID: 0, + Driver: "vfio-pci", + }, { + PciAddress: "0000:00:00.2", + DeviceID: "1889", + Vendor: "8086", + VfID: 1, + Driver: "vfio-pci", + }}, + }}, + }, + } + + // Load required kernel args. + genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(networkNodeState) + + hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil) + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + + changed, err := genericPlugin.CheckStatusChanges(networkNodeState) Expect(err).ToNot(HaveOccurred()) Expect(changed).To(BeTrue()) }) diff --git a/pkg/plugins/intel/intel_plugin.go b/pkg/plugins/intel/intel_plugin.go index a9174bdba..e88a186c9 100644 --- a/pkg/plugins/intel/intel_plugin.go +++ b/pkg/plugins/intel/intel_plugin.go @@ -41,8 +41,8 @@ func (p *IntelPlugin) OnNodeStateChange(*sriovnetworkv1.SriovNetworkNodeState) ( } // OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. -func (p *IntelPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) bool { - return false +func (p *IntelPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil } // Apply config change diff --git a/pkg/plugins/k8s/k8s_plugin.go b/pkg/plugins/k8s/k8s_plugin.go index aa28c0aa6..1a0336049 100644 --- a/pkg/plugins/k8s/k8s_plugin.go +++ b/pkg/plugins/k8s/k8s_plugin.go @@ -156,8 +156,8 @@ func (p *K8sPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState) // TODO: implement - https://github.com/k8snetworkplumbingwg/sriov-network-operator/issues/630 // OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. -func (p *K8sPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) bool { - return false +func (p *K8sPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil } // Apply config change diff --git a/pkg/plugins/mellanox/mellanox_plugin.go b/pkg/plugins/mellanox/mellanox_plugin.go index 1aa1f6434..87363464b 100644 --- a/pkg/plugins/mellanox/mellanox_plugin.go +++ b/pkg/plugins/mellanox/mellanox_plugin.go @@ -173,8 +173,8 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS // TODO: implement - https://github.com/k8snetworkplumbingwg/sriov-network-operator/issues/631 // OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. -func (p *MellanoxPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) bool { - return false +func (p *MellanoxPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil } // Apply config change diff --git a/pkg/plugins/mock/mock_plugin.go b/pkg/plugins/mock/mock_plugin.go index 044d30f0e..a6ae38202 100644 --- a/pkg/plugins/mock/mock_plugin.go +++ b/pkg/plugins/mock/mock_plugin.go @@ -49,11 +49,12 @@ func (mr *MockVendorPluginMockRecorder) Apply() *gomock.Call { } // CheckStatusChanges mocks base method. -func (m *MockVendorPlugin) CheckStatusChanges(arg0 *v1.SriovNetworkNodeState) bool { +func (m *MockVendorPlugin) CheckStatusChanges(arg0 *v1.SriovNetworkNodeState) (bool, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "CheckStatusChanges", arg0) ret0, _ := ret[0].(bool) - return ret0 + ret1, _ := ret[1].(error) + return ret0, ret1 } // CheckStatusChanges indicates an expected call of CheckStatusChanges. diff --git a/pkg/plugins/plugin.go b/pkg/plugins/plugin.go index 3136dfd46..830f7dd68 100644 --- a/pkg/plugins/plugin.go +++ b/pkg/plugins/plugin.go @@ -15,5 +15,5 @@ type VendorPlugin interface { // Apply config change Apply() error // CheckStatusChanges checks status changes on the SriovNetworkNodeState CR for configured VFs. - CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) bool + CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) } diff --git a/pkg/plugins/virtual/virtual_plugin.go b/pkg/plugins/virtual/virtual_plugin.go index cd43b120b..211cbee2c 100644 --- a/pkg/plugins/virtual/virtual_plugin.go +++ b/pkg/plugins/virtual/virtual_plugin.go @@ -68,8 +68,8 @@ func (p *VirtualPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt } // OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. -func (p *VirtualPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) bool { - return false +func (p *VirtualPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil } // Apply config change