From 2be5408e2106d39334484d3eeb21d9dedbc4f5a2 Mon Sep 17 00:00:00 2001 From: Yury Kulazhenkov Date: Thu, 26 Oct 2023 12:23:45 +0300 Subject: [PATCH 1/2] Add check extra check for systemd service in the daemon When systemd configuration mode is used we need to make sure that the systemd service in not only exist but also is in enabled state. Signed-off-by: Yury Kulazhenkov --- pkg/daemon/daemon.go | 8 ++++---- pkg/plugins/k8s/k8s_plugin.go | 6 +++--- pkg/service/service_manager.go | 21 ++++++++++++++++++++- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 66f35c2b3..df0c66323 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -466,7 +466,7 @@ func (dn *Daemon) nodeStateSyncHandler() error { if dn.nodeState.GetGeneration() == latest { if dn.useSystemdService { - serviceExist, err := dn.serviceManager.IsServiceExist(systemd.SriovServicePath) + serviceEnabled, err := dn.serviceManager.IsServiceEnabled(systemd.SriovServicePath) if err != nil { log.Log.Error(err, "nodeStateSyncHandler(): failed to check if sriov-config service exist on host") return err @@ -475,8 +475,9 @@ func (dn *Daemon) nodeStateSyncHandler() error { // if the service doesn't exist we should continue to let the k8s plugin to create the service files // this is only for k8s base environments, for openshift the sriov-operator creates a machine config to will apply // the system service and reboot the node the config-daemon doesn't need to do anything. - if !serviceExist { - sriovResult = &systemd.SriovResult{SyncStatus: syncStatusFailed, LastSyncError: "sriov-config systemd service doesn't exist on node"} + if !serviceEnabled { + sriovResult = &systemd.SriovResult{SyncStatus: syncStatusFailed, + LastSyncError: "sriov-config systemd service is not available on node"} } else { sriovResult, err = systemd.ReadSriovResult() if err != nil { @@ -495,7 +496,6 @@ func (dn *Daemon) nodeStateSyncHandler() error { <-dn.syncCh return nil } - return nil } log.Log.V(0).Info("nodeStateSyncHandler(): Interface not changed") if latestState.Status.LastSyncError != "" || diff --git a/pkg/plugins/k8s/k8s_plugin.go b/pkg/plugins/k8s/k8s_plugin.go index 0f0bc1bcd..4cbcc818e 100644 --- a/pkg/plugins/k8s/k8s_plugin.go +++ b/pkg/plugins/k8s/k8s_plugin.go @@ -322,13 +322,13 @@ func (p *K8sPlugin) switchdevServiceStateUpdate() error { func (p *K8sPlugin) sriovServiceStateUpdate() error { log.Log.Info("sriovServiceStateUpdate()") - exist, err := p.serviceManager.IsServiceExist(p.sriovService.Path) + isServiceEnabled, err := p.serviceManager.IsServiceEnabled(p.sriovService.Path) if err != nil { return err } - // create the service if it doesn't exist - if !exist { + // create and enable the service if it doesn't exist or is not enabled + if !isServiceEnabled { p.updateTarget.sriovScript = true } else { p.updateTarget.sriovScript = p.isSystemServiceNeedUpdate(p.sriovService) diff --git a/pkg/service/service_manager.go b/pkg/service/service_manager.go index 6fef6fe50..29653fe10 100644 --- a/pkg/service/service_manager.go +++ b/pkg/service/service_manager.go @@ -11,6 +11,7 @@ import ( type ServiceManager interface { IsServiceExist(string) (bool, error) + IsServiceEnabled(string) (bool, error) ReadService(string) (*Service, error) EnableService(service *Service) error } @@ -27,7 +28,7 @@ func NewServiceManager(chroot string) ServiceManager { return &serviceManager{root} } -// ReadService read service from given path +// IsServiceExist check if service unit exist func (sm *serviceManager) IsServiceExist(servicePath string) (bool, error) { _, err := os.Stat(path.Join(sm.chroot, servicePath)) if err != nil { @@ -40,6 +41,24 @@ func (sm *serviceManager) IsServiceExist(servicePath string) (bool, error) { return true, nil } +// IsServiceEnabled check if service exist and enabled +func (sm *serviceManager) IsServiceEnabled(servicePath string) (bool, error) { + exist, err := sm.IsServiceExist(servicePath) + if err != nil || !exist { + return false, err + } + serviceName := filepath.Base(servicePath) + // Change root dir + exit, err := utils.Chroot(sm.chroot) + if err != nil { + return false, err + } + defer exit() + + cmd := exec.Command("systemctl", "is-enabled", serviceName) + return cmd.Run() == nil, nil +} + // ReadService read service from given path func (sm *serviceManager) ReadService(servicePath string) (*Service, error) { data, err := os.ReadFile(path.Join(sm.chroot, servicePath)) From f1afab5b5c7c0c829a1c5b6739a1822a99714a93 Mon Sep 17 00:00:00 2001 From: Yury Kulazhenkov Date: Thu, 26 Oct 2023 12:28:08 +0300 Subject: [PATCH 2/2] Remove systemd service result file after configuration update This is required to ensure that we will not read outdated result in edge case scenarios, e.g. when systemd service exist but for some reason was disabled by the user. Signed-off-by: Yury Kulazhenkov --- pkg/daemon/daemon.go | 15 ++++++++++++--- pkg/systemd/systemd.go | 14 ++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index df0c66323..3ec571924 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -583,13 +583,22 @@ func (dn *Daemon) nodeStateSyncHandler() error { // or there is a new config we need to apply // When using systemd configuration we write the file if dn.useSystemdService { - r, err := systemd.WriteConfFile(latestState, dn.devMode, dn.platform) + systemdConfModified, err := systemd.WriteConfFile(latestState, dn.devMode, dn.platform) if err != nil { log.Log.Error(err, "nodeStateSyncHandler(): failed to write configuration file for systemd mode") return err } - reqDrain = reqDrain || r - reqReboot = reqReboot || r + if systemdConfModified { + // remove existing result file to make sure that we will not use outdated result, e.g. in case if + // systemd service was not triggered for some reason + err = systemd.RemoveSriovResult() + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to remove result file for systemd mode") + return err + } + } + reqDrain = reqDrain || systemdConfModified + reqReboot = reqReboot || systemdConfModified log.Log.V(0).Info("nodeStateSyncHandler(): systemd mode WriteConfFile results", "drain-required", reqDrain, "reboot-required", reqReboot, "disable-drain", dn.disableDrain) diff --git a/pkg/systemd/systemd.go b/pkg/systemd/systemd.go index f32b95c8d..0bb23b576 100644 --- a/pkg/systemd/systemd.go +++ b/pkg/systemd/systemd.go @@ -209,6 +209,20 @@ func ReadSriovResult() (*SriovResult, error) { return result, err } +func RemoveSriovResult() error { + err := os.Remove(SriovHostSystemdResultPath) + if err != nil { + if os.IsNotExist(err) { + log.Log.V(2).Info("RemoveSriovResult(): result file not found") + return nil + } + log.Log.Error(err, "RemoveSriovResult(): failed to remove sriov result file", "path", SriovHostSystemdResultPath) + return err + } + log.Log.V(2).Info("RemoveSriovResult(): result file removed") + return nil +} + func WriteSriovSupportedNics() error { _, err := os.Stat(sriovHostSystemdSupportedNicPath) if err != nil {