From 07254b5d6032f2650affb88d5c4f2a0e443a427e Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Tue, 16 Apr 2024 21:41:03 +0300 Subject: [PATCH] Fix config-daemon not waiting for drain to complete Signed-off-by: Sebastian Sch --- pkg/daemon/daemon.go | 33 ++++++++++++++++++++++----------- pkg/daemon/daemon_test.go | 15 +++++++++------ 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index a389c1758..07eefb4f8 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -484,10 +484,14 @@ func (dn *Daemon) nodeStateSyncHandler() error { if reqDrain || !utils.ObjectHasAnnotation(dn.desiredNodeState, consts.NodeStateDrainAnnotationCurrent, consts.DrainIdle) { - if err := dn.handleDrain(reqReboot); err != nil { + drainInProcess, err := dn.handleDrain(reqReboot) + if err != nil { log.Log.Error(err, "failed to handle drain") return err } + if drainInProcess { + return nil + } } if !reqReboot && !vars.UsingSystemdMode { @@ -560,20 +564,25 @@ func (dn *Daemon) nodeStateSyncHandler() error { return nil } -func (dn *Daemon) handleDrain(reqReboot bool) error { +// handleDrain: adds the right annotation to the node and nodeState object +// returns true if we need to finish the reconcile loop and wait for a new object +func (dn *Daemon) handleDrain(reqReboot bool) (bool, error) { + // done with the drain we can continue with the configuration if utils.ObjectHasAnnotation(dn.desiredNodeState, consts.NodeStateDrainAnnotationCurrent, consts.DrainComplete) { log.Log.Info("handleDrain(): the node complete the draining") - return nil + return false, nil } + // the operator is still draining the node so we reconcile if utils.ObjectHasAnnotation(dn.desiredNodeState, consts.NodeStateDrainAnnotationCurrent, consts.Draining) { log.Log.Info("handleDrain(): the node is still draining") - return nil + return true, nil } + // drain is disabled we continue with the configuration if dn.disableDrain { log.Log.Info("handleDrain(): drain is disabled in sriovOperatorConfig") - return nil + return false, nil } if reqReboot { @@ -581,33 +590,35 @@ func (dn *Daemon) handleDrain(reqReboot bool) error { err := utils.AnnotateNode(context.Background(), vars.NodeName, consts.NodeDrainAnnotation, consts.RebootRequired, dn.client) if err != nil { log.Log.Error(err, "applyDrainRequired(): Failed to annotate node") - return err + return false, err } log.Log.Info("handleDrain(): apply 'Reboot_Required' annotation for nodeState") if err := utils.AnnotateObject(context.Background(), dn.desiredNodeState, consts.NodeStateDrainAnnotation, consts.RebootRequired, dn.client); err != nil { - return err + return false, err } - return nil + // the node was annotated we need to wait for the operator to finish the drain + return true, nil } log.Log.Info("handleDrain(): apply 'Drain_Required' annotation for node") err := utils.AnnotateNode(context.Background(), vars.NodeName, consts.NodeDrainAnnotation, consts.DrainRequired, dn.client) if err != nil { log.Log.Error(err, "handleDrain(): Failed to annotate node") - return err + return false, err } log.Log.Info("handleDrain(): apply 'Drain_Required' annotation for nodeState") if err := utils.AnnotateObject(context.Background(), dn.desiredNodeState, consts.NodeStateDrainAnnotation, consts.DrainRequired, dn.client); err != nil { - return err + return false, err } - return nil + // the node was annotated we need to wait for the operator to finish the drain + return true, nil } func (dn *Daemon) restartDevicePluginPod() error { diff --git a/pkg/daemon/daemon_test.go b/pkg/daemon/daemon_test.go index 54b75e3e8..a1e29dcb2 100644 --- a/pkg/daemon/daemon_test.go +++ b/pkg/daemon/daemon_test.go @@ -194,8 +194,9 @@ var _ = Describe("Config Daemon", func() { nodeState := &sriovnetworkv1.SriovNetworkNodeState{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-node", - Generation: 123, + Name: "test-node", + Generation: 123, + Annotations: map[string]string{consts.NodeStateDrainAnnotationCurrent: consts.DrainIdle}, }, Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{}, Status: sriovnetworkv1.SriovNetworkNodeStateStatus{ @@ -253,8 +254,9 @@ var _ = Describe("Config Daemon", func() { nodeState1 := &sriovnetworkv1.SriovNetworkNodeState{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-node", - Generation: 123, + Name: "test-node", + Generation: 123, + Annotations: map[string]string{consts.NodeStateDrainAnnotationCurrent: consts.DrainIdle}, }, } Expect( @@ -263,8 +265,9 @@ var _ = Describe("Config Daemon", func() { nodeState2 := &sriovnetworkv1.SriovNetworkNodeState{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-node", - Generation: 777, + Name: "test-node", + Generation: 777, + Annotations: map[string]string{consts.NodeStateDrainAnnotationCurrent: consts.DrainIdle}, }, } Expect(