From 398260515010fa1c8123fd14740aabca375300dd Mon Sep 17 00:00:00 2001 From: jakobmoellerdev Date: Fri, 4 Aug 2023 06:58:42 -0400 Subject: [PATCH] fix: use SNO check to disable node-removal controller node-removal controller wouldn't be necessary at the end of the lifecycle of SNO nodes as the entire cluster would go down Signed-off-by: jakobmoellerdev --- main.go | 23 ++++++++++++++------- pkg/cluster/leaderelection.go | 31 ++++++---------------------- pkg/cluster/leaderelection_test.go | 2 +- pkg/cluster/sno.go | 33 ++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 33 deletions(-) create mode 100644 pkg/cluster/sno.go diff --git a/main.go b/main.go index 1b59737e9..48d402200 100644 --- a/main.go +++ b/main.go @@ -90,7 +90,13 @@ func main() { } setupLog.Info("Watching namespace", "Namespace", operatorNamespace) - leaderElectionResolver, err := cluster.NewLeaderElectionResolver(ctrl.GetConfigOrDie(), scheme, enableLeaderElection, operatorNamespace) + setupClient, err := client.New(ctrl.GetConfigOrDie(), client.Options{Scheme: scheme}) + if err != nil { + setupLog.Error(err, "unable to initialize setup client for pre-manager startup checks") + os.Exit(1) + } + snoCheck := cluster.NewMasterSNOCheck(setupClient) + leaderElectionResolver, err := cluster.NewLeaderElectionResolver(snoCheck, enableLeaderElection, operatorNamespace) if err != nil { setupLog.Error(err, "unable to setup leader election") os.Exit(1) @@ -130,12 +136,15 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "LVMCluster") os.Exit(1) } - // TODO only run when not in SNO - if err = (&controllers.NodeRemovalController{ - Client: mgr.GetClient(), - }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "NodeRemovalControlelr") - os.Exit(1) + + if !snoCheck.IsSNO(context.Background()) { + setupLog.Info("starting node-removal controller to observe node removal in MultiNode") + if err = (&controllers.NodeRemovalController{ + Client: mgr.GetClient(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "NodeRemovalControlelr") + os.Exit(1) + } } if err = mgr.GetFieldIndexer().IndexField(context.Background(), &lvmv1alpha1.LVMVolumeGroupNodeStatus{}, "metadata.name", func(object client.Object) []string { diff --git a/pkg/cluster/leaderelection.go b/pkg/cluster/leaderelection.go index 943d14ce2..fa41a1d95 100644 --- a/pkg/cluster/leaderelection.go +++ b/pkg/cluster/leaderelection.go @@ -2,14 +2,8 @@ package cluster import ( "context" - "fmt" configv1 "github.com/openshift/api/config/v1" "github.com/openshift/library-go/pkg/config/leaderelection" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/rest" - "os" - "sigs.k8s.io/controller-runtime/pkg/client" log "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -25,45 +19,32 @@ type LeaderElectionResolver interface { // on the amount of master nodes discovered in the cluster. If there is exactly one control-plane/master node, // the returned LeaderElectionResolver settings are optimized for SNO deployments. func NewLeaderElectionResolver( - config *rest.Config, - scheme *runtime.Scheme, + snoCheck SNOCheck, enableLeaderElection bool, operatorNamespace string, ) (LeaderElectionResolver, error) { - leaderElectionClient, err := client.New(config, client.Options{Scheme: scheme}) - if err != nil { - return nil, fmt.Errorf("cannot create leader election client: %w", err) - } - defaultElectionConfig := leaderelection.LeaderElectionDefaulting(configv1.LeaderElection{ Disable: !enableLeaderElection, }, operatorNamespace, "1136b8a6.topolvm.io") return &nodeLookupSNOLeaderElection{ - clnt: leaderElectionClient, + snoCheck: snoCheck, defaultElectionConfig: defaultElectionConfig, }, nil } type nodeLookupSNOLeaderElection struct { - clnt client.Client + snoCheck SNOCheck defaultElectionConfig configv1.LeaderElection } func (le *nodeLookupSNOLeaderElection) Resolve(ctx context.Context) (configv1.LeaderElection, error) { logger := log.FromContext(ctx) - nodes := &corev1.NodeList{} - if err := le.clnt.List(context.Background(), nodes, client.MatchingLabels{ - ControlPlaneIDLabel: "", - }); err != nil { - logger.Error(err, "unable to retrieve nodes for SNO check with lease configuration") - os.Exit(1) - } - if len(nodes.Items) != 1 { + if !le.snoCheck.IsSNO(ctx) { + logger.Info("Using default Multi-Node leader election settings optimized for high-availability") return le.defaultElectionConfig, nil } - logger.Info("Overwriting defaults with SNO leader election config as only a single node was discovered", - "node", nodes.Items[0].GetName()) + logger.Info("Overwriting defaults with SNO leader election config as only a single node was discovered") config := leaderelection.LeaderElectionSNOConfig(le.defaultElectionConfig) logger.Info("leader election config setup succeeded", "retry-period", config.RetryPeriod, diff --git a/pkg/cluster/leaderelection_test.go b/pkg/cluster/leaderelection_test.go index 49d36f1e8..80ec8bddb 100644 --- a/pkg/cluster/leaderelection_test.go +++ b/pkg/cluster/leaderelection_test.go @@ -84,7 +84,7 @@ func Test_nodeLookupSNOLeaderElection_Resolve(t *testing.T) { t.Run(tt.name, func(t *testing.T) { clnt := fake.NewClientBuilder().WithObjects(tt.nodes...).Build() le := &nodeLookupSNOLeaderElection{ - clnt: clnt, + snoCheck: NewMasterSNOCheck(clnt), defaultElectionConfig: leaderelection.LeaderElectionDefaulting(configv1.LeaderElection{}, "test", "test-leader-id"), } diff --git a/pkg/cluster/sno.go b/pkg/cluster/sno.go new file mode 100644 index 000000000..c67ef674b --- /dev/null +++ b/pkg/cluster/sno.go @@ -0,0 +1,33 @@ +package cluster + +import ( + "context" + corev1 "k8s.io/api/core/v1" + "os" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +type SNOCheck interface { + IsSNO(ctx context.Context) bool +} + +func NewMasterSNOCheck(clnt client.Client) SNOCheck { + return &masterSNOCheck{clnt: clnt} +} + +type masterSNOCheck struct { + clnt client.Client +} + +func (chk *masterSNOCheck) IsSNO(ctx context.Context) bool { + logger := log.FromContext(ctx) + nodes := &corev1.NodeList{} + if err := chk.clnt.List(context.Background(), nodes, client.MatchingLabels{ + ControlPlaneIDLabel: "", + }); err != nil { + logger.Error(err, "unable to retrieve nodes for SNO check with lease configuration") + os.Exit(1) + } + return nodes.Items != nil && len(nodes.Items) == 1 +}