Skip to content

Commit

Permalink
🐛 prevent leader election when shutting down a non-elected manager
Browse files Browse the repository at this point in the history
When leader election is enabled, a non-leader manager would never start the LeaderElection runnable group.
Thus, as the shutdown process calls the sync.Once Start func of the runnableGroup; it will start a new election.
This change ensures `Start` is ineffective during shutdown.

The test ensures the LeaderElection runnableGroup is not started during shutdown.

Signed-off-by: Alexandre Mahdhaoui <[email protected]>
  • Loading branch information
alexandremahdhaoui authored and k8s-infra-cherrypick-robot committed Apr 2, 2024
1 parent 8968da8 commit d39bab8
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pkg/manager/internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,8 @@ func (cm *controllerManager) engageStopProcedure(stopComplete <-chan struct{}) e

// Stop all the leader election runnables, which includes reconcilers.
cm.logger.Info("Stopping and waiting for leader election runnables")
// Prevent leader election when shutting down a non-elected manager
cm.runnables.LeaderElection.startOnce.Do(func() {})
cm.runnables.LeaderElection.StopAndWait(cm.shutdownCtx)

// Stop the caches before the leader election runnables, this is an important
Expand Down
92 changes: 92 additions & 0 deletions pkg/manager/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,85 @@ var _ = Describe("manger.Manager", func() {

Expect(cm.gracefulShutdownTimeout.Nanoseconds()).To(Equal(int64(0)))
})

It("should prevent leader election when shutting down a non-elected manager", func() {
var rl resourcelock.Interface
m1, err := New(cfg, Options{
LeaderElection: true,
LeaderElectionNamespace: "default",
LeaderElectionID: "test-leader-election-id",
newResourceLock: func(config *rest.Config, recorderProvider recorder.Provider, options leaderelection.Options) (resourcelock.Interface, error) {
var err error
rl, err = leaderelection.NewResourceLock(config, recorderProvider, options)
return rl, err
},
HealthProbeBindAddress: "0",
Metrics: metricsserver.Options{BindAddress: "0"},
PprofBindAddress: "0",
})
Expect(err).ToNot(HaveOccurred())
Expect(m1).ToNot(BeNil())
Expect(rl.Describe()).To(Equal("default/test-leader-election-id"))

m1cm, ok := m1.(*controllerManager)
Expect(ok).To(BeTrue())
m1cm.onStoppedLeading = func() {}

m2, err := New(cfg, Options{
LeaderElection: true,
LeaderElectionNamespace: "default",
LeaderElectionID: "test-leader-election-id",
newResourceLock: func(config *rest.Config, recorderProvider recorder.Provider, options leaderelection.Options) (resourcelock.Interface, error) {
var err error
rl, err = leaderelection.NewResourceLock(config, recorderProvider, options)
return rl, err
},
HealthProbeBindAddress: "0",
Metrics: metricsserver.Options{BindAddress: "0"},
PprofBindAddress: "0",
})
Expect(err).ToNot(HaveOccurred())
Expect(m2).ToNot(BeNil())
Expect(rl.Describe()).To(Equal("default/test-leader-election-id"))

m1done := make(chan struct{})
Expect(m1.Add(RunnableFunc(func(ctx context.Context) error {
defer GinkgoRecover()
close(m1done)
return nil
}))).To(Succeed())

ctx1, cancel1 := context.WithCancel(context.Background())
defer cancel1()
go func() {
defer GinkgoRecover()
Expect(m1.Elected()).ShouldNot(BeClosed())
Expect(m1.Start(ctx1)).NotTo(HaveOccurred())
}()
<-m1.Elected()
<-m1done

electionRunnable := &needElection{make(chan struct{})}

Expect(m2.Add(electionRunnable)).To(Succeed())

ctx2, cancel2 := context.WithCancel(context.Background())
m2done := make(chan struct{})
go func() {
defer GinkgoRecover()
Expect(m2.Start(ctx2)).NotTo(HaveOccurred())
close(m2done)
}()
Consistently(m2.Elected()).ShouldNot(Receive())

go func() {
defer GinkgoRecover()
Consistently(electionRunnable.ch).ShouldNot(Receive())
}()
cancel2()
<-m2done
})

It("should default ID to controller-runtime if ID is not set", func() {
var rl resourcelock.Interface
m1, err := New(cfg, Options{
Expand Down Expand Up @@ -1929,3 +2008,16 @@ func (f *fakeDeferredLoader) InjectScheme(scheme *runtime.Scheme) error {
type metricsDefaultServer interface {
GetBindAddr() string
}

type needElection struct {
ch chan struct{}
}

func (n *needElection) Start(_ context.Context) error {
n.ch <- struct{}{}
return nil
}

func (n *needElection) NeedLeaderElection() bool {
return true
}

0 comments on commit d39bab8

Please sign in to comment.