Skip to content

Commit

Permalink
fix: shutdown sidero-controller-manager when any component fails
Browse files Browse the repository at this point in the history
Fixes #560

The way it was implemented before this change, `errgoup` waits for all
goroutines to finish before it returns, so if the controller crashes due
to election issues, container still keeps running as HTTP API is up.

After this change, container crashes on first error.

Also added liveness/readiness check, they won't help much this issue,
but provide additional layer of protection/visibility.

Signed-off-by: Andrey Smirnov <[email protected]>
(cherry picked from commit e52071d)
  • Loading branch information
smira committed Sep 17, 2021
1 parent 3eb7b28 commit b340cee
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 13 deletions.
12 changes: 11 additions & 1 deletion app/sidero-controller-manager/config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ spec:
containerPort: 69
protocol: UDP
- name: http
containerPort: 8081
containerPort: ${SIDERO_CONTROLLER_MANAGER_API_PORT:=8081}
protocol: TCP
env:
- name: API_ENDPOINT
Expand All @@ -79,4 +79,14 @@ spec:
requests:
cpu: 100m
memory: 128Mi
readinessProbe:
httpGet:
path: /healthz
port: http
initialDelaySeconds: 15
livenessProbe:
httpGet:
path: /healthz
port: http
initialDelaySeconds: 15
terminationGracePeriodSeconds: 10
19 changes: 19 additions & 0 deletions app/sidero-controller-manager/internal/healthz/healthz.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package healthz

import (
"net/http"
)

func RegisterServer(mux *http.ServeMux) error {
mux.HandleFunc("/healthz", healthzHandler)

return nil
}

func healthzHandler(w http.ResponseWriter, req *http.Request) {
// do nothing, consider to be healthy always
}
34 changes: 22 additions & 12 deletions app/sidero-controller-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
debug "github.com/talos-systems/go-debug"
"golang.org/x/net/http2"
"golang.org/x/net/http2/h2c"
"golang.org/x/sync/errgroup"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
Expand All @@ -32,6 +31,7 @@ import (
infrav1 "github.com/talos-systems/sidero/app/caps-controller-manager/api/v1alpha3"
metalv1alpha1 "github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
"github.com/talos-systems/sidero/app/sidero-controller-manager/controllers"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/healthz"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/ipxe"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/metadata"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/api"
Expand Down Expand Up @@ -200,13 +200,16 @@ func main() {
}
// +kubebuilder:scaffold:builder

errCh := make(chan error)

setupLog.Info("starting TFTP server")

go func() {
if err := tftp.ServeTFTP(); err != nil {
setupLog.Error(err, "unable to start TFTP server", "controller", "Environment")
os.Exit(1)
}

errCh <- err
}()

httpMux := http.NewServeMux()
Expand All @@ -225,6 +228,13 @@ func main() {
os.Exit(1)
}

setupLog.Info("starting healthz server")

if err := healthz.RegisterServer(httpMux); err != nil {
setupLog.Error(err, "unable to start healthz server", "controller", "Environment")
os.Exit(1)
}

setupLog.Info("starting internal API server")

apiRecorder := eventBroadcaster.NewRecorder(
Expand All @@ -251,18 +261,16 @@ func main() {

setupLog.Info("starting manager and HTTP server")

var eg errgroup.Group

eg.Go(func() error {
go func() {
err := mgr.Start(ctrl.SetupSignalHandler())
if err != nil {
setupLog.Error(err, "problem running manager")
}

return err
})
errCh <- err
}()

eg.Go(func() error {
go func() {
// Go standard library doesn't support running HTTP/2 on non-TLS HTTP connections.
// Package h2c provides handling for HTTP/2 over plaintext connection.
// gRPC provides its own HTTP/2 server implementation, so that's not an issue for gRPC,
Expand All @@ -288,10 +296,12 @@ func main() {
setupLog.Error(err, "problem running HTTP server")
}

return err
})
errCh <- err
}()

if err := eg.Wait(); err != nil {
os.Exit(1)
for err = range errCh {
if err != nil {
os.Exit(1)
}
}
}

0 comments on commit b340cee

Please sign in to comment.