Skip to content

Commit

Permalink
cmd/contour: refactor shutdown-manager to initiate shutdown using an …
Browse files Browse the repository at this point in the history
…Exec command (projectcontour#2751)

cmd/contour: refactor shutdown-manager to initiate shutdown using an Exec command

This changes the preStop lifecycle hook to utilize a command instead of an http request.
The /shutdown http endpoint now only returns when the pod is ready to be terminated, and
no longer initiates the Envoy shutdown sequence.

The Envoy container will block on its preStop hook until the /shutdown endpoint returns.
That endpoint, implemented in the shutdown-manager, will utilize a file messaging pattern
to know when it is safe for Envoy to terminate by checking for the existence of a file
in the filesystem.

Signed-off-by: Steve Sloka <[email protected]>
  • Loading branch information
stevesloka authored and tthebst committed Aug 6, 2020
1 parent 5e3ee72 commit 51447ea
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 22 deletions.
9 changes: 7 additions & 2 deletions cmd/contour/contour.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ func main() {
app.HelpFlag.Short('h')

envoyCmd := app.Command("envoy", "Sub-command for envoy actions.")
shutdownManager, shutdownManagerCtx := registerShutdownManager(envoyCmd, log)
sdm, shutdownManagerCtx := registerShutdownManager(envoyCmd, log)

// Add a "shutdown" command which initiates an Envoy shutdown sequence.
sdmShutdown := envoyCmd.Command("shutdown", "Initiate an shutdown sequence which configures Envoy to begin draining connections.")

bootstrap, bootstrapCtx := registerBootstrap(app)
certgenApp, certgenConfig := registerCertGen(app)
Expand Down Expand Up @@ -68,8 +71,10 @@ func main() {

args := os.Args[1:]
switch kingpin.MustParse(app.Parse(args)) {
case shutdownManager.FullCommand():
case sdm.FullCommand():
doShutdownManager(shutdownManagerCtx)
case sdmShutdown.FullCommand():
shutdownManagerCtx.shutdownHandler()
case bootstrap.FullCommand():
check(envoy.WriteBootstrap(bootstrapCtx))
case certgenApp.FullCommand():
Expand Down
57 changes: 45 additions & 12 deletions cmd/contour/shutdownmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"io"
"log"
"net/http"
"os"
"time"

"github.com/projectcontour/contour/internal/contour"
Expand All @@ -35,6 +36,9 @@ const (
prometheusStat = "envoy_http_downstream_cx_active"
)

// File path used in the /shutdown endpoint.
const shutdownReadyFile = "/ok"

func prometheusLabels() []string {
return []string{contour.ENVOY_HTTP_LISTENER, contour.ENVOY_HTTPS_LISTENER}
}
Expand Down Expand Up @@ -66,25 +70,45 @@ func newShutdownManagerContext() *shutdownmanagerContext {
}
}

// handles the /healthz endpoint which is used for the shutdown-manager's liveness probe
// healthzHandler handles the /healthz endpoint which is used for the shutdown-manager's liveness probe.
func (s *shutdownmanagerContext) healthzHandler(w http.ResponseWriter, r *http.Request) {
http.StatusText(http.StatusOK)
if _, err := w.Write([]byte("OK")); err != nil {
s.Error(err)
s.WithField("context", "healthzHandler").Error(err)
}
}

// shutdownHandler handles the /shutdown endpoint which should be called from a pod preStop hook,
// where it will block pod shutdown until envoy is able to drain connections to below the min-open threshold
func (s *shutdownmanagerContext) shutdownHandler(w http.ResponseWriter, r *http.Request) {
// shutdownReadyHandler handles the /shutdown endpoint which is used by Envoy to determine if it can terminate.
// Once enough connections have drained based upon configuration, a file will be written to "/ok" in
// the shutdown manager's file system. Any HTTP request to /shutdown will use the existence of this
// file to understand if it is safe to terminate. The file-based approach is used since the process in which
// the kubelet calls the shutdown command is different than the HTTP request from Envoy to /shutdown
func (s *shutdownmanagerContext) shutdownReadyHandler(w http.ResponseWriter, r *http.Request) {
for {
if _, err := os.Stat(shutdownReadyFile); err == nil {
http.StatusText(http.StatusOK)
if _, err := w.Write([]byte("OK")); err != nil {
s.WithField("context", "shutdownReadyHandler").Error(err)
}
return
} else {
s.WithField("context", "shutdownReadyHandler").Errorf("error checking for file: %v", err)
}
time.Sleep(1 * time.Second)
}
}

// shutdownHandler is called from a pod preStop hook, where it will block pod shutdown
// until envoy is able to drain connections to below the min-open threshold.
func (s *shutdownmanagerContext) shutdownHandler() {

// Send shutdown signal to Envoy to start draining connections
s.Infof("failing envoy healthchecks")
err := shutdownEnvoy()
if err != nil {
s.Errorf("error sending envoy healthcheck fail: %v", err)
if err := shutdownEnvoy(); err != nil {
s.WithField("context", "shutdownHandler").Errorf("error sending envoy healthcheck fail: %v", err)
}

s.Infof("waiting %s before polling for draining connections", s.checkDelay)
s.WithField("context", "shutdownHandler").Infof("waiting %s before polling for draining connections", s.checkDelay)
time.Sleep(s.checkDelay)

for {
Expand All @@ -93,12 +117,19 @@ func (s *shutdownmanagerContext) shutdownHandler(w http.ResponseWriter, r *http.
s.Error(err)
} else {
if openConnections <= s.minOpenConnections {
s.WithField("open_connections", openConnections).
s.WithField("context", "shutdownHandler").
WithField("open_connections", openConnections).
WithField("min_connections", s.minOpenConnections).
Info("min number of open connections found, shutting down")
file, err := os.Create(shutdownReadyFile)
if err != nil {
s.Error(err)
}
defer file.Close()
return
}
s.WithField("open_connections", openConnections).
s.WithField("context", "shutdownHandler").
WithField("open_connections", openConnections).
WithField("min_connections", s.minOpenConnections).
Info("polled open connections")
}
Expand Down Expand Up @@ -170,11 +201,12 @@ func parseOpenConnections(stats io.Reader) (int, error) {
}

func doShutdownManager(config *shutdownmanagerContext) {

config.Info("started envoy shutdown manager")
defer config.Info("stopped")

http.HandleFunc("/healthz", config.healthzHandler)
http.HandleFunc("/shutdown", config.shutdownHandler)
http.HandleFunc("/shutdown", config.shutdownReadyHandler)
log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", config.httpServePort), nil))
}

Expand All @@ -188,5 +220,6 @@ func registerShutdownManager(cmd *kingpin.CmdClause, log logrus.FieldLogger) (*k
shutdownmgr.Flag("check-delay", "Time wait before polling Envoy for open connections.").Default("60s").DurationVar(&ctx.checkDelay)
shutdownmgr.Flag("min-open-connections", "Min number of open connections when polling Envoy.").IntVar(&ctx.minOpenConnections)
shutdownmgr.Flag("serve-port", "Port to serve the http server on.").IntVar(&ctx.httpServePort)

return shutdownmgr, ctx
}
9 changes: 5 additions & 4 deletions examples/contour/03-envoy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@ spec:
imagePullPolicy: Always
lifecycle:
preStop:
httpGet:
path: /shutdown
port: 8090
scheme: HTTP
exec:
command:
- /bin/contour
- envoy
- shutdown
livenessProbe:
httpGet:
path: /healthz
Expand Down
9 changes: 5 additions & 4 deletions examples/render/contour.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1773,10 +1773,11 @@ spec:
imagePullPolicy: IfNotPresent
lifecycle:
preStop:
httpGet:
path: /shutdown
port: 8090
scheme: HTTP
exec:
command:
- /bin/contour
- envoy
- shutdown
livenessProbe:
httpGet:
path: /healthz
Expand Down

0 comments on commit 51447ea

Please sign in to comment.