Skip to content

Commit

Permalink
Implement Liveness probe
Browse files Browse the repository at this point in the history
  • Loading branch information
Yauheni Sliaptsou committed Mar 16, 2021
1 parent 85fd758 commit 3171144
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 0 deletions.
6 changes: 6 additions & 0 deletions cmd/node-termination-handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ func main() {
log.Fatal().Err(err).Msg("Unable to instantiate observability metrics,")
}

err = observability.InitProbes(nthConfig.EnableProbes, nthConfig.ProbesPort, nthConfig.ProbesEndpoint)
if err != nil {
nthConfig.Print()
log.Fatal().Err(err).Msg("Unable to instantiate probes service,")
}

imds := ec2metadata.New(nthConfig.MetadataURL, nthConfig.MetadataTries)

interruptionEventStore := interruptioneventstore.New(nthConfig)
Expand Down
3 changes: 3 additions & 0 deletions config/helm/aws-node-termination-handler/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ Parameter | Description | Default
`logLevel` | Sets the log level (INFO, DEBUG, or ERROR) | `INFO`
`enablePrometheusServer` | If true, start an http server exposing `/metrics` endpoint for prometheus. | `false`
`prometheusServerPort` | Replaces the default HTTP port for exposing prometheus metrics. | `9092`
`enableProbesServer` |If true, start an http server exposing `/healthz` endpoint for probes. | `false`
`probesServerPort` | Replaces the default HTTP port for exposing probes endpoint. | `8080`
`probesServerEndpoint` | Replaces the default endpoint for exposing probes endpoint. | `/healthz`
`podMonitor.create` | if `true`, create a PodMonitor | `false`
`podMonitor.interval` | Prometheus scrape interval | `30s`
`podMonitor.sampleLimit` | Number of scraped samples accepted | `5000`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,12 @@ spec:
value: {{ .Values.enablePrometheusServer | quote }}
- name: PROMETHEUS_SERVER_PORT
value: {{ .Values.prometheusServerPort | quote }}
- name: ENABLE_PROBES_SERVER
value: {{ .Values.enableProbesServer | quote }}
- name: PROBES_SERVER_PORT
value: {{ .Values.probesServerPort | quote }}
- name: PROBES_SERVER_ENDPOINT
value: {{ .Values.probesServerEndpoint | quote }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- if .Values.enablePrometheusServer }}
Expand All @@ -175,6 +181,13 @@ spec:
name: http-metrics
protocol: TCP
{{- end }}
{{- if .Values.enableProbesServer }}
ports:
- containerPort: {{ .Values.probesServerPort }}
hostPort: {{ .Values.probesServerPort }}
name: liveness-probe
protocol: TCP
{{- end }}
nodeSelector:
{{ include "aws-node-termination-handler.nodeSelectorTermsOs" . }}: linux
{{- with .Values.nodeSelector }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ spec:
value: {{ .Values.enablePrometheusServer | quote }}
- name: PROMETHEUS_SERVER_PORT
value: {{ .Values.prometheusServerPort | quote }}
- name: ENABLE_PROBES_SERVER
value: {{ .Values.enableProbesServer | quote }}
- name: PROBES_SERVER_PORT
value: {{ .Values.probesServerPort | quote }}
- name: PROBES_SERVER_ENDPOINT
value: {{ .Values.probesServerEndpoint | quote }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- if .Values.enablePrometheusServer }}
Expand All @@ -149,6 +155,13 @@ spec:
name: http-metrics
protocol: TCP
{{- end }}
{{- if .Values.enableProbesServer }}
ports:
- containerPort: {{ .Values.probesServerPort }}
hostPort: {{ .Values.probesServerPort }}
name: liveness-probe
protocol: TCP
{{- end }}
nodeSelector:
{{ include "aws-node-termination-handler.nodeSelectorTermsOs" . }}: windows
{{- with .Values.nodeSelector }}
Expand Down
13 changes: 13 additions & 0 deletions config/helm/aws-node-termination-handler/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ spec:
value: {{ .Values.webhookProxy | quote }}
- name: ENABLE_PROMETHEUS_SERVER
value: {{ .Values.enablePrometheusServer | quote }}
- name: ENABLE_PROBES_SERVER
value: {{ .Values.enableProbesServer | quote }}
- name: ENABLE_SPOT_INTERRUPTION_DRAINING
value: "false"
- name: ENABLE_SCHEDULED_EVENT_DRAINING
Expand All @@ -130,6 +132,10 @@ spec:
value: {{ .Values.queueURL | quote }}
- name: PROMETHEUS_SERVER_PORT
value: {{ .Values.prometheusServerPort | quote }}
- name: PROBES_SERVER_PORT
value: {{ .Values.probesServerPort | quote }}
- name: PROBES_SERVER_ENDPOINT
value: {{ .Values.probesServerEndpoint | quote }}
- name: AWS_REGION
value: {{ .Values.awsRegion | quote }}
- name: AWS_ENDPOINT
Expand All @@ -155,6 +161,13 @@ spec:
name: http-metrics
protocol: TCP
{{- end }}
{{- if .Values.enableProbesServer }}
ports:
- containerPort: {{ .Values.probesServerPort }}
hostPort: {{ .Values.probesServerPort }}
name: liveness-probe
protocol: TCP
{{- end }}
nodeSelector:
{{ include "aws-node-termination-handler.nodeSelectorTermsOs" . }}: linux
{{- with .Values.nodeSelector }}
Expand Down
12 changes: 12 additions & 0 deletions config/helm/aws-node-termination-handler/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ podLabels: {}
linuxPodLabels: {}
windowsPodLabels: {}

# liveness probe settings.
probes:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
periodSeconds: 5

resources:
requests:
memory: "64Mi"
Expand Down Expand Up @@ -144,6 +152,10 @@ nodeSelectorTermsArch: ""
enablePrometheusServer: false
prometheusServerPort: 9092

enableProbesServer: false
probesServerPort: 8080
probesServerEndpoint: "/healthz"

tolerations:
- operator: "Exists"

Expand Down
13 changes: 13 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ const (
// https://github.com/prometheus/prometheus/wiki/Default-port-allocations
prometheusPortDefault = 9092
prometheusPortConfigKey = "PROMETHEUS_SERVER_PORT"
// probes
enableProbesDefault = false
enableProbesConfigKey = "ENABLE_PROBES_SERVER"
probesPortDefault = 8080
probesPortConfigKey = "PROBES_SERVER_PORT"
probesEndpointDefault = "/healthz"
probesEndpointConfigKey = "PROBES_SERVER_ENDPOINT"
region = ""
awsRegionConfigKey = "AWS_REGION"
awsEndpointConfigKey = "AWS_ENDPOINT"
Expand Down Expand Up @@ -115,6 +122,9 @@ type Config struct {
UptimeFromFile string
EnablePrometheus bool
PrometheusPort int
EnableProbes bool
ProbesPort int
ProbesEndpoint string
AWSRegion string
AWSEndpoint string
QueueURL string
Expand Down Expand Up @@ -162,6 +172,9 @@ func ParseCliArgs() (config Config, err error) {
flag.StringVar(&config.UptimeFromFile, "uptime-from-file", getEnv(uptimeFromFileConfigKey, uptimeFromFileDefault), "If specified, read system uptime from the file path (useful for testing).")
flag.BoolVar(&config.EnablePrometheus, "enable-prometheus-server", getBoolEnv(enablePrometheusConfigKey, enablePrometheusDefault), "If true, a http server is used for exposing prometheus metrics in /metrics endpoint.")
flag.IntVar(&config.PrometheusPort, "prometheus-server-port", getIntEnv(prometheusPortConfigKey, prometheusPortDefault), "The port for running the prometheus http server.")
flag.BoolVar(&config.EnableProbes, "enable-probes-server", getBoolEnv(enableProbesConfigKey, enableProbesDefault), "If true, a http server is used for exposing probes in /healthz endpoint.")
flag.IntVar(&config.ProbesPort, "probes-server-port", getIntEnv(probesPortConfigKey, probesPortDefault), "The port for running the probes http server.")
flag.StringVar(&config.ProbesEndpoint, "probes-server-endpoint", getEnv(probesEndpointConfigKey, probesEndpointDefault), "If specified, use this endpoint to make liveness probe")
flag.StringVar(&config.AWSRegion, "aws-region", getEnv(awsRegionConfigKey, ""), "If specified, use the AWS region for AWS API calls")
flag.StringVar(&config.AWSEndpoint, "aws-endpoint", getEnv(awsEndpointConfigKey, ""), "[testing] If specified, use the AWS endpoint to make API calls")
flag.StringVar(&config.QueueURL, "queue-url", getEnv(queueURLConfigKey, ""), "Listens for messages on the specified SQS queue URL")
Expand Down
41 changes: 41 additions & 0 deletions pkg/observability/probes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package observability

import (
"net"
"net/http"
"strconv"
"time"

"github.com/rs/zerolog/log"
)

// InitProbes will initialize, register and expose, via http server, the probes.
func InitProbes(enabled bool, port int, endpoint string) error {
if !enabled {
return nil
}

http.HandleFunc(endpoint, livenessHandler)

probes := &http.Server{
Addr: net.JoinHostPort("", strconv.Itoa(port)),
ReadTimeout: 1 * time.Second,
WriteTimeout: 1 * time.Second,
}

// Starts HTTP server exposing the probes path
go func() {
log.Info().Msgf("Starting to serve handler %s, port %d", endpoint, port)
if err := probes.ListenAndServe(); err != nil && err != http.ErrServerClosed {
log.Err(err).Msg("Failed to listen and serve http server")
}
}()

return nil
}

func livenessHandler(w http.ResponseWriter, r *http.Request) {
w.Header().Add("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"health":"OK"}`))
}
30 changes: 30 additions & 0 deletions pkg/observability/probes_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package observability

import (
"net/http"
"net/http/httptest"
"testing"
)

func TestLivenessHandler(t *testing.T) {
req := httptest.NewRequest("GET", "/healthz", nil)
rr := httptest.NewRecorder()
handler := http.HandlerFunc(livenessHandler)

handler.ServeHTTP(rr, req)

if contentType := rr.Header().Get("Content-Type"); contentType != "application/json" {
t.Errorf("handler returned wrong status content type: got %v want %v",
contentType, "application/json")
}

if status := rr.Code; status != http.StatusOK {
t.Errorf("handler returned wrong status code: got %v want %v",
status, http.StatusOK)
}

if body := rr.Body.String(); body != `{"health":"OK"}` {
t.Errorf("handler returned wrong body: got %v want %v",
body, http.StatusText(http.StatusOK))
}
}

0 comments on commit 3171144

Please sign in to comment.