diff --git a/CHANGELOG.md b/CHANGELOG.md index f8a6fbe8fc2..c4a01e00e07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel request for both Prometheus and sidecar. Single requests now should take constant amount of memory on sidecar, so resource consumption prediction is now straightforward. - [#1358](https://github.com/thanos-io/thanos/pull/1358) Added `part_size` configuration option for HTTP multipart requests minimum part size for S3 storage type - [#1363](https://github.com/thanos-io/thanos/pull/1363) Thanos Receive now exposes `thanos_receive_hashring_nodes` and `thanos_receive_hashring_tenants` metrics to monitor status of hash-rings +- [#1395](https://github.com/thanos-io/thanos/pull/1395) Added `/-/ready` and `/-/healthy` endpoints to Thanos sidecar. ### Changed diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index b6623821949..d4218c1d385 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -68,8 +68,7 @@ func (cs compactionSet) maxLevel() int { } func registerCompact(m map[string]setupFunc, app *kingpin.Application) { - comp := component.Compact - cmd := app.Command(comp.String(), "continuously compacts blocks in an object store bucket") + cmd := app.Command(component.Compact.String(), "continuously compacts blocks in an object store bucket") haltOnError := cmd.Flag("debug.halt-on-error", "Halt the process if a critical compaction error is detected."). Hidden().Default("true").Bool() @@ -110,7 +109,7 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application) { compactionConcurrency := cmd.Flag("compact.concurrency", "Number of goroutines to use when compacting groups."). Default("1").Int() - m[comp.String()] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { + m[component.Compact.String()] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { return runCompact(g, logger, reg, *httpAddr, *dataDir, @@ -125,7 +124,7 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application) { compact.ResolutionLevel5m: time.Duration(*retention5m), compact.ResolutionLevel1h: time.Duration(*retention1h), }, - comp, + component.Compact, *disableDownsampling, *maxCompactionLevel, *blockSyncConcurrency, @@ -168,9 +167,9 @@ func runCompact( downsampleMetrics := newDownsampleMetrics(reg) - readinessProber := prober.NewProber(component, logger, prometheus.WrapRegistererWithPrefix("thanos_", reg)) + statusProber := prober.NewProber(component, logger, prometheus.WrapRegistererWithPrefix("thanos_", reg)) // Initiate default HTTP listener providing metrics endpoint and readiness/liveness probes. - if err := defaultHTTPListener(g, logger, reg, httpBindAddr, readinessProber); err != nil { + if err := defaultHTTPListener(g, logger, reg, httpBindAddr, statusProber); err != nil { return errors.Wrap(err, "create readiness prober") } @@ -325,7 +324,7 @@ func runCompact( }) level.Info(logger).Log("msg", "starting compact node") - readinessProber.SetReady() + statusProber.SetReady() return nil } diff --git a/cmd/thanos/main.go b/cmd/thanos/main.go index 6d88c94ff7d..2e6f2bf413f 100644 --- a/cmd/thanos/main.go +++ b/cmd/thanos/main.go @@ -71,7 +71,7 @@ func main() { tracingConfig := regCommonTracingFlags(app) cmds := map[string]setupFunc{} - registerSidecar(cmds, app, "sidecar") + registerSidecar(cmds, app) registerStore(cmds, app, "store") registerQuery(cmds, app, "query") registerRule(cmds, app, "rule") diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 6527e434880..3e4655c5d14 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -11,7 +11,7 @@ import ( "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/oklog/run" - opentracing "github.com/opentracing/opentracing-go" + "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" @@ -19,6 +19,7 @@ import ( "github.com/thanos-io/thanos/pkg/block/metadata" "github.com/thanos-io/thanos/pkg/component" "github.com/thanos-io/thanos/pkg/objstore/client" + "github.com/thanos-io/thanos/pkg/prober" "github.com/thanos-io/thanos/pkg/promclient" "github.com/thanos-io/thanos/pkg/reloader" "github.com/thanos-io/thanos/pkg/runutil" @@ -26,13 +27,13 @@ import ( "github.com/thanos-io/thanos/pkg/store" "github.com/thanos-io/thanos/pkg/store/storepb" "google.golang.org/grpc" - kingpin "gopkg.in/alecthomas/kingpin.v2" + "gopkg.in/alecthomas/kingpin.v2" ) const waitForExternalLabelsTimeout = 10 * time.Minute -func registerSidecar(m map[string]setupFunc, app *kingpin.Application, name string) { - cmd := app.Command(name, "sidecar for Prometheus server") +func registerSidecar(m map[string]setupFunc, app *kingpin.Application) { + cmd := app.Command(component.Sidecar.String(), "sidecar for Prometheus server") grpcBindAddr, httpBindAddr, cert, key, clientCA := regCommonServerFlags(cmd) @@ -54,7 +55,7 @@ func registerSidecar(m map[string]setupFunc, app *kingpin.Application, name stri uploadCompacted := cmd.Flag("shipper.upload-compacted", "[Experimental] If true sidecar will try to upload compacted blocks as well. Useful for migration purposes. Works only if compaction is disabled on Prometheus.").Default("false").Hidden().Bool() - m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { + m[component.Sidecar.String()] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { rl := reloader.New( log.With(logger, "component", "reloader"), reloader.ReloadURLFromBase(*promURL), @@ -77,6 +78,7 @@ func registerSidecar(m map[string]setupFunc, app *kingpin.Application, name stri objStoreConfig, rl, *uploadCompacted, + component.Sidecar, ) } } @@ -96,6 +98,7 @@ func runSidecar( objStoreConfig *pathOrContent, reloader *reloader.Reloader, uploadCompacted bool, + comp component.Component, ) error { var m = &promMetadata{ promURL: promURL, @@ -117,6 +120,12 @@ func runSidecar( uploads = false } + statusProber := prober.NewProber(comp, logger, prometheus.WrapRegistererWithPrefix("thanos_", reg)) + // Initiate default HTTP listener providing metrics endpoint and readiness/liveness probes. + if err := defaultHTTPListener(g, logger, reg, httpBindAddr, statusProber); err != nil { + return errors.Wrap(err, "create readiness prober") + } + // Setup all the concurrent groups. { promUp := prometheus.NewGauge(prometheus.GaugeOpts{ @@ -148,6 +157,7 @@ func runSidecar( "err", err, ) promUp.Set(0) + statusProber.SetNotReady(err) return err } @@ -156,6 +166,7 @@ func runSidecar( "external_labels", m.Labels().String(), ) promUp.Set(1) + statusProber.SetReady() lastHeartbeat.Set(float64(time.Now().UnixNano()) / 1e9) return nil }) @@ -195,9 +206,7 @@ func runSidecar( cancel() }) } - if err := metricHTTPListenGroup(g, logger, reg, httpBindAddr); err != nil { - return err - } + { l, err := net.Listen("tcp", grpcBindAddr) if err != nil { diff --git a/pkg/prober/prober.go b/pkg/prober/prober.go index 1bf6c32f403..36bea35b88b 100644 --- a/pkg/prober/prober.go +++ b/pkg/prober/prober.go @@ -22,6 +22,19 @@ const ( ) // Prober represents health and readiness status of given component. +// +// From Kubernetes documentation https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/ : +// +// liveness: Many applications running for long periods of time eventually transition to broken states, +// (healthy) and cannot recover except by being restarted. +// Kubernetes provides liveness probes to detect and remedy such situations. +// +// readiness: Sometimes, applications are temporarily unable to serve traffic. +// (ready) For example, an application might need to load large data or configuration files during startup, +// or depend on external services after startup. In such cases, you don’t want to kill the application, +// but you don’t want to send it requests either. Kubernetes provides readiness probes to detect +// and mitigate these situations. A pod with containers reporting that they are not ready +// does not receive traffic through Kubernetes Services. type Prober struct { logger log.Logger component component.Component @@ -36,7 +49,6 @@ type Prober struct { // NewProber returns Prober representing readiness and healthiness of given component. func NewProber(component component.Component, logger log.Logger, reg prometheus.Registerer) *Prober { initialErr := fmt.Errorf(initialErrorFmt, component) - p := &Prober{ component: component, logger: logger, @@ -76,7 +88,7 @@ func (p *Prober) writeResponse(w http.ResponseWriter, probeFn func() error, prob http.Error(w, fmt.Sprintf("thanos %v is not %v. Reason: %v", p.component, probeType, err), probeErrorHTTPStatus) return } - if _, err := io.WriteString(w, fmt.Sprintf("thanos %v is %v", p.component, probeType)); err == nil { + if _, err := io.WriteString(w, fmt.Sprintf("thanos %v is %v", p.component, probeType)); err != nil { level.Error(p.logger).Log("msg", "failed to write probe response", "probe type", probeType, "err", err) } } diff --git a/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar-lts.yaml b/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar-lts.yaml index 02d37d2dc5f..a4362035e21 100644 --- a/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar-lts.yaml +++ b/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar-lts.yaml @@ -145,6 +145,14 @@ spec: containerPort: 10902 - name: grpc containerPort: 10901 + livenessProbe: + httpGet: + port: 10902 + path: /-/healthy + readinessProbe: + httpGet: + port: 10902 + path: /-/ready volumeMounts: - name: prometheus mountPath: /var/prometheus diff --git a/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar.yaml b/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar.yaml index 9e30120adec..11b48303607 100644 --- a/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar.yaml +++ b/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar.yaml @@ -129,6 +129,14 @@ spec: containerPort: 10902 - name: grpc containerPort: 10901 + livenessProbe: + httpGet: + port: 10902 + path: /-/healthy + readinessProbe: + httpGet: + port: 10902 + path: /-/ready volumeMounts: - name: prometheus mountPath: /var/prometheus