From 9ab2207003069471c732452149390df168f856ca Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Fri, 14 Jun 2019 16:30:27 -0500 Subject: [PATCH 1/5] Emit metrics with raft commit and apply index and statestore latest index --- nomad/server.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/nomad/server.go b/nomad/server.go index 612be5da65d..2700ed3fe5e 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -15,6 +15,7 @@ import ( "sync/atomic" "time" + "github.com/armon/go-metrics" "github.com/hashicorp/consul/agent/consul/autopilot" consulapi "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/lib" @@ -410,6 +411,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI) (*Server, error) // Emit metrics go s.heartbeatStats() + // Emit raft and state store metrics + go s.EmitRaftStats(time.Second, s.shutdownCh) + // Start enterprise background workers s.startEnterpriseBackground() @@ -1450,6 +1454,27 @@ func (s *Server) Stats() map[string]map[string]string { return stats } +// EmitRaftStats is used to export metrics about the blocked eval tracker while enabled +func (s *Server) EmitRaftStats(period time.Duration, stopCh <-chan struct{}) { + for { + select { + case <-time.After(period): + commitIndex := s.raft.LastIndex() + metrics.SetGauge([]string{"raft", "commitIndex"}, float32(commitIndex)) + appliedIndex := s.raft.AppliedIndex() + metrics.SetGauge([]string{"raft", "appliedIndex"}, float32(appliedIndex)) + stateStoreSnapshotIndex, err := s.State().LatestIndex() + if err != nil { + s.logger.Warn("Unable to read snapshot index from statestore, metric will not be emitted", "error", err) + } else { + metrics.SetGauge([]string{"state", "snapshotIndex"}, float32(stateStoreSnapshotIndex)) + } + case <-stopCh: + return + } + } +} + // Region returns the region of the server func (s *Server) Region() string { return s.config.Region From 2282fe1ea88eaeb192094d619d7f32ad47c00f88 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Mon, 17 Jun 2019 15:51:31 -0500 Subject: [PATCH 2/5] Changed name of metric --- nomad/server.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nomad/server.go b/nomad/server.go index 2700ed3fe5e..21c02f26aa8 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -1454,13 +1454,13 @@ func (s *Server) Stats() map[string]map[string]string { return stats } -// EmitRaftStats is used to export metrics about the blocked eval tracker while enabled +// EmitRaftStats is used to export metrics about raft indexes and state store snapshot index func (s *Server) EmitRaftStats(period time.Duration, stopCh <-chan struct{}) { for { select { case <-time.After(period): - commitIndex := s.raft.LastIndex() - metrics.SetGauge([]string{"raft", "commitIndex"}, float32(commitIndex)) + lastIndex := s.raft.LastIndex() + metrics.SetGauge([]string{"raft", "lastIndex"}, float32(lastIndex)) appliedIndex := s.raft.AppliedIndex() metrics.SetGauge([]string{"raft", "appliedIndex"}, float32(appliedIndex)) stateStoreSnapshotIndex, err := s.State().LatestIndex() From 425bd4f540d7ff96436979ebbc31c27fd0be2879 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Mon, 17 Jun 2019 15:51:49 -0500 Subject: [PATCH 3/5] docs for new metrics --- website/source/docs/telemetry/index.html.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/website/source/docs/telemetry/index.html.md b/website/source/docs/telemetry/index.html.md index e51f21e10d7..4aa575928f2 100644 --- a/website/source/docs/telemetry/index.html.md +++ b/website/source/docs/telemetry/index.html.md @@ -109,6 +109,18 @@ when retrieving metrics using the above described signals. Raft transactions / `interval` Counter + + `nomad.raft.lastIndex` + Index of the last log + Sequence number + Gauge + + + `nomad.raft.appliedIndex` + Index of the last applied log + Sequence number + Gauge + `nomad.raft.replication.appendEntries` Raft transaction commit time @@ -167,6 +179,12 @@ when retrieving metrics using the above described signals. ms / Plan Evaluation Timer + + `nomad.state.snapshotIndex` + Latest index in the server's in memory state store + Sequence number + Gauge + `nomad.worker.invoke_scheduler.` Time to run the scheduler of the given type From 3adb7510b0938cb4d9554f6cf22ed4fa87507471 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 19 Jun 2019 11:58:46 -0500 Subject: [PATCH 4/5] Change interval of raft stats collection to 10s --- nomad/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/server.go b/nomad/server.go index 21c02f26aa8..2df2dca6a2a 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -412,7 +412,7 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI) (*Server, error) go s.heartbeatStats() // Emit raft and state store metrics - go s.EmitRaftStats(time.Second, s.shutdownCh) + go s.EmitRaftStats(10*time.Second, s.shutdownCh) // Start enterprise background workers s.startEnterpriseBackground() From aba8d4281154b7a1b18817c783865e667601ab7c Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 19 Jun 2019 11:59:05 -0500 Subject: [PATCH 5/5] Add links to godoc for raft related metrics --- website/source/docs/telemetry/index.html.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/source/docs/telemetry/index.html.md b/website/source/docs/telemetry/index.html.md index 4aa575928f2..3d33c27f220 100644 --- a/website/source/docs/telemetry/index.html.md +++ b/website/source/docs/telemetry/index.html.md @@ -111,13 +111,13 @@ when retrieving metrics using the above described signals. `nomad.raft.lastIndex` - Index of the last log + Index of the last log in stable storage Sequence number Gauge `nomad.raft.appliedIndex` - Index of the last applied log + Index of the last applied log Sequence number Gauge