From 896a5e4a2b6aebc03ba052c14e257bc7d6760510 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 23 May 2018 11:53:40 -0700 Subject: [PATCH 1/8] etcdserver: add "etcd_server_heartbeat_failures_total" {"level":"warn","ts":1527101858.4149103,"caller":"etcdserver/raft.go:370","msg":"failed to send out heartbeat; took too long, server is overloaded likely from slow disk","heartbeat-interval":0.1,"expected-duration":0.2,"exceeded-duration":0.025771662} {"level":"warn","ts":1527101858.4149644,"caller":"etcdserver/raft.go:370","msg":"failed to send out heartbeat; took too long, server is overloaded likely from slow disk","heartbeat-interval":0.1,"expected-duration":0.2,"exceeded-duration":0.034015766} Signed-off-by: Gyuho Lee --- etcdserver/metrics.go | 7 +++++++ etcdserver/raft.go | 6 ++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/etcdserver/metrics.go b/etcdserver/metrics.go index 6b005bfb94d..4e1d189eb4a 100644 --- a/etcdserver/metrics.go +++ b/etcdserver/metrics.go @@ -42,6 +42,12 @@ var ( Name: "leader_changes_seen_total", Help: "The number of leader changes seen.", }) + heartbeatFailures = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "heartbeat_failures_total", + Help: "The total number of heartbeat send failures (likely overloaded from slow disk).", + }) proposalsCommitted = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: "etcd", Subsystem: "server", @@ -85,6 +91,7 @@ func init() { prometheus.MustRegister(hasLeader) prometheus.MustRegister(isLeader) prometheus.MustRegister(leaderChanges) + prometheus.MustRegister(heartbeatFailures) prometheus.MustRegister(proposalsCommitted) prometheus.MustRegister(proposalsApplied) prometheus.MustRegister(proposalsPending) diff --git a/etcdserver/raft.go b/etcdserver/raft.go index b7260e6c56c..089985c75d0 100644 --- a/etcdserver/raft.go +++ b/etcdserver/raft.go @@ -368,14 +368,16 @@ func (r *raftNode) processMessages(ms []raftpb.Message) []raftpb.Message { // TODO: limit request rate. if r.lg != nil { r.lg.Warn( - "heartbeat took too long to send out; server is overloaded, likely from slow disk", - zap.Duration("exceeded", exceed), + "failed to send out heartbeat; took too long, server is overloaded likely from slow disk", zap.Duration("heartbeat-interval", r.heartbeat), + zap.Duration("expected-duration", 2*r.heartbeat), + zap.Duration("exceeded-duration", exceed), ) } else { plog.Warningf("failed to send out heartbeat on time (exceeded the %v timeout for %v)", r.heartbeat, exceed) plog.Warningf("server is likely overloaded") } + heartbeatFailures.Inc() } } } From dd1baf6e964059a73442b88399574ea96b952187 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 23 May 2018 12:01:38 -0700 Subject: [PATCH 2/8] etcdserver: add "etcd_server_slow_apply_total" {"level":"warn","ts":1527101858.6985068,"caller":"etcdserver/util.go:115","msg":"apply request took too long","took":0.114101529,"expected-duration":0.1,"prefix":"","request":"header: put: --- etcdserver/metrics.go | 7 +++++++ etcdserver/util.go | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/etcdserver/metrics.go b/etcdserver/metrics.go index 4e1d189eb4a..6bdc0a7a659 100644 --- a/etcdserver/metrics.go +++ b/etcdserver/metrics.go @@ -48,6 +48,12 @@ var ( Name: "heartbeat_failures_total", Help: "The total number of heartbeat send failures (likely overloaded from slow disk).", }) + slowApplies = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "slow_apply_total", + Help: "The total number of slow apply requests (likely overloaded from slow disk).", + }) proposalsCommitted = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: "etcd", Subsystem: "server", @@ -92,6 +98,7 @@ func init() { prometheus.MustRegister(isLeader) prometheus.MustRegister(leaderChanges) prometheus.MustRegister(heartbeatFailures) + prometheus.MustRegister(slowApplies) prometheus.MustRegister(proposalsCommitted) prometheus.MustRegister(proposalsApplied) prometheus.MustRegister(proposalsPending) diff --git a/etcdserver/util.go b/etcdserver/util.go index 3a12a4be4ea..e05884f5973 100644 --- a/etcdserver/util.go +++ b/etcdserver/util.go @@ -113,7 +113,7 @@ func warnOfExpensiveGenericRequest(lg *zap.Logger, now time.Time, stringer fmt.S if d > warnApplyDuration { if lg != nil { lg.Warn( - "request took too long", + "apply request took too long", zap.Duration("took", d), zap.Duration("expected-duration", warnApplyDuration), zap.String("prefix", prefix), @@ -122,5 +122,6 @@ func warnOfExpensiveGenericRequest(lg *zap.Logger, now time.Time, stringer fmt.S } else { plog.Warningf("%srequest %q took too long (%v) to execute", prefix, stringer.String(), d) } + slowApplies.Inc() } } From 58e3ead21973a1b3b63c997d4bc2ee6f27b664ac Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 23 May 2018 12:09:03 -0700 Subject: [PATCH 3/8] mvcc/backend: clean up mutex, logging Signed-off-by: Gyuho Lee --- mvcc/backend/batch_tx.go | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/mvcc/backend/batch_tx.go b/mvcc/backend/batch_tx.go index d2a9e455bc3..dee9938dc7f 100644 --- a/mvcc/backend/batch_tx.go +++ b/mvcc/backend/batch_tx.go @@ -183,15 +183,15 @@ func unsafeForEach(tx *bolt.Tx, bucket []byte, visitor func(k, v []byte) error) // Commit commits a previous tx and begins a new writable one. func (t *batchTx) Commit() { t.Lock() - defer t.Unlock() t.commit(false) + t.Unlock() } // CommitAndStop commits the previous tx and does not create a new one. func (t *batchTx) CommitAndStop() { t.Lock() - defer t.Unlock() t.commit(true) + t.Unlock() } func (t *batchTx) Unlock() { @@ -215,19 +215,18 @@ func (t *batchTx) commit(stop bool) { } start := time.Now() + // gofail: var beforeCommit struct{} err := t.tx.Commit() // gofail: var afterCommit struct{} + commitDurations.Observe(time.Since(start).Seconds()) atomic.AddInt64(&t.backend.commits, 1) t.pending = 0 if err != nil { if t.backend.lg != nil { - t.backend.lg.Fatal( - "failed to commit tx", - zap.Error(err), - ) + t.backend.lg.Fatal("failed to commit tx", zap.Error(err)) } else { plog.Fatalf("cannot commit tx (%s)", err) } @@ -269,31 +268,28 @@ func (t *batchTxBuffered) Unlock() { func (t *batchTxBuffered) Commit() { t.Lock() - defer t.Unlock() t.commit(false) + t.Unlock() } func (t *batchTxBuffered) CommitAndStop() { t.Lock() - defer t.Unlock() t.commit(true) + t.Unlock() } func (t *batchTxBuffered) commit(stop bool) { // all read txs must be closed to acquire boltdb commit rwlock t.backend.readTx.mu.Lock() - defer t.backend.readTx.mu.Unlock() t.unsafeCommit(stop) + t.backend.readTx.mu.Unlock() } func (t *batchTxBuffered) unsafeCommit(stop bool) { if t.backend.readTx.tx != nil { if err := t.backend.readTx.tx.Rollback(); err != nil { if t.backend.lg != nil { - t.backend.lg.Fatal( - "failed to rollback tx", - zap.Error(err), - ) + t.backend.lg.Fatal("failed to rollback tx", zap.Error(err)) } else { plog.Fatalf("cannot rollback tx (%s)", err) } From 60a9ec8a15e8fa3ca0f5f1bc5aeae9f5d159a359 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 23 May 2018 12:14:43 -0700 Subject: [PATCH 4/8] mvcc/backend: document metrics ExponentialBuckets Signed-off-by: Gyuho Lee --- mvcc/backend/metrics.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mvcc/backend/metrics.go b/mvcc/backend/metrics.go index 30a38801476..e83602ea980 100644 --- a/mvcc/backend/metrics.go +++ b/mvcc/backend/metrics.go @@ -22,7 +22,10 @@ var ( Subsystem: "disk", Name: "backend_commit_duration_seconds", Help: "The latency distributions of commit called by backend.", - Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), + + // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2 + // highest bucket start of 0.001 sec * 2^13 == 8.192 sec + Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), }) snapshotDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ @@ -30,7 +33,9 @@ var ( Subsystem: "disk", Name: "backend_snapshot_duration_seconds", Help: "The latency distribution of backend snapshots.", - // 10 ms -> 655 seconds + + // lowest bucket start of upper bound 0.01 sec (10 ms) with factor 2 + // highest bucket start of 0.01 sec * 2^16 == 655.36 sec Buckets: prometheus.ExponentialBuckets(.01, 2, 17), }) ) From d326b2933ca5791250569ba05bf568ddb71bf3d6 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 23 May 2018 12:21:57 -0700 Subject: [PATCH 5/8] mvcc/backend: add "etcd_disk_backend_defrag_duration_seconds" Signed-off-by: Gyuho Lee --- mvcc/backend/backend.go | 5 ++++- mvcc/backend/metrics.go | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/mvcc/backend/backend.go b/mvcc/backend/backend.go index d98ce11a392..d4f25da2bb7 100644 --- a/mvcc/backend/backend.go +++ b/mvcc/backend/backend.go @@ -415,6 +415,9 @@ func (b *backend) defrag() error { atomic.StoreInt64(&b.size, size) atomic.StoreInt64(&b.sizeInUse, size-(int64(db.Stats().FreePageN)*int64(db.Info().PageSize))) + took := time.Since(now) + defragDurations.Observe(took.Seconds()) + size2, sizeInUse2 := b.Size(), b.SizeInUse() if b.lg != nil { b.lg.Info( @@ -426,7 +429,7 @@ func (b *backend) defrag() error { zap.Int64("current-db-size-in-use-bytes-diff", sizeInUse2-sizeInUse1), zap.Int64("current-db-size-in-use-bytes", sizeInUse2), zap.String("current-db-size-in-use", humanize.Bytes(uint64(sizeInUse2))), - zap.Duration("took", time.Since(now)), + zap.Duration("took", took), ) } return nil diff --git a/mvcc/backend/metrics.go b/mvcc/backend/metrics.go index e83602ea980..95503940bd5 100644 --- a/mvcc/backend/metrics.go +++ b/mvcc/backend/metrics.go @@ -28,6 +28,18 @@ var ( Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), }) + defragDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "etcd", + Subsystem: "disk", + Name: "backend_defrag_duration_seconds", + Help: "The latency distribution of backend defragmentation.", + + // 100 MB usually takes 1 sec, so start with 10 MB of 100 ms + // lowest bucket start of upper bound 0.1 sec (100 ms) with factor 2 + // highest bucket start of 0.1 sec * 2^12 == 409.6 sec + Buckets: prometheus.ExponentialBuckets(.01, 2, 13), + }) + snapshotDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: "etcd", Subsystem: "disk", @@ -42,5 +54,6 @@ var ( func init() { prometheus.MustRegister(commitDurations) + prometheus.MustRegister(defragDurations) prometheus.MustRegister(snapshotDurations) } From 966ee9323cb067f85477f7f633a035542500d825 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 23 May 2018 12:33:58 -0700 Subject: [PATCH 6/8] mvcc/backend: fix defrag duration scale Signed-off-by: Gyuho Lee --- mvcc/backend/metrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvcc/backend/metrics.go b/mvcc/backend/metrics.go index 95503940bd5..34157080499 100644 --- a/mvcc/backend/metrics.go +++ b/mvcc/backend/metrics.go @@ -37,7 +37,7 @@ var ( // 100 MB usually takes 1 sec, so start with 10 MB of 100 ms // lowest bucket start of upper bound 0.1 sec (100 ms) with factor 2 // highest bucket start of 0.1 sec * 2^12 == 409.6 sec - Buckets: prometheus.ExponentialBuckets(.01, 2, 13), + Buckets: prometheus.ExponentialBuckets(.1, 2, 13), }) snapshotDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ From bc59f7b42f580ea4494c03ad01a0e611dfcc9cad Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 23 May 2018 12:34:09 -0700 Subject: [PATCH 7/8] mvcc: add "etcd_mvcc_hash_(rev)_duration_seconds" etcd_mvcc_hash_duration_seconds etcd_mvcc_hash_rev_duration_seconds Signed-off-by: Gyuho Lee --- mvcc/kvstore.go | 11 ++++++++++- mvcc/metrics.go | 26 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/mvcc/kvstore.go b/mvcc/kvstore.go index 592f46d21c4..97e04ad78d8 100644 --- a/mvcc/kvstore.go +++ b/mvcc/kvstore.go @@ -163,12 +163,18 @@ func (s *store) compactBarrier(ctx context.Context, ch chan struct{}) { } func (s *store) Hash() (hash uint32, revision int64, err error) { + start := time.Now() + s.b.ForceCommit() h, err := s.b.Hash(DefaultIgnores) + + hashDurations.Observe(time.Since(start).Seconds()) return h, s.currentRev, err } func (s *store) HashByRev(rev int64) (hash uint32, currentRev int64, compactRev int64, err error) { + start := time.Now() + s.mu.RLock() s.revMu.RLock() compactRev, currentRev = s.compactMainRev, s.currentRev @@ -213,7 +219,10 @@ func (s *store) HashByRev(rev int64) (hash uint32, currentRev int64, compactRev h.Write(v) return nil }) - return h.Sum32(), currentRev, compactRev, err + hash = h.Sum32() + + hashRevDurations.Observe(time.Since(start).Seconds()) + return hash, currentRev, compactRev, err } func (s *store) Compact(rev int64) (<-chan struct{}, error) { diff --git a/mvcc/metrics.go b/mvcc/metrics.go index 4f43ed650cc..419a12e9cc9 100644 --- a/mvcc/metrics.go +++ b/mvcc/metrics.go @@ -170,6 +170,30 @@ var ( // overridden by mvcc initialization reportDbTotalSizeInUseInBytesMu sync.RWMutex reportDbTotalSizeInUseInBytes func() float64 = func() float64 { return 0 } + + hashDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "etcd", + Subsystem: "mvcc", + Name: "hash_duration_seconds", + Help: "The latency distribution of storage hash operation.", + + // 100 MB usually takes 100 ms, so start with 10 MB of 10 ms + // lowest bucket start of upper bound 0.01 sec (10 ms) with factor 2 + // highest bucket start of 0.01 sec * 2^14 == 163.84 sec + Buckets: prometheus.ExponentialBuckets(.01, 2, 15), + }) + + hashRevDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "etcd", + Subsystem: "mvcc", + Name: "hash_rev_duration_seconds", + Help: "The latency distribution of storage hash by revision operation.", + + // 100 MB usually takes 100 ms, so start with 10 MB of 10 ms + // lowest bucket start of upper bound 0.01 sec (10 ms) with factor 2 + // highest bucket start of 0.01 sec * 2^14 == 163.84 sec + Buckets: prometheus.ExponentialBuckets(.01, 2, 15), + }) ) func init() { @@ -189,6 +213,8 @@ func init() { prometheus.MustRegister(dbCompactionKeysCounter) prometheus.MustRegister(dbTotalSize) prometheus.MustRegister(dbTotalSizeInUse) + prometheus.MustRegister(hashDurations) + prometheus.MustRegister(hashRevDurations) } // ReportEventReceived reports that an event is received. From a1aade8c1b06263516c64eb16126f8d3ad4391cd Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 23 May 2018 13:09:31 -0700 Subject: [PATCH 8/8] etcdserver: rename to "heartbeat_send_failures_total" Signed-off-by: Gyuho Lee --- etcdserver/metrics.go | 8 ++++---- etcdserver/raft.go | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/etcdserver/metrics.go b/etcdserver/metrics.go index 6bdc0a7a659..b1f85e78ff6 100644 --- a/etcdserver/metrics.go +++ b/etcdserver/metrics.go @@ -42,11 +42,11 @@ var ( Name: "leader_changes_seen_total", Help: "The number of leader changes seen.", }) - heartbeatFailures = prometheus.NewCounter(prometheus.CounterOpts{ + heartbeatSendFailures = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "etcd", Subsystem: "server", - Name: "heartbeat_failures_total", - Help: "The total number of heartbeat send failures (likely overloaded from slow disk).", + Name: "heartbeat_send_failures_total", + Help: "The total number of leader heartbeat send failures (likely overloaded from slow disk).", }) slowApplies = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "etcd", @@ -97,7 +97,7 @@ func init() { prometheus.MustRegister(hasLeader) prometheus.MustRegister(isLeader) prometheus.MustRegister(leaderChanges) - prometheus.MustRegister(heartbeatFailures) + prometheus.MustRegister(heartbeatSendFailures) prometheus.MustRegister(slowApplies) prometheus.MustRegister(proposalsCommitted) prometheus.MustRegister(proposalsApplied) diff --git a/etcdserver/raft.go b/etcdserver/raft.go index 089985c75d0..d53bf2efd5b 100644 --- a/etcdserver/raft.go +++ b/etcdserver/raft.go @@ -368,7 +368,7 @@ func (r *raftNode) processMessages(ms []raftpb.Message) []raftpb.Message { // TODO: limit request rate. if r.lg != nil { r.lg.Warn( - "failed to send out heartbeat; took too long, server is overloaded likely from slow disk", + "leader failed to send out heartbeat on time; took too long, leader is overloaded likely from slow disk", zap.Duration("heartbeat-interval", r.heartbeat), zap.Duration("expected-duration", 2*r.heartbeat), zap.Duration("exceeded-duration", exceed), @@ -377,7 +377,7 @@ func (r *raftNode) processMessages(ms []raftpb.Message) []raftpb.Message { plog.Warningf("failed to send out heartbeat on time (exceeded the %v timeout for %v)", r.heartbeat, exceed) plog.Warningf("server is likely overloaded") } - heartbeatFailures.Inc() + heartbeatSendFailures.Inc() } } }