From cb74ec306a16c3f39c6f4c692aacb2401953ce15 Mon Sep 17 00:00:00 2001 From: lysu Date: Mon, 12 Aug 2019 11:39:55 +0800 Subject: [PATCH 1/2] metric, tikv: record duration for each backoff type --- metrics/metrics.go | 2 +- metrics/tikvclient.go | 4 ++-- store/tikv/backoff.go | 49 ++++++++++++++++++++++++--------------- store/tikv/coprocessor.go | 3 --- 4 files changed, 33 insertions(+), 25 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index bc1291fb30805..886438b730e2e 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -110,7 +110,7 @@ func RegisterMetrics() { prometheus.MustRegister(DbStmtNodeCounter) prometheus.MustRegister(StoreQueryFeedbackCounter) prometheus.MustRegister(TiKVBackoffCounter) - prometheus.MustRegister(TiKVBackoffHistogram) + prometheus.MustRegister(TiKVBackoffDuration) prometheus.MustRegister(TiKVCoprocessorHistogram) prometheus.MustRegister(TiKVLoadSafepointCounter) prometheus.MustRegister(TiKVLockResolverCounter) diff --git a/metrics/tikvclient.go b/metrics/tikvclient.go index c52c8a33e0dde..b803c34929542 100644 --- a/metrics/tikvclient.go +++ b/metrics/tikvclient.go @@ -58,14 +58,14 @@ var ( Help: "Counter of backoff.", }, []string{LblType}) - TiKVBackoffHistogram = prometheus.NewHistogram( + TiKVBackoffDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: "tidb", Subsystem: "tikvclient", Name: "backoff_seconds", Help: "total backoff seconds of a single backoffer.", Buckets: prometheus.ExponentialBuckets(0.0005, 2, 20), // 0.5ms ~ 524s - }) + }, []string{LblType}) TiKVSendReqHistogram = prometheus.NewHistogramVec( prometheus.HistogramOpts{ diff --git a/store/tikv/backoff.go b/store/tikv/backoff.go index 4f6ae1be22515..46e2a6aa31882 100644 --- a/store/tikv/backoff.go +++ b/store/tikv/backoff.go @@ -45,34 +45,42 @@ const ( ) var ( - tikvBackoffCounterRPC = metrics.TiKVBackoffCounter.WithLabelValues("tikvRPC") - tikvBackoffCounterLock = metrics.TiKVBackoffCounter.WithLabelValues("txnLock") - tikvBackoffCounterLockFast = metrics.TiKVBackoffCounter.WithLabelValues("tikvLockFast") - tikvBackoffCounterPD = metrics.TiKVBackoffCounter.WithLabelValues("pdRPC") - tikvBackoffCounterRegionMiss = metrics.TiKVBackoffCounter.WithLabelValues("regionMiss") - tikvBackoffCounterUpdateLeader = metrics.TiKVBackoffCounter.WithLabelValues("updateLeader") - tikvBackoffCounterServerBusy = metrics.TiKVBackoffCounter.WithLabelValues("serverBusy") - tikvBackoffCounterEmpty = metrics.TiKVBackoffCounter.WithLabelValues("") + tikvBackoffCounterRPC = metrics.TiKVBackoffCounter.WithLabelValues("tikvRPC") + tikvBackoffCounterLock = metrics.TiKVBackoffCounter.WithLabelValues("txnLock") + tikvBackoffCounterLockFast = metrics.TiKVBackoffCounter.WithLabelValues("tikvLockFast") + tikvBackoffCounterPD = metrics.TiKVBackoffCounter.WithLabelValues("pdRPC") + tikvBackoffCounterRegionMiss = metrics.TiKVBackoffCounter.WithLabelValues("regionMiss") + tikvBackoffCounterUpdateLeader = metrics.TiKVBackoffCounter.WithLabelValues("updateLeader") + tikvBackoffCounterServerBusy = metrics.TiKVBackoffCounter.WithLabelValues("serverBusy") + tikvBackoffCounterEmpty = metrics.TiKVBackoffCounter.WithLabelValues("") + tikvBackoffDurationRPC = metrics.TiKVBackoffDuration.WithLabelValues("tikvRPC") + tikvBackoffDurationLock = metrics.TiKVBackoffDuration.WithLabelValues("txnLock") + tikvBackoffDurationLockFast = metrics.TiKVBackoffDuration.WithLabelValues("tikvLockFast") + tikvBackoffDurationPD = metrics.TiKVBackoffDuration.WithLabelValues("pdRPC") + tikvBackoffDurationRegionMiss = metrics.TiKVBackoffDuration.WithLabelValues("regionMiss") + tikvBackoffDurationUpdateLeader = metrics.TiKVBackoffDuration.WithLabelValues("updateLeader") + tikvBackoffDurationServerBusy = metrics.TiKVBackoffDuration.WithLabelValues("serverBusy") + tikvBackoffDurationEmpty = metrics.TiKVBackoffDuration.WithLabelValues("") ) -func (t backoffType) Counter() prometheus.Counter { +func (t backoffType) metric() (prometheus.Counter, prometheus.Observer) { switch t { case boTiKVRPC: - return tikvBackoffCounterRPC + return tikvBackoffCounterRPC, tikvBackoffDurationRPC case BoTxnLock: - return tikvBackoffCounterLock + return tikvBackoffCounterLock, tikvBackoffDurationLock case boTxnLockFast: - return tikvBackoffCounterLockFast + return tikvBackoffCounterLockFast, tikvBackoffDurationLockFast case BoPDRPC: - return tikvBackoffCounterPD + return tikvBackoffCounterPD, tikvBackoffDurationPD case BoRegionMiss: - return tikvBackoffCounterRegionMiss + return tikvBackoffCounterRegionMiss, tikvBackoffDurationRegionMiss case BoUpdateLeader: - return tikvBackoffCounterUpdateLeader + return tikvBackoffCounterUpdateLeader, tikvBackoffDurationUpdateLeader case boServerBusy: - return tikvBackoffCounterServerBusy + return tikvBackoffCounterServerBusy, tikvBackoffDurationServerBusy } - return tikvBackoffCounterEmpty + return tikvBackoffCounterEmpty, tikvBackoffDurationEmpty } // NewBackoffFn creates a backoff func which implements exponential backoff with @@ -276,7 +284,8 @@ func (b *Backoffer) BackoffWithMaxSleep(typ backoffType, maxSleepMs int, err err default: } - typ.Counter().Inc() + counter, timer := typ.metric() + counter.Inc() // Lazy initialize. if b.fn == nil { b.fn = make(map[backoffType]func(context.Context, int) int) @@ -287,7 +296,9 @@ func (b *Backoffer) BackoffWithMaxSleep(typ backoffType, maxSleepMs int, err err b.fn[typ] = f } - b.totalSleep += f(b.ctx, maxSleepMs) + realSleep := f(b.ctx, maxSleepMs) + timer.Observe(float64(realSleep) / 1000) + b.totalSleep += realSleep b.types = append(b.types, typ) var startTs interface{} diff --git a/store/tikv/coprocessor.go b/store/tikv/coprocessor.go index 168cf1b6ce3d3..712de3f1b7b30 100644 --- a/store/tikv/coprocessor.go +++ b/store/tikv/coprocessor.go @@ -473,9 +473,6 @@ func (worker *copIteratorWorker) run(ctx context.Context) { bo := NewBackoffer(ctx, copNextMaxBackoff).WithVars(worker.vars) worker.handleTask(bo, task, respCh) - if bo.totalSleep > 0 { - metrics.TiKVBackoffHistogram.Observe(float64(bo.totalSleep) / 1000) - } close(task.respChan) select { case <-worker.finishCh: From 2e75b58e86bd8ab7b2f8eca518bc25a3fd47e577 Mon Sep 17 00:00:00 2001 From: lysu Date: Tue, 13 Aug 2019 12:39:45 +0800 Subject: [PATCH 2/2] address commit --- metrics/metrics.go | 2 +- metrics/tikvclient.go | 2 +- store/tikv/backoff.go | 54 +++++++++++++++++++++---------------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 886438b730e2e..bc1291fb30805 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -110,7 +110,7 @@ func RegisterMetrics() { prometheus.MustRegister(DbStmtNodeCounter) prometheus.MustRegister(StoreQueryFeedbackCounter) prometheus.MustRegister(TiKVBackoffCounter) - prometheus.MustRegister(TiKVBackoffDuration) + prometheus.MustRegister(TiKVBackoffHistogram) prometheus.MustRegister(TiKVCoprocessorHistogram) prometheus.MustRegister(TiKVLoadSafepointCounter) prometheus.MustRegister(TiKVLockResolverCounter) diff --git a/metrics/tikvclient.go b/metrics/tikvclient.go index b803c34929542..f4cb704f7d38a 100644 --- a/metrics/tikvclient.go +++ b/metrics/tikvclient.go @@ -58,7 +58,7 @@ var ( Help: "Counter of backoff.", }, []string{LblType}) - TiKVBackoffDuration = prometheus.NewHistogramVec( + TiKVBackoffHistogram = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: "tidb", Subsystem: "tikvclient", diff --git a/store/tikv/backoff.go b/store/tikv/backoff.go index 46e2a6aa31882..5319e66a770b9 100644 --- a/store/tikv/backoff.go +++ b/store/tikv/backoff.go @@ -45,42 +45,42 @@ const ( ) var ( - tikvBackoffCounterRPC = metrics.TiKVBackoffCounter.WithLabelValues("tikvRPC") - tikvBackoffCounterLock = metrics.TiKVBackoffCounter.WithLabelValues("txnLock") - tikvBackoffCounterLockFast = metrics.TiKVBackoffCounter.WithLabelValues("tikvLockFast") - tikvBackoffCounterPD = metrics.TiKVBackoffCounter.WithLabelValues("pdRPC") - tikvBackoffCounterRegionMiss = metrics.TiKVBackoffCounter.WithLabelValues("regionMiss") - tikvBackoffCounterUpdateLeader = metrics.TiKVBackoffCounter.WithLabelValues("updateLeader") - tikvBackoffCounterServerBusy = metrics.TiKVBackoffCounter.WithLabelValues("serverBusy") - tikvBackoffCounterEmpty = metrics.TiKVBackoffCounter.WithLabelValues("") - tikvBackoffDurationRPC = metrics.TiKVBackoffDuration.WithLabelValues("tikvRPC") - tikvBackoffDurationLock = metrics.TiKVBackoffDuration.WithLabelValues("txnLock") - tikvBackoffDurationLockFast = metrics.TiKVBackoffDuration.WithLabelValues("tikvLockFast") - tikvBackoffDurationPD = metrics.TiKVBackoffDuration.WithLabelValues("pdRPC") - tikvBackoffDurationRegionMiss = metrics.TiKVBackoffDuration.WithLabelValues("regionMiss") - tikvBackoffDurationUpdateLeader = metrics.TiKVBackoffDuration.WithLabelValues("updateLeader") - tikvBackoffDurationServerBusy = metrics.TiKVBackoffDuration.WithLabelValues("serverBusy") - tikvBackoffDurationEmpty = metrics.TiKVBackoffDuration.WithLabelValues("") + tikvBackoffCounterRPC = metrics.TiKVBackoffCounter.WithLabelValues("tikvRPC") + tikvBackoffCounterLock = metrics.TiKVBackoffCounter.WithLabelValues("txnLock") + tikvBackoffCounterLockFast = metrics.TiKVBackoffCounter.WithLabelValues("tikvLockFast") + tikvBackoffCounterPD = metrics.TiKVBackoffCounter.WithLabelValues("pdRPC") + tikvBackoffCounterRegionMiss = metrics.TiKVBackoffCounter.WithLabelValues("regionMiss") + tikvBackoffCounterUpdateLeader = metrics.TiKVBackoffCounter.WithLabelValues("updateLeader") + tikvBackoffCounterServerBusy = metrics.TiKVBackoffCounter.WithLabelValues("serverBusy") + tikvBackoffCounterEmpty = metrics.TiKVBackoffCounter.WithLabelValues("") + tikvBackoffHistogramRPC = metrics.TiKVBackoffHistogram.WithLabelValues("tikvRPC") + tikvBackoffHistogramLock = metrics.TiKVBackoffHistogram.WithLabelValues("txnLock") + tikvBackoffHistogramLockFast = metrics.TiKVBackoffHistogram.WithLabelValues("tikvLockFast") + tikvBackoffHistogramPD = metrics.TiKVBackoffHistogram.WithLabelValues("pdRPC") + tikvBackoffHistogramRegionMiss = metrics.TiKVBackoffHistogram.WithLabelValues("regionMiss") + tikvBackoffHistogramUpdateLeader = metrics.TiKVBackoffHistogram.WithLabelValues("updateLeader") + tikvBackoffHistogramServerBusy = metrics.TiKVBackoffHistogram.WithLabelValues("serverBusy") + tikvBackoffHistogramEmpty = metrics.TiKVBackoffHistogram.WithLabelValues("") ) func (t backoffType) metric() (prometheus.Counter, prometheus.Observer) { switch t { case boTiKVRPC: - return tikvBackoffCounterRPC, tikvBackoffDurationRPC + return tikvBackoffCounterRPC, tikvBackoffHistogramRPC case BoTxnLock: - return tikvBackoffCounterLock, tikvBackoffDurationLock + return tikvBackoffCounterLock, tikvBackoffHistogramLock case boTxnLockFast: - return tikvBackoffCounterLockFast, tikvBackoffDurationLockFast + return tikvBackoffCounterLockFast, tikvBackoffHistogramLockFast case BoPDRPC: - return tikvBackoffCounterPD, tikvBackoffDurationPD + return tikvBackoffCounterPD, tikvBackoffHistogramPD case BoRegionMiss: - return tikvBackoffCounterRegionMiss, tikvBackoffDurationRegionMiss + return tikvBackoffCounterRegionMiss, tikvBackoffHistogramRegionMiss case BoUpdateLeader: - return tikvBackoffCounterUpdateLeader, tikvBackoffDurationUpdateLeader + return tikvBackoffCounterUpdateLeader, tikvBackoffHistogramUpdateLeader case boServerBusy: - return tikvBackoffCounterServerBusy, tikvBackoffDurationServerBusy + return tikvBackoffCounterServerBusy, tikvBackoffHistogramServerBusy } - return tikvBackoffCounterEmpty, tikvBackoffDurationEmpty + return tikvBackoffCounterEmpty, tikvBackoffHistogramEmpty } // NewBackoffFn creates a backoff func which implements exponential backoff with @@ -284,8 +284,8 @@ func (b *Backoffer) BackoffWithMaxSleep(typ backoffType, maxSleepMs int, err err default: } - counter, timer := typ.metric() - counter.Inc() + backoffCounter, backoffDuration := typ.metric() + backoffCounter.Inc() // Lazy initialize. if b.fn == nil { b.fn = make(map[backoffType]func(context.Context, int) int) @@ -297,7 +297,7 @@ func (b *Backoffer) BackoffWithMaxSleep(typ backoffType, maxSleepMs int, err err } realSleep := f(b.ctx, maxSleepMs) - timer.Observe(float64(realSleep) / 1000) + backoffDuration.Observe(float64(realSleep) / 1000) b.totalSleep += realSleep b.types = append(b.types, typ)