diff --git a/Documentation/op-guide/grafana.json b/Documentation/op-guide/grafana.json index 45bfc651362..c405fdfee53 100644 --- a/Documentation/op-guide/grafana.json +++ b/Documentation/op-guide/grafana.json @@ -341,7 +341,7 @@ "steppedLine": false, "targets": [ { - "expr": "etcd_debugging_mvcc_db_total_size_in_bytes{job=\"$cluster\"}", + "expr": "etcd_mvcc_db_total_size_in_bytes{job=\"$cluster\"}", "hide": false, "interval": "", "intervalFactor": 2, diff --git a/Documentation/op-guide/maintenance.md b/Documentation/op-guide/maintenance.md index 294d8c7d61e..7e85a11cf43 100644 --- a/Documentation/op-guide/maintenance.md +++ b/Documentation/op-guide/maintenance.md @@ -149,7 +149,9 @@ $ ETCDCTL_API=3 etcdctl put newkey 123 OK ``` -The metric `etcd_debugging_mvcc_db_total_size_in_use_in_bytes` indicates the actual database usage after a history compaction, while `etcd_debugging_mvcc_db_total_size_in_bytes` shows the database size including free space waiting for defragmentation. The latter increases only when the former is close to it, meaning when both of these metrics are close to the quota, a history compaction is required to avoid triggering the space quota. +The metric `etcd_mvcc_db_total_size_in_use_in_bytes` indicates the actual database usage after a history compaction, while `etcd_debugging_mvcc_db_total_size_in_bytes` shows the database size including free space waiting for defragmentation. The latter increases only when the former is close to it, meaning when both of these metrics are close to the quota, a history compaction is required to avoid triggering the space quota. + +`etcd_debugging_mvcc_db_total_size_in_bytes` is renamed to `etcd_mvcc_db_total_size_in_bytes` from v3.4. ## Snapshot backup diff --git a/Documentation/upgrades/upgrade_3_4.md b/Documentation/upgrades/upgrade_3_4.md index fc5b3fb292a..130f87ff05f 100644 --- a/Documentation/upgrades/upgrade_3_4.md +++ b/Documentation/upgrades/upgrade_3_4.md @@ -47,6 +47,19 @@ OK +etcd --peer-trusted-ca-file ca-peer.crt ``` +#### Promote `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics + +v3.4 promotes `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics to `etcd_mvcc_db_total_size_in_bytes`, in order to encourage etcd storage monitoring. + +`etcd_debugging_mvcc_db_total_size_in_bytes` is still served in v3.4 for backward compatibilities. It will be completely deprecated in v3.5. + +```diff +-etcd_debugging_mvcc_db_total_size_in_bytes ++etcd_mvcc_db_total_size_in_bytes +``` + +Note that `etcd_debugging_*` namespace metrics have been marked as experimental. As we improve monitoring guide, we will promote more metrics. + #### Deprecating `etcd --log-output` flag (now `--log-outputs`) Rename [`etcd --log-output` to `--log-outputs`](https://github.com/coreos/etcd/pull/9624) to support multiple log outputs. **`etcd --logger=capnslog` does not support multiple log outputs.** diff --git a/Documentation/upgrades/upgrade_3_5.md b/Documentation/upgrades/upgrade_3_5.md index 9f0d1eeebd8..6d9a70a15ba 100644 --- a/Documentation/upgrades/upgrade_3_5.md +++ b/Documentation/upgrades/upgrade_3_5.md @@ -14,6 +14,17 @@ Before [starting an upgrade](#upgrade-procedure), read through the rest of this Highlighted breaking changes in 3.5. +#### Deprecate `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics + +v3.4 promoted `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics to `etcd_mvcc_db_total_size_in_bytes`, in order to encourage etcd storage monitoring. And v3.5 completely deprcates `etcd_debugging_mvcc_db_total_size_in_bytes`. + +```diff +-etcd_debugging_mvcc_db_total_size_in_bytes ++etcd_mvcc_db_total_size_in_bytes +``` + +Note that `etcd_debugging_*` namespace metrics have been marked as experimental. As we improve monitoring guide, we will promote more metrics. + #### Deprecated in `etcd --logger capnslog` v3.4 defaults to `--logger=zap` in order to support multiple log outputs and structured logging. diff --git a/integration/metrics_test.go b/integration/metrics_test.go index 3dccd220af1..ca50ef67e65 100644 --- a/integration/metrics_test.go +++ b/integration/metrics_test.go @@ -40,8 +40,16 @@ func TestMetricDbSizeBoot(t *testing.T) { } } -// TestMetricDbSizeDefrag checks that the db size metric is set after defrag. func TestMetricDbSizeDefrag(t *testing.T) { + testMetricDbSizeDefrag(t, "etcd") +} + +func TestMetricDbSizeDefragDebugging(t *testing.T) { + testMetricDbSizeDefrag(t, "etcd_debugging") +} + +// testMetricDbSizeDefrag checks that the db size metric is set after defrag. +func testMetricDbSizeDefrag(t *testing.T, name string) { defer testutil.AfterTest(t) clus := NewClusterV3(t, &ClusterConfig{Size: 1}) defer clus.Terminate(t) @@ -63,7 +71,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { time.Sleep(500 * time.Millisecond) expected := numPuts * len(putreq.Value) - beforeDefrag, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_bytes") + beforeDefrag, err := clus.Members[0].Metric(name + "_mvcc_db_total_size_in_bytes") if err != nil { t.Fatal(err) } @@ -74,7 +82,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { if bv < expected { t.Fatalf("expected db size greater than %d, got %d", expected, bv) } - beforeDefragInUse, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_use_in_bytes") + beforeDefragInUse, err := clus.Members[0].Metric("etcd_mvcc_db_total_size_in_use_in_bytes") if err != nil { t.Fatal(err) } @@ -98,7 +106,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { } time.Sleep(500 * time.Millisecond) - afterCompactionInUse, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_use_in_bytes") + afterCompactionInUse, err := clus.Members[0].Metric("etcd_mvcc_db_total_size_in_use_in_bytes") if err != nil { t.Fatal(err) } @@ -113,7 +121,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { // defrag should give freed space back to fs mc.Defragment(context.TODO(), &pb.DefragmentRequest{}) - afterDefrag, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_bytes") + afterDefrag, err := clus.Members[0].Metric(name + "_mvcc_db_total_size_in_bytes") if err != nil { t.Fatal(err) } @@ -125,7 +133,7 @@ func TestMetricDbSizeDefrag(t *testing.T) { t.Fatalf("expected less than %d, got %d after defrag", bv, av) } - afterDefragInUse, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_use_in_bytes") + afterDefragInUse, err := clus.Members[0].Metric("etcd_mvcc_db_total_size_in_use_in_bytes") if err != nil { t.Fatal(err) } diff --git a/mvcc/kvstore.go b/mvcc/kvstore.go index a445f6a4a1a..9c7f5c3ad85 100644 --- a/mvcc/kvstore.go +++ b/mvcc/kvstore.go @@ -323,6 +323,9 @@ func (s *store) restore() error { reportDbTotalSizeInBytesMu.Lock() reportDbTotalSizeInBytes = func() float64 { return float64(b.Size()) } reportDbTotalSizeInBytesMu.Unlock() + reportDbTotalSizeInBytesDebuggingMu.Lock() + reportDbTotalSizeInBytesDebugging = func() float64 { return float64(b.Size()) } + reportDbTotalSizeInBytesDebuggingMu.Unlock() reportDbTotalSizeInUseInBytesMu.Lock() reportDbTotalSizeInUseInBytes = func() float64 { return float64(b.SizeInUse()) } reportDbTotalSizeInUseInBytesMu.Unlock() diff --git a/mvcc/metrics.go b/mvcc/metrics.go index f80c70c8563..9163cc7c66d 100644 --- a/mvcc/metrics.go +++ b/mvcc/metrics.go @@ -146,7 +146,7 @@ var ( }) dbTotalSize = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ - Namespace: "etcd_debugging", + Namespace: "etcd", Subsystem: "mvcc", Name: "db_total_size_in_bytes", Help: "Total size of the underlying database physically allocated in bytes.", @@ -159,11 +159,28 @@ var ( ) // overridden by mvcc initialization reportDbTotalSizeInBytesMu sync.RWMutex - reportDbTotalSizeInBytes func() float64 = func() float64 { return 0 } + reportDbTotalSizeInBytes = func() float64 { return 0 } - dbTotalSizeInUse = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + // TODO: remove this in v3.5 + dbTotalSizeDebugging = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ Namespace: "etcd_debugging", Subsystem: "mvcc", + Name: "db_total_size_in_bytes", + Help: "Total size of the underlying database physically allocated in bytes.", + }, + func() float64 { + reportDbTotalSizeInBytesDebuggingMu.RLock() + defer reportDbTotalSizeInBytesDebuggingMu.RUnlock() + return reportDbTotalSizeInBytesDebugging() + }, + ) + // overridden by mvcc initialization + reportDbTotalSizeInBytesDebuggingMu sync.RWMutex + reportDbTotalSizeInBytesDebugging = func() float64 { return 0 } + + dbTotalSizeInUse = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Namespace: "etcd", + Subsystem: "mvcc", Name: "db_total_size_in_use_in_bytes", Help: "Total size of the underlying database logically in use in bytes.", }, @@ -218,6 +235,7 @@ func init() { prometheus.MustRegister(dbCompactionTotalMs) prometheus.MustRegister(dbCompactionKeysCounter) prometheus.MustRegister(dbTotalSize) + prometheus.MustRegister(dbTotalSizeDebugging) prometheus.MustRegister(dbTotalSizeInUse) prometheus.MustRegister(hashSec) prometheus.MustRegister(hashRevSec)