From 6930d87aacdeb5eeb5e2d67733759b9bb2d4a7d5 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Tue, 16 Apr 2024 08:28:35 +0800 Subject: [PATCH] gc(ticdc): add min service gc safe point metrics (#10553) (#10751) ref pingcap/tiflow#10463 --- cdc/server/metrics.go | 2 + metrics/grafana/ticdc.json | 111 ++++++++++++++++++++++++++++++++++- pkg/txnutil/gc/gc_manager.go | 2 + pkg/txnutil/gc/metrics.go | 42 +++++++++++++ 4 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 pkg/txnutil/gc/metrics.go diff --git a/cdc/server/metrics.go b/cdc/server/metrics.go index a626c976bbb..1ca0a6e4a11 100644 --- a/cdc/server/metrics.go +++ b/cdc/server/metrics.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/tiflow/pkg/orchestrator" "github.com/pingcap/tiflow/pkg/p2p" "github.com/pingcap/tiflow/pkg/sink/observer" + "github.com/pingcap/tiflow/pkg/txnutil/gc" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" tikvmetrics "github.com/tikv/client-go/v2/metrics" @@ -55,6 +56,7 @@ func init() { redo.InitMetrics(registry) scheduler.InitMetrics(registry) observer.InitMetrics(registry) + gc.InitMetrics(registry) // TiKV client metrics, including metrics about resolved and region cache. originalRegistry := prometheus.DefaultRegisterer prometheus.DefaultRegisterer = registry diff --git a/metrics/grafana/ticdc.json b/metrics/grafana/ticdc.json index 6b6b518bd02..72774065795 100644 --- a/metrics/grafana/ticdc.json +++ b/metrics/grafana/ticdc.json @@ -3192,7 +3192,7 @@ }, "gridPos": { "h": 7, - "w": 12, + "w": 6, "x": 0, "y": 3 }, @@ -3249,8 +3249,8 @@ "grid": {}, "gridPos": { "h": 7, - "w": 12, - "x": 12, + "w": 6, + "x": 6, "y": 3 }, "hiddenSeries": false, @@ -3342,6 +3342,111 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "dateTimeAsIso" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 3 + }, + "hiddenSeries": false, + "id": 10037, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "max(ticdc_gc_min_service_gc_safepoint{})", + "interval": "", + "legendFormat": "gc time", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "max(ticdc_gc_cdc_gc_safepoint{})", + "hide": false, + "interval": "", + "legendFormat": "cdc service safepoint", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GC Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "dateTimeAsIso", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": true, diff --git a/pkg/txnutil/gc/gc_manager.go b/pkg/txnutil/gc/gc_manager.go index 6497e163aca..b9fb79fec89 100644 --- a/pkg/txnutil/gc/gc_manager.go +++ b/pkg/txnutil/gc/gc_manager.go @@ -102,6 +102,8 @@ func (m *gcManager) TryUpdateGCSafePoint( m.isTiCDCBlockGC = actual == checkpointTs m.lastSafePointTs = actual m.lastSucceededTime = time.Now() + minServiceGCSafePointGauge.Set(float64(oracle.ExtractPhysical(actual))) + cdcGCSafePointGauge.Set(float64(oracle.ExtractPhysical(checkpointTs))) return nil } diff --git a/pkg/txnutil/gc/metrics.go b/pkg/txnutil/gc/metrics.go new file mode 100644 index 00000000000..eb29c88a390 --- /dev/null +++ b/pkg/txnutil/gc/metrics.go @@ -0,0 +1,42 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package gc + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +var ( + minServiceGCSafePointGauge = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "ticdc", + Subsystem: "gc", + Name: "min_service_gc_safepoint", + Help: "The min value all of service GC safepoint", + }) + + cdcGCSafePointGauge = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "ticdc", + Subsystem: "gc", + Name: "cdc_gc_safepoint", + Help: "the value of CDC GC safepoint", + }) +) + +// InitMetrics registers all metrics used gc manager +func InitMetrics(registry *prometheus.Registry) { + registry.MustRegister(minServiceGCSafePointGauge) + registry.MustRegister(cdcGCSafePointGauge) +}