Skip to content

Commit

Permalink
Merge #104320
Browse files Browse the repository at this point in the history
104320: kvserver: add metric for Raft leader removals r=erikgrinaker a=erikgrinaker

This patch adds the metric `range.raftleaderremovals` which counts the number of times a Raft leader was removed from a range via a config change. Other removals, such as range merges, are excluded.

Epic: none
Release note: None

Co-authored-by: Erik Grinaker <[email protected]>
  • Loading branch information
craig[bot] and erikgrinaker committed Jun 15, 2023
2 parents 3c55b81 + 71ca22d commit 13b9ad0
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 0 deletions.
8 changes: 8 additions & 0 deletions pkg/kv/kvserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,12 @@ var (
Measurement: "Leader Transfers",
Unit: metric.Unit_COUNT,
}
metaRangeRaftLeaderRemovals = metric.Metadata{
Name: "range.raftleaderremovals",
Help: "Number of times the current Raft leader was removed from a range",
Measurement: "Raft leader removals",
Unit: metric.Unit_COUNT,
}
metaRangeLossOfQuorumRecoveries = metric.Metadata{
Name: "range.recoveries",
Help: `Count of offline loss of quorum recovery operations performed on ranges.
Expand Down Expand Up @@ -2199,6 +2205,7 @@ type StoreMetrics struct {
RangeAdds *metric.Counter
RangeRemoves *metric.Counter
RangeRaftLeaderTransfers *metric.Counter
RangeRaftLeaderRemovals *metric.Counter
RangeLossOfQuorumRecoveries *metric.Counter

// Range snapshot metrics.
Expand Down Expand Up @@ -2855,6 +2862,7 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
RangeSnapshotSendQueueSize: metric.NewGauge(metaRangeSnapshotSendQueueSize),
RangeSnapshotRecvQueueSize: metric.NewGauge(metaRangeSnapshotRecvQueueSize),
RangeRaftLeaderTransfers: metric.NewCounter(metaRangeRaftLeaderTransfers),
RangeRaftLeaderRemovals: metric.NewCounter(metaRangeRaftLeaderRemovals),
RangeLossOfQuorumRecoveries: metric.NewCounter(metaRangeLossOfQuorumRecoveries),
DelegateSnapshotSendBytes: metric.NewCounter(metaDelegateSnapshotSendBytes),
DelegateSnapshotSuccesses: metric.NewCounter(metaDelegateSnapshotSuccesses),
Expand Down
7 changes: 7 additions & 0 deletions pkg/kv/kvserver/replica_application_result.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/errors"
"go.etcd.io/raft/v3"
)

// replica_application_*.go files provide concrete implementations of
Expand Down Expand Up @@ -422,6 +423,12 @@ func (r *Replica) handleChangeReplicasResult(
log.Infof(ctx, "removing replica due to ChangeReplicasTrigger: %v", chng)
}

// This is currently executed before the conf change is applied to the Raft
// node, so we still see ourselves as the leader.
if r.raftBasicStatusRLocked().RaftState == raft.StateLeader {
r.store.metrics.RangeRaftLeaderRemovals.Inc(1)
}

if _, err := r.store.removeInitializedReplicaRaftMuLocked(ctx, r, chng.NextReplicaID(), RemoveOptions{
// We destroyed the data when the batch committed so don't destroy it again.
DestroyData: false,
Expand Down

0 comments on commit 13b9ad0

Please sign in to comment.