Skip to content

Commit

Permalink
storage: add replicated locks to MVCCStats
Browse files Browse the repository at this point in the history
Fixes cockroachdb#109645.
Informs cockroachdb#100193.

This commit adds `MVCCStats` for replicated locks.

To do so, it first adds a new field to the stats struct, `LockBytes`.
`LockBytes` is the encoded size of replicated locks with shared or
exclusive strengths, which are stored in the lock table keyspace. The
field includes the size of the locks' keys and their values.

For historical reasons, the field excludes the size of intent metadata
key-values, even though they are also stored in the lock table keyspace.
Intent metadata keys are tracked under KeyBytes and their values are
tracked under ValBytes. This is not to be confused with the provisional
versioned values protected by the intents, which are tracked by the
IntentBytes field (and also by KeyBytes and ValBytes). Hence the vague
"without their meta keys" comment above.

The patch then begins accounting for the contributions of replicated
locks to `LockBytes`, `LockCount`, and `LockAge`, of which the second
two fields already exist. This accounting is straightforward.

The less straightforward part of the patch is MVCC stats computation.
Scanning the lock table requires the use of an EngineIterator. To this
point, all stats computation has taken place on an MVCCIterator. The
patch addresses this by directly scanning the lock table with an
EngineIterator (wrapped in a LockTableIterator) during stats
computation.

Release note: None
  • Loading branch information
nvanbenschoten authored and Thomas Hardy committed Oct 4, 2023
1 parent 3e30216 commit c44ee5b
Show file tree
Hide file tree
Showing 19 changed files with 525 additions and 109 deletions.
3 changes: 2 additions & 1 deletion pkg/cmd/roachtest/tests/mvcc_gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,8 @@ func checkRangesConsistentAndHaveNoData(totals enginepb.MVCCStats, details range
return errors.Errorf("table ranges contain garbage %s", totals.String())
}
if totals.LiveBytes > 0 || totals.LiveCount > 0 ||
totals.IntentBytes > 0 || totals.IntentCount > 0 || totals.LockCount > 0 {
totals.IntentBytes > 0 || totals.IntentCount > 0 ||
totals.LockBytes > 0 || totals.LockCount > 0 {
return errors.Errorf("table ranges contain live data %s", totals.String())
}
if details.status != kvpb.CheckConsistencyResponse_RANGE_CONSISTENT.String() {
Expand Down
2 changes: 2 additions & 0 deletions pkg/kv/kvserver/kvserverpb/state.proto
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ message RangeAppliedState {
// state simply because we have introduced this field.
uint64 raft_applied_index_term = 5 [(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/kv/kvpb.RaftTerm"];
}

// MVCCPersistentStats is convertible to MVCCStats, but uses signed variable
// width encodings for most fields that make it efficient to store positive
// values but inefficient to store negative values. This makes the encodings
Expand All @@ -265,6 +266,7 @@ message MVCCPersistentStats {
int64 val_count = 9;
int64 intent_bytes = 10;
int64 intent_count = 11;
int64 lock_bytes = 21;
int64 lock_count = 16;
int64 range_key_count = 17;
int64 range_key_bytes = 18;
Expand Down
10 changes: 4 additions & 6 deletions pkg/kv/kvserver/rditer/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (
func ComputeStatsForRange(
d *roachpb.RangeDescriptor, reader storage.Reader, nowNanos int64,
) (enginepb.MVCCStats, error) {
return ComputeStatsForRangeWithVisitors(d, reader, nowNanos, nil, nil)
return ComputeStatsForRangeWithVisitors(d, reader, nowNanos, storage.ComputeStatsVisitors{})
}

// ComputeStatsForRangeWithVisitors is like ComputeStatsForRange but also
Expand All @@ -30,13 +30,11 @@ func ComputeStatsForRangeWithVisitors(
d *roachpb.RangeDescriptor,
reader storage.Reader,
nowNanos int64,
pointKeyVisitor func(storage.MVCCKey, []byte) error,
rangeKeyVisitor func(storage.MVCCRangeKeyValue) error,
visitors storage.ComputeStatsVisitors,
) (enginepb.MVCCStats, error) {
var ms enginepb.MVCCStats
for _, keySpan := range makeReplicatedKeySpansExceptLockTable(d) {
msDelta, err := storage.ComputeStatsWithVisitors(reader, keySpan.Key, keySpan.EndKey, nowNanos,
pointKeyVisitor, rangeKeyVisitor)
for _, keySpan := range MakeReplicatedKeySpans(d) {
msDelta, err := storage.ComputeStatsWithVisitors(reader, keySpan.Key, keySpan.EndKey, nowNanos, visitors)
if err != nil {
return enginepb.MVCCStats{}, err
}
Expand Down
15 changes: 11 additions & 4 deletions pkg/kv/kvserver/replica_consistency.go
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,9 @@ func CalcReplicaDigest(
return limiter.WaitN(ctx, tokens)
}

pointKeyVisitor := func(unsafeKey storage.MVCCKey, unsafeValue []byte) error {
var visitors storage.ComputeStatsVisitors

visitors.PointKey = func(unsafeKey storage.MVCCKey, unsafeValue []byte) error {
// Rate limit the scan through the range.
if err := wait(int64(len(unsafeKey.Key) + len(unsafeValue))); err != nil {
return err
Expand Down Expand Up @@ -549,7 +551,7 @@ func CalcReplicaDigest(
return err
}

rangeKeyVisitor := func(rangeKV storage.MVCCRangeKeyValue) error {
visitors.RangeKey = func(rangeKV storage.MVCCRangeKeyValue) error {
// Rate limit the scan through the range.
err := wait(
int64(len(rangeKV.RangeKey.StartKey) + len(rangeKV.RangeKey.EndKey) + len(rangeKV.Value)))
Expand Down Expand Up @@ -591,12 +593,17 @@ func CalcReplicaDigest(
return err
}

visitors.LockTableKey = func(unsafeKey storage.LockTableKey, unsafeValue []byte) error {
// TODO(nvanbenschoten): rate limit scan through lock table and add to
// checksum to be included in consistency checks.
return nil
}

// In statsOnly mode, we hash only the RangeAppliedState. In regular mode, hash
// all of the replicated key space.
var result ReplicaDigest
if !statsOnly {
ms, err := rditer.ComputeStatsForRangeWithVisitors(&desc, snap, 0, /* nowNanos */
pointKeyVisitor, rangeKeyVisitor)
ms, err := rditer.ComputeStatsForRangeWithVisitors(&desc, snap, 0 /* nowNanos */, visitors)
// Consume the remaining quota borrowed in the visitors. Do it even on
// iteration error, but prioritize returning the latter if it occurs.
if wErr := limiter.WaitN(ctx, batchSize); wErr != nil && err == nil {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
echo
----
14062697193383087404
2796048770313977431
3 changes: 2 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/builtin_function
Original file line number Diff line number Diff line change
Expand Up @@ -4005,7 +4005,8 @@ FROM crdb_internal.check_consistency(true, crdb_internal.tenant_span()[1], crdb_
ORDER BY range_id
LIMIT 1
----
RANGE_CONSISTENT stats: {ContainsEstimates: LastUpdateNanos: LockAge: GCBytesAge: LiveBytes: LiveCount: KeyBytes: KeyCount: ValBytes: ValCount: IntentBytes: IntentCount: LockCount: RangeKeyCount: RangeKeyBytes: RangeValCount: RangeValBytes: SysBytes: SysCount: AbortSpanBytes:}
RANGE_CONSISTENT stats: {ContainsEstimates: LastUpdateNanos: LockAge: GCBytesAge: LiveBytes: LiveCount: KeyBytes: KeyCount: ValBytes: ValCount: IntentBytes: IntentCount: LockBytes: LockCount: RangeKeyCount: RangeKeyBytes: RangeValCount: RangeValBytes: SysBytes: SysCount: AbortSpanBytes:}



# Fill a table with consistency check results. This used to panic.
Expand Down
5 changes: 5 additions & 0 deletions pkg/storage/engine_key.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,11 @@ func (lk LockTableKey) ToEngineKey(buf []byte) (EngineKey, []byte) {
return k, buf
}

// EncodedSize returns the size of the LockTableKey when encoded.
func (lk LockTableKey) EncodedSize() int64 {
return int64(len(lk.Key)) + engineKeyVersionLockTableLen
}

// EngineRangeKeyValue is a raw value for a general range key as stored in the
// engine. It consists of a version (suffix) and corresponding value. The range
// key bounds are not included, but are surfaced via EngineRangeBounds().
Expand Down
2 changes: 2 additions & 0 deletions pkg/storage/enginepb/mvcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ func (ms *MVCCStats) Add(oms MVCCStats) {
ms.KeyCount += oms.KeyCount
ms.ValCount += oms.ValCount
ms.IntentCount += oms.IntentCount
ms.LockBytes += oms.LockBytes
ms.LockCount += oms.LockCount
ms.RangeKeyCount += oms.RangeKeyCount
ms.RangeKeyBytes += oms.RangeKeyBytes
Expand Down Expand Up @@ -190,6 +191,7 @@ func (ms *MVCCStats) Subtract(oms MVCCStats) {
ms.KeyCount -= oms.KeyCount
ms.ValCount -= oms.ValCount
ms.IntentCount -= oms.IntentCount
ms.LockBytes -= oms.LockBytes
ms.LockCount -= oms.LockCount
ms.RangeKeyCount -= oms.RangeKeyCount
ms.RangeKeyBytes -= oms.RangeKeyBytes
Expand Down
13 changes: 13 additions & 0 deletions pkg/storage/enginepb/mvcc.proto
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,19 @@ message MVCCStats {
// It is equal to the number of meta keys in the system with
// a non-empty Transaction proto.
optional sfixed64 intent_count = 11 [(gogoproto.nullable) = false];
// lock_bytes is the encoded size of replicated locks with shared or
// exclusive strengths, which are stored in the lock table keyspace.
// The field includes the size of the locks' keys and their values.
//
// For historical reasons, the field excludes the size of intent
// metadata key-values, even though they are also stored in the lock
// table keyspace. Intent metadata keys are tracked under key_bytes
// and their values are tracked under val_bytes. This is not to be
// confused with the provisional versioned values protected by the
// intents, which are tracked by the intent_bytes field (and also by
// key_bytes and val_bytes). Hence the vague "without their meta keys"
// comment above.
optional sfixed64 lock_bytes = 21 [(gogoproto.nullable) = false];
// lock_count is the number of replicated locks (shared, exclusive, or
// intent strength) that are in the lock table. It is >= intent_count.
optional sfixed64 lock_count = 16 [(gogoproto.nullable) = false];
Expand Down
1 change: 1 addition & 0 deletions pkg/storage/enginepb/mvcc3.proto
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ message MVCCStatsDelta {
sint64 val_count = 9;
sint64 intent_bytes = 10;
sint64 intent_count = 11;
sint64 lock_bytes = 21;
sint64 lock_count = 16;
sint64 range_key_count = 17;
sint64 range_key_bytes = 18;
Expand Down
10 changes: 10 additions & 0 deletions pkg/storage/lock_table_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,16 @@ func (i *LockTableIterator) UnsafeRawEngineKey() []byte {
return i.iter.UnsafeRawEngineKey()
}

// UnsafeLockTableKey returns the current key as an unsafe LockTableKey.
// TODO(nvanbenschoten): use this more widely.
func (i *LockTableIterator) UnsafeLockTableKey() (LockTableKey, error) {
k, err := i.iter.UnsafeEngineKey()
if err != nil {
return LockTableKey{}, errors.Wrap(err, "retrieving lock table key")
}
return k.ToLockTableKey()
}

// LockTableKeyVersion returns the strength and txn ID from the version of the
// current key.
func (i *LockTableIterator) LockTableKeyVersion() (lock.Strength, uuid.UUID, error) {
Expand Down
Loading

0 comments on commit c44ee5b

Please sign in to comment.