From 7f61dfd1484469cece2ad70ff83f8f81f39976e9 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Wed, 3 Aug 2022 20:36:46 +0000 Subject: [PATCH] storage: remove key comparison in MVCC stats computations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch restructures MVCC stats computations to use an MVCC iterator with appropriate bounds. This allows omitting a key comparison in the hot path, yielding a ~10% performance improvement. The new stats functions are: * `ComputeStats`: calculates stats for a `Reader` key span. * `ComputeStatsWithVisitors`: calculates stats for a `Reader` key span, calling the given visitor callbacks for every point/range key. * `ComputeStatsForIter`: calculates stats for a given `MVCCIterator`, by scanning it until exhausted. It also removes the `MVCCIterator.ComputeStats` method, which had no business being part of the interface. ``` name old time/op new time/op delta MVCCComputeStats_Pebble/valueSize=64-24 130ms ± 1% 119ms ± 1% -8.77% (p=0.000 n=10+10) name old speed new speed delta MVCCComputeStats_Pebble/valueSize=64-24 516MB/s ± 1% 565MB/s ± 1% +9.61% (p=0.000 n=10+10) ``` Release note: None --- pkg/kv/bulk/sst_batcher.go | 18 ++++- pkg/kv/kvserver/batcheval/cmd_add_sstable.go | 7 +- pkg/kv/kvserver/batcheval/cmd_clear_range.go | 8 +- .../batcheval/cmd_delete_range_test.go | 9 +-- .../kvserver/batcheval/cmd_end_transaction.go | 9 +-- .../batcheval/cmd_revert_range_test.go | 20 ++--- pkg/kv/kvserver/batcheval/cmd_truncate_log.go | 8 +- pkg/kv/kvserver/consistency_queue_test.go | 5 +- pkg/kv/kvserver/rditer/stats.go | 37 +++++---- pkg/kv/kvserver/replica_consistency.go | 24 ++---- pkg/kv/kvserver/replica_raft.go | 8 +- pkg/kv/kvserver/spanset/BUILD.bazel | 1 - pkg/kv/kvserver/spanset/batch.go | 17 ---- pkg/storage/bench_test.go | 26 ++----- pkg/storage/engine.go | 17 ---- pkg/storage/intent_interleaving_iter.go | 7 -- pkg/storage/mvcc.go | 77 ++++++++++++------- pkg/storage/mvcc_history_test.go | 17 ++-- pkg/storage/mvcc_incremental_iterator_test.go | 11 +-- pkg/storage/mvcc_stats_test.go | 62 +++++++-------- pkg/storage/mvcc_test.go | 50 +++--------- pkg/storage/pebble_iterator.go | 8 -- pkg/storage/point_synthesizing_iter.go | 8 -- pkg/testutils/storageutils/stats.go | 12 +-- 24 files changed, 161 insertions(+), 305 deletions(-) diff --git a/pkg/kv/bulk/sst_batcher.go b/pkg/kv/bulk/sst_batcher.go index 5bd31c8c78c5..2db090484dd7 100644 --- a/pkg/kv/bulk/sst_batcher.go +++ b/pkg/kv/bulk/sst_batcher.go @@ -661,7 +661,8 @@ func (b *SSTBatcher) addSSTable( defer iter.Close() if (stats == enginepb.MVCCStats{}) { - stats, err = storage.ComputeStatsForRange(iter, start, end, sendStart.UnixNano()) + iter.SeekGE(storage.MVCCKey{Key: start}) + stats, err = storage.ComputeStatsForIter(iter, sendStart.UnixNano()) if err != nil { return errors.Wrapf(err, "computing stats for SST [%s, %s)", start, end) } @@ -779,9 +780,18 @@ func (b *SSTBatcher) addSSTable( return err } - right.stats, err = storage.ComputeStatsForRange( - iter, right.start, right.end, sendStart.Unix(), - ) + // Needs a new iterator with new bounds. + statsIter, err := storage.NewPebbleMemSSTIterator(sstBytes, true, storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsOnly, + LowerBound: right.start, + UpperBound: right.end, + }) + if err != nil { + return err + } + statsIter.SeekGE(storage.MVCCKey{Key: right.start}) + right.stats, err = storage.ComputeStatsForIter(statsIter, sendStart.Unix()) + statsIter.Close() if err != nil { return err } diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go index e3d78d5a5a5b..3fd05266cead 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go @@ -228,6 +228,7 @@ func EvalAddSSTable( // compute the expected MVCC stats delta of ingesting the SST. sstIter, err := storage.NewPebbleMemSSTIterator(sst, true /* verify */, storage.IterOptions{ KeyTypes: storage.IterKeyTypePointsAndRanges, + LowerBound: keys.MinKey, UpperBound: keys.MaxKey, }) if err != nil { @@ -252,7 +253,7 @@ func EvalAddSSTable( stats = *args.MVCCStats } else { log.VEventf(ctx, 2, "computing MVCCStats for SSTable [%s,%s)", start.Key, end.Key) - stats, err = storage.ComputeStatsForRange(sstIter, start.Key, end.Key, h.Timestamp.WallTime) + stats, err = storage.ComputeStatsForIter(sstIter, h.Timestamp.WallTime) if err != nil { return result.Result{}, errors.Wrap(err, "computing SSTable MVCC stats") } @@ -569,10 +570,10 @@ func assertSSTContents(sst []byte, sstTimestamp hlc.Timestamp, stats *enginepb.M return err } defer iter.Close() + iter.SeekGE(storage.MVCCKey{Key: keys.MinKey}) given := *stats - actual, err := storage.ComputeStatsForRange( - iter, keys.MinKey, keys.MaxKey, given.LastUpdateNanos) + actual, err := storage.ComputeStatsForIter(iter, given.LastUpdateNanos) if err != nil { return errors.Wrap(err, "failed to compare stats: %w") } diff --git a/pkg/kv/kvserver/batcheval/cmd_clear_range.go b/pkg/kv/kvserver/batcheval/cmd_clear_range.go index e5b0e2f37bb9..88ca15ed92a8 100644 --- a/pkg/kv/kvserver/batcheval/cmd_clear_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_clear_range.go @@ -176,13 +176,7 @@ func computeStatsDelta( // If we can't use the fast stats path, or race test is enabled, compute stats // across the key span to be cleared. if !entireRange || util.RaceEnabled { - iter := readWriter.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{ - KeyTypes: storage.IterKeyTypePointsAndRanges, - LowerBound: from, - UpperBound: to, - }) - computed, err := iter.ComputeStats(from, to, delta.LastUpdateNanos) - iter.Close() + computed, err := storage.ComputeStats(readWriter, from, to, delta.LastUpdateNanos) if err != nil { return enginepb.MVCCStats{}, err } diff --git a/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go b/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go index b8cfb058cdde..ab727c925fed 100644 --- a/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go @@ -376,14 +376,7 @@ func computeStats( if len(to) == 0 { to = keys.MaxKey } - - iter := reader.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{ - KeyTypes: storage.IterKeyTypePointsAndRanges, - LowerBound: from, - UpperBound: to, - }) - defer iter.Close() - ms, err := storage.ComputeStatsForRange(iter, from, to, nowNanos) + ms, err := storage.ComputeStats(reader, from, to, nowNanos) require.NoError(t, err) return ms } diff --git a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go index 39b873d3f9b6..25562a7a9d4a 100644 --- a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go +++ b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go @@ -1205,14 +1205,7 @@ func mergeTrigger( { ridPrefix := keys.MakeRangeIDReplicatedPrefix(merge.RightDesc.RangeID) - // NB: Range-ID local keys have no versions and no intents. - iter := batch.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{ - KeyTypes: storage.IterKeyTypePointsAndRanges, - LowerBound: ridPrefix, - UpperBound: ridPrefix.PrefixEnd(), - }) - defer iter.Close() - sysMS, err := iter.ComputeStats(ridPrefix, ridPrefix.PrefixEnd(), 0 /* nowNanos */) + sysMS, err := storage.ComputeStats(batch, ridPrefix, ridPrefix.PrefixEnd(), 0 /* nowNanos */) if err != nil { return result.Result{}, err } diff --git a/pkg/kv/kvserver/batcheval/cmd_revert_range_test.go b/pkg/kv/kvserver/batcheval/cmd_revert_range_test.go index 8d6f4d2cfa47..d9330a1fc78f 100644 --- a/pkg/kv/kvserver/batcheval/cmd_revert_range_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_revert_range_test.go @@ -44,17 +44,6 @@ func hashRange(t *testing.T, reader storage.Reader, start, end roachpb.Key) []by return h.Sum(nil) } -func getStats(t *testing.T, reader storage.Reader) enginepb.MVCCStats { - t.Helper() - iter := reader.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{UpperBound: roachpb.KeyMax}) - defer iter.Close() - s, err := storage.ComputeStatsForRange(iter, keys.LocalMax, roachpb.KeyMax, 1100) - if err != nil { - t.Fatalf("%+v", err) - } - return s -} - // createTestPebbleEngine returns a new in-memory Pebble storage engine. func createTestPebbleEngine(ctx context.Context) (storage.Engine, error) { return storage.Open(ctx, storage.InMemory(), @@ -138,7 +127,8 @@ func TestCmdRevertRange(t *testing.T) { cArgs := batcheval.CommandArgs{Header: roachpb.Header{RangeID: desc.RangeID, Timestamp: tsC, MaxSpanRequestKeys: 2}} evalCtx := &batcheval.MockEvalCtx{Desc: &desc, Clock: hlc.NewClockWithSystemTimeSource(time.Nanosecond /* maxOffset */), Stats: stats} cArgs.EvalCtx = evalCtx.EvalContext() - afterStats := getStats(t, eng) + afterStats, err := storage.ComputeStats(eng, keys.LocalMax, keys.MaxKey, 0) + require.NoError(t, err) for _, tc := range []struct { name string ts hlc.Timestamp @@ -187,9 +177,9 @@ func TestCmdRevertRange(t *testing.T) { } evalStats := afterStats evalStats.Add(*cArgs.Stats) - if realStats := getStats(t, batch); !evalStats.Equal(evalStats) { - t.Fatalf("stats mismatch:\npre-revert\t%+v\nevaled:\t%+v\neactual\t%+v", afterStats, evalStats, realStats) - } + realStats, err := storage.ComputeStats(batch, keys.LocalMax, keys.MaxKey, evalStats.LastUpdateNanos) + require.NoError(t, err) + require.Equal(t, realStats, evalStats) }) } diff --git a/pkg/kv/kvserver/batcheval/cmd_truncate_log.go b/pkg/kv/kvserver/batcheval/cmd_truncate_log.go index cdc3a06d320e..d9e7a36ff507 100644 --- a/pkg/kv/kvserver/batcheval/cmd_truncate_log.go +++ b/pkg/kv/kvserver/batcheval/cmd_truncate_log.go @@ -117,14 +117,8 @@ func TruncateLog( // Note that any sideloaded payloads that may be removed by this truncation // are not tracked in the raft log delta. The delta will be adjusted below // raft. - iter := readWriter.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{ - KeyTypes: storage.IterKeyTypePointsAndRanges, - LowerBound: start, - UpperBound: end, - }) - defer iter.Close() // We can pass zero as nowNanos because we're only interested in SysBytes. - ms, err := iter.ComputeStats(start, end, 0 /* nowNanos */) + ms, err := storage.ComputeStats(readWriter, start, end, 0 /* nowNanos */) if err != nil { return result.Result{}, errors.Wrap(err, "while computing stats of Raft log freed by truncation") } diff --git a/pkg/kv/kvserver/consistency_queue_test.go b/pkg/kv/kvserver/consistency_queue_test.go index 7fad8cf85733..587011be3332 100644 --- a/pkg/kv/kvserver/consistency_queue_test.go +++ b/pkg/kv/kvserver/consistency_queue_test.go @@ -453,12 +453,9 @@ func TestCheckConsistencyInconsistent(t *testing.T) { cpEng := storage.InMemFromFS(context.Background(), fs, cps[0], storage.CacheSize(1<<20)) defer cpEng.Close() - iter := cpEng.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{UpperBound: []byte("\xff")}) - defer iter.Close() - // The range is specified using only global keys, since the implementation // may use an intentInterleavingIter. - ms, err := storage.ComputeStatsForRange(iter, keys.LocalMax, roachpb.KeyMax, 0 /* nowNanos */) + ms, err := storage.ComputeStats(cpEng, keys.LocalMax, roachpb.KeyMax, 0 /* nowNanos */) assert.NoError(t, err) assert.NotZero(t, ms.KeyBytes) diff --git a/pkg/kv/kvserver/rditer/stats.go b/pkg/kv/kvserver/rditer/stats.go index 16d7714de4dd..51fbf9a28f36 100644 --- a/pkg/kv/kvserver/rditer/stats.go +++ b/pkg/kv/kvserver/rditer/stats.go @@ -16,32 +16,31 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage/enginepb" ) -// ComputeStatsForRange computes the stats for a given range by -// iterating over all key spans for the given range that should -// be accounted for in its stats. +// ComputeStatsForRange computes the stats for a given range by iterating over +// all key spans for the given range that should be accounted for in its stats. func ComputeStatsForRange( d *roachpb.RangeDescriptor, reader storage.Reader, nowNanos int64, ) (enginepb.MVCCStats, error) { - ms := enginepb.MVCCStats{} - var err error - for _, keySpan := range MakeReplicatedKeySpansExceptLockTable(d) { - func() { - iter := reader.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{ - KeyTypes: storage.IterKeyTypePointsAndRanges, - LowerBound: keySpan.Key, - UpperBound: keySpan.EndKey, - }) - defer iter.Close() + return ComputeStatsForRangeWithVisitors(d, reader, nowNanos, nil, nil) +} - var msDelta enginepb.MVCCStats - if msDelta, err = iter.ComputeStats(keySpan.Key, keySpan.EndKey, nowNanos); err != nil { - return - } - ms.Add(msDelta) - }() +// ComputeStatsForRangeWithVisitors is like ComputeStatsForRange but also +// calls the given callbacks for every key. +func ComputeStatsForRangeWithVisitors( + d *roachpb.RangeDescriptor, + reader storage.Reader, + nowNanos int64, + pointKeyVisitor func(storage.MVCCKey, []byte) error, + rangeKeyVisitor func(storage.MVCCRangeKeyValue) error, +) (enginepb.MVCCStats, error) { + var ms enginepb.MVCCStats + for _, keySpan := range MakeReplicatedKeySpansExceptLockTable(d) { + msDelta, err := storage.ComputeStatsWithVisitors(reader, keySpan.Key, keySpan.EndKey, nowNanos, + pointKeyVisitor, rangeKeyVisitor) if err != nil { return enginepb.MVCCStats{}, err } + ms.Add(msDelta) } return ms, nil } diff --git a/pkg/kv/kvserver/replica_consistency.go b/pkg/kv/kvserver/replica_consistency.go index 688b22e3f6f5..15c118c082bc 100644 --- a/pkg/kv/kvserver/replica_consistency.go +++ b/pkg/kv/kvserver/replica_consistency.go @@ -700,25 +700,11 @@ func (*Replica) sha512( // In statsOnly mode, we hash only the RangeAppliedState. In regular mode, hash // all of the replicated key space. if !statsOnly { - // Do not want the lock table ranges since the iter has been constructed - // using MVCCKeyAndIntentsIterKind. - // - // TODO(sumeer): When we have replicated locks other than exclusive locks, - // we will probably not have any interleaved intents so we could stop - // using MVCCKeyAndIntentsIterKind and consider all locks here. - for _, span := range rditer.MakeReplicatedKeySpansExceptLockTable(&desc) { - iter := snap.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{ - KeyTypes: storage.IterKeyTypePointsAndRanges, - LowerBound: span.Key, - UpperBound: span.EndKey, - }) - spanMS, err := storage.ComputeStatsForRangeWithVisitors( - iter, span.Key, span.EndKey, 0 /* nowNanos */, pointKeyVisitor, rangeKeyVisitor) - iter.Close() - if err != nil { - return nil, err - } - ms.Add(spanMS) + var err error + ms, err = rditer.ComputeStatsForRangeWithVisitors(&desc, snap, 0, /* nowNanos */ + pointKeyVisitor, rangeKeyVisitor) + if err != nil { + return nil, err } } diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go index fd9cc8ee0a2b..e3675b20ce92 100644 --- a/pkg/kv/kvserver/replica_raft.go +++ b/pkg/kv/kvserver/replica_raft.go @@ -2198,13 +2198,7 @@ func ComputeRaftLogSize( ) (int64, error) { prefix := keys.RaftLogPrefix(rangeID) prefixEnd := prefix.PrefixEnd() - iter := reader.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{ - KeyTypes: storage.IterKeyTypePointsAndRanges, - LowerBound: prefix, - UpperBound: prefixEnd, - }) - defer iter.Close() - ms, err := iter.ComputeStats(prefix, prefixEnd, 0 /* nowNanos */) + ms, err := storage.ComputeStats(reader, prefix, prefixEnd, 0 /* nowNanos */) if err != nil { return 0, err } diff --git a/pkg/kv/kvserver/spanset/BUILD.bazel b/pkg/kv/kvserver/spanset/BUILD.bazel index 70f5b4ad3d10..4d29597ff164 100644 --- a/pkg/kv/kvserver/spanset/BUILD.bazel +++ b/pkg/kv/kvserver/spanset/BUILD.bazel @@ -14,7 +14,6 @@ go_library( "//pkg/keys", "//pkg/roachpb", "//pkg/storage", - "//pkg/storage/enginepb", "//pkg/util/hlc", "//pkg/util/log", "//pkg/util/protoutil", diff --git a/pkg/kv/kvserver/spanset/batch.go b/pkg/kv/kvserver/spanset/batch.go index 77de83c2b3d3..f503c7fafe6a 100644 --- a/pkg/kv/kvserver/spanset/batch.go +++ b/pkg/kv/kvserver/spanset/batch.go @@ -18,7 +18,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/storage" - "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/protoutil" @@ -190,22 +189,6 @@ func (i *MVCCIterator) RangeKeys() storage.MVCCRangeKeyStack { return i.i.RangeKeys() } -// ComputeStats is part of the storage.MVCCIterator interface. -func (i *MVCCIterator) ComputeStats( - start, end roachpb.Key, nowNanos int64, -) (enginepb.MVCCStats, error) { - if i.spansOnly { - if err := i.spans.CheckAllowed(SpanReadOnly, roachpb.Span{Key: start, EndKey: end}); err != nil { - return enginepb.MVCCStats{}, err - } - } else { - if err := i.spans.CheckAllowedAt(SpanReadOnly, roachpb.Span{Key: start, EndKey: end}, i.ts); err != nil { - return enginepb.MVCCStats{}, err - } - } - return i.i.ComputeStats(start, end, nowNanos) -} - // FindSplitKey is part of the storage.MVCCIterator interface. func (i *MVCCIterator) FindSplitKey( start, end, minSplitKey roachpb.Key, targetSize int64, diff --git a/pkg/storage/bench_test.go b/pkg/storage/bench_test.go index f037a25b12b1..424716b1226e 100644 --- a/pkg/storage/bench_test.go +++ b/pkg/storage/bench_test.go @@ -886,13 +886,9 @@ func runMVCCScan(ctx context.Context, b *testing.B, emk engineMaker, opts benchS // Pull all of the sstables into the RocksDB cache in order to make the // timings more stable. Otherwise, the first run will be penalized pulling // data into the cache while later runs will not. - iter := eng.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ - KeyTypes: IterKeyTypePointsAndRanges, - LowerBound: keys.LocalMax, - UpperBound: roachpb.KeyMax, - }) - _, _ = iter.ComputeStats(keys.LocalMax, roachpb.KeyMax, 0) - iter.Close() + if _, err := ComputeStats(eng, keys.LocalMax, roachpb.KeyMax, 0); err != nil { + b.Fatalf("stats failed: %s", err) + } } var startKey, endKey roachpb.Key @@ -1349,13 +1345,7 @@ func runMVCCDeleteRangeUsingTombstone( var msCovered *enginepb.MVCCStats var leftPeekBound, rightPeekBound roachpb.Key if entireRange { - iter := eng.NewMVCCIterator(MVCCKeyIterKind, IterOptions{ - KeyTypes: IterKeyTypePointsAndRanges, - LowerBound: keys.LocalMax, - UpperBound: keys.MaxKey, - }) - ms, err := ComputeStatsForRange(iter, keys.LocalMax, keys.MaxKey, 0) - iter.Close() + ms, err := ComputeStats(eng, keys.LocalMax, keys.MaxKey, 0) require.NoError(b, err) leftPeekBound = keys.LocalMax @@ -1457,13 +1447,7 @@ func runMVCCComputeStats( var stats enginepb.MVCCStats var err error for i := 0; i < b.N; i++ { - iter := eng.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ - KeyTypes: IterKeyTypePointsAndRanges, - LowerBound: keys.LocalMax, - UpperBound: roachpb.KeyMax, - }) - stats, err = iter.ComputeStats(keys.LocalMax, roachpb.KeyMax, 0) - iter.Close() + stats, err = ComputeStats(eng, keys.LocalMax, keys.MaxKey, 0) if err != nil { b.Fatal(err) } diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index 73c5f3513580..74ca8f773084 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -252,23 +252,6 @@ type MVCCIterator interface { // ValueProto unmarshals the value the iterator is currently // pointing to using a protobuf decoder. ValueProto(msg protoutil.Message) error - // ComputeStats scans the underlying engine from start to end keys and - // computes stats counters based on the values. This method is used after a - // range is split to recompute stats for each subrange. The nowNanos arg - // specifies the wall time in nanoseconds since the epoch and is used to - // compute the total age of intents and garbage. - // - // To properly account for intents and range keys, the iterator must be - // created with MVCCKeyAndIntentsIterKind and IterKeyTypePointsAndRanges, - // and the LowerBound and UpperBound must be set equal to start and end - // in order for range keys to be truncated to the bounds. - // - // TODO(erikgrinaker): This should be replaced by ComputeStatsForRange - // instead, which should set up its own iterator with appropriate options. - // This isn't currently done in order to do spanset assertions on it, but this - // could be better solved by checking the iterator bounds in NewMVCCIterator - // and requiring callers to set them appropriately. - ComputeStats(start, end roachpb.Key, nowNanos int64) (enginepb.MVCCStats, error) // FindSplitKey finds a key from the given span such that the left side of // the split is roughly targetSize bytes. The returned key will never be // chosen from the key ranges listed in keys.NoSplitSpans and will always diff --git a/pkg/storage/intent_interleaving_iter.go b/pkg/storage/intent_interleaving_iter.go index 35f4cf723f14..6c72c2e4c631 100644 --- a/pkg/storage/intent_interleaving_iter.go +++ b/pkg/storage/intent_interleaving_iter.go @@ -19,7 +19,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock" "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/protoutil" @@ -1167,12 +1166,6 @@ func (i *intentInterleavingIter) ValueProto(msg protoutil.Message) error { return protoutil.Unmarshal(value, msg) } -func (i *intentInterleavingIter) ComputeStats( - start, end roachpb.Key, nowNanos int64, -) (enginepb.MVCCStats, error) { - return ComputeStatsForRange(i, start, end, nowNanos) -} - func (i *intentInterleavingIter) FindSplitKey( start, end, minSplitKey roachpb.Key, targetSize int64, ) (MVCCKey, error) { diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index 68dc1e98a509..6ff2bcc6a7df 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -5250,41 +5250,65 @@ func willOverflow(a, b int64) bool { return math.MinInt64-b > a } -// ComputeStatsForRange scans the iterator from start to end keys and computes -// stats counters based on the values. This method is used after a range is -// split to recompute stats for each subrange. The nowNanos arg specifies the -// wall time in nanoseconds since the epoch and is used to compute the total age -// of all intents. +// ComputeStats scans the given key span and computes MVCC stats. nowNanos +// specifies the wall time in nanoseconds since the epoch and is used to compute +// age-related stats quantities. +func ComputeStats(r Reader, start, end roachpb.Key, nowNanos int64) (enginepb.MVCCStats, error) { + return ComputeStatsWithVisitors(r, start, end, nowNanos, nil, nil) +} + +// ComputeStatsWithVisitors is like ComputeStats, but also takes a point and/or +// range key callback that is invoked for each key. +func ComputeStatsWithVisitors( + r Reader, + start, end roachpb.Key, + nowNanos int64, + pointKeyVisitor func(MVCCKey, []byte) error, + rangeKeyVisitor func(MVCCRangeKeyValue) error, +) (enginepb.MVCCStats, error) { + iter := r.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: start, + UpperBound: end, + }) + defer iter.Close() + iter.SeekGE(MVCCKey{Key: start}) + return computeStatsForIterWithVisitors(iter, nowNanos, pointKeyVisitor, rangeKeyVisitor) +} + +// ComputeStatsForIter is like ComputeStats, but scans across the given iterator +// until exhausted. The iterator must have appropriate bounds, key types, and +// intent options set, and it must have been seeked to the appropriate starting +// point. // -// To account for intents and range keys, the iterator must be created with -// MVCCKeyAndIntentsIterKind and IterKeyTypePointsAndRanges. To correctly -// account for range key truncation bounds, the iterator must have an -// appropriate UpperBound and LowerBound. +// We don't take start/end here, because that would require expensive key +// comparisons. We also don't seek to e.g. MinKey, because that might violate +// spanset assertions. // -// TODO(erikgrinaker): Consider removing the start,end parameters, forcing the -// caller to set appropriate bounds on the iterator instead. -func ComputeStatsForRange( - iter SimpleMVCCIterator, start, end roachpb.Key, nowNanos int64, -) (enginepb.MVCCStats, error) { - return ComputeStatsForRangeWithVisitors(iter, start, end, nowNanos, nil, nil) +// Most callers should use ComputeStats() instead. This exists primarily for use +// with SST iterators. +func ComputeStatsForIter(iter SimpleMVCCIterator, nowNanos int64) (enginepb.MVCCStats, error) { + return computeStatsForIterWithVisitors(iter, nowNanos, nil, nil) } -// ComputeStatsForRangeWithVisitors is like ComputeStatsForRange, but also -// takes a point and/or range key callback that is invoked for each physical -// key-value pair (i.e. not for implicit meta records), and iteration is aborted -// on the first error returned from either of them. +// computeStatsForIterWithVisitors performs the actual stats computation for the +// other ComputeStats methods. +// +// The iterator must already have been seeked. This requirement is to comply +// with spanset assertions, such that ComputeStats can seek to the given start +// key (satisfying the spanset asserter), while ComputeStatsForIter can seek to +// MinKey (in effect the iterator's lower bound) as it's geared towards SST +// iterators which are not subject to spanset assertions. // -// Callbacks must copy any data they intend to hold on to. -func ComputeStatsForRangeWithVisitors( +// Notably, we do not want to take the start/end key here, and instead rely on +// the iterator's bounds, to avoid expensive key comparisons. +func computeStatsForIterWithVisitors( iter SimpleMVCCIterator, - start, end roachpb.Key, nowNanos int64, pointKeyVisitor func(MVCCKey, []byte) error, rangeKeyVisitor func(MVCCRangeKeyValue) error, ) (enginepb.MVCCStats, error) { var ms enginepb.MVCCStats - // Only some callers are providing an MVCCIterator. The others don't have - // any intents. var meta enginepb.MVCCMetadata var prevKey, prevRangeStart []byte first := false @@ -5296,12 +5320,11 @@ func ComputeStatsForRangeWithVisitors( // of the point in time at which the current key begins to age. var accrueGCAgeNanos int64 var rangeKeys MVCCRangeKeyStack - mvccEndKey := MakeMVCCMetadataKey(end) - for iter.SeekGE(MakeMVCCMetadataKey(start)); ; iter.Next() { + for ; ; iter.Next() { if ok, err := iter.Valid(); err != nil { return ms, err - } else if !ok || !iter.UnsafeKey().Less(mvccEndKey) { + } else if !ok { break } diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go index 3efde0013836..aa122b737189 100644 --- a/pkg/storage/mvcc_history_test.go +++ b/pkg/storage/mvcc_history_test.go @@ -516,7 +516,8 @@ func TestMVCCHistories(t *testing.T) { // that we can compare the deltas. var msEngineBefore enginepb.MVCCStats if stats { - msEngineBefore = computeStats(e.t, e.engine, span.Key, span.EndKey, statsTS) + msEngineBefore, err = ComputeStats(e.engine, span.Key, span.EndKey, statsTS) + require.NoError(t, err) } msEvalBefore := *e.ms @@ -533,7 +534,8 @@ func TestMVCCHistories(t *testing.T) { if stats && cmd.typ == typDataUpdate { // If stats are enabled, emit evaluated stats returned by the // command, and compare them with the real computed stats diff. - msEngineDiff := computeStats(e.t, e.engine, span.Key, span.EndKey, statsTS) + msEngineDiff, err := ComputeStats(e.engine, span.Key, span.EndKey, statsTS) + require.NoError(t, err) msEngineDiff.Subtract(msEngineBefore) msEvalDiff := *e.ms @@ -576,7 +578,8 @@ func TestMVCCHistories(t *testing.T) { // Calculate and output final stats if requested and the data changed. if stats && dataChange { - ms := computeStats(t, e.engine, span.Key, span.EndKey, statsTS) + ms, err := ComputeStats(e.engine, span.Key, span.EndKey, statsTS) + require.NoError(t, err) buf.Printf("stats: %s\n", formatStats(ms, false)) } @@ -1056,13 +1059,7 @@ func cmdDeleteRangeTombstone(e *evalCtx) error { // Some tests will submit invalid MVCC range keys, where e.g. the end key is // before the start key -- ignore them to avoid iterator panics. if key.Compare(endKey) < 0 { - iter := e.engine.NewMVCCIterator(MVCCKeyIterKind, IterOptions{ - KeyTypes: IterKeyTypePointsAndRanges, - LowerBound: key, - UpperBound: endKey, - }) - ms, err := ComputeStatsForRange(iter, key, endKey, ts.WallTime) - iter.Close() + ms, err := ComputeStats(e.engine, key, endKey, ts.WallTime) if err != nil { return err } diff --git a/pkg/storage/mvcc_incremental_iterator_test.go b/pkg/storage/mvcc_incremental_iterator_test.go index 1c38e433bcc8..7b57b454f03b 100644 --- a/pkg/storage/mvcc_incremental_iterator_test.go +++ b/pkg/storage/mvcc_incremental_iterator_test.go @@ -1502,13 +1502,10 @@ func runIncrementalBenchmark( // Pull all of the sstables into the cache. This // probably defeats a lot of the benefits of the // time-based optimization. - iter := eng.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ - KeyTypes: IterKeyTypePointsAndRanges, - LowerBound: roachpb.LocalMax, - UpperBound: roachpb.KeyMax, - }) - _, _ = iter.ComputeStats(keys.LocalMax, roachpb.KeyMax, 0) - iter.Close() + _, err := ComputeStats(eng, keys.LocalMax, roachpb.KeyMax, 0) + if err != nil { + b.Fatalf("stats failed: %s", err) + } } defer eng.Close() diff --git a/pkg/storage/mvcc_stats_test.go b/pkg/storage/mvcc_stats_test.go index 8a5d5fdbc851..c0ef14c24a54 100644 --- a/pkg/storage/mvcc_stats_test.go +++ b/pkg/storage/mvcc_stats_test.go @@ -58,15 +58,9 @@ func assertEqImpl( keyMin = keys.LocalMax keyMax = roachpb.KeyMax } - it := rw.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ - KeyTypes: IterKeyTypePointsAndRanges, - LowerBound: keyMin, - UpperBound: keyMax, - }) - defer it.Close() for _, mvccStatsTest := range mvccStatsTests { - compMS, err := mvccStatsTest.fn(it, keyMin, keyMax, ms.LastUpdateNanos) + compMS, err := mvccStatsTest.fn(rw, keyMin, keyMax, ms.LastUpdateNanos) if err != nil { t.Fatal(err) } @@ -1512,18 +1506,25 @@ func TestMVCCStatsSysPutPut(t *testing.T) { var mvccStatsTests = []struct { name string - fn func(MVCCIterator, roachpb.Key, roachpb.Key, int64) (enginepb.MVCCStats, error) + fn func(Reader, roachpb.Key, roachpb.Key, int64) (enginepb.MVCCStats, error) }{ { name: "ComputeStats", - fn: func(iter MVCCIterator, start, end roachpb.Key, nowNanos int64) (enginepb.MVCCStats, error) { - return iter.ComputeStats(start, end, nowNanos) + fn: func(r Reader, start, end roachpb.Key, nowNanos int64) (enginepb.MVCCStats, error) { + return ComputeStats(r, start, end, nowNanos) }, }, { - name: "ComputeStatsForRange", - fn: func(iter MVCCIterator, start, end roachpb.Key, nowNanos int64) (enginepb.MVCCStats, error) { - return ComputeStatsForRange(iter, start, end, nowNanos) + name: "ComputeStatsForIter", + fn: func(r Reader, start, end roachpb.Key, nowNanos int64) (enginepb.MVCCStats, error) { + iter := r.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: start, + UpperBound: end, + }) + defer iter.Close() + iter.SeekGE(MVCCKey{Key: start}) + return ComputeStatsForIter(iter, nowNanos) }, }, } @@ -1792,30 +1793,21 @@ func TestMVCCStatsRandomized(t *testing.T) { func TestMVCCComputeStatsError(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) - for _, engineImpl := range mvccEngineImpls { - t.Run(engineImpl.name, func(t *testing.T) { - engine := engineImpl.create() - defer engine.Close() - // Write a MVCC metadata key where the value is not an encoded MVCCMetadata - // protobuf. - if err := engine.PutUnversioned(roachpb.Key("garbage"), []byte("garbage")); err != nil { - t.Fatal(err) - } + engine := NewDefaultInMemForTesting() + defer engine.Close() - iter := engine.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ - KeyTypes: IterKeyTypePointsAndRanges, - LowerBound: roachpb.LocalMax, - UpperBound: roachpb.KeyMax, - }) - defer iter.Close() - for _, mvccStatsTest := range mvccStatsTests { - t.Run(mvccStatsTest.name, func(t *testing.T) { - _, err := mvccStatsTest.fn(iter, keys.LocalMax, roachpb.KeyMax, 100) - if e := "unable to decode MVCCMetadata"; !testutils.IsError(err, e) { - t.Fatalf("expected %s, got %v", e, err) - } - }) + // Write a MVCC metadata key where the value is not an encoded MVCCMetadata + // protobuf. + if err := engine.PutUnversioned(roachpb.Key("garbage"), []byte("garbage")); err != nil { + t.Fatal(err) + } + + for _, mvccStatsTest := range mvccStatsTests { + t.Run(mvccStatsTest.name, func(t *testing.T) { + _, err := mvccStatsTest.fn(engine, keys.LocalMax, roachpb.KeyMax, 100) + if e := "unable to decode MVCCMetadata"; !testutils.IsError(err, e) { + t.Fatalf("expected %s, got %v", e, err) } }) } diff --git a/pkg/storage/mvcc_test.go b/pkg/storage/mvcc_test.go index 1ec70356e633..5b9b01ab5c87 100644 --- a/pkg/storage/mvcc_test.go +++ b/pkg/storage/mvcc_test.go @@ -872,7 +872,7 @@ func TestMVCCGetProtoInconsistent(t *testing.T) { } // Regression test for #28205: MVCCGet and MVCCScan, FindSplitKey, and -// ComputeStats need to invalidate the cached iterator data. +// ComputeStatsForIter need to invalidate the cached iterator data. func TestMVCCInvalidateIterator(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -923,9 +923,10 @@ func TestMVCCInvalidateIterator(t *testing.T) { _, err = MVCCScan(ctx, batch, key, roachpb.KeyMax, ts2, MVCCScanOptions{}) case "findSplitKey": _, err = MVCCFindSplitKey(ctx, batch, roachpb.RKeyMin, roachpb.RKeyMax, 64<<20) - case "computeStats": + case "computeStatsForIter": iter := batch.NewMVCCIterator(MVCCKeyAndIntentsIterKind, iterOptions) - _, err = iter.ComputeStats(keys.LocalMax, roachpb.KeyMax, 0) + iter.SeekGE(MVCCKey{Key: iterOptions.LowerBound}) + _, err = ComputeStatsForIter(iter, 0) iter.Close() } if err != nil { @@ -2179,29 +2180,6 @@ func TestMVCCClearTimeRange(t *testing.T) { }) } -func computeStats( - t *testing.T, reader Reader, from, to roachpb.Key, nowNanos int64, -) enginepb.MVCCStats { - t.Helper() - - if len(from) == 0 { - from = keys.LocalMax - } - if len(to) == 0 { - to = keys.MaxKey - } - - iter := reader.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ - KeyTypes: IterKeyTypePointsAndRanges, - LowerBound: from, - UpperBound: to, - }) - defer iter.Close() - ms, err := ComputeStatsForRange(iter, from, to, nowNanos) - require.NoError(t, err) - return ms -} - // TestMVCCClearTimeRangeOnRandomData sets up mostly random KVs and then picks // some random times to which to revert, ensuring that a MVCC-Scan at each of // those times before reverting matches the result of an MVCC-Scan done at a @@ -2268,7 +2246,9 @@ func TestMVCCClearTimeRangeOnRandomData(t *testing.T) { ms.AgeTo(2000) // Sanity check starting stats. - require.Equal(t, computeStats(t, e, localMax, keyMax, 2000), ms) + msComputed, err := ComputeStats(e, localMax, keyMax, 2000) + require.NoError(t, err) + require.Equal(t, msComputed, ms) // Pick timestamps to which we'll revert, and sort them so we can go back // though them in order. The largest will still be less than randTimeRange so @@ -2304,7 +2284,9 @@ func TestMVCCClearTimeRangeOnRandomData(t *testing.T) { batch.Close() } - require.Equal(t, computeStats(t, e, localMax, keyMax, 2000), ms) + msComputed, err := ComputeStats(e, localMax, keyMax, 2000) + require.NoError(t, err) + require.Equal(t, msComputed, ms) // Scanning at "now" post-revert should yield the same result as scanning // at revert-time pre-revert. resAfter, err := MVCCScan(ctx, e, localMax, keyMax, now, MVCCScanOptions{MaxKeys: numKVs}) @@ -4897,12 +4879,9 @@ func TestMVCCGarbageCollect(t *testing.T) { } // Verify aggregated stats match computed stats after GC. - iter := engine.NewMVCCIterator(MVCCKeyAndIntentsIterKind, - IterOptions{UpperBound: roachpb.KeyMax, KeyTypes: IterKeyTypePointsAndRanges}) - defer iter.Close() for _, mvccStatsTest := range mvccStatsTests { t.Run(mvccStatsTest.name, func(t *testing.T) { - expMS, err := mvccStatsTest.fn(iter, localMax, roachpb.KeyMax, gcTime.WallTime) + expMS, err := mvccStatsTest.fn(engine, localMax, roachpb.KeyMax, gcTime.WallTime) if err != nil { t.Fatal(err) } @@ -5761,12 +5740,7 @@ func TestMVCCGarbageCollectRanges(t *testing.T) { "not all range tombstone expectations were consumed") ms.AgeTo(tsMax.WallTime) - it = engine.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ - KeyTypes: IterKeyTypePointsAndRanges, - LowerBound: d.rangeStart, - UpperBound: d.rangeEnd, - }) - expMs, err := ComputeStatsForRange(it, rangeStart, rangeEnd, tsMax.WallTime) + expMs, err := ComputeStats(engine, d.rangeStart, d.rangeEnd, tsMax.WallTime) require.NoError(t, err, "failed to compute stats for range") require.EqualValues(t, expMs, ms, "computed range stats vs gc'd") }) diff --git a/pkg/storage/pebble_iterator.go b/pkg/storage/pebble_iterator.go index d6e565dba19b..2b7ebef7e61c 100644 --- a/pkg/storage/pebble_iterator.go +++ b/pkg/storage/pebble_iterator.go @@ -17,7 +17,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/cockroach/pkg/util/uuid" @@ -699,13 +698,6 @@ func (p *pebbleIterator) EngineRangeKeys() []EngineRangeKeyValue { return rkvs } -// ComputeStats implements the MVCCIterator interface. -func (p *pebbleIterator) ComputeStats( - start, end roachpb.Key, nowNanos int64, -) (enginepb.MVCCStats, error) { - return ComputeStatsForRange(p, start, end, nowNanos) -} - // Go-only version of IsValidSplitKey. Checks if the specified key is in // NoSplitSpans. func isValidSplitKey(key roachpb.Key, noSplitSpans []roachpb.Span) bool { diff --git a/pkg/storage/point_synthesizing_iter.go b/pkg/storage/point_synthesizing_iter.go index a0a8d09a6b6e..643caf86f275 100644 --- a/pkg/storage/point_synthesizing_iter.go +++ b/pkg/storage/point_synthesizing_iter.go @@ -15,7 +15,6 @@ import ( "sync" "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/protoutil" @@ -636,13 +635,6 @@ func (i *pointSynthesizingIter) RangeKeys() MVCCRangeKeyStack { return MVCCRangeKeyStack{} } -// ComputeStats implements MVCCIterator. -func (i *pointSynthesizingIter) ComputeStats( - start, end roachpb.Key, nowNanos int64, -) (enginepb.MVCCStats, error) { - return i.iter.ComputeStats(start, end, nowNanos) -} - // FindSplitKey implements MVCCIterator. func (i *pointSynthesizingIter) FindSplitKey( start, end, minSplitKey roachpb.Key, targetSize int64, diff --git a/pkg/testutils/storageutils/stats.go b/pkg/testutils/storageutils/stats.go index 17e611243065..cf6bf6ff8e5f 100644 --- a/pkg/testutils/storageutils/stats.go +++ b/pkg/testutils/storageutils/stats.go @@ -24,13 +24,7 @@ import ( func EngineStats(t *testing.T, engine storage.Reader, nowNanos int64) *enginepb.MVCCStats { t.Helper() - iter := engine.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{ - KeyTypes: storage.IterKeyTypePointsAndRanges, - LowerBound: keys.LocalMax, - UpperBound: keys.MaxKey, - }) - defer iter.Close() - stats, err := storage.ComputeStatsForRange(iter, keys.LocalMax, keys.MaxKey, nowNanos) + stats, err := storage.ComputeStats(engine, keys.LocalMax, keys.MaxKey, nowNanos) require.NoError(t, err) return &stats } @@ -41,11 +35,13 @@ func SSTStats(t *testing.T, sst []byte, nowNanos int64) *enginepb.MVCCStats { iter, err := storage.NewPebbleMemSSTIterator(sst, true /* verify */, storage.IterOptions{ KeyTypes: storage.IterKeyTypePointsAndRanges, + LowerBound: keys.MinKey, UpperBound: keys.MaxKey, }) require.NoError(t, err) defer iter.Close() - stats, err := storage.ComputeStatsForRange(iter, keys.MinKey, keys.MaxKey, nowNanos) + iter.SeekGE(storage.MVCCKey{Key: keys.MinKey}) + stats, err := storage.ComputeStatsForIter(iter, nowNanos) require.NoError(t, err) return &stats }