Skip to content

Commit

Permalink
storage: remove key comparison in MVCC stats computations
Browse files Browse the repository at this point in the history
This patch restructures MVCC stats computations to use an MVCC iterator
with appropriate bounds. This allows omitting a key comparison in the
hot path, yielding a ~10% performance improvement. The new stats
functions are:

* `ComputeStats`: calculates stats for a `Reader` key span.

* `ComputeStatsWithVisitors`: calculates stats for a `Reader` key span,
  calling the given visitor callbacks for every point/range key.

* `ComputeStatsForIter`: calculates stats for a given `MVCCIterator`, by
  scanning it until exhausted.

It also removes the `MVCCIterator.ComputeStats` method, which had no
business being part of the interface.

```
name                                     old time/op   new time/op   delta
MVCCComputeStats_Pebble/valueSize=64-24    130ms ± 1%    119ms ± 1%  -8.77%  (p=0.000 n=10+10)

name                                     old speed     new speed     delta
MVCCComputeStats_Pebble/valueSize=64-24  516MB/s ± 1%  565MB/s ± 1%  +9.61%  (p=0.000 n=10+10)
```

Release note: None
  • Loading branch information
erikgrinaker committed Aug 11, 2022
1 parent 33124be commit 7f61dfd
Show file tree
Hide file tree
Showing 24 changed files with 161 additions and 305 deletions.
18 changes: 14 additions & 4 deletions pkg/kv/bulk/sst_batcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,8 @@ func (b *SSTBatcher) addSSTable(
defer iter.Close()

if (stats == enginepb.MVCCStats{}) {
stats, err = storage.ComputeStatsForRange(iter, start, end, sendStart.UnixNano())
iter.SeekGE(storage.MVCCKey{Key: start})
stats, err = storage.ComputeStatsForIter(iter, sendStart.UnixNano())
if err != nil {
return errors.Wrapf(err, "computing stats for SST [%s, %s)", start, end)
}
Expand Down Expand Up @@ -779,9 +780,18 @@ func (b *SSTBatcher) addSSTable(
return err
}

right.stats, err = storage.ComputeStatsForRange(
iter, right.start, right.end, sendStart.Unix(),
)
// Needs a new iterator with new bounds.
statsIter, err := storage.NewPebbleMemSSTIterator(sstBytes, true, storage.IterOptions{
KeyTypes: storage.IterKeyTypePointsOnly,
LowerBound: right.start,
UpperBound: right.end,
})
if err != nil {
return err
}
statsIter.SeekGE(storage.MVCCKey{Key: right.start})
right.stats, err = storage.ComputeStatsForIter(statsIter, sendStart.Unix())
statsIter.Close()
if err != nil {
return err
}
Expand Down
7 changes: 4 additions & 3 deletions pkg/kv/kvserver/batcheval/cmd_add_sstable.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ func EvalAddSSTable(
// compute the expected MVCC stats delta of ingesting the SST.
sstIter, err := storage.NewPebbleMemSSTIterator(sst, true /* verify */, storage.IterOptions{
KeyTypes: storage.IterKeyTypePointsAndRanges,
LowerBound: keys.MinKey,
UpperBound: keys.MaxKey,
})
if err != nil {
Expand All @@ -252,7 +253,7 @@ func EvalAddSSTable(
stats = *args.MVCCStats
} else {
log.VEventf(ctx, 2, "computing MVCCStats for SSTable [%s,%s)", start.Key, end.Key)
stats, err = storage.ComputeStatsForRange(sstIter, start.Key, end.Key, h.Timestamp.WallTime)
stats, err = storage.ComputeStatsForIter(sstIter, h.Timestamp.WallTime)
if err != nil {
return result.Result{}, errors.Wrap(err, "computing SSTable MVCC stats")
}
Expand Down Expand Up @@ -569,10 +570,10 @@ func assertSSTContents(sst []byte, sstTimestamp hlc.Timestamp, stats *enginepb.M
return err
}
defer iter.Close()
iter.SeekGE(storage.MVCCKey{Key: keys.MinKey})

given := *stats
actual, err := storage.ComputeStatsForRange(
iter, keys.MinKey, keys.MaxKey, given.LastUpdateNanos)
actual, err := storage.ComputeStatsForIter(iter, given.LastUpdateNanos)
if err != nil {
return errors.Wrap(err, "failed to compare stats: %w")
}
Expand Down
8 changes: 1 addition & 7 deletions pkg/kv/kvserver/batcheval/cmd_clear_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,7 @@ func computeStatsDelta(
// If we can't use the fast stats path, or race test is enabled, compute stats
// across the key span to be cleared.
if !entireRange || util.RaceEnabled {
iter := readWriter.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{
KeyTypes: storage.IterKeyTypePointsAndRanges,
LowerBound: from,
UpperBound: to,
})
computed, err := iter.ComputeStats(from, to, delta.LastUpdateNanos)
iter.Close()
computed, err := storage.ComputeStats(readWriter, from, to, delta.LastUpdateNanos)
if err != nil {
return enginepb.MVCCStats{}, err
}
Expand Down
9 changes: 1 addition & 8 deletions pkg/kv/kvserver/batcheval/cmd_delete_range_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,14 +376,7 @@ func computeStats(
if len(to) == 0 {
to = keys.MaxKey
}

iter := reader.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{
KeyTypes: storage.IterKeyTypePointsAndRanges,
LowerBound: from,
UpperBound: to,
})
defer iter.Close()
ms, err := storage.ComputeStatsForRange(iter, from, to, nowNanos)
ms, err := storage.ComputeStats(reader, from, to, nowNanos)
require.NoError(t, err)
return ms
}
9 changes: 1 addition & 8 deletions pkg/kv/kvserver/batcheval/cmd_end_transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -1205,14 +1205,7 @@ func mergeTrigger(

{
ridPrefix := keys.MakeRangeIDReplicatedPrefix(merge.RightDesc.RangeID)
// NB: Range-ID local keys have no versions and no intents.
iter := batch.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{
KeyTypes: storage.IterKeyTypePointsAndRanges,
LowerBound: ridPrefix,
UpperBound: ridPrefix.PrefixEnd(),
})
defer iter.Close()
sysMS, err := iter.ComputeStats(ridPrefix, ridPrefix.PrefixEnd(), 0 /* nowNanos */)
sysMS, err := storage.ComputeStats(batch, ridPrefix, ridPrefix.PrefixEnd(), 0 /* nowNanos */)
if err != nil {
return result.Result{}, err
}
Expand Down
20 changes: 5 additions & 15 deletions pkg/kv/kvserver/batcheval/cmd_revert_range_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,6 @@ func hashRange(t *testing.T, reader storage.Reader, start, end roachpb.Key) []by
return h.Sum(nil)
}

func getStats(t *testing.T, reader storage.Reader) enginepb.MVCCStats {
t.Helper()
iter := reader.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{UpperBound: roachpb.KeyMax})
defer iter.Close()
s, err := storage.ComputeStatsForRange(iter, keys.LocalMax, roachpb.KeyMax, 1100)
if err != nil {
t.Fatalf("%+v", err)
}
return s
}

// createTestPebbleEngine returns a new in-memory Pebble storage engine.
func createTestPebbleEngine(ctx context.Context) (storage.Engine, error) {
return storage.Open(ctx, storage.InMemory(),
Expand Down Expand Up @@ -138,7 +127,8 @@ func TestCmdRevertRange(t *testing.T) {
cArgs := batcheval.CommandArgs{Header: roachpb.Header{RangeID: desc.RangeID, Timestamp: tsC, MaxSpanRequestKeys: 2}}
evalCtx := &batcheval.MockEvalCtx{Desc: &desc, Clock: hlc.NewClockWithSystemTimeSource(time.Nanosecond /* maxOffset */), Stats: stats}
cArgs.EvalCtx = evalCtx.EvalContext()
afterStats := getStats(t, eng)
afterStats, err := storage.ComputeStats(eng, keys.LocalMax, keys.MaxKey, 0)
require.NoError(t, err)
for _, tc := range []struct {
name string
ts hlc.Timestamp
Expand Down Expand Up @@ -187,9 +177,9 @@ func TestCmdRevertRange(t *testing.T) {
}
evalStats := afterStats
evalStats.Add(*cArgs.Stats)
if realStats := getStats(t, batch); !evalStats.Equal(evalStats) {
t.Fatalf("stats mismatch:\npre-revert\t%+v\nevaled:\t%+v\neactual\t%+v", afterStats, evalStats, realStats)
}
realStats, err := storage.ComputeStats(batch, keys.LocalMax, keys.MaxKey, evalStats.LastUpdateNanos)
require.NoError(t, err)
require.Equal(t, realStats, evalStats)
})
}

Expand Down
8 changes: 1 addition & 7 deletions pkg/kv/kvserver/batcheval/cmd_truncate_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,8 @@ func TruncateLog(
// Note that any sideloaded payloads that may be removed by this truncation
// are not tracked in the raft log delta. The delta will be adjusted below
// raft.
iter := readWriter.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{
KeyTypes: storage.IterKeyTypePointsAndRanges,
LowerBound: start,
UpperBound: end,
})
defer iter.Close()
// We can pass zero as nowNanos because we're only interested in SysBytes.
ms, err := iter.ComputeStats(start, end, 0 /* nowNanos */)
ms, err := storage.ComputeStats(readWriter, start, end, 0 /* nowNanos */)
if err != nil {
return result.Result{}, errors.Wrap(err, "while computing stats of Raft log freed by truncation")
}
Expand Down
5 changes: 1 addition & 4 deletions pkg/kv/kvserver/consistency_queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -453,12 +453,9 @@ func TestCheckConsistencyInconsistent(t *testing.T) {
cpEng := storage.InMemFromFS(context.Background(), fs, cps[0], storage.CacheSize(1<<20))
defer cpEng.Close()

iter := cpEng.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{UpperBound: []byte("\xff")})
defer iter.Close()

// The range is specified using only global keys, since the implementation
// may use an intentInterleavingIter.
ms, err := storage.ComputeStatsForRange(iter, keys.LocalMax, roachpb.KeyMax, 0 /* nowNanos */)
ms, err := storage.ComputeStats(cpEng, keys.LocalMax, roachpb.KeyMax, 0 /* nowNanos */)
assert.NoError(t, err)

assert.NotZero(t, ms.KeyBytes)
Expand Down
37 changes: 18 additions & 19 deletions pkg/kv/kvserver/rditer/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,31 @@ import (
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
)

// ComputeStatsForRange computes the stats for a given range by
// iterating over all key spans for the given range that should
// be accounted for in its stats.
// ComputeStatsForRange computes the stats for a given range by iterating over
// all key spans for the given range that should be accounted for in its stats.
func ComputeStatsForRange(
d *roachpb.RangeDescriptor, reader storage.Reader, nowNanos int64,
) (enginepb.MVCCStats, error) {
ms := enginepb.MVCCStats{}
var err error
for _, keySpan := range MakeReplicatedKeySpansExceptLockTable(d) {
func() {
iter := reader.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{
KeyTypes: storage.IterKeyTypePointsAndRanges,
LowerBound: keySpan.Key,
UpperBound: keySpan.EndKey,
})
defer iter.Close()
return ComputeStatsForRangeWithVisitors(d, reader, nowNanos, nil, nil)
}

var msDelta enginepb.MVCCStats
if msDelta, err = iter.ComputeStats(keySpan.Key, keySpan.EndKey, nowNanos); err != nil {
return
}
ms.Add(msDelta)
}()
// ComputeStatsForRangeWithVisitors is like ComputeStatsForRange but also
// calls the given callbacks for every key.
func ComputeStatsForRangeWithVisitors(
d *roachpb.RangeDescriptor,
reader storage.Reader,
nowNanos int64,
pointKeyVisitor func(storage.MVCCKey, []byte) error,
rangeKeyVisitor func(storage.MVCCRangeKeyValue) error,
) (enginepb.MVCCStats, error) {
var ms enginepb.MVCCStats
for _, keySpan := range MakeReplicatedKeySpansExceptLockTable(d) {
msDelta, err := storage.ComputeStatsWithVisitors(reader, keySpan.Key, keySpan.EndKey, nowNanos,
pointKeyVisitor, rangeKeyVisitor)
if err != nil {
return enginepb.MVCCStats{}, err
}
ms.Add(msDelta)
}
return ms, nil
}
24 changes: 5 additions & 19 deletions pkg/kv/kvserver/replica_consistency.go
Original file line number Diff line number Diff line change
Expand Up @@ -700,25 +700,11 @@ func (*Replica) sha512(
// In statsOnly mode, we hash only the RangeAppliedState. In regular mode, hash
// all of the replicated key space.
if !statsOnly {
// Do not want the lock table ranges since the iter has been constructed
// using MVCCKeyAndIntentsIterKind.
//
// TODO(sumeer): When we have replicated locks other than exclusive locks,
// we will probably not have any interleaved intents so we could stop
// using MVCCKeyAndIntentsIterKind and consider all locks here.
for _, span := range rditer.MakeReplicatedKeySpansExceptLockTable(&desc) {
iter := snap.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{
KeyTypes: storage.IterKeyTypePointsAndRanges,
LowerBound: span.Key,
UpperBound: span.EndKey,
})
spanMS, err := storage.ComputeStatsForRangeWithVisitors(
iter, span.Key, span.EndKey, 0 /* nowNanos */, pointKeyVisitor, rangeKeyVisitor)
iter.Close()
if err != nil {
return nil, err
}
ms.Add(spanMS)
var err error
ms, err = rditer.ComputeStatsForRangeWithVisitors(&desc, snap, 0, /* nowNanos */
pointKeyVisitor, rangeKeyVisitor)
if err != nil {
return nil, err
}
}

Expand Down
8 changes: 1 addition & 7 deletions pkg/kv/kvserver/replica_raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -2198,13 +2198,7 @@ func ComputeRaftLogSize(
) (int64, error) {
prefix := keys.RaftLogPrefix(rangeID)
prefixEnd := prefix.PrefixEnd()
iter := reader.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{
KeyTypes: storage.IterKeyTypePointsAndRanges,
LowerBound: prefix,
UpperBound: prefixEnd,
})
defer iter.Close()
ms, err := iter.ComputeStats(prefix, prefixEnd, 0 /* nowNanos */)
ms, err := storage.ComputeStats(reader, prefix, prefixEnd, 0 /* nowNanos */)
if err != nil {
return 0, err
}
Expand Down
1 change: 0 additions & 1 deletion pkg/kv/kvserver/spanset/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ go_library(
"//pkg/keys",
"//pkg/roachpb",
"//pkg/storage",
"//pkg/storage/enginepb",
"//pkg/util/hlc",
"//pkg/util/log",
"//pkg/util/protoutil",
Expand Down
17 changes: 0 additions & 17 deletions pkg/kv/kvserver/spanset/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
Expand Down Expand Up @@ -190,22 +189,6 @@ func (i *MVCCIterator) RangeKeys() storage.MVCCRangeKeyStack {
return i.i.RangeKeys()
}

// ComputeStats is part of the storage.MVCCIterator interface.
func (i *MVCCIterator) ComputeStats(
start, end roachpb.Key, nowNanos int64,
) (enginepb.MVCCStats, error) {
if i.spansOnly {
if err := i.spans.CheckAllowed(SpanReadOnly, roachpb.Span{Key: start, EndKey: end}); err != nil {
return enginepb.MVCCStats{}, err
}
} else {
if err := i.spans.CheckAllowedAt(SpanReadOnly, roachpb.Span{Key: start, EndKey: end}, i.ts); err != nil {
return enginepb.MVCCStats{}, err
}
}
return i.i.ComputeStats(start, end, nowNanos)
}

// FindSplitKey is part of the storage.MVCCIterator interface.
func (i *MVCCIterator) FindSplitKey(
start, end, minSplitKey roachpb.Key, targetSize int64,
Expand Down
26 changes: 5 additions & 21 deletions pkg/storage/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -886,13 +886,9 @@ func runMVCCScan(ctx context.Context, b *testing.B, emk engineMaker, opts benchS
// Pull all of the sstables into the RocksDB cache in order to make the
// timings more stable. Otherwise, the first run will be penalized pulling
// data into the cache while later runs will not.
iter := eng.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{
KeyTypes: IterKeyTypePointsAndRanges,
LowerBound: keys.LocalMax,
UpperBound: roachpb.KeyMax,
})
_, _ = iter.ComputeStats(keys.LocalMax, roachpb.KeyMax, 0)
iter.Close()
if _, err := ComputeStats(eng, keys.LocalMax, roachpb.KeyMax, 0); err != nil {
b.Fatalf("stats failed: %s", err)
}
}

var startKey, endKey roachpb.Key
Expand Down Expand Up @@ -1349,13 +1345,7 @@ func runMVCCDeleteRangeUsingTombstone(
var msCovered *enginepb.MVCCStats
var leftPeekBound, rightPeekBound roachpb.Key
if entireRange {
iter := eng.NewMVCCIterator(MVCCKeyIterKind, IterOptions{
KeyTypes: IterKeyTypePointsAndRanges,
LowerBound: keys.LocalMax,
UpperBound: keys.MaxKey,
})
ms, err := ComputeStatsForRange(iter, keys.LocalMax, keys.MaxKey, 0)
iter.Close()
ms, err := ComputeStats(eng, keys.LocalMax, keys.MaxKey, 0)
require.NoError(b, err)

leftPeekBound = keys.LocalMax
Expand Down Expand Up @@ -1457,13 +1447,7 @@ func runMVCCComputeStats(
var stats enginepb.MVCCStats
var err error
for i := 0; i < b.N; i++ {
iter := eng.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{
KeyTypes: IterKeyTypePointsAndRanges,
LowerBound: keys.LocalMax,
UpperBound: roachpb.KeyMax,
})
stats, err = iter.ComputeStats(keys.LocalMax, roachpb.KeyMax, 0)
iter.Close()
stats, err = ComputeStats(eng, keys.LocalMax, keys.MaxKey, 0)
if err != nil {
b.Fatal(err)
}
Expand Down
17 changes: 0 additions & 17 deletions pkg/storage/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,23 +252,6 @@ type MVCCIterator interface {
// ValueProto unmarshals the value the iterator is currently
// pointing to using a protobuf decoder.
ValueProto(msg protoutil.Message) error
// ComputeStats scans the underlying engine from start to end keys and
// computes stats counters based on the values. This method is used after a
// range is split to recompute stats for each subrange. The nowNanos arg
// specifies the wall time in nanoseconds since the epoch and is used to
// compute the total age of intents and garbage.
//
// To properly account for intents and range keys, the iterator must be
// created with MVCCKeyAndIntentsIterKind and IterKeyTypePointsAndRanges,
// and the LowerBound and UpperBound must be set equal to start and end
// in order for range keys to be truncated to the bounds.
//
// TODO(erikgrinaker): This should be replaced by ComputeStatsForRange
// instead, which should set up its own iterator with appropriate options.
// This isn't currently done in order to do spanset assertions on it, but this
// could be better solved by checking the iterator bounds in NewMVCCIterator
// and requiring callers to set them appropriately.
ComputeStats(start, end roachpb.Key, nowNanos int64) (enginepb.MVCCStats, error)
// FindSplitKey finds a key from the given span such that the left side of
// the split is roughly targetSize bytes. The returned key will never be
// chosen from the key ranges listed in keys.NoSplitSpans and will always
Expand Down
Loading

0 comments on commit 7f61dfd

Please sign in to comment.