Skip to content

Commit

Permalink
roachpb: disable roachpb.Value checksum computation
Browse files Browse the repository at this point in the history
Fixes #21435.

This change disables roachpb.Value checksum computation, leaving
the checksum field blank on roachpb.Values.

See #21435 (comment)
for details on the measured performance improvement from this change.

Release note (performance improvement): Unnecessary value
checksums are no longer computed, speeding up database writes.
  • Loading branch information
nvanbenschoten committed Feb 9, 2018
1 parent c93d304 commit 75cbeb8
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 29 deletions.
2 changes: 1 addition & 1 deletion pkg/ccl/storageccl/add_sstable_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ func runTestDBAddSSTable(ctx context.Context, t *testing.T, db *client.DB) {
{
key := engine.MVCCKey{Key: []byte("bb"), Timestamp: hlc.Timestamp{WallTime: 1}}
value := roachpb.MakeValueFromString("1")
value.InitChecksum([]byte("foo"))
value.MustInitChecksum([]byte("foo"))
data, err := singleKVSSTable(key, value.RawBytes)
if err != nil {
t.Fatalf("%+v", err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/storageccl/engineccl/rocksdb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func TestVerifyBatchRepr(t *testing.T) {
var batch engine.RocksDBBatchBuilder
key := engine.MVCCKey{Key: []byte("bb"), Timestamp: hlc.Timestamp{WallTime: 1}}
value := roachpb.MakeValueFromString("1")
value.InitChecksum([]byte("foo"))
value.MustInitChecksum([]byte("foo"))
batch.Put(key, value.RawBytes)
data := batch.Finish()

Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/storageccl/writebatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func TestDBWriteBatch(t *testing.T) {
var batch engine.RocksDBBatchBuilder
key := engine.MVCCKey{Key: []byte("bb"), Timestamp: hlc.Timestamp{WallTime: 1}}
value := roachpb.MakeValueFromString("1")
value.InitChecksum([]byte("foo"))
value.MustInitChecksum([]byte("foo"))
batch.Put(key, value.RawBytes)
data := batch.Finish()

Expand Down
31 changes: 28 additions & 3 deletions pkg/roachpb/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (

"github.com/cockroachdb/apd"
"github.com/cockroachdb/cockroach/pkg/storage/engine/enginepb"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/duration"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
Expand Down Expand Up @@ -236,13 +237,37 @@ func (v *Value) setChecksum(cksum uint32) {
}
}

// InitChecksum initializes a checksum based on the provided key and
// the contents of the value. If the value contains a byte slice, the
// checksum includes it directly.
// ChecksumInterceptor will be called with every value before its checksum is
// conditionally computed. This is used to prove that the decision to init
// checksums on values or not does not affect beneath-raft consistency.
// Interceptor is not safe to modify concurrently with calls to InitChecksum.
var ChecksumInterceptor = func(_ *Value) {}

// InitChecksum initializes a checksum based on the provided key and the
// contents of the value if checksum computation is enabled. If the value
// contains a byte slice, the checksum includes it directly.
//
// If you require that the checksum be set even if checksum computations are
// disabled, use MustInitChecksum insted.
//
// TODO(peter): This method should return an error if the Value is corrupted
// (e.g. the RawBytes field is > 0 but smaller than the header size).
//
// TODO(nvanbenschoten): This method and all users can be removed in any
// binary version > 2.1.
func (v *Value) InitChecksum(key []byte) {
// The branch is behind a constant conditional so that this
// entire method is optimized out in production builds.
if util.RaceEnabled {
ChecksumInterceptor(v)
}
}

// MustInitChecksum is like InitChecksum, but does not depend on the
// shouldInitChecksum setting. This method can be used beneath raft where
// different binaries cannot decide independently whether checksums should
// be initialized or not.
func (v *Value) MustInitChecksum(key []byte) {
if v.RawBytes == nil {
return
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/roachpb/data_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ func TestValueChecksumEmpty(t *testing.T) {
func TestValueChecksumWithBytes(t *testing.T) {
k := []byte("key")
v := MakeValueFromString("abc")
v.InitChecksum(k)
v.MustInitChecksum(k)
if err := v.Verify(k); err != nil {
t.Error(err)
}
Expand All @@ -245,7 +245,7 @@ func TestValueChecksumWithBytes(t *testing.T) {
}
// Test ClearChecksum and reinitialization of checksum.
v.ClearChecksum()
v.InitChecksum(k)
v.MustInitChecksum(k)
if err := v.Verify(k); err != nil {
t.Error(err)
}
Expand Down
10 changes: 9 additions & 1 deletion pkg/storage/stateloader/stateloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,15 @@ func (rsl StateLoader) LoadMVCCStats(
func (rsl StateLoader) SetMVCCStats(
ctx context.Context, eng engine.ReadWriter, newMS *enginepb.MVCCStats,
) error {
return engine.MVCCPutProto(ctx, eng, nil, rsl.RangeStatsKey(), hlc.Timestamp{}, nil, newMS)
key := rsl.RangeStatsKey()
value := roachpb.Value{}
if err := value.SetProto(newMS); err != nil {
return err
}
// We're beneath Raft and need to maintain strict consistency on this key
// because it is replicated, so we must always initialize the checksum.
value.MustInitChecksum(key)
return engine.MVCCPut(ctx, eng, nil /* ms */, key, hlc.Timestamp{}, value, nil /* txn */)
}

// The rest is not technically part of ReplicaState.
Expand Down
69 changes: 49 additions & 20 deletions pkg/storage/track_raft_protos.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,20 @@ func funcName(f interface{}) string {
return runtime.FuncForPC(reflect.ValueOf(f).Pointer()).Name()
}

func forCallers(fn func(runtime.Frame) bool) {
var pcs [256]uintptr
if numCallers := runtime.Callers(1, pcs[:]); numCallers == len(pcs) {
panic(fmt.Sprintf("number of callers %d might have exceeded slice size %d", numCallers, len(pcs)))
}
frames := runtime.CallersFrames(pcs[:])
for {
f, more := frames.Next()
if !fn(f) || !more {
return
}
}
}

// TrackRaftProtos instruments proto marshaling to track protos which are
// marshaled downstream of raft. It returns a function that removes the
// instrumentation and returns the list of downstream-of-raft protos.
Expand All @@ -40,14 +54,27 @@ func TrackRaftProtos() func() []reflect.Type {
processRaftFunc := funcName((*Replica).processRaftCommand)
// We only need to track protos that could cause replica divergence
// by being written to disk downstream of raft.
whitelist := []string{
marshalWhitelist := []string{
// Some raft operations trigger gossip, but we don't require
// strict consistency there.
funcName((*gossip.Gossip).AddInfoProto),
// Replica destroyed errors are written to disk, but they are
// deliberately per-replica values.
funcName((stateloader.StateLoader).SetReplicaDestroyedError),
}
// We only need to track checksum computations for values that could
// cause replica divergence by being written to disk downstream of raft.
checksumWhitelist := []string{
// Some raft operations trigger gossip, but we don't require
// strict consistency there.
funcName((*gossip.Gossip).AddInfo),
// Replica destroyed errors are written to disk, but they are
// deliberately per-replica values.
funcName((stateloader.StateLoader).SetReplicaDestroyedError),
// HardState is unreplicated, so we don't require strict
// consistency on its value.
funcName((stateloader.StateLoader).SetHardState),
}

belowRaftProtos := struct {
syncutil.Mutex
Expand Down Expand Up @@ -81,39 +108,41 @@ func TrackRaftProtos() func() []reflect.Type {
return
}

var pcs [256]uintptr
if numCallers := runtime.Callers(0, pcs[:]); numCallers == len(pcs) {
panic(fmt.Sprintf("number of callers %d might have exceeded slice size %d", numCallers, len(pcs)))
}
frames := runtime.CallersFrames(pcs[:])
for {
f, more := frames.Next()

whitelisted := false
for _, s := range whitelist {
forCallers(func(f runtime.Frame) bool {
for _, s := range marshalWhitelist {
if strings.Contains(f.Function, s) {
whitelisted = true
break
return false
}
}
if whitelisted {
break
}

if strings.Contains(f.Function, processRaftFunc) {
belowRaftProtos.Lock()
belowRaftProtos.inner[t] = struct{}{}
belowRaftProtos.Unlock()
break
return false
}
if !more {
break
return true
})
}

roachpb.ChecksumInterceptor = func(v *roachpb.Value) {
forCallers(func(f runtime.Frame) bool {
for _, s := range checksumWhitelist {
if strings.Contains(f.Function, s) {
return false
}
}
}

if strings.Contains(f.Function, processRaftFunc) {
panic(fmt.Sprintf("unexpected below raft checksum computation on value %v", v))
}
return true
})
}

return func() []reflect.Type {
protoutil.Interceptor = func(_ protoutil.Message) {}
roachpb.ChecksumInterceptor = func(_ *roachpb.Value) {}

belowRaftProtos.Lock()
types := make([]reflect.Type, 0, len(belowRaftProtos.inner))
Expand Down

0 comments on commit 75cbeb8

Please sign in to comment.