From d47f932e9b609b9835da8c7a6dcc90178982c2bb Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Wed, 19 Jan 2022 09:57:00 +0000 Subject: [PATCH] kvserver: add `SSTTimestamp` parameter for `AddSSTable` This patch adds an `SSTTimestamp` parameter for `AddSSTable`. When set, the client promises that all MVCC timestamps in the given SST are equal to `SSTTimestamp`. When used together with `WriteAtRequestTimestamp`, this can avoid the cost of rewriting the SST timestamps if the `SSTTimestamp` already equals the request `Timestamp`. Release note: None --- pkg/kv/kvserver/batcheval/BUILD.bazel | 1 + pkg/kv/kvserver/batcheval/cmd_add_sstable.go | 38 ++++++++++++- .../batcheval/cmd_add_sstable_test.go | 57 ++++++++++++++----- pkg/roachpb/api.proto | 16 +++++- 4 files changed, 92 insertions(+), 20 deletions(-) diff --git a/pkg/kv/kvserver/batcheval/BUILD.bazel b/pkg/kv/kvserver/batcheval/BUILD.bazel index 093634e70587..fd98007f876a 100644 --- a/pkg/kv/kvserver/batcheval/BUILD.bazel +++ b/pkg/kv/kvserver/batcheval/BUILD.bazel @@ -140,6 +140,7 @@ go_test( "//pkg/testutils/skip", "//pkg/testutils/sqlutils", "//pkg/testutils/testcluster", + "//pkg/util", "//pkg/util/hlc", "//pkg/util/leaktest", "//pkg/util/log", diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go index 7340b0871c85..88c639b291a5 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go @@ -21,6 +21,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/util" + "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/tracing" "github.com/cockroachdb/errors" @@ -56,8 +57,15 @@ func EvalAddSSTable( // If requested, rewrite the SST's MVCC timestamps to the request timestamp. // This ensures the writes comply with the timestamp cache and closed // timestamp, i.e. by not writing to timestamps that have already been - // observed or closed. - if args.WriteAtRequestTimestamp { + // observed or closed. If the race detector is enabled, also assert that + // the provided SST only contains the expected timestamps. + if util.RaceEnabled && !args.SSTTimestamp.IsEmpty() { + if err := assertSSTTimestamp(sst, args.SSTTimestamp); err != nil { + return result.Result{}, err + } + } + if args.WriteAtRequestTimestamp && + (args.SSTTimestamp.IsEmpty() || h.Timestamp != args.SSTTimestamp) { sst, err = storage.UpdateSSTTimestamps(sst, h.Timestamp) if err != nil { return result.Result{}, errors.Wrap(err, "updating SST timestamps") @@ -260,3 +268,29 @@ func EvalAddSSTable( }, }, nil } + +func assertSSTTimestamp(sst []byte, ts hlc.Timestamp) error { + iter, err := storage.NewMemSSTIterator(sst, true) + if err != nil { + return err + } + defer iter.Close() + + iter.SeekGE(storage.MVCCKey{Key: keys.MinKey}) + for { + ok, err := iter.Valid() + if err != nil { + return err + } + if !ok { + return nil + } + + key := iter.UnsafeKey() + if key.Timestamp != ts { + return errors.AssertionFailedf("incorrect timestamp %s for SST key %s (expected %s)", + key.Timestamp, key.Key, ts) + } + iter.Next() + } +} diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go index 1344a30b13ab..b0f73a384f72 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go @@ -30,6 +30,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -70,15 +71,17 @@ func TestEvalAddSSTable(t *testing.T) { // These are run with IngestAsWrites both disabled and enabled. testcases := map[string]struct { - data []mvccKV - sst []mvccKV - atReqTS int64 // WriteAtRequestTimestamp with given timestamp - noConflict bool // DisallowConflicts - noShadow bool // DisallowShadowing - noShadowBelow int64 // DisallowShadowingBelow - expect []mvccKV - expectErr interface{} // error type, substring, or true (any error) - expectStatsEst bool // expect MVCCStats.ContainsEstimates, don't check stats + data []mvccKV + sst []mvccKV + sstTimestamp int64 // SSTTimestamp set to given timestamp + atReqTS int64 // WriteAtRequestTimestamp with given timestamp + noConflict bool // DisallowConflicts + noShadow bool // DisallowShadowing + noShadowBelow int64 // DisallowShadowingBelow + expect []mvccKV + expectErr interface{} // error type, substring, or true (any error) + expectErrUnderRace interface{} + expectStatsEst bool // expect MVCCStats.ContainsEstimates, don't check stats }{ // Blind writes. "blind writes below existing": { @@ -610,6 +613,25 @@ func TestEvalAddSSTable(t *testing.T) { sst: []mvccKV{{"a", 7, "a8"}}, expectErr: &roachpb.WriteTooOldError{}, }, + + // SSTTimestamp + "SSTTimestamp works with WriteAtRequestTimestamp": { + atReqTS: 7, + data: []mvccKV{{"a", 6, "a6"}}, + sst: []mvccKV{{"a", 7, "a7"}}, + sstTimestamp: 7, + expect: []mvccKV{{"a", 7, "a7"}, {"a", 6, "a6"}}, + expectStatsEst: true, + }, + "SSTTimestamp doesn't rewrite with incorrect timestamp, but errors under race": { + atReqTS: 8, + data: []mvccKV{{"a", 6, "a6"}}, + sst: []mvccKV{{"a", 7, "a7"}}, + sstTimestamp: 8, + expect: []mvccKV{{"a", 7, "a7"}, {"a", 6, "a6"}}, + expectErrUnderRace: `incorrect timestamp 0.000000007,0 for SST key "a" (expected 0.000000008,0)`, + expectStatsEst: true, + }, } testutils.RunTrueAndFalse(t, "IngestAsWrites", func(t *testing.T, ingestAsWrites bool) { for name, tc := range testcases { @@ -657,20 +679,25 @@ func TestEvalAddSSTable(t *testing.T) { DisallowShadowing: tc.noShadow, DisallowShadowingBelow: hlc.Timestamp{WallTime: tc.noShadowBelow}, WriteAtRequestTimestamp: tc.atReqTS != 0, + SSTTimestamp: hlc.Timestamp{WallTime: tc.sstTimestamp}, IngestAsWrites: ingestAsWrites, }, }, resp) - if tc.expectErr != nil { + expectErr := tc.expectErr + if expectErr == nil && tc.expectErrUnderRace != nil && util.RaceEnabled { + expectErr = tc.expectErrUnderRace + } + if expectErr != nil { require.Error(t, err) - if b, ok := tc.expectErr.(bool); ok && b { + if b, ok := expectErr.(bool); ok && b { // any error is fine - } else if expectMsg, ok := tc.expectErr.(string); ok { + } else if expectMsg, ok := expectErr.(string); ok { require.Contains(t, err.Error(), expectMsg) - } else if expectErr, ok := tc.expectErr.(error); ok { - require.True(t, errors.HasType(err, expectErr), "expected %T, got %v", expectErr, err) + } else if e, ok := expectErr.(error); ok { + require.True(t, errors.HasType(err, e), "expected %T, got %v", e, err) } else { - require.Fail(t, "invalid expectErr", "expectErr=%v", tc.expectErr) + require.Fail(t, "invalid expectErr", "expectErr=%v", expectErr) } return } diff --git a/pkg/roachpb/api.proto b/pkg/roachpb/api.proto index eb8e377b5c02..89191599ced1 100644 --- a/pkg/roachpb/api.proto +++ b/pkg/roachpb/api.proto @@ -1572,9 +1572,9 @@ message AdminVerifyProtectedTimestampResponse { } // AddSSTableRequest contains arguments to the AddSSTable method, which links an -// SST file into the Pebble log-structured merge-tree. The SST should only -// contain committed versioned values with non-zero MVCC timestamps (no intents -// or inline values) and no tombstones, but this is only fully enforced when +// SST file into the Pebble log-structured merge-tree. The SST must only contain +// committed versioned values with non-zero MVCC timestamps (no intents or +// inline values) and no tombstones, but this is only fully enforced when // WriteAtRequestTimestamp is enabled, for performance. It cannot be used in a // transaction, cannot be split across ranges, and must be alone in a batch. // @@ -1633,6 +1633,16 @@ message AddSSTableRequest { // Added in 22.1, so check the MVCCAddSSTable version gate before using. bool write_at_request_timestamp = 6; + // SSTTimestamp is a promise from the client that all MVCC timestamps in the + // SST equal the provided timestamp, and that there are no inline values, + // intents, or tombstones. When used together with WriteAtRequestTimestamp, + // this can avoid an SST rewrite (and the associated overhead) if the SST + // timestamp equals the request timestamp (i.e. if it was provided by the + // client and the request was not pushed due to e.g. the closed timestamp or + // contention). + util.hlc.Timestamp sst_timestamp = 9 + [(gogoproto.customname) = "SSTTimestamp", (gogoproto.nullable) = false]; + // DisallowConflicts will check for MVCC conflicts with existing keys, i.e. // scan for existing keys with a timestamp at or above the SST key and // return WriteTooOldError (possibly retrying). It also ensures MVCC