From 343335ada6d05016b26dc20c2aa1bcebe71d351d Mon Sep 17 00:00:00 2001
From: Rafi Shamim
Date: Wed, 15 Jun 2022 15:40:57 -0400
Subject: [PATCH 1/2] builtins: add pg_backend_pid()
Release note (sql change): Updated the pg_backend_pid() builtin function
so it matches with the data in the query cancellation key created during
session initialization. The function is just for compatibility, and it
does not return a real process ID.
---
docs/generated/sql/functions.md | 2 ++
pkg/sql/conn_executor.go | 1 +
.../pgwire/pgwirecancel/backend_key_data.go | 7 ++++
pkg/sql/sem/builtins/pg_builtins.go | 10 ++++--
pkg/sql/sem/eval/BUILD.bazel | 1 +
pkg/sql/sem/eval/context.go | 5 +++
pkg/testutils/pgtest/pgtest.go | 34 +++++++++++++++++++
7 files changed, 57 insertions(+), 3 deletions(-)
diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md
index 53728b7ed912..7e68f34673fa 100644
--- a/docs/generated/sql/functions.md
+++ b/docs/generated/sql/functions.md
@@ -3419,6 +3419,8 @@ A write probe will effectively probe reads as well.
pg_column_is_updatable(reloid: oid, attnum: int2, include_triggers: bool) → bool | Returns whether the given column can be updated.
diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go
index a5c13e97649e..637289efc691 100644
--- a/pkg/sql/conn_executor.go
+++ b/pkg/sql/conn_executor.go
@@ -2674,6 +2674,7 @@ func (ex *connExecutor) initEvalCtx(ctx context.Context, evalCtx *extendedEvalCo
RangeProber: p.execCfg.RangeProber,
StmtDiagnosticsRequestInserter: ex.server.cfg.StmtDiagnosticsRecorder.InsertRequest,
CatalogBuiltins: &p.evalCatalogBuiltins,
+ QueryCancelKey: ex.queryCancelKey,
},
Tracing: &ex.sessionTracing,
MemMetrics: &ex.memMetrics,
diff --git a/pkg/sql/pgwire/pgwirecancel/backend_key_data.go b/pkg/sql/pgwire/pgwirecancel/backend_key_data.go
index 82b3ac4272ee..1b151d9bc2dc 100644
--- a/pkg/sql/pgwire/pgwirecancel/backend_key_data.go
+++ b/pkg/sql/pgwire/pgwirecancel/backend_key_data.go
@@ -70,5 +70,12 @@ func (b BackendKeyData) GetSQLInstanceID() base.SQLInstanceID {
bits = bits &^ leadingBitMask
// Use the upper 32 bits as the sqlInstanceID.
return base.SQLInstanceID(bits >> 32)
+}
+
+// GetPGBackendPID returns the upper 32 bits of this BackendKeyData. In Postgres,
+// this is the process ID, but we expose it only for compatibility.
+func (b BackendKeyData) GetPGBackendPID() uint32 {
+ bits := uint64(b)
+ return uint32(bits >> 32)
}
diff --git a/pkg/sql/sem/builtins/pg_builtins.go b/pkg/sql/sem/builtins/pg_builtins.go
index e6c6e90813c1..5aa8c26696c0 100644
--- a/pkg/sql/sem/builtins/pg_builtins.go
+++ b/pkg/sql/sem/builtins/pg_builtins.go
@@ -540,10 +540,14 @@ var pgBuiltins = map[string]builtinDefinition{
tree.Overload{
Types: tree.ArgTypes{},
ReturnType: tree.FixedReturnType(types.Int),
- Fn: func(_ *eval.Context, _ tree.Datums) (tree.Datum, error) {
- return tree.NewDInt(-1), nil
+ Fn: func(ctx *eval.Context, _ tree.Datums) (tree.Datum, error) {
+ pid := ctx.QueryCancelKey.GetPGBackendPID()
+ return tree.NewDInt(tree.DInt(pid)), nil
},
- Info: notUsableInfo,
+ Info: "Returns a numerical ID attached to this session. This ID is " +
+ "part of the query cancellation key used by the wire protocol. This " +
+ "function was only added for compatibility, and unlike in Postgres, the" +
+ "returned value does not correspond to a real process ID.",
Volatility: volatility.Stable,
},
),
diff --git a/pkg/sql/sem/eval/BUILD.bazel b/pkg/sql/sem/eval/BUILD.bazel
index 7b20225264f0..adca2fb4ca0f 100644
--- a/pkg/sql/sem/eval/BUILD.bazel
+++ b/pkg/sql/sem/eval/BUILD.bazel
@@ -50,6 +50,7 @@ go_library(
"//pkg/sql/pgwire/pgcode",
"//pkg/sql/pgwire/pgerror",
"//pkg/sql/pgwire/pgnotice",
+ "//pkg/sql/pgwire/pgwirecancel",
"//pkg/sql/privilege",
"//pkg/sql/roleoption",
"//pkg/sql/sem/cast",
diff --git a/pkg/sql/sem/eval/context.go b/pkg/sql/sem/eval/context.go
index 26eff83ebd06..da12a3c3df18 100644
--- a/pkg/sql/sem/eval/context.go
+++ b/pkg/sql/sem/eval/context.go
@@ -24,6 +24,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
+ "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgwirecancel"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb"
@@ -211,6 +212,10 @@ type Context struct {
// CatalogBuiltins is used by various builtins which depend on looking up
// catalog information. Unlike the Planner, it is available in DistSQL.
CatalogBuiltins CatalogBuiltins
+
+ // QueryCancelKey is the key used by the pgwire protocol to cancel the
+ // query currently running in this session.
+ QueryCancelKey pgwirecancel.BackendKeyData
}
var _ tree.ParseTimeContext = &Context{}
diff --git a/pkg/testutils/pgtest/pgtest.go b/pkg/testutils/pgtest/pgtest.go
index ddc414dc1294..b22a41b00a9b 100644
--- a/pkg/testutils/pgtest/pgtest.go
+++ b/pkg/testutils/pgtest/pgtest.go
@@ -18,6 +18,7 @@ import (
"fmt"
"net"
"reflect"
+ "strconv"
"strings"
"testing"
@@ -78,11 +79,44 @@ func NewPGTest(ctx context.Context, addr, user string) (*PGTest, error) {
if backendKeyData == nil {
return nil, errors.Errorf("did not receive BackendKeyData")
}
+ if err := checkPGBackendPID(p, backendKeyData); err != nil {
+ return nil, err
+ }
+
p.isCockroachDB = foundCrdb
success = err == nil
return p, err
}
+func checkPGBackendPID(p *PGTest, backendKeyData *pgproto3.BackendKeyData) error {
+ if err := p.fe.Send(&pgproto3.Query{
+ String: "SELECT pg_backend_pid();",
+ }); err != nil {
+ return errors.Wrap(err, "fetching pg_backend_pid")
+ }
+ msgs, err := p.Until(false /* keepErrMsg */, &pgproto3.ReadyForQuery{})
+ if err != nil {
+ return errors.Wrap(err, "fetching pg_backend_pid")
+ }
+ matched := false
+ for _, msg := range msgs {
+ if d, ok := msg.(*pgproto3.DataRow); ok {
+ pid, err := strconv.Atoi(string(d.Values[0]))
+ if err != nil {
+ return errors.Wrap(err, "parsing pg_backend_pid")
+ }
+ if uint32(pid) != backendKeyData.ProcessID {
+ return errors.Errorf("wrong pg_backend_pid; wanted %d, got %d", backendKeyData.ProcessID, pid)
+ }
+ matched = true
+ }
+ }
+ if !matched {
+ return errors.Errorf("could not retrieve pg_backend_pid")
+ }
+ return nil
+}
+
// Close sends a Terminate message and closes the connection.
func (p *PGTest) Close() error {
defer p.conn.Close()
From 04be3bca9e201a2503c298c4d96494f483150abc Mon Sep 17 00:00:00 2001
From: Erik Grinaker
Date: Wed, 15 Jun 2022 09:12:49 +0000
Subject: [PATCH 2/2] batcheval: allow writing tombstones in `AddSSTable`
This patch allows writing MVCC point tombstones in `AddSSTable`, which
will be needed by cluster-to-cluster replication. Previously, SSTs
containing tombstones could error or be silently accepted. The version
gate `AddSSTableTombstones` has been added to allow clients to rely on
this behavior.
Note that `AddSSTable` has never made any attempt at correctly
calculating `MVCCStats.GCBytesAge` when shadowing existing data, and
this patch does not attempt to either. This will be addressed later.
Release note: None
---
.../settings/settings-for-tenants.txt | 2 +-
docs/generated/settings/settings.html | 2 +-
pkg/clusterversion/cockroach_versions.go | 7 +
pkg/clusterversion/key_string.go | 5 +-
pkg/kv/kvserver/batcheval/cmd_add_sstable.go | 19 +--
.../batcheval/cmd_add_sstable_test.go | 143 ++++++++++--------
pkg/roachpb/api.proto | 9 +-
pkg/storage/sst.go | 17 ++-
8 files changed, 113 insertions(+), 91 deletions(-)
diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt
index 2c239f57fa8c..dad2d8c8f11a 100644
--- a/docs/generated/settings/settings-for-tenants.txt
+++ b/docs/generated/settings/settings-for-tenants.txt
@@ -281,4 +281,4 @@ trace.jaeger.agent string the address of a Jaeger agent to receive traces using
trace.opentelemetry.collector string address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as :. If no port is specified, 4317 will be used.
trace.span_registry.enabled boolean true if set, ongoing traces can be seen at https:///#/debug/tracez
trace.zipkin.collector string the address of a Zipkin instance to receive traces, as :. If no port is specified, 9411 will be used.
-version version 22.1-18 set the active cluster version in the format '.'
+version version 22.1-20 set the active cluster version in the format '.'
diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html
index 1a8892748f39..bd41c08e3c9b 100644
--- a/docs/generated/settings/settings.html
+++ b/docs/generated/settings/settings.html
@@ -212,6 +212,6 @@
trace.opentelemetry.collector | string |
| address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as :. If no port is specified, 4317 will be used. |
trace.span_registry.enabled | boolean | true | if set, ongoing traces can be seen at https:///#/debug/tracez |
trace.zipkin.collector | string |
| the address of a Zipkin instance to receive traces, as :. If no port is specified, 9411 will be used. |
-version | version | 22.1-18 | set the active cluster version in the format '.' |
+version | version | 22.1-20 | set the active cluster version in the format '.' |
diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go
index 4fc346f197c2..d414b2571e33 100644
--- a/pkg/clusterversion/cockroach_versions.go
+++ b/pkg/clusterversion/cockroach_versions.go
@@ -377,6 +377,9 @@ const (
// probabilistically collects stmt bundles, controlled by the user provided
// sampling rate.
SampledStmtDiagReqs
+ // AddSSTableTombstones allows writing MVCC point tombstones via AddSSTable.
+ // Previously, SSTs containing these could error.
+ AddSSTableTombstones
// *************************************************
// Step (1): Add new versions here.
@@ -666,6 +669,10 @@ var versionsSingleton = keyedVersions{
Key: SampledStmtDiagReqs,
Version: roachpb.Version{Major: 22, Minor: 1, Internal: 18},
},
+ {
+ Key: AddSSTableTombstones,
+ Version: roachpb.Version{Major: 22, Minor: 1, Internal: 20},
+ },
// *************************************************
// Step (2): Add new versions here.
diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go
index f583164116bb..29f2e5f723b6 100644
--- a/pkg/clusterversion/key_string.go
+++ b/pkg/clusterversion/key_string.go
@@ -71,11 +71,12 @@ func _() {
_ = x[MVCCRangeTombstones-60]
_ = x[UpgradeSequenceToBeReferencedByID-61]
_ = x[SampledStmtDiagReqs-62]
+ _ = x[AddSSTableTombstones-63]
}
-const _Key_name = "V21_2Start22_1TargetBytesAvoidExcessAvoidDrainingNamesDrainingNamesMigrationTraceIDDoesntImplyStructuredRecordingAlterSystemTableStatisticsAddAvgSizeColMVCCAddSSTableInsertPublicSchemaNamespaceEntryOnRestoreUnsplitRangesInAsyncGCJobsValidateGrantOptionPebbleFormatBlockPropertyCollectorProbeRequestSelectRPCsTakeTracingInfoInbandPreSeedTenantSpanConfigsSeedTenantSpanConfigsPublicSchemasWithDescriptorsEnsureSpanConfigReconciliationEnsureSpanConfigSubscriptionEnableSpanConfigStoreScanWholeRowsSCRAMAuthenticationUnsafeLossOfQuorumRecoveryRangeLogAlterSystemProtectedTimestampAddColumnEnableProtectedTimestampsForTenantDeleteCommentsWithDroppedIndexesRemoveIncompatibleDatabasePrivilegesAddRaftAppliedIndexTermMigrationPostAddRaftAppliedIndexTermMigrationDontProposeWriteTimestampForLeaseTransfersTenantSettingsTableEnablePebbleFormatVersionBlockPropertiesDisableSystemConfigGossipTriggerMVCCIndexBackfillerEnableLeaseHolderRemovalBackupResolutionInJobLooselyCoupledRaftLogTruncationChangefeedIdlenessBackupDoesNotOverwriteLatestAndCheckpointEnableDeclarativeSchemaChangerRowLevelTTLPebbleFormatSplitUserKeysMarkedIncrementalBackupSubdirDateStyleIntervalStyleCastRewriteEnableNewStoreRebalancerClusterLocksVirtualTableAutoStatsTableSettingsForecastStatsSuperRegionsEnableNewChangefeedOptionsSpanCountTablePreSeedSpanCountTableSeedSpanCountTableV22_1Start22_2LocalTimestampsEnsurePebbleFormatVersionRangeKeysEnablePebbleFormatVersionRangeKeysTrigramInvertedIndexesRemoveGrantPrivilegeMVCCRangeTombstonesUpgradeSequenceToBeReferencedByIDSampledStmtDiagReqs"
+const _Key_name = "V21_2Start22_1TargetBytesAvoidExcessAvoidDrainingNamesDrainingNamesMigrationTraceIDDoesntImplyStructuredRecordingAlterSystemTableStatisticsAddAvgSizeColMVCCAddSSTableInsertPublicSchemaNamespaceEntryOnRestoreUnsplitRangesInAsyncGCJobsValidateGrantOptionPebbleFormatBlockPropertyCollectorProbeRequestSelectRPCsTakeTracingInfoInbandPreSeedTenantSpanConfigsSeedTenantSpanConfigsPublicSchemasWithDescriptorsEnsureSpanConfigReconciliationEnsureSpanConfigSubscriptionEnableSpanConfigStoreScanWholeRowsSCRAMAuthenticationUnsafeLossOfQuorumRecoveryRangeLogAlterSystemProtectedTimestampAddColumnEnableProtectedTimestampsForTenantDeleteCommentsWithDroppedIndexesRemoveIncompatibleDatabasePrivilegesAddRaftAppliedIndexTermMigrationPostAddRaftAppliedIndexTermMigrationDontProposeWriteTimestampForLeaseTransfersTenantSettingsTableEnablePebbleFormatVersionBlockPropertiesDisableSystemConfigGossipTriggerMVCCIndexBackfillerEnableLeaseHolderRemovalBackupResolutionInJobLooselyCoupledRaftLogTruncationChangefeedIdlenessBackupDoesNotOverwriteLatestAndCheckpointEnableDeclarativeSchemaChangerRowLevelTTLPebbleFormatSplitUserKeysMarkedIncrementalBackupSubdirDateStyleIntervalStyleCastRewriteEnableNewStoreRebalancerClusterLocksVirtualTableAutoStatsTableSettingsForecastStatsSuperRegionsEnableNewChangefeedOptionsSpanCountTablePreSeedSpanCountTableSeedSpanCountTableV22_1Start22_2LocalTimestampsEnsurePebbleFormatVersionRangeKeysEnablePebbleFormatVersionRangeKeysTrigramInvertedIndexesRemoveGrantPrivilegeMVCCRangeTombstonesUpgradeSequenceToBeReferencedByIDSampledStmtDiagReqsAddSSTableTombstones"
-var _Key_index = [...]uint16{0, 5, 14, 36, 54, 76, 113, 152, 166, 207, 233, 252, 286, 298, 329, 353, 374, 402, 432, 460, 481, 494, 513, 547, 585, 619, 651, 687, 719, 755, 797, 816, 856, 888, 907, 931, 952, 983, 1001, 1042, 1072, 1083, 1114, 1137, 1170, 1194, 1218, 1240, 1253, 1265, 1291, 1305, 1326, 1344, 1349, 1358, 1373, 1407, 1441, 1463, 1483, 1502, 1535, 1554}
+var _Key_index = [...]uint16{0, 5, 14, 36, 54, 76, 113, 152, 166, 207, 233, 252, 286, 298, 329, 353, 374, 402, 432, 460, 481, 494, 513, 547, 585, 619, 651, 687, 719, 755, 797, 816, 856, 888, 907, 931, 952, 983, 1001, 1042, 1072, 1083, 1114, 1137, 1170, 1194, 1218, 1240, 1253, 1265, 1291, 1305, 1326, 1344, 1349, 1358, 1373, 1407, 1441, 1463, 1483, 1502, 1535, 1554, 1574}
func (i Key) String() string {
if i < 0 || i >= Key(len(_Key_index)-1) {
diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go
index d8ad1988a18e..da319d2e975a 100644
--- a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go
+++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go
@@ -428,32 +428,21 @@ func assertSSTContents(sst []byte, sstTimestamp hlc.Timestamp, stats *enginepb.M
defer iter.Close()
// Check SST KV pairs.
- iter.SeekGE(storage.MVCCKey{Key: keys.MinKey})
- for {
- ok, err := iter.Valid()
- if err != nil {
+ for iter.SeekGE(storage.MVCCKey{Key: keys.MinKey}); ; iter.Next() {
+ if ok, err := iter.Valid(); err != nil {
return err
- }
- if !ok {
+ } else if !ok {
break
}
- key, valueRaw := iter.UnsafeKey(), iter.UnsafeValue()
- value, err := storage.DecodeMVCCValue(valueRaw)
- if err != nil {
- return err
- }
+ key := iter.UnsafeKey()
if key.Timestamp.IsEmpty() {
return errors.AssertionFailedf("SST contains inline value or intent for key %s", key)
}
- if value.IsTombstone() {
- return errors.AssertionFailedf("SST contains tombstone for key %s", key)
- }
if sstTimestamp.IsSet() && key.Timestamp != sstTimestamp {
return errors.AssertionFailedf("SST has unexpected timestamp %s (expected %s) for key %s",
key.Timestamp, sstTimestamp, key.Key)
}
- iter.Next()
}
// Compare statistics with those passed by client. We calculate them at the
diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go
index 3de05a8b8ef9..bc1f9d96d251 100644
--- a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go
+++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go
@@ -67,15 +67,17 @@ func TestEvalAddSSTable(t *testing.T) {
}{
// Blind writes.
"blind writes below existing": {
- data: []sstutil.KV{{"a", 5, "a5"}, {"b", 7, ""}},
- sst: []sstutil.KV{{"a", 3, "sst"}, {"b", 2, "sst"}},
- expect: []sstutil.KV{{"a", 5, "a5"}, {"a", 3, "sst"}, {"b", 7, ""}, {"b", 2, "sst"}},
+ data: []sstutil.KV{{"a", 5, "a5"}, {"b", 7, ""}, {"c", 6, "c6"}},
+ sst: []sstutil.KV{{"a", 3, "sst"}, {"b", 2, "sst"}, {"c", 3, ""}},
+ expect: []sstutil.KV{
+ {"a", 5, "a5"}, {"a", 3, "sst"}, {"b", 7, ""}, {"b", 2, "sst"}, {"c", 6, "c6"}, {"c", 3, ""},
+ },
expectStatsEst: true,
},
"blind replaces existing": {
- data: []sstutil.KV{{"a", 2, "a2"}},
- sst: []sstutil.KV{{"a", 2, "sst"}},
- expect: []sstutil.KV{{"a", 2, "sst"}},
+ data: []sstutil.KV{{"a", 2, "a2"}, {"b", 2, "b2"}},
+ sst: []sstutil.KV{{"a", 2, "sst"}, {"b", 2, ""}},
+ expect: []sstutil.KV{{"a", 2, "sst"}, {"b", 2, ""}},
expectStatsEst: true,
},
"blind errors on AddSSTableRequireAtRequestTimestamp": {
@@ -100,11 +102,10 @@ func TestEvalAddSSTable(t *testing.T) {
expect: []sstutil.KV{{"b", intentTS, "b0"}, {"c", 1, "sst"}, {"d", 1, "sst"}},
expectStatsEst: true,
},
- "blind writes tombstones unless race": { // unfortunately, for performance
+ "blind writes tombstones": {
sst: []sstutil.KV{{"a", 1, ""}},
expect: []sstutil.KV{{"a", 1, ""}},
expectStatsEst: true,
- expectErrRace: `SST contains tombstone for key "a"/0.000000001,0`,
},
"blind writes SST inline values unless race": { // unfortunately, for performance
sst: []sstutil.KV{{"a", 0, "inline"}},
@@ -135,12 +136,11 @@ func TestEvalAddSSTable(t *testing.T) {
expect: []sstutil.KV{{"a", 10, "a1"}, {"b", 10, "b1"}},
expectStatsEst: true,
},
- "SSTTimestampToRequestTimestamp writes tombstones unless race": { // unfortunately, for performance
+ "SSTTimestampToRequestTimestamp writes tombstones": {
reqTS: 10,
toReqTS: 1,
sst: []sstutil.KV{{"a", 1, ""}},
expect: []sstutil.KV{{"a", 10, ""}},
- expectErrRace: `SST contains tombstone for key "a"/0.000000001,0`,
expectStatsEst: true,
},
"SSTTimestampToRequestTimestamp rejects incorrect SST timestamp": {
@@ -221,9 +221,9 @@ func TestEvalAddSSTable(t *testing.T) {
reqTS: 5,
toReqTS: 1,
noShadow: true,
- data: []sstutil.KV{{"a", 5, "a5"}, {"b", 5, "b5"}},
- sst: []sstutil.KV{{"a", 1, "a5"}, {"b", 1, "b5"}},
- expect: []sstutil.KV{{"a", 5, "a5"}, {"b", 5, "b5"}},
+ data: []sstutil.KV{{"a", 5, "a5"}, {"b", 5, "b5"}, {"c", 5, ""}},
+ sst: []sstutil.KV{{"a", 1, "a5"}, {"b", 1, "b5"}, {"c", 1, ""}},
+ expect: []sstutil.KV{{"a", 5, "a5"}, {"b", 5, "b5"}, {"c", 5, ""}},
},
"SSTTimestampToRequestTimestamp errors with DisallowShadowingBelow equal value above existing below limit": {
reqTS: 7,
@@ -295,18 +295,16 @@ func TestEvalAddSSTable(t *testing.T) {
sst: []sstutil.KV{{"a", 3, "a3"}},
expectErr: &roachpb.WriteTooOldError{},
},
- "DisallowConflicts allows new SST tombstones": { // unfortunately, for performance
- noConflict: true,
- sst: []sstutil.KV{{"a", 3, ""}},
- expect: []sstutil.KV{{"a", 3, ""}},
- expectErrRace: `SST contains tombstone for key "a"/0.000000003,0`,
+ "DisallowConflicts allows new SST tombstones": {
+ noConflict: true,
+ sst: []sstutil.KV{{"a", 3, ""}},
+ expect: []sstutil.KV{{"a", 3, ""}},
},
- "DisallowConflicts rejects SST tombstones when shadowing": {
- noConflict: true,
- data: []sstutil.KV{{"a", 2, "a2"}},
- sst: []sstutil.KV{{"a", 3, ""}},
- expectErr: "SST values cannot be tombstones",
- expectErrRace: `SST contains tombstone for key "a"/0.000000003,0`,
+ "DisallowConflicts allows SST tombstones when shadowing": {
+ noConflict: true,
+ data: []sstutil.KV{{"a", 2, "a2"}},
+ sst: []sstutil.KV{{"a", 3, ""}},
+ expect: []sstutil.KV{{"a", 3, ""}, {"a", 2, "a2"}},
},
"DisallowConflicts allows new SST inline values": { // unfortunately, for performance
noConflict: true,
@@ -383,18 +381,16 @@ func TestEvalAddSSTable(t *testing.T) {
sst: []sstutil.KV{{"a", 3, "a3"}},
expect: []sstutil.KV{{"a", 3, "a3"}},
},
- "DisallowShadowing allows new SST tombstones": { // unfortunately, for performance
- noShadow: true,
- sst: []sstutil.KV{{"a", 3, ""}},
- expect: []sstutil.KV{{"a", 3, ""}},
- expectErrRace: `SST contains tombstone for key "a"/0.000000003,0`,
+ "DisallowShadowing allows new SST tombstones": {
+ noShadow: true,
+ sst: []sstutil.KV{{"a", 3, ""}},
+ expect: []sstutil.KV{{"a", 3, ""}},
},
"DisallowShadowing rejects SST tombstones when shadowing": {
- noShadow: true,
- data: []sstutil.KV{{"a", 2, "a2"}},
- sst: []sstutil.KV{{"a", 3, ""}},
- expectErr: "SST values cannot be tombstones",
- expectErrRace: `SST contains tombstone for key "a"/0.000000003,0`,
+ noShadow: true,
+ data: []sstutil.KV{{"a", 2, "a2"}},
+ sst: []sstutil.KV{{"a", 3, ""}},
+ expectErr: `ingested key collides with an existing one: "a"`,
},
"DisallowShadowing allows new SST inline values": { // unfortunately, for performance
noShadow: true,
@@ -502,18 +498,22 @@ func TestEvalAddSSTable(t *testing.T) {
sst: []sstutil.KV{{"a", 3, "a3"}},
expectErr: `ingested key collides with an existing one: "a"`,
},
- "DisallowShadowingBelow allows new SST tombstones": { // unfortunately, for performance
+ "DisallowShadowingBelow is not generally idempotent with tombstones": {
+ noShadowBelow: 5,
+ data: []sstutil.KV{{"a", 3, ""}},
+ sst: []sstutil.KV{{"a", 3, ""}},
+ expectErr: &roachpb.WriteTooOldError{},
+ },
+ "DisallowShadowingBelow allows new SST tombstones": {
noShadowBelow: 5,
sst: []sstutil.KV{{"a", 3, ""}},
expect: []sstutil.KV{{"a", 3, ""}},
- expectErrRace: `SST contains tombstone for key "a"/0.000000003,0`,
},
- "DisallowShadowingBelow rejects SST tombstones when shadowing": {
+ "DisallowShadowingBelow rejects SST tombstones when shadowing below": {
noShadowBelow: 5,
data: []sstutil.KV{{"a", 2, "a2"}},
sst: []sstutil.KV{{"a", 3, ""}},
- expectErr: "SST values cannot be tombstones",
- expectErrRace: `SST contains tombstone for key "a"/0.000000003,0`,
+ expectErr: `ingested key collides with an existing one: "a"`,
},
"DisallowShadowingBelow allows new SST inline values": { // unfortunately, for performance
noShadowBelow: 5,
@@ -558,6 +558,12 @@ func TestEvalAddSSTable(t *testing.T) {
sst: []sstutil.KV{{"a", 3, "sst"}, {"b", 1, "sst"}},
expectErr: `ingested key collides with an existing one: "b"`,
},
+ "DisallowShadowingBelow tombstone above tombstone": {
+ noShadowBelow: 5,
+ data: []sstutil.KV{{"a", 2, ""}, {"a", 1, "a1"}},
+ sst: []sstutil.KV{{"a", 3, ""}},
+ expect: []sstutil.KV{{"a", 3, ""}, {"a", 2, ""}, {"a", 1, "a1"}},
+ },
"DisallowShadowingBelow at limit writes": {
noShadowBelow: 5,
sst: []sstutil.KV{{"a", 5, "sst"}},
@@ -598,6 +604,12 @@ func TestEvalAddSSTable(t *testing.T) {
sst: []sstutil.KV{{"a", 7, "sst"}},
expectErr: `ingested key collides with an existing one: "a"`,
},
+ "DisallowShadowingBelow tombstone above limit errors on existing below limit": {
+ noShadowBelow: 5,
+ data: []sstutil.KV{{"a", 4, "a4"}},
+ sst: []sstutil.KV{{"a", 7, ""}},
+ expectErr: `ingested key collides with an existing one: "a"`,
+ },
"DisallowShadowingBelow above limit errors on existing below limit with same value": {
noShadowBelow: 5,
data: []sstutil.KV{{"a", 4, "a4"}},
@@ -640,6 +652,12 @@ func TestEvalAddSSTable(t *testing.T) {
sst: []sstutil.KV{{"a", 7, "a7"}},
expect: []sstutil.KV{{"a", 7, "a7"}},
},
+ "DisallowShadowingBelow above limit is idempotent with tombstone": {
+ noShadowBelow: 5,
+ data: []sstutil.KV{{"a", 7, ""}},
+ sst: []sstutil.KV{{"a", 7, ""}},
+ expect: []sstutil.KV{{"a", 7, ""}},
+ },
"DisallowShadowingBelow above limit errors below existing": {
noShadowBelow: 5,
data: []sstutil.KV{{"a", 8, "a8"}},
@@ -764,9 +782,8 @@ func TestEvalAddSSTable(t *testing.T) {
UpperBound: keys.MaxKey,
})
defer iter.Close()
- iter.SeekGE(storage.MVCCKey{Key: keys.SystemPrefix})
scan := []sstutil.KV{}
- for {
+ for iter.SeekGE(storage.MVCCKey{Key: keys.SystemPrefix}); ; iter.Next() {
ok, err := iter.Valid()
require.NoError(t, err)
if !ok {
@@ -787,23 +804,21 @@ func TestEvalAddSSTable(t *testing.T) {
require.NoError(t, protoutil.Unmarshal(iter.UnsafeValue(), &meta))
if meta.RawBytes == nil {
// Skip intent metadata records (value emitted separately).
- iter.Next()
continue
}
value, err = roachpb.Value{RawBytes: meta.RawBytes}.GetBytes()
require.NoError(t, err)
}
scan = append(scan, sstutil.KV{key, ts, string(value)})
- iter.Next()
}
require.Equal(t, tc.expect, scan)
// Check that stats were updated correctly.
if tc.expectStatsEst {
- require.True(t, stats.ContainsEstimates > 0, "expected stats to be estimated")
+ require.NotZero(t, stats.ContainsEstimates, "expected stats to be estimated")
} else {
- require.False(t, stats.ContainsEstimates > 0, "found estimated stats")
- require.Equal(t, stats, engineStats(t, engine, stats.LastUpdateNanos))
+ require.Zero(t, stats.ContainsEstimates, "found estimated stats")
+ require.Equal(t, engineStats(t, engine, stats.LastUpdateNanos), stats)
}
})
}
@@ -1121,6 +1136,7 @@ func TestAddSSTableMVCCStats(t *testing.T) {
{"d", 1, "d"},
{"d", 2, ""},
{"e", 1, "e"},
+ {"u", 3, "u"},
{"z", 2, "zzzzzz"},
} {
require.NoError(t, engine.PutMVCC(kv.MVCCKey(), kv.MVCCValue()))
@@ -1133,14 +1149,16 @@ func TestAddSSTableMVCCStats(t *testing.T) {
{"d", 4, "dddd"}, // mvcc-shadow existing deleted d.
{"e", 4, "eeee"}, // mvcc-shadow existing 1b.
{"j", 2, "jj"}, // no colission – via MVCC or LSM – with existing.
+ {"t", 3, ""}, // tombstone, no collission
+ {"u", 5, ""}, // tombstone, shadows existing
})
statsDelta := enginepb.MVCCStats{
- // the sst will think it added 4 keys here, but a, c, and e shadow or are shadowed.
- LiveCount: -3,
- LiveBytes: -109,
- // the sst will think it added 5 keys, but only j is new so 4 are over-counted.
- KeyCount: -4,
- KeyBytes: -20,
+ // the sst will think it added 5 keys here, but a, c, e, and t shadow or are shadowed.
+ LiveCount: -4,
+ LiveBytes: -129,
+ // the sst will think it added 5 keys, but only j and t are new so 5 are over-counted.
+ KeyCount: -5,
+ KeyBytes: -22,
// the sst will think it added 6 values, but since one was a perfect (key+ts)
// collision, it *replaced* the existing value and is over-counted.
ValCount: -1,
@@ -1229,6 +1247,7 @@ func TestAddSSTableMVCCStatsDisallowShadowing(t *testing.T) {
{"b", 6, ""},
{"g", 5, "gg"},
{"r", 1, "rr"},
+ {"t", 3, ""},
{"y", 1, "yy"},
{"y", 2, ""},
{"y", 5, "yyy"},
@@ -1298,12 +1317,14 @@ func TestAddSSTableMVCCStatsDisallowShadowing(t *testing.T) {
// Check that there has been no double counting of stats. All keys in second SST are shadowing.
require.Equal(t, firstSSTStats, *cArgs.Stats)
- // Evaluate the third SST. Two of the three KVs are perfectly shadowing, but
- // there is one valid KV which should contribute to the stats.
+ // Evaluate the third SST. Some of the KVs are perfectly shadowing, but there
+ // are two valid KVs which should contribute to the stats.
sst, start, end = sstutil.MakeSST(t, st, []sstutil.KV{
{"c", 2, "bb"}, // key has the same timestamp and value as the one present in the existing data.
{"e", 2, "ee"},
{"h", 6, "hh"}, // key has the same timestamp and value as the one present in the existing data.
+ {"t", 3, ""}, // identical to existing tombstone.
+ {"x", 7, ""}, // new tombstone.
})
cArgs.Args = &roachpb.AddSSTableRequest{
@@ -1315,15 +1336,15 @@ func TestAddSSTableMVCCStatsDisallowShadowing(t *testing.T) {
_, err = batcheval.EvalAddSSTable(ctx, engine, cArgs, &roachpb.AddSSTableResponse{})
require.NoError(t, err)
- // This is the stats contribution of the KV {"e", 2, "ee"}. This should be
- // the only addition to the cumulative stats, as the other two KVs are
- // perfect shadows of existing data.
+ // This is the stats contribution of the KVs {"e", 2, "ee"} and {"x", 7, ""}.
+ // This should be the only addition to the cumulative stats, as the other
+ // KVs are perfect shadows of existing data.
delta := enginepb.MVCCStats{
LiveCount: 1,
LiveBytes: 21,
- KeyCount: 1,
- KeyBytes: 14,
- ValCount: 1,
+ KeyCount: 2,
+ KeyBytes: 28,
+ ValCount: 2,
ValBytes: 7,
}
@@ -1489,8 +1510,6 @@ func engineStats(t *testing.T, engine storage.Engine, nowNanos int64) *enginepb.
UpperBound: keys.MaxKey,
})
defer iter.Close()
- // We don't care about nowNanos, because the SST can't contain intents or
- // tombstones and all existing intents will be resolved.
stats, err := storage.ComputeStatsForRange(iter, keys.LocalMax, keys.MaxKey, nowNanos)
require.NoError(t, err)
return &stats
diff --git a/pkg/roachpb/api.proto b/pkg/roachpb/api.proto
index b423a972a295..b7f558761ada 100644
--- a/pkg/roachpb/api.proto
+++ b/pkg/roachpb/api.proto
@@ -1688,8 +1688,13 @@ message AdminVerifyProtectedTimestampResponse {
// AddSSTableRequest contains arguments to the AddSSTable method, which links an
// SST file into the Pebble log-structured merge-tree. The SST must only contain
// committed versioned values with non-zero MVCC timestamps (no intents or
-// inline values) and no tombstones. It cannot be used in a transaction, cannot
-// be split across ranges, and must be alone in a batch.
+// inline values). It cannot be used in a transaction, cannot be split across
+// ranges, and must be alone in a batch.
+//
+// In 22.1, AddSSTable does not support MVCC point tombstones in SSTs, and may
+// return an error when given these. 22.2 does allow this, but check the
+// AddSSTableTombstones version gate first to make sure there are no 22.1 nodes
+// left in the cluster.
//
// By default, AddSSTable will blindly write the SST contents into Pebble, with
// fixed MVCC timestamps unaffected by pushes. This can violate many CRDB
diff --git a/pkg/storage/sst.go b/pkg/storage/sst.go
index 36fffbbf6abc..64204b93462f 100644
--- a/pkg/storage/sst.go
+++ b/pkg/storage/sst.go
@@ -38,8 +38,8 @@ import (
// is non-empty, disallowShadowing is ignored.
//
// The given SST and reader cannot contain intents or inline values (i.e. zero
-// timestamps), nor tombstones (i.e. empty values), but this is only checked for
-// keys that exist in both sides, for performance.
+// timestamps), but this is only checked for keys that exist in both sides, for
+// performance.
//
// The returned MVCC statistics is a delta between the SST-only statistics and
// their effect when applied, which when added to the SST statistics will adjust
@@ -98,9 +98,6 @@ func CheckSSTConflicts(
if err != nil {
return err
}
- if sstValue.IsTombstone() {
- return errors.New("SST values cannot be tombstones")
- }
if !extKey.IsValue() {
var mvccMeta enginepb.MVCCMetadata
if err = extIter.ValueProto(&mvccMeta); err != nil {
@@ -149,15 +146,19 @@ func CheckSSTConflicts(
totalBytes := metaKeySize + metaValSize
// Update the skipped stats to account for the skipped meta key.
- statsDiff.LiveBytes -= totalBytes
- statsDiff.LiveCount--
+ if !sstValue.IsTombstone() {
+ statsDiff.LiveBytes -= totalBytes
+ statsDiff.LiveCount--
+ }
statsDiff.KeyBytes -= metaKeySize
statsDiff.ValBytes -= metaValSize
statsDiff.KeyCount--
// Update the stats to account for the skipped versioned key/value.
totalBytes = int64(len(sstValueRaw)) + MVCCVersionTimestampSize
- statsDiff.LiveBytes -= totalBytes
+ if !sstValue.IsTombstone() {
+ statsDiff.LiveBytes -= totalBytes
+ }
statsDiff.KeyBytes -= MVCCVersionTimestampSize
statsDiff.ValBytes -= int64(len(sstValueRaw))
statsDiff.ValCount--
|