diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt
index 2c239f57fa8c..dad2d8c8f11a 100644
--- a/docs/generated/settings/settings-for-tenants.txt
+++ b/docs/generated/settings/settings-for-tenants.txt
@@ -281,4 +281,4 @@ trace.jaeger.agent string the address of a Jaeger agent to receive traces using
trace.opentelemetry.collector string address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as trace.opentelemetry.collector
string address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as trace.span_registry.enabled
boolean true
if set, ongoing traces can be seen at https://
-trace.zipkin.collector
string the address of a Zipkin instance to receive traces, as
+version
version 22.1-18
set the active cluster version in the format '
diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md
index 0a74169e65ab..3657bcfa71ac 100644
--- a/docs/generated/sql/functions.md
+++ b/docs/generated/sql/functions.md
@@ -3419,6 +3419,8 @@ A write probe will effectively probe reads as well.version
version 22.1-20
set the active cluster version in the format '
oid(int: int) → oid
Converts an integer to an OID.
pg_backend_pid() → int
Returns a numerical ID attached to this session. This ID is part of the query cancellation key used by the wire protocol. This function was only added for compatibility, and unlike in Postgres, thereturned value does not correspond to a real process ID.
+pg_collation_for(str: anyelement) → string
Returns the collation of the argument
pg_column_is_updatable(reloid: oid, attnum: int2, include_triggers: bool) → bool
Returns whether the given column can be updated.
diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index 4fc346f197c2..d414b2571e33 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -377,6 +377,9 @@ const ( // probabilistically collects stmt bundles, controlled by the user provided // sampling rate. SampledStmtDiagReqs + // AddSSTableTombstones allows writing MVCC point tombstones via AddSSTable. + // Previously, SSTs containing these could error. + AddSSTableTombstones // ************************************************* // Step (1): Add new versions here. @@ -666,6 +669,10 @@ var versionsSingleton = keyedVersions{ Key: SampledStmtDiagReqs, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 18}, }, + { + Key: AddSSTableTombstones, + Version: roachpb.Version{Major: 22, Minor: 1, Internal: 20}, + }, // ************************************************* // Step (2): Add new versions here. diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go index f583164116bb..29f2e5f723b6 100644 --- a/pkg/clusterversion/key_string.go +++ b/pkg/clusterversion/key_string.go @@ -71,11 +71,12 @@ func _() { _ = x[MVCCRangeTombstones-60] _ = x[UpgradeSequenceToBeReferencedByID-61] _ = x[SampledStmtDiagReqs-62] + _ = x[AddSSTableTombstones-63] } -const _Key_name = "V21_2Start22_1TargetBytesAvoidExcessAvoidDrainingNamesDrainingNamesMigrationTraceIDDoesntImplyStructuredRecordingAlterSystemTableStatisticsAddAvgSizeColMVCCAddSSTableInsertPublicSchemaNamespaceEntryOnRestoreUnsplitRangesInAsyncGCJobsValidateGrantOptionPebbleFormatBlockPropertyCollectorProbeRequestSelectRPCsTakeTracingInfoInbandPreSeedTenantSpanConfigsSeedTenantSpanConfigsPublicSchemasWithDescriptorsEnsureSpanConfigReconciliationEnsureSpanConfigSubscriptionEnableSpanConfigStoreScanWholeRowsSCRAMAuthenticationUnsafeLossOfQuorumRecoveryRangeLogAlterSystemProtectedTimestampAddColumnEnableProtectedTimestampsForTenantDeleteCommentsWithDroppedIndexesRemoveIncompatibleDatabasePrivilegesAddRaftAppliedIndexTermMigrationPostAddRaftAppliedIndexTermMigrationDontProposeWriteTimestampForLeaseTransfersTenantSettingsTableEnablePebbleFormatVersionBlockPropertiesDisableSystemConfigGossipTriggerMVCCIndexBackfillerEnableLeaseHolderRemovalBackupResolutionInJobLooselyCoupledRaftLogTruncationChangefeedIdlenessBackupDoesNotOverwriteLatestAndCheckpointEnableDeclarativeSchemaChangerRowLevelTTLPebbleFormatSplitUserKeysMarkedIncrementalBackupSubdirDateStyleIntervalStyleCastRewriteEnableNewStoreRebalancerClusterLocksVirtualTableAutoStatsTableSettingsForecastStatsSuperRegionsEnableNewChangefeedOptionsSpanCountTablePreSeedSpanCountTableSeedSpanCountTableV22_1Start22_2LocalTimestampsEnsurePebbleFormatVersionRangeKeysEnablePebbleFormatVersionRangeKeysTrigramInvertedIndexesRemoveGrantPrivilegeMVCCRangeTombstonesUpgradeSequenceToBeReferencedByIDSampledStmtDiagReqs" +const _Key_name = "V21_2Start22_1TargetBytesAvoidExcessAvoidDrainingNamesDrainingNamesMigrationTraceIDDoesntImplyStructuredRecordingAlterSystemTableStatisticsAddAvgSizeColMVCCAddSSTableInsertPublicSchemaNamespaceEntryOnRestoreUnsplitRangesInAsyncGCJobsValidateGrantOptionPebbleFormatBlockPropertyCollectorProbeRequestSelectRPCsTakeTracingInfoInbandPreSeedTenantSpanConfigsSeedTenantSpanConfigsPublicSchemasWithDescriptorsEnsureSpanConfigReconciliationEnsureSpanConfigSubscriptionEnableSpanConfigStoreScanWholeRowsSCRAMAuthenticationUnsafeLossOfQuorumRecoveryRangeLogAlterSystemProtectedTimestampAddColumnEnableProtectedTimestampsForTenantDeleteCommentsWithDroppedIndexesRemoveIncompatibleDatabasePrivilegesAddRaftAppliedIndexTermMigrationPostAddRaftAppliedIndexTermMigrationDontProposeWriteTimestampForLeaseTransfersTenantSettingsTableEnablePebbleFormatVersionBlockPropertiesDisableSystemConfigGossipTriggerMVCCIndexBackfillerEnableLeaseHolderRemovalBackupResolutionInJobLooselyCoupledRaftLogTruncationChangefeedIdlenessBackupDoesNotOverwriteLatestAndCheckpointEnableDeclarativeSchemaChangerRowLevelTTLPebbleFormatSplitUserKeysMarkedIncrementalBackupSubdirDateStyleIntervalStyleCastRewriteEnableNewStoreRebalancerClusterLocksVirtualTableAutoStatsTableSettingsForecastStatsSuperRegionsEnableNewChangefeedOptionsSpanCountTablePreSeedSpanCountTableSeedSpanCountTableV22_1Start22_2LocalTimestampsEnsurePebbleFormatVersionRangeKeysEnablePebbleFormatVersionRangeKeysTrigramInvertedIndexesRemoveGrantPrivilegeMVCCRangeTombstonesUpgradeSequenceToBeReferencedByIDSampledStmtDiagReqsAddSSTableTombstones" -var _Key_index = [...]uint16{0, 5, 14, 36, 54, 76, 113, 152, 166, 207, 233, 252, 286, 298, 329, 353, 374, 402, 432, 460, 481, 494, 513, 547, 585, 619, 651, 687, 719, 755, 797, 816, 856, 888, 907, 931, 952, 983, 1001, 1042, 1072, 1083, 1114, 1137, 1170, 1194, 1218, 1240, 1253, 1265, 1291, 1305, 1326, 1344, 1349, 1358, 1373, 1407, 1441, 1463, 1483, 1502, 1535, 1554} +var _Key_index = [...]uint16{0, 5, 14, 36, 54, 76, 113, 152, 166, 207, 233, 252, 286, 298, 329, 353, 374, 402, 432, 460, 481, 494, 513, 547, 585, 619, 651, 687, 719, 755, 797, 816, 856, 888, 907, 931, 952, 983, 1001, 1042, 1072, 1083, 1114, 1137, 1170, 1194, 1218, 1240, 1253, 1265, 1291, 1305, 1326, 1344, 1349, 1358, 1373, 1407, 1441, 1463, 1483, 1502, 1535, 1554, 1574} func (i Key) String() string { if i < 0 || i >= Key(len(_Key_index)-1) { diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go index d8ad1988a18e..da319d2e975a 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go @@ -428,32 +428,21 @@ func assertSSTContents(sst []byte, sstTimestamp hlc.Timestamp, stats *enginepb.M defer iter.Close() // Check SST KV pairs. - iter.SeekGE(storage.MVCCKey{Key: keys.MinKey}) - for { - ok, err := iter.Valid() - if err != nil { + for iter.SeekGE(storage.MVCCKey{Key: keys.MinKey}); ; iter.Next() { + if ok, err := iter.Valid(); err != nil { return err - } - if !ok { + } else if !ok { break } - key, valueRaw := iter.UnsafeKey(), iter.UnsafeValue() - value, err := storage.DecodeMVCCValue(valueRaw) - if err != nil { - return err - } + key := iter.UnsafeKey() if key.Timestamp.IsEmpty() { return errors.AssertionFailedf("SST contains inline value or intent for key %s", key) } - if value.IsTombstone() { - return errors.AssertionFailedf("SST contains tombstone for key %s", key) - } if sstTimestamp.IsSet() && key.Timestamp != sstTimestamp { return errors.AssertionFailedf("SST has unexpected timestamp %s (expected %s) for key %s", key.Timestamp, sstTimestamp, key.Key) } - iter.Next() } // Compare statistics with those passed by client. We calculate them at the diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go index 3de05a8b8ef9..bc1f9d96d251 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go @@ -67,15 +67,17 @@ func TestEvalAddSSTable(t *testing.T) { }{ // Blind writes. "blind writes below existing": { - data: []sstutil.KV{{"a", 5, "a5"}, {"b", 7, ""}}, - sst: []sstutil.KV{{"a", 3, "sst"}, {"b", 2, "sst"}}, - expect: []sstutil.KV{{"a", 5, "a5"}, {"a", 3, "sst"}, {"b", 7, ""}, {"b", 2, "sst"}}, + data: []sstutil.KV{{"a", 5, "a5"}, {"b", 7, ""}, {"c", 6, "c6"}}, + sst: []sstutil.KV{{"a", 3, "sst"}, {"b", 2, "sst"}, {"c", 3, ""}}, + expect: []sstutil.KV{ + {"a", 5, "a5"}, {"a", 3, "sst"}, {"b", 7, ""}, {"b", 2, "sst"}, {"c", 6, "c6"}, {"c", 3, ""}, + }, expectStatsEst: true, }, "blind replaces existing": { - data: []sstutil.KV{{"a", 2, "a2"}}, - sst: []sstutil.KV{{"a", 2, "sst"}}, - expect: []sstutil.KV{{"a", 2, "sst"}}, + data: []sstutil.KV{{"a", 2, "a2"}, {"b", 2, "b2"}}, + sst: []sstutil.KV{{"a", 2, "sst"}, {"b", 2, ""}}, + expect: []sstutil.KV{{"a", 2, "sst"}, {"b", 2, ""}}, expectStatsEst: true, }, "blind errors on AddSSTableRequireAtRequestTimestamp": { @@ -100,11 +102,10 @@ func TestEvalAddSSTable(t *testing.T) { expect: []sstutil.KV{{"b", intentTS, "b0"}, {"c", 1, "sst"}, {"d", 1, "sst"}}, expectStatsEst: true, }, - "blind writes tombstones unless race": { // unfortunately, for performance + "blind writes tombstones": { sst: []sstutil.KV{{"a", 1, ""}}, expect: []sstutil.KV{{"a", 1, ""}}, expectStatsEst: true, - expectErrRace: `SST contains tombstone for key "a"/0.000000001,0`, }, "blind writes SST inline values unless race": { // unfortunately, for performance sst: []sstutil.KV{{"a", 0, "inline"}}, @@ -135,12 +136,11 @@ func TestEvalAddSSTable(t *testing.T) { expect: []sstutil.KV{{"a", 10, "a1"}, {"b", 10, "b1"}}, expectStatsEst: true, }, - "SSTTimestampToRequestTimestamp writes tombstones unless race": { // unfortunately, for performance + "SSTTimestampToRequestTimestamp writes tombstones": { reqTS: 10, toReqTS: 1, sst: []sstutil.KV{{"a", 1, ""}}, expect: []sstutil.KV{{"a", 10, ""}}, - expectErrRace: `SST contains tombstone for key "a"/0.000000001,0`, expectStatsEst: true, }, "SSTTimestampToRequestTimestamp rejects incorrect SST timestamp": { @@ -221,9 +221,9 @@ func TestEvalAddSSTable(t *testing.T) { reqTS: 5, toReqTS: 1, noShadow: true, - data: []sstutil.KV{{"a", 5, "a5"}, {"b", 5, "b5"}}, - sst: []sstutil.KV{{"a", 1, "a5"}, {"b", 1, "b5"}}, - expect: []sstutil.KV{{"a", 5, "a5"}, {"b", 5, "b5"}}, + data: []sstutil.KV{{"a", 5, "a5"}, {"b", 5, "b5"}, {"c", 5, ""}}, + sst: []sstutil.KV{{"a", 1, "a5"}, {"b", 1, "b5"}, {"c", 1, ""}}, + expect: []sstutil.KV{{"a", 5, "a5"}, {"b", 5, "b5"}, {"c", 5, ""}}, }, "SSTTimestampToRequestTimestamp errors with DisallowShadowingBelow equal value above existing below limit": { reqTS: 7, @@ -295,18 +295,16 @@ func TestEvalAddSSTable(t *testing.T) { sst: []sstutil.KV{{"a", 3, "a3"}}, expectErr: &roachpb.WriteTooOldError{}, }, - "DisallowConflicts allows new SST tombstones": { // unfortunately, for performance - noConflict: true, - sst: []sstutil.KV{{"a", 3, ""}}, - expect: []sstutil.KV{{"a", 3, ""}}, - expectErrRace: `SST contains tombstone for key "a"/0.000000003,0`, + "DisallowConflicts allows new SST tombstones": { + noConflict: true, + sst: []sstutil.KV{{"a", 3, ""}}, + expect: []sstutil.KV{{"a", 3, ""}}, }, - "DisallowConflicts rejects SST tombstones when shadowing": { - noConflict: true, - data: []sstutil.KV{{"a", 2, "a2"}}, - sst: []sstutil.KV{{"a", 3, ""}}, - expectErr: "SST values cannot be tombstones", - expectErrRace: `SST contains tombstone for key "a"/0.000000003,0`, + "DisallowConflicts allows SST tombstones when shadowing": { + noConflict: true, + data: []sstutil.KV{{"a", 2, "a2"}}, + sst: []sstutil.KV{{"a", 3, ""}}, + expect: []sstutil.KV{{"a", 3, ""}, {"a", 2, "a2"}}, }, "DisallowConflicts allows new SST inline values": { // unfortunately, for performance noConflict: true, @@ -383,18 +381,16 @@ func TestEvalAddSSTable(t *testing.T) { sst: []sstutil.KV{{"a", 3, "a3"}}, expect: []sstutil.KV{{"a", 3, "a3"}}, }, - "DisallowShadowing allows new SST tombstones": { // unfortunately, for performance - noShadow: true, - sst: []sstutil.KV{{"a", 3, ""}}, - expect: []sstutil.KV{{"a", 3, ""}}, - expectErrRace: `SST contains tombstone for key "a"/0.000000003,0`, + "DisallowShadowing allows new SST tombstones": { + noShadow: true, + sst: []sstutil.KV{{"a", 3, ""}}, + expect: []sstutil.KV{{"a", 3, ""}}, }, "DisallowShadowing rejects SST tombstones when shadowing": { - noShadow: true, - data: []sstutil.KV{{"a", 2, "a2"}}, - sst: []sstutil.KV{{"a", 3, ""}}, - expectErr: "SST values cannot be tombstones", - expectErrRace: `SST contains tombstone for key "a"/0.000000003,0`, + noShadow: true, + data: []sstutil.KV{{"a", 2, "a2"}}, + sst: []sstutil.KV{{"a", 3, ""}}, + expectErr: `ingested key collides with an existing one: "a"`, }, "DisallowShadowing allows new SST inline values": { // unfortunately, for performance noShadow: true, @@ -502,18 +498,22 @@ func TestEvalAddSSTable(t *testing.T) { sst: []sstutil.KV{{"a", 3, "a3"}}, expectErr: `ingested key collides with an existing one: "a"`, }, - "DisallowShadowingBelow allows new SST tombstones": { // unfortunately, for performance + "DisallowShadowingBelow is not generally idempotent with tombstones": { + noShadowBelow: 5, + data: []sstutil.KV{{"a", 3, ""}}, + sst: []sstutil.KV{{"a", 3, ""}}, + expectErr: &roachpb.WriteTooOldError{}, + }, + "DisallowShadowingBelow allows new SST tombstones": { noShadowBelow: 5, sst: []sstutil.KV{{"a", 3, ""}}, expect: []sstutil.KV{{"a", 3, ""}}, - expectErrRace: `SST contains tombstone for key "a"/0.000000003,0`, }, - "DisallowShadowingBelow rejects SST tombstones when shadowing": { + "DisallowShadowingBelow rejects SST tombstones when shadowing below": { noShadowBelow: 5, data: []sstutil.KV{{"a", 2, "a2"}}, sst: []sstutil.KV{{"a", 3, ""}}, - expectErr: "SST values cannot be tombstones", - expectErrRace: `SST contains tombstone for key "a"/0.000000003,0`, + expectErr: `ingested key collides with an existing one: "a"`, }, "DisallowShadowingBelow allows new SST inline values": { // unfortunately, for performance noShadowBelow: 5, @@ -558,6 +558,12 @@ func TestEvalAddSSTable(t *testing.T) { sst: []sstutil.KV{{"a", 3, "sst"}, {"b", 1, "sst"}}, expectErr: `ingested key collides with an existing one: "b"`, }, + "DisallowShadowingBelow tombstone above tombstone": { + noShadowBelow: 5, + data: []sstutil.KV{{"a", 2, ""}, {"a", 1, "a1"}}, + sst: []sstutil.KV{{"a", 3, ""}}, + expect: []sstutil.KV{{"a", 3, ""}, {"a", 2, ""}, {"a", 1, "a1"}}, + }, "DisallowShadowingBelow at limit writes": { noShadowBelow: 5, sst: []sstutil.KV{{"a", 5, "sst"}}, @@ -598,6 +604,12 @@ func TestEvalAddSSTable(t *testing.T) { sst: []sstutil.KV{{"a", 7, "sst"}}, expectErr: `ingested key collides with an existing one: "a"`, }, + "DisallowShadowingBelow tombstone above limit errors on existing below limit": { + noShadowBelow: 5, + data: []sstutil.KV{{"a", 4, "a4"}}, + sst: []sstutil.KV{{"a", 7, ""}}, + expectErr: `ingested key collides with an existing one: "a"`, + }, "DisallowShadowingBelow above limit errors on existing below limit with same value": { noShadowBelow: 5, data: []sstutil.KV{{"a", 4, "a4"}}, @@ -640,6 +652,12 @@ func TestEvalAddSSTable(t *testing.T) { sst: []sstutil.KV{{"a", 7, "a7"}}, expect: []sstutil.KV{{"a", 7, "a7"}}, }, + "DisallowShadowingBelow above limit is idempotent with tombstone": { + noShadowBelow: 5, + data: []sstutil.KV{{"a", 7, ""}}, + sst: []sstutil.KV{{"a", 7, ""}}, + expect: []sstutil.KV{{"a", 7, ""}}, + }, "DisallowShadowingBelow above limit errors below existing": { noShadowBelow: 5, data: []sstutil.KV{{"a", 8, "a8"}}, @@ -764,9 +782,8 @@ func TestEvalAddSSTable(t *testing.T) { UpperBound: keys.MaxKey, }) defer iter.Close() - iter.SeekGE(storage.MVCCKey{Key: keys.SystemPrefix}) scan := []sstutil.KV{} - for { + for iter.SeekGE(storage.MVCCKey{Key: keys.SystemPrefix}); ; iter.Next() { ok, err := iter.Valid() require.NoError(t, err) if !ok { @@ -787,23 +804,21 @@ func TestEvalAddSSTable(t *testing.T) { require.NoError(t, protoutil.Unmarshal(iter.UnsafeValue(), &meta)) if meta.RawBytes == nil { // Skip intent metadata records (value emitted separately). - iter.Next() continue } value, err = roachpb.Value{RawBytes: meta.RawBytes}.GetBytes() require.NoError(t, err) } scan = append(scan, sstutil.KV{key, ts, string(value)}) - iter.Next() } require.Equal(t, tc.expect, scan) // Check that stats were updated correctly. if tc.expectStatsEst { - require.True(t, stats.ContainsEstimates > 0, "expected stats to be estimated") + require.NotZero(t, stats.ContainsEstimates, "expected stats to be estimated") } else { - require.False(t, stats.ContainsEstimates > 0, "found estimated stats") - require.Equal(t, stats, engineStats(t, engine, stats.LastUpdateNanos)) + require.Zero(t, stats.ContainsEstimates, "found estimated stats") + require.Equal(t, engineStats(t, engine, stats.LastUpdateNanos), stats) } }) } @@ -1121,6 +1136,7 @@ func TestAddSSTableMVCCStats(t *testing.T) { {"d", 1, "d"}, {"d", 2, ""}, {"e", 1, "e"}, + {"u", 3, "u"}, {"z", 2, "zzzzzz"}, } { require.NoError(t, engine.PutMVCC(kv.MVCCKey(), kv.MVCCValue())) @@ -1133,14 +1149,16 @@ func TestAddSSTableMVCCStats(t *testing.T) { {"d", 4, "dddd"}, // mvcc-shadow existing deleted d. {"e", 4, "eeee"}, // mvcc-shadow existing 1b. {"j", 2, "jj"}, // no colission – via MVCC or LSM – with existing. + {"t", 3, ""}, // tombstone, no collission + {"u", 5, ""}, // tombstone, shadows existing }) statsDelta := enginepb.MVCCStats{ - // the sst will think it added 4 keys here, but a, c, and e shadow or are shadowed. - LiveCount: -3, - LiveBytes: -109, - // the sst will think it added 5 keys, but only j is new so 4 are over-counted. - KeyCount: -4, - KeyBytes: -20, + // the sst will think it added 5 keys here, but a, c, e, and t shadow or are shadowed. + LiveCount: -4, + LiveBytes: -129, + // the sst will think it added 5 keys, but only j and t are new so 5 are over-counted. + KeyCount: -5, + KeyBytes: -22, // the sst will think it added 6 values, but since one was a perfect (key+ts) // collision, it *replaced* the existing value and is over-counted. ValCount: -1, @@ -1229,6 +1247,7 @@ func TestAddSSTableMVCCStatsDisallowShadowing(t *testing.T) { {"b", 6, ""}, {"g", 5, "gg"}, {"r", 1, "rr"}, + {"t", 3, ""}, {"y", 1, "yy"}, {"y", 2, ""}, {"y", 5, "yyy"}, @@ -1298,12 +1317,14 @@ func TestAddSSTableMVCCStatsDisallowShadowing(t *testing.T) { // Check that there has been no double counting of stats. All keys in second SST are shadowing. require.Equal(t, firstSSTStats, *cArgs.Stats) - // Evaluate the third SST. Two of the three KVs are perfectly shadowing, but - // there is one valid KV which should contribute to the stats. + // Evaluate the third SST. Some of the KVs are perfectly shadowing, but there + // are two valid KVs which should contribute to the stats. sst, start, end = sstutil.MakeSST(t, st, []sstutil.KV{ {"c", 2, "bb"}, // key has the same timestamp and value as the one present in the existing data. {"e", 2, "ee"}, {"h", 6, "hh"}, // key has the same timestamp and value as the one present in the existing data. + {"t", 3, ""}, // identical to existing tombstone. + {"x", 7, ""}, // new tombstone. }) cArgs.Args = &roachpb.AddSSTableRequest{ @@ -1315,15 +1336,15 @@ func TestAddSSTableMVCCStatsDisallowShadowing(t *testing.T) { _, err = batcheval.EvalAddSSTable(ctx, engine, cArgs, &roachpb.AddSSTableResponse{}) require.NoError(t, err) - // This is the stats contribution of the KV {"e", 2, "ee"}. This should be - // the only addition to the cumulative stats, as the other two KVs are - // perfect shadows of existing data. + // This is the stats contribution of the KVs {"e", 2, "ee"} and {"x", 7, ""}. + // This should be the only addition to the cumulative stats, as the other + // KVs are perfect shadows of existing data. delta := enginepb.MVCCStats{ LiveCount: 1, LiveBytes: 21, - KeyCount: 1, - KeyBytes: 14, - ValCount: 1, + KeyCount: 2, + KeyBytes: 28, + ValCount: 2, ValBytes: 7, } @@ -1489,8 +1510,6 @@ func engineStats(t *testing.T, engine storage.Engine, nowNanos int64) *enginepb. UpperBound: keys.MaxKey, }) defer iter.Close() - // We don't care about nowNanos, because the SST can't contain intents or - // tombstones and all existing intents will be resolved. stats, err := storage.ComputeStatsForRange(iter, keys.LocalMax, keys.MaxKey, nowNanos) require.NoError(t, err) return &stats diff --git a/pkg/roachpb/api.proto b/pkg/roachpb/api.proto index b423a972a295..b7f558761ada 100644 --- a/pkg/roachpb/api.proto +++ b/pkg/roachpb/api.proto @@ -1688,8 +1688,13 @@ message AdminVerifyProtectedTimestampResponse { // AddSSTableRequest contains arguments to the AddSSTable method, which links an // SST file into the Pebble log-structured merge-tree. The SST must only contain // committed versioned values with non-zero MVCC timestamps (no intents or -// inline values) and no tombstones. It cannot be used in a transaction, cannot -// be split across ranges, and must be alone in a batch. +// inline values). It cannot be used in a transaction, cannot be split across +// ranges, and must be alone in a batch. +// +// In 22.1, AddSSTable does not support MVCC point tombstones in SSTs, and may +// return an error when given these. 22.2 does allow this, but check the +// AddSSTableTombstones version gate first to make sure there are no 22.1 nodes +// left in the cluster. // // By default, AddSSTable will blindly write the SST contents into Pebble, with // fixed MVCC timestamps unaffected by pushes. This can violate many CRDB diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index a5c13e97649e..637289efc691 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -2674,6 +2674,7 @@ func (ex *connExecutor) initEvalCtx(ctx context.Context, evalCtx *extendedEvalCo RangeProber: p.execCfg.RangeProber, StmtDiagnosticsRequestInserter: ex.server.cfg.StmtDiagnosticsRecorder.InsertRequest, CatalogBuiltins: &p.evalCatalogBuiltins, + QueryCancelKey: ex.queryCancelKey, }, Tracing: &ex.sessionTracing, MemMetrics: &ex.memMetrics, diff --git a/pkg/sql/pgwire/pgwirecancel/backend_key_data.go b/pkg/sql/pgwire/pgwirecancel/backend_key_data.go index 82b3ac4272ee..1b151d9bc2dc 100644 --- a/pkg/sql/pgwire/pgwirecancel/backend_key_data.go +++ b/pkg/sql/pgwire/pgwirecancel/backend_key_data.go @@ -70,5 +70,12 @@ func (b BackendKeyData) GetSQLInstanceID() base.SQLInstanceID { bits = bits &^ leadingBitMask // Use the upper 32 bits as the sqlInstanceID. return base.SQLInstanceID(bits >> 32) +} + +// GetPGBackendPID returns the upper 32 bits of this BackendKeyData. In Postgres, +// this is the process ID, but we expose it only for compatibility. +func (b BackendKeyData) GetPGBackendPID() uint32 { + bits := uint64(b) + return uint32(bits >> 32) } diff --git a/pkg/sql/sem/builtins/pg_builtins.go b/pkg/sql/sem/builtins/pg_builtins.go index e6c6e90813c1..5aa8c26696c0 100644 --- a/pkg/sql/sem/builtins/pg_builtins.go +++ b/pkg/sql/sem/builtins/pg_builtins.go @@ -540,10 +540,14 @@ var pgBuiltins = map[string]builtinDefinition{ tree.Overload{ Types: tree.ArgTypes{}, ReturnType: tree.FixedReturnType(types.Int), - Fn: func(_ *eval.Context, _ tree.Datums) (tree.Datum, error) { - return tree.NewDInt(-1), nil + Fn: func(ctx *eval.Context, _ tree.Datums) (tree.Datum, error) { + pid := ctx.QueryCancelKey.GetPGBackendPID() + return tree.NewDInt(tree.DInt(pid)), nil }, - Info: notUsableInfo, + Info: "Returns a numerical ID attached to this session. This ID is " + + "part of the query cancellation key used by the wire protocol. This " + + "function was only added for compatibility, and unlike in Postgres, the" + + "returned value does not correspond to a real process ID.", Volatility: volatility.Stable, }, ), diff --git a/pkg/sql/sem/eval/BUILD.bazel b/pkg/sql/sem/eval/BUILD.bazel index 7b20225264f0..adca2fb4ca0f 100644 --- a/pkg/sql/sem/eval/BUILD.bazel +++ b/pkg/sql/sem/eval/BUILD.bazel @@ -50,6 +50,7 @@ go_library( "//pkg/sql/pgwire/pgcode", "//pkg/sql/pgwire/pgerror", "//pkg/sql/pgwire/pgnotice", + "//pkg/sql/pgwire/pgwirecancel", "//pkg/sql/privilege", "//pkg/sql/roleoption", "//pkg/sql/sem/cast", diff --git a/pkg/sql/sem/eval/context.go b/pkg/sql/sem/eval/context.go index eec11b2d01c7..a1f9e8f949ef 100644 --- a/pkg/sql/sem/eval/context.go +++ b/pkg/sql/sem/eval/context.go @@ -24,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgwirecancel" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb" @@ -211,6 +212,10 @@ type Context struct { // CatalogBuiltins is used by various builtins which depend on looking up // catalog information. Unlike the Planner, it is available in DistSQL. CatalogBuiltins CatalogBuiltins + + // QueryCancelKey is the key used by the pgwire protocol to cancel the + // query currently running in this session. + QueryCancelKey pgwirecancel.BackendKeyData } var _ tree.ParseTimeContext = &Context{} diff --git a/pkg/storage/sst.go b/pkg/storage/sst.go index 36fffbbf6abc..64204b93462f 100644 --- a/pkg/storage/sst.go +++ b/pkg/storage/sst.go @@ -38,8 +38,8 @@ import ( // is non-empty, disallowShadowing is ignored. // // The given SST and reader cannot contain intents or inline values (i.e. zero -// timestamps), nor tombstones (i.e. empty values), but this is only checked for -// keys that exist in both sides, for performance. +// timestamps), but this is only checked for keys that exist in both sides, for +// performance. // // The returned MVCC statistics is a delta between the SST-only statistics and // their effect when applied, which when added to the SST statistics will adjust @@ -98,9 +98,6 @@ func CheckSSTConflicts( if err != nil { return err } - if sstValue.IsTombstone() { - return errors.New("SST values cannot be tombstones") - } if !extKey.IsValue() { var mvccMeta enginepb.MVCCMetadata if err = extIter.ValueProto(&mvccMeta); err != nil { @@ -149,15 +146,19 @@ func CheckSSTConflicts( totalBytes := metaKeySize + metaValSize // Update the skipped stats to account for the skipped meta key. - statsDiff.LiveBytes -= totalBytes - statsDiff.LiveCount-- + if !sstValue.IsTombstone() { + statsDiff.LiveBytes -= totalBytes + statsDiff.LiveCount-- + } statsDiff.KeyBytes -= metaKeySize statsDiff.ValBytes -= metaValSize statsDiff.KeyCount-- // Update the stats to account for the skipped versioned key/value. totalBytes = int64(len(sstValueRaw)) + MVCCVersionTimestampSize - statsDiff.LiveBytes -= totalBytes + if !sstValue.IsTombstone() { + statsDiff.LiveBytes -= totalBytes + } statsDiff.KeyBytes -= MVCCVersionTimestampSize statsDiff.ValBytes -= int64(len(sstValueRaw)) statsDiff.ValCount-- diff --git a/pkg/testutils/pgtest/pgtest.go b/pkg/testutils/pgtest/pgtest.go index ddc414dc1294..b22a41b00a9b 100644 --- a/pkg/testutils/pgtest/pgtest.go +++ b/pkg/testutils/pgtest/pgtest.go @@ -18,6 +18,7 @@ import ( "fmt" "net" "reflect" + "strconv" "strings" "testing" @@ -78,11 +79,44 @@ func NewPGTest(ctx context.Context, addr, user string) (*PGTest, error) { if backendKeyData == nil { return nil, errors.Errorf("did not receive BackendKeyData") } + if err := checkPGBackendPID(p, backendKeyData); err != nil { + return nil, err + } + p.isCockroachDB = foundCrdb success = err == nil return p, err } +func checkPGBackendPID(p *PGTest, backendKeyData *pgproto3.BackendKeyData) error { + if err := p.fe.Send(&pgproto3.Query{ + String: "SELECT pg_backend_pid();", + }); err != nil { + return errors.Wrap(err, "fetching pg_backend_pid") + } + msgs, err := p.Until(false /* keepErrMsg */, &pgproto3.ReadyForQuery{}) + if err != nil { + return errors.Wrap(err, "fetching pg_backend_pid") + } + matched := false + for _, msg := range msgs { + if d, ok := msg.(*pgproto3.DataRow); ok { + pid, err := strconv.Atoi(string(d.Values[0])) + if err != nil { + return errors.Wrap(err, "parsing pg_backend_pid") + } + if uint32(pid) != backendKeyData.ProcessID { + return errors.Errorf("wrong pg_backend_pid; wanted %d, got %d", backendKeyData.ProcessID, pid) + } + matched = true + } + } + if !matched { + return errors.Errorf("could not retrieve pg_backend_pid") + } + return nil +} + // Close sends a Terminate message and closes the connection. func (p *PGTest) Close() error { defer p.conn.Close()