diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go index d713911a2c5e..05d9189b947b 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go @@ -118,6 +118,12 @@ func TestEvalAddSSTable(t *testing.T) { expect: kvs{rangeKV("a", "d", 1, "")}, expectStatsEst: true, }, + "blind extends overlapping range tombstone": { + data: kvs{rangeKV("c", "e", 1, "")}, + sst: kvs{rangeKV("d", "f", 1, "")}, + expect: kvs{rangeKV("c", "f", 1, "")}, + expectStatsEst: true, + }, "blind rejects SST inline values under race only": { // unfortunately, for performance sst: kvs{pointKV("a", 0, "inline")}, expect: kvs{pointKV("a", 0, "inline")}, diff --git a/pkg/roachpb/api.proto b/pkg/roachpb/api.proto index 09b4d9e03196..1083edb7f0eb 100644 --- a/pkg/roachpb/api.proto +++ b/pkg/roachpb/api.proto @@ -1526,14 +1526,15 @@ message ExportRequest { // blowing up on memory usage. This option is only allowed together with // return_sst since caller should reconstruct full tables. // - // NB: If the result contains MVCC range tombstones, this can cause MVCC range - // tombstones in two subsequent SSTs to overlap. For example, given the range - // tombstone [a-f)@5, if we return a resume key at c@3 then the response will - // contain a truncated MVCC range tombstone [a-c\0)@5 which covers the point - // keys at c, but resuming from c@3 will contain the MVCC range tombstone - // [c-f)@5 which overlaps with the MVCC range tombstone in the previous - // response for the interval [c-c\0)@5. AddSSTable will allow this overlap - // during ingestion. + // NB: If the result contains MVCC range tombstones, this can cause MVCC range + // tombstones in two subsequent SSTs to overlap. For example, given the range + // tombstone [a-f)@5, if we stop between c@4 and c@2 and return a resume key c@2, + // then the response will contain a truncated MVCC range tombstone [a-c\0)@5 + // which covers the point key at c, but resuming from c@2 will contain the + // MVCC range tombstone [c-f)@5 which overlaps with the MVCC range tombstone + // in the previous response in the interval [c-c\0)@5. This overlap will not + // cause problems with multiplexed iteration using NewPebbleSSTIterator(), + // nor when ingesting the SSTs via `AddSSTable`. bool split_mid_key = 13; // Return the exported SST data in the response. diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index a2b8b5eed5d2..5e3af3068456 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -5060,12 +5060,13 @@ type MVCCExportOptions struct { // // NB: If the result contains MVCC range tombstones, this can cause MVCC range // tombstones in two subsequent SSTs to overlap. For example, given the range - // tombstone [a-f)@5, if we return a resume key at c@3 then the response will - // contain a truncated MVCC range tombstone [a-c\0)@5 which covers the point - // keys at c, but resuming from c@3 will contain the MVCC range tombstone - // [c-f)@5 which overlaps with the MVCC range tombstone in the previous - // response for the interval [c-c\0)@5. AddSSTable will allow this overlap - // during ingestion. + // tombstone [a-f)@5, if we stop between c@4 and c@2 and return a resume key c@2, + // then the response will contain a truncated MVCC range tombstone [a-c\0)@5 + // which covers the point key at c, but resuming from c@2 will contain the + // MVCC range tombstone [c-f)@5 which overlaps with the MVCC range tombstone + // in the previous response in the interval [c-c\0)@5. This overlap will not + // cause problems with multiplexed iteration using NewPebbleSSTIterator(), + // nor when ingesting the SSTs via `AddSSTable`. StopMidKey bool // ResourceLimiter limits how long iterator could run until it exhausts allocated // resources. Export queries limiter in its iteration loop to break out once diff --git a/pkg/storage/testdata/mvcc_histories/sst_iter_multi b/pkg/storage/testdata/mvcc_histories/sst_iter_multi index 2465abe7dbb3..ef1d4a19ecb0 100644 --- a/pkg/storage/testdata/mvcc_histories/sst_iter_multi +++ b/pkg/storage/testdata/mvcc_histories/sst_iter_multi @@ -267,3 +267,46 @@ iter_seek_ge: "b"/3.000000000,0=/ {b-d}/[4.000000000,0=/] iter_seek_ge: {b-d}/[4.000000000,0=/] iter_seek_ge: "b"/1.000000000,0=/BYTES/b1 {b-d}/[4.000000000,0=/] iter_seek_ge: {b-d}/[4.000000000,0=/] + +# Clear the span. Write two SSTs: +# +# 1. c@3 point key, [a-c\0)@2 range key. +# 2. c@1 point key, [c-f)@2 range keys. +# +# This simulates what would happen in an Export with SplitMidKey that +# emits a resume span at c@1, where we have to ensure that the "c" +# point is covered by the range key in both SSTs, so they will +# overlap at [c-c\0)@2. +run ok +sst_reset +sst_put k=c ts=3 v=c3 +sst_put_rangekey k=a end=+c ts=2 +sst_finish +sst_put k=c ts=1 v=c3 +sst_put_rangekey k=c end=f ts=2 +---- +>> at end: + +>> sst-0: +set: "c"/3.000000000,0 -> /BYTES/c3 +rangekeyset: {a-c\x00}/2.000000000,0 -> / +>> sst-1: +set: "c"/1.000000000,0 -> /BYTES/c3 +rangekeyset: {c-f}/2.000000000,0 -> / + +run ok +sst_iter_new +iter_seek_ge k=c ts=3 +iter_seek_ge k=c ts=2 +iter_seek_ge k=c ts=1 +iter_seek_ge k=a +iter_scan +---- +iter_seek_ge: "c"/3.000000000,0=/BYTES/c3 {a-f}/[2.000000000,0=/] +iter_seek_ge: {a-f}/[2.000000000,0=/] +iter_seek_ge: "c"/1.000000000,0=/BYTES/c3 {a-f}/[2.000000000,0=/] +iter_seek_ge: {a-f}/[2.000000000,0=/] +iter_scan: {a-f}/[2.000000000,0=/] +iter_scan: "c"/3.000000000,0=/BYTES/c3 {a-f}/[2.000000000,0=/] +iter_scan: "c"/1.000000000,0=/BYTES/c3 {a-f}/[2.000000000,0=/] +iter_scan: .