diff --git a/pkg/keys/constants.go b/pkg/keys/constants.go index 4390db95ef57..caee9bcdec3b 100644 --- a/pkg/keys/constants.go +++ b/pkg/keys/constants.go @@ -133,6 +133,10 @@ var ( // LocalRangeLastReplicaGCTimestampSuffix is the suffix for a range's last // replica GC timestamp (for GC of old replicas). LocalRangeLastReplicaGCTimestampSuffix = []byte("rlrt") + // LocalRangeMVCCRangeKeyGCLockSuffix is the suffix for a lock obtained + // by range tombstone operations to ensure they don't overlap with + // GC requests while allowing point traffic to go through unobstructed. + LocalRangeMVCCRangeKeyGCLockSuffix = []byte("rltu") // localRangeLastVerificationTimestampSuffix is DEPRECATED and remains to // prevent reuse. localRangeLastVerificationTimestampSuffix = []byte("rlvt") diff --git a/pkg/keys/keys.go b/pkg/keys/keys.go index 1d39b9c0e700..ff9b17703049 100644 --- a/pkg/keys/keys.go +++ b/pkg/keys/keys.go @@ -1055,3 +1055,9 @@ func (b RangeIDPrefixBuf) RaftReplicaIDKey() roachpb.Key { func (b RangeIDPrefixBuf) RangeLastReplicaGCTimestampKey() roachpb.Key { return append(b.unreplicatedPrefix(), LocalRangeLastReplicaGCTimestampSuffix...) } + +// RangeTombstoneStatsUpdateKey returns a range local key protecting range +// tombstone mvcc stats calculations during range tombstone GC. +func (b RangeIDPrefixBuf) RangeTombstoneStatsUpdateKey() roachpb.Key { + return append(b.unreplicatedPrefix(), LocalRangeMVCCRangeKeyGCLockSuffix...) +} diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go index 4a794e33b21a..debbf2d0eb6e 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go @@ -59,6 +59,12 @@ func declareKeysAddSSTable( // span will be tightened during evaluation. l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil) latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp) + + // Obtain a read only lock on range key stats update key to serialize with + // range tombstone GC requests. + latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(), + }) } } diff --git a/pkg/kv/kvserver/batcheval/cmd_clear_range.go b/pkg/kv/kvserver/batcheval/cmd_clear_range.go index 88ca15ed92a8..a8df770e704f 100644 --- a/pkg/kv/kvserver/batcheval/cmd_clear_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_clear_range.go @@ -59,6 +59,12 @@ func declareKeysClearRange( args := req.(*roachpb.ClearRangeRequest) l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil) latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp) + + // Obtain a read only lock on range key stats update key to serialize with + // range tombstone GC requests. + latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(), + }) } // ClearRange wipes all MVCC versions of keys covered by the specified diff --git a/pkg/kv/kvserver/batcheval/cmd_delete_range.go b/pkg/kv/kvserver/batcheval/cmd_delete_range.go index 54acdf8a0744..be7b99ef4158 100644 --- a/pkg/kv/kvserver/batcheval/cmd_delete_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_delete_range.go @@ -56,6 +56,12 @@ func declareKeysDeleteRange( latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ Key: keys.RangeDescriptorKey(rs.GetStartKey()), }) + + // Obtain a read only lock on range key stats update key to serialize with + // range tombstone GC requests. + latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(), + }) } } diff --git a/pkg/kv/kvserver/batcheval/cmd_gc.go b/pkg/kv/kvserver/batcheval/cmd_gc.go index b148481d6d3b..86c4979d93ae 100644 --- a/pkg/kv/kvserver/batcheval/cmd_gc.go +++ b/pkg/kv/kvserver/batcheval/cmd_gc.go @@ -37,22 +37,25 @@ func declareKeysGC( _ time.Duration, ) { gcr := req.(*roachpb.GCRequest) - // When GC-ing MVCC range key tombstones or individual range keys, we need to - // serialize with all writes that overlap the MVCC range tombstone, as well as - // the immediate left/right neighboring keys. This is because a range key - // write has non-local effects, i.e. it can fragment or merge other range keys - // at other timestamps and at its boundaries, and this has a non-commutative - // effect on MVCC stats -- if someone writes a new range key while we're GCing - // one below, the stats would come out wrong. - // Note that we only need to serialize with writers (including other GC - // processes) and not with readers (that are guaranteed to be above the GC - // threshold). To achieve this, we declare read-write access at - // hlc.MaxTimestamp which will not block any readers. - for _, span := range mergeAdjacentSpans(makeLookupBoundariesForGCRanges( - rs.GetStartKey().AsRawKey(), nil, gcr.RangeKeys, - )) { - latchSpans.AddMVCC(spanset.SpanReadWrite, span, hlc.MaxTimestamp) + if gcr.RangeKeys != nil { + // When GC-ing MVCC range key tombstones, we need to serialize with + // range key writes that overlap the MVCC range tombstone, as well as + // the immediate left/right neighboring keys. This is because a range + // key write has non-local effects, i.e. it can fragment or merge other + // range keys at other timestamps and at its boundaries, and this has + // a non-commutative effect on MVCC stats -- if someone writes a new + // range key while we're GCing one below, the stats would come out wrong. + // + // To achieve this, we use a virtual latch key that will prevent any + // range key write operations until GC request is done. All other range + // tombstone write operations obtain a read latch on this key and can + // run concurrently. + latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{ + Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(), + }) } + // For ClearRangeKey request we still obtain a wide write lock as we don't + // expect any operations running on the range. if rk := gcr.ClearRangeKey; rk != nil { latchSpans.AddMVCC(spanset.SpanReadWrite, roachpb.Span{Key: rk.StartKey, EndKey: rk.EndKey}, hlc.MaxTimestamp) diff --git a/pkg/kv/kvserver/batcheval/cmd_revert_range.go b/pkg/kv/kvserver/batcheval/cmd_revert_range.go index bbf862dc5cd0..4c45ef905320 100644 --- a/pkg/kv/kvserver/batcheval/cmd_revert_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_revert_range.go @@ -56,6 +56,12 @@ func declareKeysRevertRange( // span will be tightened during evaluation. l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil) latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp) + + // Obtain a read only lock on range key stats update key to serialize with + // range tombstone GC requests. + latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(), + }) } // isEmptyKeyTimeRange checks if the span has no writes in (since,until].