From 722e313c64fe8a051411e1e92a3826e671e00adb Mon Sep 17 00:00:00 2001 From: Oleg Afanasyev Date: Mon, 22 Aug 2022 20:55:53 +0100 Subject: [PATCH] batcheval: add latch key protecting range key stats update Previously GC needed to get a read latch with max timestamp to ensure that range tombstones are not modified during GC. This is causing all writers to get stuck in queue while GC is validating request and removing range tombstone. This commit adds a dedicated latch key LocalRangeRangeTombstoneStatsUpdateLockSuffix to address the problem. All range tombstone writers obtain this read latch on top of the write latches for the ranges they are interested to update. GC on the other hand will obtain write latch on that key. This approach allows point writers to proceed during GC, but will block new range tombstones from being written. Non conflicting writes of range tombstones could still proceed since their write latch ranges don't overlap. Release justification: this is a safe change as range tombstone behaviour is not enabled yet and the change is needed to address potential performance regressions. Release note: None --- pkg/keys/constants.go | 4 +++ pkg/keys/keys.go | 6 ++++ pkg/kv/kvserver/batcheval/cmd_add_sstable.go | 6 ++++ pkg/kv/kvserver/batcheval/cmd_clear_range.go | 6 ++++ pkg/kv/kvserver/batcheval/cmd_delete_range.go | 6 ++++ pkg/kv/kvserver/batcheval/cmd_gc.go | 33 ++++++++++--------- pkg/kv/kvserver/batcheval/cmd_revert_range.go | 6 ++++ 7 files changed, 52 insertions(+), 15 deletions(-) diff --git a/pkg/keys/constants.go b/pkg/keys/constants.go index 4390db95ef57..caee9bcdec3b 100644 --- a/pkg/keys/constants.go +++ b/pkg/keys/constants.go @@ -133,6 +133,10 @@ var ( // LocalRangeLastReplicaGCTimestampSuffix is the suffix for a range's last // replica GC timestamp (for GC of old replicas). LocalRangeLastReplicaGCTimestampSuffix = []byte("rlrt") + // LocalRangeMVCCRangeKeyGCLockSuffix is the suffix for a lock obtained + // by range tombstone operations to ensure they don't overlap with + // GC requests while allowing point traffic to go through unobstructed. + LocalRangeMVCCRangeKeyGCLockSuffix = []byte("rltu") // localRangeLastVerificationTimestampSuffix is DEPRECATED and remains to // prevent reuse. localRangeLastVerificationTimestampSuffix = []byte("rlvt") diff --git a/pkg/keys/keys.go b/pkg/keys/keys.go index 1d39b9c0e700..ff9b17703049 100644 --- a/pkg/keys/keys.go +++ b/pkg/keys/keys.go @@ -1055,3 +1055,9 @@ func (b RangeIDPrefixBuf) RaftReplicaIDKey() roachpb.Key { func (b RangeIDPrefixBuf) RangeLastReplicaGCTimestampKey() roachpb.Key { return append(b.unreplicatedPrefix(), LocalRangeLastReplicaGCTimestampSuffix...) } + +// RangeTombstoneStatsUpdateKey returns a range local key protecting range +// tombstone mvcc stats calculations during range tombstone GC. +func (b RangeIDPrefixBuf) RangeTombstoneStatsUpdateKey() roachpb.Key { + return append(b.unreplicatedPrefix(), LocalRangeMVCCRangeKeyGCLockSuffix...) +} diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go index 4a794e33b21a..debbf2d0eb6e 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go @@ -59,6 +59,12 @@ func declareKeysAddSSTable( // span will be tightened during evaluation. l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil) latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp) + + // Obtain a read only lock on range key stats update key to serialize with + // range tombstone GC requests. + latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(), + }) } } diff --git a/pkg/kv/kvserver/batcheval/cmd_clear_range.go b/pkg/kv/kvserver/batcheval/cmd_clear_range.go index 88ca15ed92a8..a8df770e704f 100644 --- a/pkg/kv/kvserver/batcheval/cmd_clear_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_clear_range.go @@ -59,6 +59,12 @@ func declareKeysClearRange( args := req.(*roachpb.ClearRangeRequest) l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil) latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp) + + // Obtain a read only lock on range key stats update key to serialize with + // range tombstone GC requests. + latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(), + }) } // ClearRange wipes all MVCC versions of keys covered by the specified diff --git a/pkg/kv/kvserver/batcheval/cmd_delete_range.go b/pkg/kv/kvserver/batcheval/cmd_delete_range.go index 54acdf8a0744..be7b99ef4158 100644 --- a/pkg/kv/kvserver/batcheval/cmd_delete_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_delete_range.go @@ -56,6 +56,12 @@ func declareKeysDeleteRange( latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ Key: keys.RangeDescriptorKey(rs.GetStartKey()), }) + + // Obtain a read only lock on range key stats update key to serialize with + // range tombstone GC requests. + latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(), + }) } } diff --git a/pkg/kv/kvserver/batcheval/cmd_gc.go b/pkg/kv/kvserver/batcheval/cmd_gc.go index b148481d6d3b..86c4979d93ae 100644 --- a/pkg/kv/kvserver/batcheval/cmd_gc.go +++ b/pkg/kv/kvserver/batcheval/cmd_gc.go @@ -37,22 +37,25 @@ func declareKeysGC( _ time.Duration, ) { gcr := req.(*roachpb.GCRequest) - // When GC-ing MVCC range key tombstones or individual range keys, we need to - // serialize with all writes that overlap the MVCC range tombstone, as well as - // the immediate left/right neighboring keys. This is because a range key - // write has non-local effects, i.e. it can fragment or merge other range keys - // at other timestamps and at its boundaries, and this has a non-commutative - // effect on MVCC stats -- if someone writes a new range key while we're GCing - // one below, the stats would come out wrong. - // Note that we only need to serialize with writers (including other GC - // processes) and not with readers (that are guaranteed to be above the GC - // threshold). To achieve this, we declare read-write access at - // hlc.MaxTimestamp which will not block any readers. - for _, span := range mergeAdjacentSpans(makeLookupBoundariesForGCRanges( - rs.GetStartKey().AsRawKey(), nil, gcr.RangeKeys, - )) { - latchSpans.AddMVCC(spanset.SpanReadWrite, span, hlc.MaxTimestamp) + if gcr.RangeKeys != nil { + // When GC-ing MVCC range key tombstones, we need to serialize with + // range key writes that overlap the MVCC range tombstone, as well as + // the immediate left/right neighboring keys. This is because a range + // key write has non-local effects, i.e. it can fragment or merge other + // range keys at other timestamps and at its boundaries, and this has + // a non-commutative effect on MVCC stats -- if someone writes a new + // range key while we're GCing one below, the stats would come out wrong. + // + // To achieve this, we use a virtual latch key that will prevent any + // range key write operations until GC request is done. All other range + // tombstone write operations obtain a read latch on this key and can + // run concurrently. + latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{ + Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(), + }) } + // For ClearRangeKey request we still obtain a wide write lock as we don't + // expect any operations running on the range. if rk := gcr.ClearRangeKey; rk != nil { latchSpans.AddMVCC(spanset.SpanReadWrite, roachpb.Span{Key: rk.StartKey, EndKey: rk.EndKey}, hlc.MaxTimestamp) diff --git a/pkg/kv/kvserver/batcheval/cmd_revert_range.go b/pkg/kv/kvserver/batcheval/cmd_revert_range.go index bbf862dc5cd0..4c45ef905320 100644 --- a/pkg/kv/kvserver/batcheval/cmd_revert_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_revert_range.go @@ -56,6 +56,12 @@ func declareKeysRevertRange( // span will be tightened during evaluation. l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil) latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp) + + // Obtain a read only lock on range key stats update key to serialize with + // range tombstone GC requests. + latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(), + }) } // isEmptyKeyTimeRange checks if the span has no writes in (since,until].