Skip to content

Commit

Permalink
batcheval: add latch key protecting range key stats update
Browse files Browse the repository at this point in the history
Previously GC needed to get a read latch with max timestamp to
ensure that range tombstones are not modified during GC. This
is causing all writers to get stuck in queue while GC is validating
request and removing range tombstone.
This commit adds a dedicated latch key
LocalRangeRangeTombstoneStatsUpdateLockSuffix to address the problem.
All range tombstone writers obtain this read latch on top of the write
latches for the ranges they are interested to update.
GC on the other hand will obtain write latch on that key. This
approach allows point writers to proceed during GC, but will block new
range tombstones from being written. Non conflicting writes of range
tombstones could still proceed since their write latch ranges don't
overlap.

Release justification: this is a safe change as range tombstone
behaviour is not enabled yet and the change is needed to address
potential performance regressions.

Release note: None
  • Loading branch information
aliher1911 committed Aug 24, 2022
1 parent a50a783 commit 722e313
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 15 deletions.
4 changes: 4 additions & 0 deletions pkg/keys/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ var (
// LocalRangeLastReplicaGCTimestampSuffix is the suffix for a range's last
// replica GC timestamp (for GC of old replicas).
LocalRangeLastReplicaGCTimestampSuffix = []byte("rlrt")
// LocalRangeMVCCRangeKeyGCLockSuffix is the suffix for a lock obtained
// by range tombstone operations to ensure they don't overlap with
// GC requests while allowing point traffic to go through unobstructed.
LocalRangeMVCCRangeKeyGCLockSuffix = []byte("rltu")
// localRangeLastVerificationTimestampSuffix is DEPRECATED and remains to
// prevent reuse.
localRangeLastVerificationTimestampSuffix = []byte("rlvt")
Expand Down
6 changes: 6 additions & 0 deletions pkg/keys/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -1055,3 +1055,9 @@ func (b RangeIDPrefixBuf) RaftReplicaIDKey() roachpb.Key {
func (b RangeIDPrefixBuf) RangeLastReplicaGCTimestampKey() roachpb.Key {
return append(b.unreplicatedPrefix(), LocalRangeLastReplicaGCTimestampSuffix...)
}

// RangeTombstoneStatsUpdateKey returns a range local key protecting range
// tombstone mvcc stats calculations during range tombstone GC.
func (b RangeIDPrefixBuf) RangeTombstoneStatsUpdateKey() roachpb.Key {
return append(b.unreplicatedPrefix(), LocalRangeMVCCRangeKeyGCLockSuffix...)
}
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_add_sstable.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ func declareKeysAddSSTable(
// span will be tightened during evaluation.
l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil)
latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp)

// Obtain a read only lock on range key stats update key to serialize with
// range tombstone GC requests.
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(),
})
}
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_clear_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ func declareKeysClearRange(
args := req.(*roachpb.ClearRangeRequest)
l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil)
latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp)

// Obtain a read only lock on range key stats update key to serialize with
// range tombstone GC requests.
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(),
})
}

// ClearRange wipes all MVCC versions of keys covered by the specified
Expand Down
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_delete_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ func declareKeysDeleteRange(
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.RangeDescriptorKey(rs.GetStartKey()),
})

// Obtain a read only lock on range key stats update key to serialize with
// range tombstone GC requests.
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(),
})
}
}

Expand Down
33 changes: 18 additions & 15 deletions pkg/kv/kvserver/batcheval/cmd_gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,25 @@ func declareKeysGC(
_ time.Duration,
) {
gcr := req.(*roachpb.GCRequest)
// When GC-ing MVCC range key tombstones or individual range keys, we need to
// serialize with all writes that overlap the MVCC range tombstone, as well as
// the immediate left/right neighboring keys. This is because a range key
// write has non-local effects, i.e. it can fragment or merge other range keys
// at other timestamps and at its boundaries, and this has a non-commutative
// effect on MVCC stats -- if someone writes a new range key while we're GCing
// one below, the stats would come out wrong.
// Note that we only need to serialize with writers (including other GC
// processes) and not with readers (that are guaranteed to be above the GC
// threshold). To achieve this, we declare read-write access at
// hlc.MaxTimestamp which will not block any readers.
for _, span := range mergeAdjacentSpans(makeLookupBoundariesForGCRanges(
rs.GetStartKey().AsRawKey(), nil, gcr.RangeKeys,
)) {
latchSpans.AddMVCC(spanset.SpanReadWrite, span, hlc.MaxTimestamp)
if gcr.RangeKeys != nil {
// When GC-ing MVCC range key tombstones, we need to serialize with
// range key writes that overlap the MVCC range tombstone, as well as
// the immediate left/right neighboring keys. This is because a range
// key write has non-local effects, i.e. it can fragment or merge other
// range keys at other timestamps and at its boundaries, and this has
// a non-commutative effect on MVCC stats -- if someone writes a new
// range key while we're GCing one below, the stats would come out wrong.
//
// To achieve this, we use a virtual latch key that will prevent any
// range key write operations until GC request is done. All other range
// tombstone write operations obtain a read latch on this key and can
// run concurrently.
latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{
Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(),
})
}
// For ClearRangeKey request we still obtain a wide write lock as we don't
// expect any operations running on the range.
if rk := gcr.ClearRangeKey; rk != nil {
latchSpans.AddMVCC(spanset.SpanReadWrite, roachpb.Span{Key: rk.StartKey, EndKey: rk.EndKey},
hlc.MaxTimestamp)
Expand Down
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_revert_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ func declareKeysRevertRange(
// span will be tightened during evaluation.
l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil)
latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp)

// Obtain a read only lock on range key stats update key to serialize with
// range tombstone GC requests.
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).RangeTombstoneStatsUpdateKey(),
})
}

// isEmptyKeyTimeRange checks if the span has no writes in (since,until].
Expand Down

0 comments on commit 722e313

Please sign in to comment.