Skip to content

Commit

Permalink
batcheval: add latch key protecting range key stats update
Browse files Browse the repository at this point in the history
Previously GC needed to get a read latch with max timestamp to
ensure that range tombstones are not modified during GC. This
is causing all writers to get stuck in queue while GC is validating
request and removing range tombstone.
This commit adds a dedicated latch key
LocalRangeMVCCRangeKeyGCLockSuffix to address the problem.
All range tombstone writers obtain this read latch on top of the write
latches for the ranges they are interested to update.
GC on the other hand will obtain write latch on that key. This
approach allows point writers to proceed during GC, but will block new
range tombstones from being written. Non conflicting writes of range
tombstones could still proceed since their write latch ranges don't
overlap.

Release justification: this is a safe change as range tombstone
behaviour is not enabled yet and the change is needed to address
potential performance regressions.

Release note: None
  • Loading branch information
aliher1911 committed Aug 24, 2022
1 parent fe11e4b commit 249575f
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 15 deletions.
4 changes: 4 additions & 0 deletions pkg/keys/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ var (
// LocalRangeLastReplicaGCTimestampSuffix is the suffix for a range's last
// replica GC timestamp (for GC of old replicas).
LocalRangeLastReplicaGCTimestampSuffix = []byte("rlrt")
// LocalRangeMVCCRangeKeyGCLockSuffix is the suffix for a lock obtained
// by range tombstone operations to ensure they don't overlap with
// GC requests while allowing point traffic to go through unobstructed.
LocalRangeMVCCRangeKeyGCLockSuffix = []byte("rltu")
// localRangeLastVerificationTimestampSuffix is DEPRECATED and remains to
// prevent reuse.
localRangeLastVerificationTimestampSuffix = []byte("rlvt")
Expand Down
12 changes: 12 additions & 0 deletions pkg/keys/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,12 @@ func RangeGCThresholdKey(rangeID roachpb.RangeID) roachpb.Key {
return MakeRangeIDPrefixBuf(rangeID).RangeGCThresholdKey()
}

// MVCCRangeKeyGCKey returns a range local key protecting range
// tombstone mvcc stats calculations during range tombstone GC.
func MVCCRangeKeyGCKey(rangeID roachpb.RangeID) roachpb.Key {
return MakeRangeIDPrefixBuf(rangeID).MVCCRangeKeyGCKey()
}

// RangeVersionKey returns a system-local for the range version.
func RangeVersionKey(rangeID roachpb.RangeID) roachpb.Key {
return MakeRangeIDPrefixBuf(rangeID).RangeVersionKey()
Expand Down Expand Up @@ -1055,3 +1061,9 @@ func (b RangeIDPrefixBuf) RaftReplicaIDKey() roachpb.Key {
func (b RangeIDPrefixBuf) RangeLastReplicaGCTimestampKey() roachpb.Key {
return append(b.unreplicatedPrefix(), LocalRangeLastReplicaGCTimestampSuffix...)
}

// MVCCRangeKeyGCKey returns a range local key protecting range
// tombstone mvcc stats calculations during range tombstone GC.
func (b RangeIDPrefixBuf) MVCCRangeKeyGCKey() roachpb.Key {
return append(b.unreplicatedPrefix(), LocalRangeMVCCRangeKeyGCLockSuffix...)
}
9 changes: 9 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_add_sstable.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,17 @@ func declareKeysAddSSTable(
// NB: The range end key is not available, so this will pessimistically
// latch up to args.EndKey.Next(). If EndKey falls on the range end key, the
// span will be tightened during evaluation.
// Even if we obtain latches beyond the end range here, it won't cause
// contention with the subsequent range because latches are enforced per
// range.
l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil)
latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp)

// Obtain a read only lock on range key stats update key to serialize with
// range tombstone GC requests.
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).MVCCRangeKeyGCKey(),
})
}
}

Expand Down
9 changes: 9 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_clear_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,18 @@ func declareKeysClearRange(
// NB: The range end key is not available, so this will pessimistically latch
// up to args.EndKey.Next(). If EndKey falls on the range end key, the span
// will be tightened during evaluation.
// Even if we obtain latches beyond the end range here, it won't cause
// contention with the subsequent range because latches are enforced per
// range.
args := req.(*roachpb.ClearRangeRequest)
l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil)
latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp)

// Obtain a read only lock on range key stats update key to serialize with
// range tombstone GC requests.
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).MVCCRangeKeyGCKey(),
})
}

// ClearRange wipes all MVCC versions of keys covered by the specified
Expand Down
9 changes: 9 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_delete_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,22 @@ func declareKeysDeleteRange(
// NB: The range end key is not available, so this will pessimistically
// latch up to args.EndKey.Next(). If EndKey falls on the range end key, the
// span will be tightened during evaluation.
// Even if we obtain latches beyond the end range here, it won't cause
// contention with the subsequent range because latches are enforced per
// range.
l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil)
latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp)

// We need to read the range descriptor to determine the bounds during eval.
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.RangeDescriptorKey(rs.GetStartKey()),
})

// Obtain a read only lock on range key stats update key to serialize with
// range tombstone GC requests.
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).MVCCRangeKeyGCKey(),
})
}
}

Expand Down
11 changes: 11 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_end_transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,12 @@ func declareKeysEndTxn(
Key: abortspan.MinKey(rs.GetRangeID()),
EndKey: abortspan.MaxKey(rs.GetRangeID()),
})

// Protect range tombstones from collection by GC to avoid interference
// with MVCCStats calculation.
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.MVCCRangeKeyGCKey(rs.GetRangeID()),
})
}
if mt := et.InternalCommitTrigger.MergeTrigger; mt != nil {
// Merges copy over the RHS abort span to the LHS, and compute
Expand Down Expand Up @@ -183,6 +189,11 @@ func declareKeysEndTxn(
Key: leftPeekBound,
EndKey: rightPeekBound,
})
// Protect range tombstones from collection by GC to avoid interference
// with MVCCStats calculation.
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.MVCCRangeKeyGCKey(mt.LeftDesc.RangeID),
})
}
}
}
Expand Down
33 changes: 18 additions & 15 deletions pkg/kv/kvserver/batcheval/cmd_gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,25 @@ func declareKeysGC(
_ time.Duration,
) {
gcr := req.(*roachpb.GCRequest)
// When GC-ing MVCC range key tombstones or individual range keys, we need to
// serialize with all writes that overlap the MVCC range tombstone, as well as
// the immediate left/right neighboring keys. This is because a range key
// write has non-local effects, i.e. it can fragment or merge other range keys
// at other timestamps and at its boundaries, and this has a non-commutative
// effect on MVCC stats -- if someone writes a new range key while we're GCing
// one below, the stats would come out wrong.
// Note that we only need to serialize with writers (including other GC
// processes) and not with readers (that are guaranteed to be above the GC
// threshold). To achieve this, we declare read-write access at
// hlc.MaxTimestamp which will not block any readers.
for _, span := range mergeAdjacentSpans(makeLookupBoundariesForGCRanges(
rs.GetStartKey().AsRawKey(), nil, gcr.RangeKeys,
)) {
latchSpans.AddMVCC(spanset.SpanReadWrite, span, hlc.MaxTimestamp)
if gcr.RangeKeys != nil {
// When GC-ing MVCC range key tombstones, we need to serialize with
// range key writes that overlap the MVCC range tombstone, as well as
// the immediate left/right neighboring keys. This is because a range
// key write has non-local effects, i.e. it can fragment or merge other
// range keys at other timestamps and at its boundaries, and this has
// a non-commutative effect on MVCC stats -- if someone writes a new
// range key while we're GCing one below, the stats would come out wrong.
//
// To achieve this, we use a virtual latch key that will prevent any
// range key write operations until GC request is done. All other range
// tombstone write operations obtain a read latch on this key and can
// run concurrently.
latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{
Key: keys.MVCCRangeKeyGCKey(rs.GetRangeID()),
})
}
// For ClearRangeKey request we still obtain a wide write lock as we don't
// expect any operations running on the range.
if rk := gcr.ClearRangeKey; rk != nil {
latchSpans.AddMVCC(spanset.SpanReadWrite, roachpb.Span{Key: rk.StartKey, EndKey: rk.EndKey},
hlc.MaxTimestamp)
Expand Down
9 changes: 9 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_revert_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,17 @@ func declareKeysRevertRange(
// NB: The range end key is not available, so this will pessimistically
// latch up to args.EndKey.Next(). If EndKey falls on the range end key, the
// span will be tightened during evaluation.
// Even if we obtain latches beyond the end range here, it won't cause
// contention with the subsequent range because latches are enforced per
// range.
l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil)
latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp)

// Obtain a read only lock on range key stats update key to serialize with
// range tombstone GC requests.
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.MakeRangeIDPrefixBuf(rs.GetRangeID()).MVCCRangeKeyGCKey(),
})
}

// isEmptyKeyTimeRange checks if the span has no writes in (since,until].
Expand Down

0 comments on commit 249575f

Please sign in to comment.