Skip to content

Commit

Permalink
kvserver/gc: remove range tombstones during GC
Browse files Browse the repository at this point in the history
Previously range tombstones were taken into account when doing point key
GC, but were never removed themselves. This PR is adding support for
removal of old range keys.

This PR is extending GCRequest to include range tombstones. Range tombstones
are populated by GC in requests sent after all the point keys under the
GC threshold are deleted thus guaranteeing that point keys are not accidentally
exposed.
When processing GC range tombstone requests, replica does an addtional step to
validate these assumptions and fail deletions if there's data still covered by
range tombstones.

Release note: None
  • Loading branch information
aliher1911 committed Jul 18, 2022
1 parent 3c0dc6f commit 285aead
Show file tree
Hide file tree
Showing 20 changed files with 2,048 additions and 146 deletions.
2 changes: 2 additions & 0 deletions pkg/kv/kvserver/batcheval/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ go_library(
"declare.go",
"eval_context.go",
"intent.go",
"ranges.go",
"split_stats_helper.go",
"stateloader.go",
"transaction.go",
Expand Down Expand Up @@ -118,6 +119,7 @@ go_test(
"declare_test.go",
"intent_test.go",
"main_test.go",
"ranges_test.go",
"testutils_test.go",
"transaction_test.go",
],
Expand Down
20 changes: 0 additions & 20 deletions pkg/kv/kvserver/batcheval/cmd_delete_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,23 +117,3 @@ func DeleteRange(
// error is not consumed by the caller because the result will be discarded.
return result.FromAcquiredLocks(h.Txn, deleted...), err
}

// rangeTombstonePeekBounds returns the left and right bounds that
// MVCCDeleteRangeUsingTombstone can read in order to detect adjacent range
// tombstones to merge with or fragment. The bounds will be truncated to the
// Raft range bounds if given.
func rangeTombstonePeekBounds(
startKey, endKey, rangeStart, rangeEnd roachpb.Key,
) (roachpb.Key, roachpb.Key) {
leftPeekBound := startKey.Prevish(roachpb.PrevishKeyLength)
if len(rangeStart) > 0 && leftPeekBound.Compare(rangeStart) <= 0 {
leftPeekBound = rangeStart
}

rightPeekBound := endKey.Next()
if len(rangeEnd) > 0 && rightPeekBound.Compare(rangeEnd) >= 0 {
rightPeekBound = rangeEnd
}

return leftPeekBound.Clone(), rightPeekBound.Clone()
}
84 changes: 83 additions & 1 deletion pkg/kv/kvserver/batcheval/cmd_gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package batcheval

import (
"context"
"sort"
"time"

"github.com/cockroachdb/cockroach/pkg/keys"
Expand Down Expand Up @@ -44,6 +45,17 @@ func declareKeysGC(
latchSpans.AddMVCC(spanset.SpanReadWrite, roachpb.Span{Key: key.Key}, header.Timestamp)
}
}
// Extend the range key latches by feather to ensure MVCC stats
// computations correctly account for adjacent range keys tombstones if they
// need to be split.
// TODO(oleg): These latches are very broad and will be disruptive to read and
// write operations despite only accessing "stale" data. We should think of
// better integrating it with latchless GC approach.
for _, span := range mergeAdjacentSpans(makeLookupBoundariesForGCRanges(rs.GetStartKey().AsRawKey(),
nil, gcr.RangeKeys)) {
latchSpans.AddMVCC(spanset.SpanReadWrite, span,
header.Timestamp)
}
// Be smart here about blocking on the threshold keys. The MVCC GC queue can
// send an empty request first to bump the thresholds, and then another one
// that actually does work but can avoid declaring these keys below.
Expand All @@ -54,6 +66,25 @@ func declareKeysGC(
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{Key: keys.RangeDescriptorKey(rs.GetStartKey())})
}

// Create latches and merge adjacent.
func mergeAdjacentSpans(spans []roachpb.Span) []roachpb.Span {
if len(spans) == 0 {
return nil
}
sort.Slice(spans, func(i, j int) bool {
return spans[i].Key.Compare(spans[j].Key) < 0
})
j := 0
for i := 1; i < len(spans); i++ {
if spans[i].Key.Compare(spans[j].EndKey) < 0 {
spans[j].EndKey = spans[i].EndKey
} else {
j++
}
}
return spans[0 : j+1]
}

// GC iterates through the list of keys to garbage collect
// specified in the arguments. MVCCGarbageCollect is invoked on each
// listed key along with the expiration timestamp. The GC metadata
Expand Down Expand Up @@ -85,7 +116,8 @@ func GC(
// 2. the read could be served off a follower, which could be applying the
// GC request's effect from the raft log. Latches held on the leaseholder
// would have no impact on a follower read.
if !args.Threshold.IsEmpty() && len(args.Keys) != 0 &&
if !args.Threshold.IsEmpty() &&
(len(args.Keys) != 0 || len(args.RangeKeys) != 0) &&
!cArgs.EvalCtx.EvalKnobs().AllowGCWithNewThresholdAndKeys {
return result.Result{}, errors.AssertionFailedf(
"GC request can set threshold or it can GC keys, but it is unsafe for it to do both")
Expand Down Expand Up @@ -119,6 +151,16 @@ func GC(
}
}

// Garbage collect range keys. Note that we pass latch range boundaries for
// each key as we may need to merge range keys with adjacent ones, but we
// are restricted on how far we are allowed to read.
desc := cArgs.EvalCtx.Desc()
rangeKeys := makeCollectableGCRangesFromGCRequests(desc.StartKey.AsRawKey(),
desc.EndKey.AsRawKey(), args.RangeKeys)
if err := storage.MVCCGarbageCollectRangeKeys(ctx, readWriter, cArgs.Stats, rangeKeys); err != nil {
return result.Result{}, err
}

// Optionally bump the GC threshold timestamp.
var res result.Result
if !args.Threshold.IsEmpty() {
Expand Down Expand Up @@ -147,3 +189,43 @@ func GC(

return res, nil
}

// makeLookupBoundariesForGCRanges creates spans that could be used for latches
// and iterators when performing range tombstone garbage collection. Each of
// spans includes additional keys to the left and right of the GD'd range to
// ensure merging of range tombstones could be performed and at the same time
// no data is accessed outside of latches.
func makeLookupBoundariesForGCRanges(
rangeStart, rangeEnd roachpb.Key, rangeKeys []roachpb.GCRequest_GCRangeKey,
) []roachpb.Span {
spans := make([]roachpb.Span, len(rangeKeys))
for i := range rangeKeys {
l, r := rangeTombstonePeekBounds(rangeKeys[i].StartKey, rangeKeys[i].EndKey, rangeStart, rangeEnd)
spans[i] = roachpb.Span{
Key: l,
EndKey: r,
}
}
return spans
}

// makeCollectableGCRangesFromGCRequests creates GC collectable ranges
// containing ranges to be removed as well as safe iteration boundaries.
// See makeLookupBoundariesForGCRanges for why additional boundaries are used.
func makeCollectableGCRangesFromGCRequests(
rangeStart, rangeEnd roachpb.Key, rangeKeys []roachpb.GCRequest_GCRangeKey,
) []storage.CollectableGCRangeKey {
latches := makeLookupBoundariesForGCRanges(rangeStart, rangeEnd, rangeKeys)
collectableKeys := make([]storage.CollectableGCRangeKey, len(rangeKeys))
for i, rk := range rangeKeys {
collectableKeys[i] = storage.CollectableGCRangeKey{
MVCCRangeKey: storage.MVCCRangeKey{
StartKey: rk.StartKey,
EndKey: rk.EndKey,
Timestamp: rk.Timestamp,
},
LatchSpan: latches[i],
}
}
return collectableKeys
}
33 changes: 33 additions & 0 deletions pkg/kv/kvserver/batcheval/ranges.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package batcheval

import "github.com/cockroachdb/cockroach/pkg/roachpb"

// rangeTombstonePeekBounds returns the left and right bounds that
// MVCCDeleteRangeUsingTombstone can read in order to detect adjacent range
// tombstones to merge with or fragment. The bounds will be truncated to the
// Raft range bounds if given.
func rangeTombstonePeekBounds(
startKey, endKey, rangeStart, rangeEnd roachpb.Key,
) (roachpb.Key, roachpb.Key) {
leftPeekBound := startKey.Prevish(roachpb.PrevishKeyLength)
if len(rangeStart) > 0 && leftPeekBound.Compare(rangeStart) <= 0 {
leftPeekBound = rangeStart
}

rightPeekBound := endKey.Next()
if len(rangeEnd) > 0 && rightPeekBound.Compare(rangeEnd) >= 0 {
rightPeekBound = rangeEnd
}

return leftPeekBound.Clone(), rightPeekBound.Clone()
}
117 changes: 117 additions & 0 deletions pkg/kv/kvserver/batcheval/ranges_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package batcheval

import (
"testing"

"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/stretchr/testify/require"
)

func TestMergeGCRangeBoundaries(t *testing.T) {
defer leaktest.AfterTest(t)()

gcr := func(start, end roachpb.Key) roachpb.GCRequest_GCRangeKey {
return roachpb.GCRequest_GCRangeKey{
StartKey: start,
EndKey: end,
}
}
span := func(start, end roachpb.Key) roachpb.Span {
return roachpb.Span{
Key: start,
EndKey: end,
}
}
key := func(k string) roachpb.Key {
return roachpb.Key(k)
}
preKey := func(k string) roachpb.Key {
l, _ := rangeTombstonePeekBounds(key(k), key(k+"zzzzzzz"), nil, nil)
return l
}
postKey := func(k string) roachpb.Key {
_, r := rangeTombstonePeekBounds(key(""), key(k), nil, nil)
return r
}

for _, d := range []struct {
name string
rangeStart roachpb.Key
rangeEnd roachpb.Key
rangeKeys []roachpb.GCRequest_GCRangeKey
spans []roachpb.Span
}{
{
name: "empty",
rangeStart: key("a"),
rangeEnd: key("b"),
rangeKeys: []roachpb.GCRequest_GCRangeKey{},
spans: nil,
},
{
name: "full range",
rangeStart: key("a"),
rangeEnd: key("b"),
rangeKeys: []roachpb.GCRequest_GCRangeKey{
gcr(key("a"), key("b")),
},
spans: []roachpb.Span{
span(key("a"), key("b")),
},
},
{
name: "sub range",
rangeStart: key("a"),
rangeEnd: key("z"),
rangeKeys: []roachpb.GCRequest_GCRangeKey{
gcr(key("c"), key("d")),
},
spans: []roachpb.Span{
span(preKey("c"), postKey("d")),
},
},
{
name: "non adjacent",
rangeStart: key("a"),
rangeEnd: key("z"),
rangeKeys: []roachpb.GCRequest_GCRangeKey{
gcr(key("c"), key("d")),
gcr(key("e"), key("f")),
},
spans: []roachpb.Span{
span(preKey("c"), postKey("d")),
span(preKey("e"), postKey("f")),
},
},
{
name: "merge adjacent",
rangeStart: key("a"),
rangeEnd: key("z"),
rangeKeys: []roachpb.GCRequest_GCRangeKey{
gcr(key("a"), key("b")),
gcr(key("b"), key("c")),
gcr(key("c"), key("d")),
},
spans: []roachpb.Span{
span(key("a"), postKey("d")),
},
},
} {
t.Run(d.name, func(t *testing.T) {
spans := makeLookupBoundariesForGCRanges(d.rangeStart, d.rangeEnd, d.rangeKeys)
merged := mergeAdjacentSpans(spans)
require.Equal(t, d.spans, merged, "combined spans")
})
}
}
21 changes: 17 additions & 4 deletions pkg/kv/kvserver/gc/data_distribution_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ func newDataDistribution(
}
retries = 0
}
return nextKey, nil, keyTimestamps, hasIntent
return nextKey, unusedEndKey, keyTimestamps, hasIntent
}

generateRangeKey := func() (startKey, endKey roachpb.Key, timestamps []hlc.Timestamp, hasIntent bool) {
Expand Down Expand Up @@ -375,14 +375,27 @@ type distSpec interface {
// uniformDistSpec is a distSpec which represents uniform distributions over its
// various dimensions.
type uniformDistSpec struct {
tsSecFrom, tsSecTo int64 // seconds
tsSecFrom, tsSecTo int64
// Intents are split into two categories with distinct time ranges.
// All intents have lower timestamp bound to ensure they don't overlap with
// range tombstones since we will not be able to put a range tombstone over
// intent.
// Additionally, we have two time thresholds for intents. This is needed to
// ensure that we have certain fraction of intents GC'able since they lay
// below certain threshold.
tsSecMinIntent, tsSecOldIntentTo int64
keySuffixMin, keySuffixMax int
valueLenMin, valueLenMax int
deleteFrac float64
// keysPerValue parameters determine number of versions for a key. This number
// includes tombstones and intents which may be present on top of the history.
keysPerValueMin, keysPerValueMax int
intentFrac, oldIntentFrac float64
rangeKeyFrac float64
// Fractions define how likely is that a key will belong to one of categories.
// If we only had a single version for each key, then that would be fraction
// of total number of objects, but if we have many versions, this value would
// roughly be total objects/avg(keysPerValueMin, keysPerValueMax) * frac.
intentFrac, oldIntentFrac float64
rangeKeyFrac float64
}

var _ distSpec = uniformDistSpec{}
Expand Down
Loading

0 comments on commit 285aead

Please sign in to comment.