Skip to content

Commit

Permalink
gc: take range tombstones into account in mvcc gc
Browse files Browse the repository at this point in the history
Previously GC didn't take range tombstones into account
when removing old point data. This patch adds support for
removal of data hidden by range tombstones.

Release note: None
  • Loading branch information
aliher1911 committed Jun 27, 2022
1 parent ccb8ef1 commit 3283bde
Show file tree
Hide file tree
Showing 12 changed files with 957 additions and 198 deletions.
190 changes: 172 additions & 18 deletions pkg/kv/kvserver/gc/data_distribution_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ import (
// MVCCKeyValues. The stream may indicate that a value is an intent by returning
// a non-nil transaction. If an intent is returned it must have a higher
// timestamp than any other version written for the key.
type dataDistribution func() (storage.MVCCKeyValue, *roachpb.Transaction, bool)
type dataDistribution func() (
storage.MVCCKeyValue, storage.MVCCRangeKey, *roachpb.Transaction, bool,
)

// setupTest writes the data from this distribution into eng. All data should
// be a part of the range represented by desc.
Expand All @@ -46,11 +48,17 @@ func (ds dataDistribution) setupTest(
var maxTs hlc.Timestamp
var ms enginepb.MVCCStats
for {
kv, txn, ok := ds()
kv, rangeTS, txn, ok := ds()
if !ok {
break
}
if txn == nil {
if len(rangeTS.StartKey) > 0 {
require.Nil(t, txn, "invalid test data, range can't use transaction")
require.Zero(t, len(kv.Key.Key), "invalid test data, range can't be used together with value: key=%s, rangeKey=%s", kv.Key.String(), rangeTS.String())
err := storage.ExperimentalMVCCDeleteRangeUsingTombstone(ctx, eng, &ms, rangeTS.StartKey,
rangeTS.EndKey, rangeTS.Timestamp, hlc.ClockTimestamp{}, 1)
require.NoError(t, err, "failed to put delete range")
} else if txn == nil {
if kv.Key.Timestamp.IsEmpty() {
require.NoError(t, eng.PutUnversioned(kv.Key.Key, kv.Value))
} else {
Expand All @@ -73,6 +81,9 @@ func (ds dataDistribution) setupTest(
if !kv.Key.Timestamp.Less(maxTs) {
maxTs = kv.Key.Timestamp
}
if !rangeTS.Timestamp.Less(maxTs) {
maxTs = rangeTS.Timestamp
}
}
require.NoError(t, eng.Flush())
snap := eng.NewSnapshot()
Expand All @@ -82,6 +93,99 @@ func (ds dataDistribution) setupTest(
return ms
}

// Then we would create a sorting decorator for the distribution that would
// ensure we emit versions old to new and ensure range keys don't overlap
// with point timestamps.
type dataFeedItem struct {
kv storage.MVCCKeyValue
rk storage.MVCCRangeKey
txn *roachpb.Transaction
}

func (i *dataFeedItem) String() string {
if i.txn != nil {
return fmt.Sprintf("%s ! %s", i.kv.Key.String(), i.txn.ID.String())
}
if len(i.kv.Key.Key) > 0 {
return i.kv.Key.String()
}
return i.rk.String()
}

func sortedDistribution(dist dataDistribution) dataDistribution {
var allData []dataFeedItem
for {
kv, rk, txn, ok := dist()
if !ok {
break
}
allData = append(allData, dataFeedItem{kv: kv, rk: rk, txn: txn})
}
isPoint := func(d dataFeedItem) bool {
return len(d.kv.Key.Key) > 0
}
meta := func(i int) (roachpb.Key, hlc.Timestamp, bool) {
if !isPoint(allData[i]) {
return allData[i].rk.StartKey, allData[i].rk.Timestamp, false
}
return allData[i].kv.Key.Key, allData[i].kv.Key.Timestamp, true
}
sort.Slice(allData, func(i, j int) bool {
ki, ti, pi := meta(i)
kj, tj, _ := meta(j)
switch ti.Compare(tj) {
case -1:
return true
case 1:
return false
}
switch ki.Compare(kj) {
case -1:
return true
case 1:
return false
}
return pi
})

var lastTs hlc.Timestamp
var lastIsPoint = true
for i, v := range allData {
switch {
case isPoint(v) && !lastIsPoint && v.kv.Key.Timestamp.LessEq(lastTs):
lastTs.WallTime++
allData[i].kv.Key.Timestamp = lastTs
lastIsPoint = true
case isPoint(v) && lastIsPoint && v.kv.Key.Timestamp.Less(lastTs):
allData[i].kv.Key.Timestamp = lastTs
case !isPoint(v) && !lastIsPoint && v.rk.Timestamp.LessEq(lastTs):
lastTs.WallTime++
allData[i].rk.Timestamp = lastTs
case !isPoint(v) && lastIsPoint && v.rk.Timestamp.LessEq(lastTs):
lastTs.WallTime++
allData[i].rk.Timestamp = lastTs
lastIsPoint = false
default:
lastIsPoint = isPoint(v)
if lastIsPoint {
lastTs = v.kv.Key.Timestamp
} else {
lastTs = v.rk.Timestamp
}
}
}

return func() (storage.MVCCKeyValue, storage.MVCCRangeKey, *roachpb.Transaction, bool) {
if len(allData) == 0 {
return storage.MVCCKeyValue{}, storage.MVCCRangeKey{}, nil, false
}
defer func() {
allData = allData[1:]
}()
return allData[0].kv, allData[0].rk, allData[0].txn, true
}
}

// maxRetriesAllowed is limiting how many times we could retry when generating
// keys and timestamps for objects that are restricted by some criteria (e.g.
// keys are unique, timestamps shouldn't be duplicate in history, intents
Expand All @@ -101,17 +205,16 @@ func newDataDistribution(
versionsPerKey func() int,
intentFrac float64,
oldIntentFrac float64, // within intents(!)
rangeKeyFrac float64,
totalKeys int,
rng *rand.Rand,
) dataDistribution {
// TODO(ajwerner): provide a mechanism to control the rate of expired intents
// or the intent age. Such a knob would likely require decoupling intents from
// other keys.
rangeKeyDist := rangeKeyDistribution(keyDist)
var (
// Remaining values (all versions of all keys together with intents).
remaining = totalKeys
// Key for the objects currently emitted (if versions are not empty).
key roachpb.Key
key, endKey roachpb.Key
// Set of key.String() to avoid generating data for the same key multiple
// times.
seen = map[string]struct{}{}
Expand All @@ -121,7 +224,7 @@ func newDataDistribution(
hasIntent bool
)

generatePointKey := func() (nextKey roachpb.Key, keyTimestamps []hlc.Timestamp, hasIntent bool) {
generatePointKey := func() (nextKey, _ roachpb.Key, keyTimestamps []hlc.Timestamp, hasIntent bool) {
hasIntent = rng.Float64() < intentFrac
oldIntent := hasIntent && rng.Float64() < oldIntentFrac
for retries := 0; len(keyTimestamps) == 0; retries++ {
Expand Down Expand Up @@ -186,27 +289,54 @@ func newDataDistribution(
}
retries = 0
}
return nextKey, keyTimestamps, hasIntent
return nextKey, nil, keyTimestamps, hasIntent
}

generateRangeKey := func() (startKey, endKey roachpb.Key, timestamps []hlc.Timestamp, _ bool) {
var ts hlc.Timestamp
for {
ts = tsDist()
if ts.Less(minIntentTs) {
break
}
}
timestamps = []hlc.Timestamp{ts}
startKey, endKey = rangeKeyDist()
return startKey, endKey, timestamps, false
}

return func() (storage.MVCCKeyValue, *roachpb.Transaction, bool) {
return func() (storage.MVCCKeyValue, storage.MVCCRangeKey, *roachpb.Transaction, bool) {
if remaining == 0 {
// Throw away temp key data, because we reached the end of sequence.
seen = nil
return storage.MVCCKeyValue{}, nil, false
return storage.MVCCKeyValue{}, storage.MVCCRangeKey{}, nil, false
}
defer func() { remaining-- }()

if len(timestamps) == 0 {
// Loop because we can have duplicate keys or unacceptable values, in that
// case we retry key from scratch.
for len(timestamps) == 0 {
key, timestamps, hasIntent = generatePointKey()
if rng.Float64() < rangeKeyFrac {
key, endKey, timestamps, hasIntent = generateRangeKey()
} else {
key, endKey, timestamps, hasIntent = generatePointKey()
}
}
seen[string(key)] = struct{}{}
}
ts := timestamps[0]
timestamps = timestamps[1:]

if len(endKey) > 0 {
return storage.MVCCKeyValue{},
storage.MVCCRangeKey{
StartKey: key,
EndKey: endKey,
Timestamp: ts,
}, nil, true
}

var txn *roachpb.Transaction
// On the last version, we generate a transaction as needed.
if len(timestamps) == 0 && hasIntent {
Expand All @@ -222,7 +352,7 @@ func newDataDistribution(
return storage.MVCCKeyValue{
Key: storage.MVCCKey{Key: key, Timestamp: ts},
Value: valueDist().RawBytes,
}, txn, true
}, storage.MVCCRangeKey{}, txn, true
}
}

Expand All @@ -243,19 +373,30 @@ type uniformDistSpec struct {
deleteFrac float64
keysPerValueMin, keysPerValueMax int
intentFrac, oldIntentFrac float64
rangeKeyFrac float64
}

var _ distSpec = uniformDistSpec{}

func (ds uniformDistSpec) dist(maxRows int, rng *rand.Rand) dataDistribution {
if ds.tsSecMinIntent <= ds.tsSecFrom && ds.rangeKeyFrac > 0 {
panic("min intent ts should be set if range key generation is needed")
}
if ds.tsSecOldIntentTo <= ds.tsSecMinIntent && ds.oldIntentFrac > 0 {
panic("old intent ts must be lower than min intent ts if old intents are enabled")
}
return newDataDistribution(
uniformTimestampDistribution(ds.tsSecFrom*time.Second.Nanoseconds(), ds.tsSecTo*time.Second.Nanoseconds(), rng),
uniformTimestampDistribution(ds.tsSecFrom*time.Second.Nanoseconds(),
ds.tsSecTo*time.Second.Nanoseconds(), rng),
hlc.Timestamp{WallTime: ds.tsSecMinIntent * time.Second.Nanoseconds()},
hlc.Timestamp{WallTime: ds.tsSecOldIntentTo * time.Second.Nanoseconds()},
uniformTableStringKeyDistribution(ds.desc().StartKey.AsRawKey(), ds.keySuffixMin, ds.keySuffixMax, rng),
uniformTableStringKeyDistribution(ds.desc().StartKey.AsRawKey(), ds.keySuffixMin,
ds.keySuffixMax, rng),
uniformValueStringDistribution(ds.valueLenMin, ds.valueLenMax, ds.deleteFrac, rng),
uniformValuesPerKey(ds.keysPerValueMin, ds.keysPerValueMax, rng),
ds.intentFrac, ds.oldIntentFrac,
ds.intentFrac,
ds.oldIntentFrac,
ds.rangeKeyFrac,
maxRows,
rng,
)
Expand All @@ -275,12 +416,12 @@ func (ds uniformDistSpec) String() string {
"keySuffix=[%d,%d],"+
"valueLen=[%d,%d],"+
"keysPerValue=[%d,%d],"+
"deleteFrac=%f,intentFrac=%f",
"deleteFrac=%f,intentFrac=%f,oldIntentFrac=%f,rangeFrac=%f",
ds.tsSecFrom, ds.tsSecTo,
ds.keySuffixMin, ds.keySuffixMax,
ds.valueLenMin, ds.valueLenMax,
ds.keysPerValueMin, ds.keysPerValueMax,
ds.deleteFrac, ds.intentFrac)
ds.deleteFrac, ds.intentFrac, ds.oldIntentFrac, ds.rangeKeyFrac)
}

// uniformTimestamp returns an hlc timestamp distribution with a wall time
Expand Down Expand Up @@ -374,3 +515,16 @@ func uniformTableStringKeyDistribution(
return encoding.EncodeBytesAscending(prefix[0:len(prefix):len(prefix)], []byte(key))
}
}

func rangeKeyDistribution(keyDist func() roachpb.Key) func() (roachpb.Key, roachpb.Key) {
return func() (roachpb.Key, roachpb.Key) {
k1 := keyDist()
k2 := keyDist()
for ; k1.Equal(k2); k2 = keyDist() {
}
if k1.Compare(k2) > 0 {
return k2, k1
}
return k1, k2
}
}
Loading

0 comments on commit 3283bde

Please sign in to comment.