Skip to content

Commit

Permalink
storage: add range tombstone handling in MVCC scans and gets
Browse files Browse the repository at this point in the history
This patch adds range tombstone handling for MVCC scans and gets. It
uses a point synthesizing iterator that synthesizes point tombstones for
range tombstones that overlap a point key. `pebbleMVCCScanner` then
remains unchanged, and will automatically take range tombstones into
account similarly to point tombstones, including for uncertainty checks.

For MVCC gets, we additionally synthesize point tombstones for the seek
key if it does not contain a point key. These tombstones would not be
visible to an MVCC scan.

When the `Tombstones` option is disabled, range key masking is enabled
to omit point keys below older range tombstones during iteration.
However, point synthesis must still be enabled in this case to account
for future range tombstones in uncertainty checks.

Release note: None
  • Loading branch information
erikgrinaker committed Apr 12, 2022
1 parent f6d2aca commit ed3987c
Show file tree
Hide file tree
Showing 7 changed files with 654 additions and 4 deletions.
1 change: 1 addition & 0 deletions pkg/storage/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ go_library(
"pebble_iterator.go",
"pebble_merge.go",
"pebble_mvcc_scanner.go",
"point_synthesizing_iter.go",
"replicas_storage.go",
"resource_limiter.go",
"row_counter.go",
Expand Down
29 changes: 25 additions & 4 deletions pkg/storage/mvcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,8 @@ func newMVCCIterator(reader Reader, inlineMeta bool, opts IterOptions) MVCCItera
//
// In tombstones mode, if the most recent value is a deletion tombstone, the
// result will be a non-nil roachpb.Value whose RawBytes field is nil.
// Otherwise, a deletion tombstone results in a nil roachpb.Value.
// Otherwise, a deletion tombstone results in a nil roachpb.Value. Range
// tombstones will be emitted as if they were point tombstones.
//
// In inconsistent mode, if an intent is encountered, it will be placed in the
// dedicated return parameter. By contrast, in consistent mode, an intent will
Expand All @@ -724,7 +725,15 @@ func newMVCCIterator(reader Reader, inlineMeta bool, opts IterOptions) MVCCItera
func MVCCGet(
ctx context.Context, reader Reader, key roachpb.Key, timestamp hlc.Timestamp, opts MVCCGetOptions,
) (*roachpb.Value, *roachpb.Intent, error) {
iter := newMVCCIterator(reader, timestamp.IsEmpty(), IterOptions{Prefix: true})
var rangeKeyMaskingBelow hlc.Timestamp
if !opts.Tombstones {
rangeKeyMaskingBelow = timestamp
}
iter := newPointSynthesizingIter(newMVCCIterator(reader, timestamp.IsEmpty(), IterOptions{
KeyTypes: IterKeyTypePointsAndRanges,
Prefix: true,
RangeKeyMaskingBelow: rangeKeyMaskingBelow,
}), true /* emitOnSeek */)
defer iter.Close()
value, intent, err := mvccGet(ctx, iter, key, timestamp, opts)
return value.ToPointer(), intent, err
Expand Down Expand Up @@ -2730,7 +2739,9 @@ type MVCCScanResult struct {
// In tombstones mode, if the most recent value for a key is a deletion
// tombstone, the scan result will contain a roachpb.KeyValue for that key whose
// RawBytes field is nil. Otherwise, the key-value pair will be omitted from the
// result entirely.
// result entirely. If a point key was deleted by an MVCC range tombstone,
// a synthesized point tombstone is returned -- range tombstones by themselves
// are not surfaced (in particular if they don't cover any point keys).
//
// When scanning inconsistently, any encountered intents will be placed in the
// dedicated result parameter. By contrast, when scanning consistently, any
Expand All @@ -2753,7 +2764,16 @@ func MVCCScan(
timestamp hlc.Timestamp,
opts MVCCScanOptions,
) (MVCCScanResult, error) {
iter := newMVCCIterator(reader, timestamp.IsEmpty(), IterOptions{LowerBound: key, UpperBound: endKey})
var rangeKeyMaskingBelow hlc.Timestamp
if !opts.Tombstones {
rangeKeyMaskingBelow = timestamp
}
iter := newPointSynthesizingIter(newMVCCIterator(reader, timestamp.IsEmpty(), IterOptions{
KeyTypes: IterKeyTypePointsWithRanges,
LowerBound: key,
UpperBound: endKey,
RangeKeyMaskingBelow: rangeKeyMaskingBelow,
}), false /* emitOnSeek */)
defer iter.Close()
return mvccScanToKvs(ctx, iter, key, endKey, timestamp, opts)
}
Expand All @@ -2766,6 +2786,7 @@ func MVCCScanToBytes(
timestamp hlc.Timestamp,
opts MVCCScanOptions,
) (MVCCScanResult, error) {
// TODO(erikgrinaker): Update this.
iter := newMVCCIterator(reader, timestamp.IsEmpty(), IterOptions{LowerBound: key, UpperBound: endKey})
defer iter.Close()
return mvccScanToBytes(ctx, iter, key, endKey, timestamp, opts)
Expand Down
3 changes: 3 additions & 0 deletions pkg/storage/mvcc_history_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,9 @@ func cmdIterNew(e *evalCtx) error {
MVCCIterator: r.NewMVCCIterator(kind, opts),
closeReader: closeReader,
}
if e.hasArg("pointSynthesis") {
e.iter = newPointSynthesizingIter(e.iter, false)
}
return nil
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/storage/mvcc_key.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ func (k MVCCKey) Next() MVCCKey {
}
}

// Clone returns a copy of the key.
func (k MVCCKey) Clone() MVCCKey {
k.Key = k.Key.Clone()
return k
}

// Compare returns -1 if this key is less than the given key, 0 if they're
// equal, or 1 if this is greater. Comparison is by key,timestamp, where larger
// timestamps sort before smaller ones except empty ones which sort first (like
Expand Down
269 changes: 269 additions & 0 deletions pkg/storage/point_synthesizing_iter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package storage

import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
)

// pointSynthesizingIter wraps an MVCCIterator, and synthesizes point tombstones
// for range tombstones above/below existing point keys. It does not emit range
// keys at all, since these would appear to conflict with the synthesized point
// keys.
type pointSynthesizingIter struct {
iter MVCCIterator
rangeKeys []MVCCRangeKey
rangeKeysIdx int
rangeKeysStart roachpb.Key
// emitOnSeek will cause a SeekGE() call to emit synthetic points for the seek
// key even if it has no existing point keys.
emitOnSeek bool
// curKey is the current key position of the synthesizing iterator. This may
// be out of sync with the point iterator (when there are no further real
// point versions for the current key).
curKey roachpb.Key
// atPoint is true if the synthesizing iterator is positioned on the real
// point key. In that case, rangeKeysIdx points to the next range key below
// the point key, or past the end of rangeKeys if there are none.
//
// If atPoint is false, then the point iterator will be positioned on the next
// point key after the current range tombstone, which can either be an older
// version of the current key or a different key (or invalid if exhausted).
atPoint bool
}

var _ MVCCIterator = new(pointSynthesizingIter)

func newPointSynthesizingIter(iter MVCCIterator, emitOnSeek bool) *pointSynthesizingIter {
return &pointSynthesizingIter{
iter: iter,
emitOnSeek: emitOnSeek,
}
}

// updateWithFirstPointKey scans to the first point key and updates the iterator
// for its position.
func (i *pointSynthesizingIter) updateWithFirstPointKey() {
for {
if ok, err := i.iter.Valid(); !ok || err != nil {
break
}
if hasPoint, _ := i.iter.HasPointAndRange(); hasPoint {
break
}
i.iter.Next()
}
i.updatePosition()
}

// maybeUpdateWithBareRangeKey updates the iterator with synthetic point keys
// for the current range key if it is a bare range key and there are no point
// keys overlapping the range key for the current key. It will move ahead to
// check that and leave the iterator at the next key position regardless.
func (i *pointSynthesizingIter) maybeUpdateWithBareRangeKey() bool {
if ok, err := i.iter.Valid(); !ok || err != nil {
return false
}
hasPoint, hasRange := i.iter.HasPointAndRange()
if hasPoint || !hasRange {
return false
}
i.updatePosition()

i.iter.Next()
if ok, err := i.iter.Valid(); !ok || err != nil {
return true
}
if hasPoint, _ := i.iter.HasPointAndRange(); hasPoint {
if i.iter.UnsafeKey().Key.Equal(i.curKey) {
// We found a point key for the seek key, update the position again.
i.updatePosition()
}
}
return true
}

// updatePosition will update the iterator position to the newest version of the
// current point iterator's key.
func (i *pointSynthesizingIter) updatePosition() {
if ok, err := i.iter.Valid(); !ok || err != nil {
i.curKey = nil
i.rangeKeys = nil
return
}

hasPoint, hasRange := i.iter.HasPointAndRange()

if hasRange {
if rangeStart, _ := i.iter.RangeBounds(); !rangeStart.Equal(i.rangeKeysStart) {
i.rangeKeys = i.iter.RangeKeys()
i.rangeKeysStart = rangeStart.Clone()
}
}

key := i.iter.UnsafeKey()
i.curKey = key.Key.Clone()
i.rangeKeysIdx = 0
i.atPoint = hasPoint && (len(i.rangeKeys) == 0 || key.Timestamp.IsEmpty() ||
i.rangeKeys[0].Timestamp.LessEq(key.Timestamp))
}

func (i *pointSynthesizingIter) SeekGE(key MVCCKey) {
i.iter.SeekGE(key)
if !i.emitOnSeek || !i.maybeUpdateWithBareRangeKey() {
i.updateWithFirstPointKey()
}
}

func (i *pointSynthesizingIter) SeekIntentGE(key roachpb.Key, txnUUID uuid.UUID) {
i.iter.SeekIntentGE(key, txnUUID)
i.updateWithFirstPointKey()
}

func (i *pointSynthesizingIter) Next() {
if i.atPoint {
// Pass by the current point key and onto the next one. This may be a
// different version of the current key, or a different key entirely.
i.atPoint = false
i.iter.Next()
} else {
// Move onto the next range key. This may be below the current point key,
// we'll find out below.
i.rangeKeysIdx++
}
var key MVCCKey
if ok, _ := i.iter.Valid(); ok {
if hasPoint, _ := i.iter.HasPointAndRange(); hasPoint {
key = i.iter.UnsafeKey()
}
}
if len(key.Key) > 0 && key.Key.Equal(i.curKey) && (i.rangeKeysIdx >= len(i.rangeKeys) ||
key.Timestamp.IsEmpty() || i.rangeKeys[i.rangeKeysIdx].Timestamp.LessEq(key.Timestamp)) {
// If the iter point key is at the same position as us and newer than the current
// range key, then position on the point key.
i.atPoint = true
} else if i.rangeKeysIdx >= len(i.rangeKeys) {
// If we've exhausted the range keys then synthesize points for the current point key,
// which must now be a different key from curKey.
i.updateWithFirstPointKey()
}
// Otherwise, we're now on the correct range key.
}

func (i *pointSynthesizingIter) NextKey() {
i.iter.NextKey()
i.updateWithFirstPointKey()
}

func (i *pointSynthesizingIter) SeekLT(key MVCCKey) {
panic("not implemented")
}

func (i *pointSynthesizingIter) Prev() {
panic("not implemented")
}

func (i *pointSynthesizingIter) Valid() (bool, error) {
if !i.atPoint && i.rangeKeysIdx < len(i.rangeKeys) {
return true, nil
}
return i.iter.Valid()
}

func (i *pointSynthesizingIter) HasPointAndRange() (bool, bool) {
ok, _ := i.Valid()
return ok, false
}

func (i *pointSynthesizingIter) RangeBounds() (roachpb.Key, roachpb.Key) {
return nil, nil
}

func (i *pointSynthesizingIter) RangeKeys() []MVCCRangeKey {
return nil
}

func (i *pointSynthesizingIter) Key() MVCCKey {
return i.UnsafeKey().Clone()
}

func (i *pointSynthesizingIter) UnsafeKey() MVCCKey {
if i.atPoint {
return i.iter.UnsafeKey()
}
if len(i.curKey) == 0 || i.rangeKeysIdx >= len(i.rangeKeys) {
return MVCCKey{}
}
return MVCCKey{
Key: i.curKey,
Timestamp: i.rangeKeys[i.rangeKeysIdx].Timestamp,
}
}

func (i *pointSynthesizingIter) UnsafeRawKey() []byte {
if i.atPoint {
return i.iter.UnsafeRawKey()
}
return EncodeMVCCKeyPrefix(i.curKey)
}

func (i *pointSynthesizingIter) UnsafeRawMVCCKey() []byte {
if i.atPoint {
return i.iter.UnsafeRawMVCCKey()
}
return EncodeMVCCKey(i.UnsafeKey())
}

func (i *pointSynthesizingIter) Value() []byte {
v := i.UnsafeValue()
if v != nil {
v = append([]byte{}, v...)
}
return v
}

func (i *pointSynthesizingIter) UnsafeValue() []byte {
if i.atPoint {
return i.iter.UnsafeValue()
}
return nil
}

func (i *pointSynthesizingIter) Close() {
i.iter.Close()
}

func (i *pointSynthesizingIter) ValueProto(msg protoutil.Message) error {
panic("not implemented")
}

func (i *pointSynthesizingIter) ComputeStats(start, end roachpb.Key, nowNanos int64) (enginepb.MVCCStats, error) {
panic("not implemented")
}

func (i *pointSynthesizingIter) FindSplitKey(start, end, minSplitKey roachpb.Key, targetSize int64) (MVCCKey, error) {
panic("not implemented")
}

func (i *pointSynthesizingIter) SetUpperBound(key roachpb.Key) {
panic("not implemented")
}

func (i *pointSynthesizingIter) Stats() IteratorStats {
return i.iter.Stats()
}

func (i *pointSynthesizingIter) SupportsPrev() bool {
panic("not implemented")
}
Loading

0 comments on commit ed3987c

Please sign in to comment.