Skip to content

Commit

Permalink
Merge pull request #91028 from erikgrinaker/backport22.2-90899-90953
Browse files Browse the repository at this point in the history
release-22.2: storage: fix point synthesis activation during reverse scans
  • Loading branch information
erikgrinaker authored Nov 3, 2022
2 parents 8afb672 + ffd1485 commit 3d34142
Show file tree
Hide file tree
Showing 3 changed files with 259 additions and 11 deletions.
39 changes: 31 additions & 8 deletions pkg/storage/pebble_mvcc_scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,11 @@ func extractResultKey(repr []byte) roachpb.Key {
// by switching to a pointSynthesizingIter.
type pebbleMVCCScanner struct {
parent MVCCIterator
// parentReverse is true if the previous parent positioning operation was a
// reverse operation (SeekLT or Prev). This is needed to correctly initialize
// pointIter when encountering an MVCC range tombstone in reverse.
// TODO(erikgrinaker): Consider adding MVCCIterator.IsReverse() instead.
parentReverse bool
// pointIter is a point synthesizing iterator that wraps and replaces parent
// when an MVCC range tombstone is encountered. A separate reference to it is
// kept in order to release it back to its pool when the scanner is done.
Expand Down Expand Up @@ -447,10 +452,13 @@ func (p *pebbleMVCCScanner) get(ctx context.Context) {
// HasPointAndRange() call.
if ok, _ := p.parent.Valid(); ok {
if _, hasRange := p.parent.HasPointAndRange(); hasRange {
p.enablePointSynthesis()
if !p.enablePointSynthesis() {
return
}
}
}

p.parentReverse = false
p.parent.SeekGE(MVCCKey{Key: p.start})
if !p.updateCurrent() {
return
Expand All @@ -477,7 +485,9 @@ func (p *pebbleMVCCScanner) scan(
// HasPointAndRange() call.
if ok, _ := p.parent.Valid(); ok {
if _, hasRange := p.parent.HasPointAndRange(); hasRange {
p.enablePointSynthesis()
if !p.enablePointSynthesis() {
return nil, 0, 0, p.err
}
}
}

Expand Down Expand Up @@ -959,6 +969,7 @@ func (p *pebbleMVCCScanner) advanceKeyAtEnd() bool {
// Iterating to the next key might have caused the iterator to reach the
// end of the key space. If that happens, back up to the very last key.
p.peeked = false
p.parentReverse = true
p.parent.SeekLT(MVCCKey{Key: p.end})
if !p.updateCurrent() {
return false
Expand Down Expand Up @@ -1199,8 +1210,8 @@ func (p *pebbleMVCCScanner) updateCurrent() bool {
// synthesizes MVCC point tombstones for MVCC range tombstones and never emits
// range keys itself. p.parent must be valid.
//
// gcassert:inline
func (p *pebbleMVCCScanner) enablePointSynthesis() {
// Returns true if the iterator is valid.
func (p *pebbleMVCCScanner) enablePointSynthesis() bool {
if util.RaceEnabled {
if ok, _ := p.parent.Valid(); !ok {
panic(errors.AssertionFailedf("enablePointSynthesis called with invalid iter"))
Expand All @@ -1213,13 +1224,18 @@ func (p *pebbleMVCCScanner) enablePointSynthesis() {
p.parent.UnsafeKey()))
}
}
p.pointIter = NewPointSynthesizingIterAtParent(p.parent)
p.parent = p.pointIter
pointIter, err := NewPointSynthesizingIterAtParent(p.parent, p.parentReverse)
if err != nil {
p.err = err
return false
}
if util.RaceEnabled {
if ok, _ := p.parent.Valid(); !ok {
if ok, _ := pointIter.Valid(); !ok {
panic(errors.AssertionFailedf("invalid pointSynthesizingIter with valid iter"))
}
}
p.pointIter, p.parent = pointIter, pointIter
return true
}

func (p *pebbleMVCCScanner) decodeCurrentMetadata() bool {
Expand Down Expand Up @@ -1260,7 +1276,9 @@ func (p *pebbleMVCCScanner) iterValid() bool {
// Since iterValid() is called after every iterator positioning operation, it
// is convenient to check for any range keys and enable point synthesis here.
if p.parent.RangeKeyChanged() {
p.enablePointSynthesis()
if !p.enablePointSynthesis() {
return false
}
}
if util.RaceEnabled && p.pointIter == nil {
if _, hasRange := p.parent.HasPointAndRange(); hasRange {
Expand All @@ -1273,13 +1291,15 @@ func (p *pebbleMVCCScanner) iterValid() bool {

// iterSeek seeks to the latest revision of the specified key (or a greater key).
func (p *pebbleMVCCScanner) iterSeek(key MVCCKey) bool {
p.parentReverse = false
p.clearPeeked()
p.parent.SeekGE(key)
return p.updateCurrent()
}

// iterSeekReverse seeks to the latest revision of the key before the specified key.
func (p *pebbleMVCCScanner) iterSeekReverse(key MVCCKey) bool {
p.parentReverse = true
p.clearPeeked()
p.parent.SeekLT(key)
if !p.updateCurrent() {
Expand All @@ -1298,6 +1318,7 @@ func (p *pebbleMVCCScanner) iterSeekReverse(key MVCCKey) bool {

// iterNext advances to the next MVCC key.
func (p *pebbleMVCCScanner) iterNext() bool {
p.parentReverse = false
if p.reverse && p.peeked {
// If we have peeked at the previous entry, we need to advance the iterator
// twice.
Expand All @@ -1323,6 +1344,7 @@ func (p *pebbleMVCCScanner) iterNext() bool {

// iterPrev advances to the previous MVCC Key.
func (p *pebbleMVCCScanner) iterPrev() bool {
p.parentReverse = true
if p.peeked {
p.peeked = false
return p.updateCurrent()
Expand All @@ -1337,6 +1359,7 @@ func (p *pebbleMVCCScanner) iterPrev() bool {
func (p *pebbleMVCCScanner) iterPeekPrev() ([]byte, bool) {
if !p.peeked {
p.peeked = true
p.parentReverse = true
// We need to save a copy of the current iterator key and value and adjust
// curRawKey, curKey and curValue to point to this saved data. We use a
// single buffer for this purpose: savedBuf.
Expand Down
51 changes: 48 additions & 3 deletions pkg/storage/point_synthesizing_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,63 @@ func NewPointSynthesizingIter(parent MVCCIterator) *PointSynthesizingIter {

// NewPointSynthesizingIterAtParent creates a new pointSynthesizingIter and
// loads the position from the parent iterator.
func NewPointSynthesizingIterAtParent(parent MVCCIterator) *PointSynthesizingIter {
//
// If reverse is true, the point synthesizing iterator will reposition on
// a synthetic point tombstone after the current position if appropriate.
// Consider the following dataset:
//
// 2 a2 b2
// 1 [---)
// a b
//
// If the caller was positioned on b@2, then called Prev() and landed on a@2,
// where it detected the [a-b)@2 range key and enabled point synthesis, it would
// appear to have skipped over the synthetic point tombstone at a@1. Setting
// reverse=true in this case will position the iterator on the synthetic point
// tombstone a@1.
//
// However, the above assumes that the previous positioning operation was from a
// different key -- if it was e.g. a SeekLT(a@1) then this repositioning will be
// incorrect. The caller must take care to use reverse only when appropriate.
func NewPointSynthesizingIterAtParent(
parent MVCCIterator, reverse bool,
) (*PointSynthesizingIter, error) {
iter := NewPointSynthesizingIter(parent)
iter.rangeKeyChanged = true // force range key detection
if ok, err := iter.updateIter(); ok && err == nil {
if ok, _ := iter.updateIter(); ok {
// updateSeekGEPosition may step parent and then compare against seekKey, so
// we need to clone it.
seekKey := parent.UnsafeKey()
iter.seekKeyBuf = append(iter.seekKeyBuf[:0], seekKey.Key...)
seekKey.Key = iter.seekKeyBuf
iter.updateSeekGEPosition(seekKey)
if err := iter.iterErr; err != nil {
iter.release()
return nil, err
}

// If we're on the start key of a range key in the reverse direction, we
// must take care not to skip any synthetic point tombstones that the caller
// would have expected to be emitted. This essentially means switching the
// iterator to reverse iteration and repositioning on the oldest version
// (real or synthetic).
if reverse && len(iter.rangeKeys) > 0 && iter.rangeKeysPos.Equal(iter.rangeKeysStart) {
if !iter.atPoint {
if _, err := iter.iterPrev(); err != nil {
iter.release()
return nil, err
}
}
iter.reverse = true
iter.rangeKeysIdx = iter.rangeKeysEnd - 1
iter.updateAtPoint()
}
}
return iter
if err := iter.iterErr; err != nil {
iter.release()
return nil, err
}
return iter, nil
}

// Close implements MVCCIterator.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# Regression test for https://github.com/cockroachdb/cockroach/issues/90642.
#
# REAL DATASET SYNTHETIC DATASET
# 3 [b3] 3 [b3]
# 2 a2 2 a2
# 1 [---) 1 x
# a b a b
#
# Recall that pebbleMVCCScanner only enables pointSynthesizingIter when it
# encounters a range key. In the case above, during a reverse scan, the [a-b)@1
# range key will first become visible to pebbleMVCCScanner when it lands on a@2,
# so it enabled point synthesis positioned at the a@2 point key. Notice how the
# iterator has now skipped over the synthetic point tombstone a@1.
#
# This is particularly problematic when combined with pebbleMVCCScanner peeking,
# which assumes that following a iterPeekPrev() call, an iterNext() call can
# step the parent iterator forward once to get back to the original position.
# With the above bug, that is no longer true, as it instead lands on the
# synthetic point tombstone which was skipped during reverse iteration. During
# intent processing for b@3, such an iterNext() call is expected to land on the
# intent's provisional value at b@3, but it instead lands on the intent itself
# at b@0. This in turn caused a value checksum or decoding failure, where it was
# expecting the current key to be b@3, but the actual key was b@0.
run ok
del_range_ts k=a end=b ts=1
put k=a ts=2 v=a2
with t=A
txn_begin k=b ts=3
put k=b v=b3
----
>> at end:
txn: "A" meta={id=00000000 key="b" pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=3.000000000,0 wto=false gul=0,0
rangekey: {a-b}/[1.000000000,0=/<empty>]
data: "a"/2.000000000,0 -> /BYTES/a2
meta: "b"/0,0 -> txn={id=00000000 key="b" pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
data: "b"/3.000000000,0 -> /BYTES/b3

run ok
scan t=A k=a end=z reverse
----
scan: "b" -> /BYTES/b3 @3.000000000,0
scan: "a" -> /BYTES/a2 @2.000000000,0

# We also test the same scenario with a double range tombstone, i.e.:
#
# 3 [b3]
# 2 [---)
# 1 [---)
# a b
run ok
txn_remove t=A
clear_range k=a end=z
del_range_ts k=a end=b ts=1
del_range_ts k=a end=b ts=2
with t=A
txn_begin k=b ts=3
put k=b v=b3
----
>> at end:
txn: "A" meta={id=00000000 key="b" pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=3.000000000,0 wto=false gul=0,0
rangekey: {a-b}/[2.000000000,0=/<empty> 1.000000000,0=/<empty>]
meta: "b"/0,0 -> txn={id=00000000 key="b" pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
data: "b"/3.000000000,0 -> /BYTES/b3

run ok
scan t=A k=a end=z reverse
----
scan: "b" -> /BYTES/b3 @3.000000000,0

run ok
scan t=A k=a end=z reverse tombstones
----
scan: "b" -> /BYTES/b3 @3.000000000,0
scan: "a" -> /<empty> @2.000000000,0

# And with a point between the range tombstone. We place the intent at the
# lowest timestamp, which is a contrived/unrealistic scenario, and do tombstone
# scans at all timestamps.
#
# 3 [---)
# 3 a3
# 2 [---)
# 1 [b1]
# a b
run ok
txn_remove t=A
clear_range k=a end=z
del_range_ts k=a end=b ts=2
put k=a ts=3 v=a3
del_range_ts k=a end=b ts=4
with t=A
txn_begin k=b ts=1
put k=b v=b1
----
>> at end:
txn: "A" meta={id=00000000 key="b" pri=0.00000000 epo=0 ts=1.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=1.000000000,0 wto=false gul=0,0
rangekey: {a-b}/[4.000000000,0=/<empty> 2.000000000,0=/<empty>]
data: "a"/3.000000000,0 -> /BYTES/a3
meta: "b"/0,0 -> txn={id=00000000 key="b" pri=0.00000000 epo=0 ts=1.000000000,0 min=0,0 seq=0} ts=1.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
data: "b"/1.000000000,0 -> /BYTES/b1

run ok
scan t=A k=a end=z reverse
----
scan: "b" -> /BYTES/b1 @1.000000000,0

run ok
scan t=A k=a end=z reverse tombstones ts=4
----
scan: "b" -> /BYTES/b1 @1.000000000,0
scan: "a" -> /<empty> @4.000000000,0

run ok
scan t=A k=a end=z reverse tombstones ts=3
----
scan: "b" -> /BYTES/b1 @1.000000000,0
scan: "a" -> /BYTES/a3 @3.000000000,0

run ok
scan t=A k=a end=z reverse tombstones ts=2
----
scan: "b" -> /BYTES/b1 @1.000000000,0
scan: "a" -> /<empty> @2.000000000,0

run ok
scan t=A k=a end=z reverse tombstones ts=1
----
scan: "b" -> /BYTES/b1 @1.000000000,0

# And a point below them.
#
# 4 [b4]
# 3 [---)
# 2 [---)
# 1 a1
# a b
run ok
txn_remove t=A
clear_range k=a end=z
put k=a ts=2 v=a2
del_range_ts k=a end=b ts=3
del_range_ts k=a end=b ts=4
with t=A
txn_begin k=b ts=1
put k=b v=b1
----
>> at end:
txn: "A" meta={id=00000000 key="b" pri=0.00000000 epo=0 ts=1.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=1.000000000,0 wto=false gul=0,0
rangekey: {a-b}/[4.000000000,0=/<empty> 3.000000000,0=/<empty>]
data: "a"/2.000000000,0 -> /BYTES/a2
meta: "b"/0,0 -> txn={id=00000000 key="b" pri=0.00000000 epo=0 ts=1.000000000,0 min=0,0 seq=0} ts=1.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
data: "b"/1.000000000,0 -> /BYTES/b1

run ok
scan t=A k=a end=z reverse
----
scan: "b" -> /BYTES/b1 @1.000000000,0

run ok
scan t=A k=a end=z reverse tombstones ts=4
----
scan: "b" -> /BYTES/b1 @1.000000000,0
scan: "a" -> /<empty> @4.000000000,0

run ok
scan t=A k=a end=z reverse tombstones ts=3
----
scan: "b" -> /BYTES/b1 @1.000000000,0
scan: "a" -> /<empty> @3.000000000,0

run ok
scan t=A k=a end=z reverse tombstones ts=2
----
scan: "b" -> /BYTES/b1 @1.000000000,0
scan: "a" -> /BYTES/a2 @2.000000000,0

run ok
scan t=A k=a end=z reverse tombstones ts=1
----
scan: "b" -> /BYTES/b1 @1.000000000,0

0 comments on commit 3d34142

Please sign in to comment.