From 5217021a675209bffcd2ca71c04ca655738c4435 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Mon, 31 Oct 2022 17:53:50 +0000 Subject: [PATCH 1/3] storage: fix error handling in `NewPointSynthesizingIterAtParent` Release note: None --- pkg/storage/point_synthesizing_iter.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/storage/point_synthesizing_iter.go b/pkg/storage/point_synthesizing_iter.go index cdfe62d2ddbe..43b5d86ececf 100644 --- a/pkg/storage/point_synthesizing_iter.go +++ b/pkg/storage/point_synthesizing_iter.go @@ -158,9 +158,9 @@ func NewPointSynthesizingIterAtParent(parent MVCCIterator) (*PointSynthesizingIt seekKey.Key = iter.seekKeyBuf iter.updateSeekGEPosition(seekKey) } - if iter.iterErr != nil { + if err := iter.iterErr; err != nil { iter.release() - return nil, iter.iterErr + return nil, err } return iter, nil } From 51d70e82d848c1065785184febf7c52e69ff2e9a Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Sun, 30 Oct 2022 12:58:31 +0000 Subject: [PATCH 2/3] storage: fix point synthesis activation during reverse scans This patch fixes a bug where `pointSynthesizingIter` could appear to have skipped a synthetic point tombstone in the reverse direction when enabled dynamically by `pebbleMVCCScanner`. Consider the following case: ``` REAL DATASET SYNTHETIC DATASET 3 [b3] 3 [b3] 2 a2 2 a2 1 [---) 1 x a b a b ``` Recall that `pebbleMVCCScanner` only enables `pointSynthesizingIter` when it encounters a range key. In the case above, during a reverse scan, the `[a-b)@1` range key will first become visible to `pebbleMVCCScanner` when it lands on `a@2`, so it enabled point synthesis positioned at the `a@2` point key. Notice how the iterator has now skipped over the synthetic point tombstone `a@1`. This is particularly problematic when combined with `pebbleMVCCScanner` peeking, which assumes that following a `iterPeekPrev()` call, an `iterNext()` call can step the parent iterator forward once to get back to the original position. With the above bug, that is no longer true, as it instead lands on the synthetic point tombstone which was skipped during reverse iteration. During intent processing for `b@3`, such an `iterNext()` call is expected to land on the intent's provisional value at `b@3`, but it instead lands on the intent itself at `b@0`. This in turn caused a value checksum or decoding failure, where it was expecting the current key to be `b@3`, but the actual key was `b@0`. This patch fixes the bug by keeping track of the direction of the previous positioning operation in `pebbleMVCCScanner`, and then repositioning the `pointSynthesizingIter` as appropriate during dynamic activation in the reverse direction. This doesn't have a significant effect on performance (in particular, the point get path is never in reverse). Several alternative approaches were attempted but abandoned: * Don't synthesize points at a range key's start bound. While compelling for a range of reasons (complexity, performance, etc) this has two flaws: on a bare range key, the iterator still needs to know which direction to skip in, and we also need to know the timestamps of bare range keys for `FailOnMoreRecent` handling. * Only synthesize a single point at a range key's start bound. This would solve the problem at hand, but has rather strange semantics. Longer-term, it may be better to merge range key logic into `pebbleMVCCScanner` itself -- `pointSynthesizingIter` was intended to reduce complexity, but it's not clear that the overall complexity is actually reduced. Release note: None --- pkg/storage/pebble_mvcc_scanner.go | 14 +- pkg/storage/point_synthesizing_iter.go | 43 ++++- ...ge_tombstone_scans_reverse_skip_regression | 180 ++++++++++++++++++ 3 files changed, 235 insertions(+), 2 deletions(-) create mode 100644 pkg/storage/testdata/mvcc_histories/range_tombstone_scans_reverse_skip_regression diff --git a/pkg/storage/pebble_mvcc_scanner.go b/pkg/storage/pebble_mvcc_scanner.go index 29934eb07c14..e170f648d630 100644 --- a/pkg/storage/pebble_mvcc_scanner.go +++ b/pkg/storage/pebble_mvcc_scanner.go @@ -309,6 +309,11 @@ func extractResultKey(repr []byte) roachpb.Key { // by switching to a pointSynthesizingIter. type pebbleMVCCScanner struct { parent MVCCIterator + // parentReverse is true if the previous parent positioning operation was a + // reverse operation (SeekLT or Prev). This is needed to correctly initialize + // pointIter when encountering an MVCC range tombstone in reverse. + // TODO(erikgrinaker): Consider adding MVCCIterator.IsReverse() instead. + parentReverse bool // pointIter is a point synthesizing iterator that wraps and replaces parent // when an MVCC range tombstone is encountered. A separate reference to it is // kept in order to release it back to its pool when the scanner is done. @@ -453,6 +458,7 @@ func (p *pebbleMVCCScanner) get(ctx context.Context) { } } + p.parentReverse = false p.parent.SeekGE(MVCCKey{Key: p.start}) if !p.updateCurrent() { return @@ -963,6 +969,7 @@ func (p *pebbleMVCCScanner) advanceKeyAtEnd() bool { // Iterating to the next key might have caused the iterator to reach the // end of the key space. If that happens, back up to the very last key. p.peeked = false + p.parentReverse = true p.parent.SeekLT(MVCCKey{Key: p.end}) if !p.updateCurrent() { return false @@ -1217,7 +1224,7 @@ func (p *pebbleMVCCScanner) enablePointSynthesis() bool { p.parent.UnsafeKey())) } } - pointIter, err := NewPointSynthesizingIterAtParent(p.parent) + pointIter, err := NewPointSynthesizingIterAtParent(p.parent, p.parentReverse) if err != nil { p.err = err return false @@ -1284,6 +1291,7 @@ func (p *pebbleMVCCScanner) iterValid() bool { // iterSeek seeks to the latest revision of the specified key (or a greater key). func (p *pebbleMVCCScanner) iterSeek(key MVCCKey) bool { + p.parentReverse = false p.clearPeeked() p.parent.SeekGE(key) return p.updateCurrent() @@ -1291,6 +1299,7 @@ func (p *pebbleMVCCScanner) iterSeek(key MVCCKey) bool { // iterSeekReverse seeks to the latest revision of the key before the specified key. func (p *pebbleMVCCScanner) iterSeekReverse(key MVCCKey) bool { + p.parentReverse = true p.clearPeeked() p.parent.SeekLT(key) if !p.updateCurrent() { @@ -1309,6 +1318,7 @@ func (p *pebbleMVCCScanner) iterSeekReverse(key MVCCKey) bool { // iterNext advances to the next MVCC key. func (p *pebbleMVCCScanner) iterNext() bool { + p.parentReverse = false if p.reverse && p.peeked { // If we have peeked at the previous entry, we need to advance the iterator // twice. @@ -1334,6 +1344,7 @@ func (p *pebbleMVCCScanner) iterNext() bool { // iterPrev advances to the previous MVCC Key. func (p *pebbleMVCCScanner) iterPrev() bool { + p.parentReverse = true if p.peeked { p.peeked = false return p.updateCurrent() @@ -1348,6 +1359,7 @@ func (p *pebbleMVCCScanner) iterPrev() bool { func (p *pebbleMVCCScanner) iterPeekPrev() ([]byte, bool) { if !p.peeked { p.peeked = true + p.parentReverse = true // We need to save a copy of the current iterator key and value and adjust // curRawKey, curKey and curValue to point to this saved data. We use a // single buffer for this purpose: savedBuf. diff --git a/pkg/storage/point_synthesizing_iter.go b/pkg/storage/point_synthesizing_iter.go index 43b5d86ececf..95276e671a4f 100644 --- a/pkg/storage/point_synthesizing_iter.go +++ b/pkg/storage/point_synthesizing_iter.go @@ -147,7 +147,27 @@ func NewPointSynthesizingIter(parent MVCCIterator) *PointSynthesizingIter { // NewPointSynthesizingIterAtParent creates a new pointSynthesizingIter and // loads the position from the parent iterator. -func NewPointSynthesizingIterAtParent(parent MVCCIterator) (*PointSynthesizingIter, error) { +// +// If reverse is true, the point synthesizing iterator will reposition on +// a synthetic point tombstone after the current position if appropriate. +// Consider the following dataset: +// +// 2 a2 b2 +// 1 [---) +// a b +// +// If the caller was positioned on b@2, then called Prev() and landed on a@2, +// where it detected the [a-b)@2 range key and enabled point synthesis, it would +// appear to have skipped over the synthetic point tombstone at a@1. Setting +// reverse=true in this case will position the iterator on the synthetic point +// tombstone a@1. +// +// However, the above assumes that the previous positioning operation was from a +// different key -- if it was e.g. a SeekLT(a@1) then this repositioning will be +// incorrect. The caller must take care to use reverse only when appropriate. +func NewPointSynthesizingIterAtParent( + parent MVCCIterator, reverse bool, +) (*PointSynthesizingIter, error) { iter := NewPointSynthesizingIter(parent) iter.rangeKeyChanged = true // force range key detection if ok, _ := iter.updateIter(); ok { @@ -157,6 +177,27 @@ func NewPointSynthesizingIterAtParent(parent MVCCIterator) (*PointSynthesizingIt iter.seekKeyBuf = append(iter.seekKeyBuf[:0], seekKey.Key...) seekKey.Key = iter.seekKeyBuf iter.updateSeekGEPosition(seekKey) + if err := iter.iterErr; err != nil { + iter.release() + return nil, err + } + + // If we're on the start key of a range key in the reverse direction, we + // must take care not to skip any synthetic point tombstones that the caller + // would have expected to be emitted. This essentially means switching the + // iterator to reverse iteration and repositioning on the oldest version + // (real or synthetic). + if reverse && len(iter.rangeKeys) > 0 && iter.rangeKeysPos.Equal(iter.rangeKeysStart) { + if !iter.atPoint { + if _, err := iter.iterPrev(); err != nil { + iter.release() + return nil, err + } + } + iter.reverse = true + iter.rangeKeysIdx = iter.rangeKeysEnd - 1 + iter.updateAtPoint() + } } if err := iter.iterErr; err != nil { iter.release() diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_scans_reverse_skip_regression b/pkg/storage/testdata/mvcc_histories/range_tombstone_scans_reverse_skip_regression new file mode 100644 index 000000000000..f2fcba081bd9 --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_scans_reverse_skip_regression @@ -0,0 +1,180 @@ +# Regression test for https://github.com/cockroachdb/cockroach/issues/90642. +# +# REAL DATASET SYNTHETIC DATASET +# 3 [b3] 3 [b3] +# 2 a2 2 a2 +# 1 [---) 1 x +# a b a b +# +# Recall that pebbleMVCCScanner only enables pointSynthesizingIter when it +# encounters a range key. In the case above, during a reverse scan, the [a-b)@1 +# range key will first become visible to pebbleMVCCScanner when it lands on a@2, +# so it enabled point synthesis positioned at the a@2 point key. Notice how the +# iterator has now skipped over the synthetic point tombstone a@1. +# +# This is particularly problematic when combined with pebbleMVCCScanner peeking, +# which assumes that following a iterPeekPrev() call, an iterNext() call can +# step the parent iterator forward once to get back to the original position. +# With the above bug, that is no longer true, as it instead lands on the +# synthetic point tombstone which was skipped during reverse iteration. During +# intent processing for b@3, such an iterNext() call is expected to land on the +# intent's provisional value at b@3, but it instead lands on the intent itself +# at b@0. This in turn caused a value checksum or decoding failure, where it was +# expecting the current key to be b@3, but the actual key was b@0. +run ok +del_range_ts k=a end=b ts=1 +put k=a ts=2 v=a2 +with t=A + txn_begin k=b ts=3 + put k=b v=b3 +---- +>> at end: +txn: "A" meta={id=00000000 key="b" pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=3.000000000,0 wto=false gul=0,0 +rangekey: {a-b}/[1.000000000,0=/] +data: "a"/2.000000000,0 -> /BYTES/a2 +meta: "b"/0,0 -> txn={id=00000000 key="b" pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "b"/3.000000000,0 -> /BYTES/b3 + +run ok +scan t=A k=a end=z reverse +---- +scan: "b" -> /BYTES/b3 @3.000000000,0 +scan: "a" -> /BYTES/a2 @2.000000000,0 + +# We also test the same scenario with a double range tombstone, i.e.: +# +# 3 [b3] +# 2 [---) +# 1 [---) +# a b +run ok +txn_remove t=A +clear_range k=a end=z +del_range_ts k=a end=b ts=1 +del_range_ts k=a end=b ts=2 +with t=A + txn_begin k=b ts=3 + put k=b v=b3 +---- +>> at end: +txn: "A" meta={id=00000000 key="b" pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=3.000000000,0 wto=false gul=0,0 +rangekey: {a-b}/[2.000000000,0=/ 1.000000000,0=/] +meta: "b"/0,0 -> txn={id=00000000 key="b" pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "b"/3.000000000,0 -> /BYTES/b3 + +run ok +scan t=A k=a end=z reverse +---- +scan: "b" -> /BYTES/b3 @3.000000000,0 + +run ok +scan t=A k=a end=z reverse tombstones +---- +scan: "b" -> /BYTES/b3 @3.000000000,0 +scan: "a" -> / @2.000000000,0 + +# And with a point between the range tombstone. We place the intent at the +# lowest timestamp, which is a contrived/unrealistic scenario, and do tombstone +# scans at all timestamps. +# +# 3 [---) +# 3 a3 +# 2 [---) +# 1 [b1] +# a b +run ok +txn_remove t=A +clear_range k=a end=z +del_range_ts k=a end=b ts=2 +put k=a ts=3 v=a3 +del_range_ts k=a end=b ts=4 +with t=A + txn_begin k=b ts=1 + put k=b v=b1 +---- +>> at end: +txn: "A" meta={id=00000000 key="b" pri=0.00000000 epo=0 ts=1.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=1.000000000,0 wto=false gul=0,0 +rangekey: {a-b}/[4.000000000,0=/ 2.000000000,0=/] +data: "a"/3.000000000,0 -> /BYTES/a3 +meta: "b"/0,0 -> txn={id=00000000 key="b" pri=0.00000000 epo=0 ts=1.000000000,0 min=0,0 seq=0} ts=1.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "b"/1.000000000,0 -> /BYTES/b1 + +run ok +scan t=A k=a end=z reverse +---- +scan: "b" -> /BYTES/b1 @1.000000000,0 + +run ok +scan t=A k=a end=z reverse tombstones ts=4 +---- +scan: "b" -> /BYTES/b1 @1.000000000,0 +scan: "a" -> / @4.000000000,0 + +run ok +scan t=A k=a end=z reverse tombstones ts=3 +---- +scan: "b" -> /BYTES/b1 @1.000000000,0 +scan: "a" -> /BYTES/a3 @3.000000000,0 + +run ok +scan t=A k=a end=z reverse tombstones ts=2 +---- +scan: "b" -> /BYTES/b1 @1.000000000,0 +scan: "a" -> / @2.000000000,0 + +run ok +scan t=A k=a end=z reverse tombstones ts=1 +---- +scan: "b" -> /BYTES/b1 @1.000000000,0 + +# And a point below them. +# +# 4 [b4] +# 3 [---) +# 2 [---) +# 1 a1 +# a b +run ok +txn_remove t=A +clear_range k=a end=z +put k=a ts=2 v=a2 +del_range_ts k=a end=b ts=3 +del_range_ts k=a end=b ts=4 +with t=A + txn_begin k=b ts=1 + put k=b v=b1 +---- +>> at end: +txn: "A" meta={id=00000000 key="b" pri=0.00000000 epo=0 ts=1.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=1.000000000,0 wto=false gul=0,0 +rangekey: {a-b}/[4.000000000,0=/ 3.000000000,0=/] +data: "a"/2.000000000,0 -> /BYTES/a2 +meta: "b"/0,0 -> txn={id=00000000 key="b" pri=0.00000000 epo=0 ts=1.000000000,0 min=0,0 seq=0} ts=1.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "b"/1.000000000,0 -> /BYTES/b1 + +run ok +scan t=A k=a end=z reverse +---- +scan: "b" -> /BYTES/b1 @1.000000000,0 + +run ok +scan t=A k=a end=z reverse tombstones ts=4 +---- +scan: "b" -> /BYTES/b1 @1.000000000,0 +scan: "a" -> / @4.000000000,0 + +run ok +scan t=A k=a end=z reverse tombstones ts=3 +---- +scan: "b" -> /BYTES/b1 @1.000000000,0 +scan: "a" -> / @3.000000000,0 + +run ok +scan t=A k=a end=z reverse tombstones ts=2 +---- +scan: "b" -> /BYTES/b1 @1.000000000,0 +scan: "a" -> /BYTES/a2 @2.000000000,0 + +run ok +scan t=A k=a end=z reverse tombstones ts=1 +---- +scan: "b" -> /BYTES/b1 @1.000000000,0 From 0b361f143808b5376dbf4f248e0b854b1eefd911 Mon Sep 17 00:00:00 2001 From: Rail Aliiev Date: Mon, 31 Oct 2022 16:36:13 -0400 Subject: [PATCH 3/3] release: fix download url Previously, we changed the download url to always use `s3.amazonaws.com` as the suffix. In cases when a bucket name contains dots, it breaks the SSL verification procedure. This change will revert the change and curl will be using an explicit download domain. Release note: None --- .../internal/release/process/publish-cockroach-release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/teamcity/internal/release/process/publish-cockroach-release.sh b/build/teamcity/internal/release/process/publish-cockroach-release.sh index 1d98a3f553dc..7b01c1cfad1d 100755 --- a/build/teamcity/internal/release/process/publish-cockroach-release.sh +++ b/build/teamcity/internal/release/process/publish-cockroach-release.sh @@ -119,7 +119,7 @@ for platform_name in "${platform_names[@]}"; do --silent \ --show-error \ --output /dev/stdout \ - --url "https://${bucket}.s3.amazonaws.com/cockroach-${build_name}.${linux_platform}-${tarball_arch}.tgz" \ + --url "https://${s3_download_hostname}/cockroach-${build_name}.${linux_platform}-${tarball_arch}.tgz" \ | tar \ --directory="build/deploy-${docker_arch}" \ --extract \