Skip to content

Commit

Permalink
sstable: fix interaction between bpf and monotonic bounds optimization
Browse files Browse the repository at this point in the history
The sstable iterator has an optimization for when iterator bounds are
moved monotonically forward or backward. The first seek after the bounds
adjustment can reuse the existing iterator position to avoid a more
expensive full seek.

In iterators over sstables with two-level indexes, this optimization
could interact with block-property filters, improperly allowing the
second seek after the bounds adjustment to also reuse the existing
iterator position. If the second seek key was smaller than the first
seek key, the iterator would skip keys.

Informs #1963.
Informs cockroachdb/cockroach#88296.
  • Loading branch information
jbowens committed Sep 23, 2022
1 parent 46a6a53 commit 0f464e9
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 2 deletions.
24 changes: 24 additions & 0 deletions iterator_histories_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/cockroachdb/pebble/internal/datadriven"
"github.com/cockroachdb/pebble/internal/testkeys"
"github.com/cockroachdb/pebble/internal/testkeys/blockprop"
"github.com/cockroachdb/pebble/sstable"
"github.com/cockroachdb/pebble/vfs"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -84,6 +85,14 @@ func TestIterHistories(t *testing.T) {
for i := range opts.Levels {
opts.Levels[i].BlockSize = v
}
case "index-block-size":
v, err := strconv.Atoi(cmdArg.Vals[0])
if err != nil {
return err.Error()
}
for i := range opts.Levels {
opts.Levels[i].IndexBlockSize = v
}
case "target-file-size":
v, err := strconv.Atoi(cmdArg.Vals[0])
if err != nil {
Expand Down Expand Up @@ -271,6 +280,21 @@ func TestIterHistories(t *testing.T) {
if reader == nil {
return fmt.Sprintf("unknown reader %q", arg.Vals[0])
}
case "point-key-filter":
if len(arg.Vals) != 2 {
return fmt.Sprintf("blockprop-filter expects 2 arguments, received %d", len(arg.Vals))
}
min, err := strconv.ParseUint(arg.Vals[0], 10, 64)
if err != nil {
return err.Error()
}
max, err := strconv.ParseUint(arg.Vals[1], 10, 64)
if err != nil {
return err.Error()
}
o.PointKeyFilters = []sstable.BlockPropertyFilter{
blockprop.NewBlockPropertyFilter(min, max),
}
}
}
var iter *Iterator
Expand Down
18 changes: 16 additions & 2 deletions sstable/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -1337,8 +1337,8 @@ var _ base.InternalIterator = (*twoLevelIterator)(nil)
// encountered, which may be nil if we have simply exhausted the entire table.
// This is used for two level indexes.
func (i *twoLevelIterator) loadIndex(dir int8) loadBlockResult {
// Ensure the data block iterator is invalidated even if loading of the
// index fails.
// Ensure the index data block iterators are invalidated even if loading of
// the index fails.
i.data.invalidate()
if !i.topLevelIndex.valid() {
i.index.offset = 0
Expand Down Expand Up @@ -1525,6 +1525,7 @@ func (i *twoLevelIterator) SeekGE(key []byte, flags base.SeekGEFlags) (*Internal

result := i.loadIndex(+1)
if result == loadBlockFailed {
i.boundsCmp = 0
return nil, nil
}
if result == loadBlockIrrelevant {
Expand All @@ -1539,6 +1540,12 @@ func (i *twoLevelIterator) SeekGE(key []byte, flags base.SeekGEFlags) (*Internal
}
// Fall through to skipForward.
dontSeekWithinSingleLevelIter = true
// Clear boundsCmp. Normally, singleLevelIterator.SeekGE will clear
// it, but if we're skipping an index block altogether, it's
// possible we'll return without ever seeking within the
// single-level iterator, in which case boundsCmp may be improperly
// left == 0.
i.boundsCmp = 0
}
}
// Else fast-path: There are two possible cases, from
Expand Down Expand Up @@ -1643,6 +1650,7 @@ func (i *twoLevelIterator) SeekPrefixGE(

result := i.loadIndex(+1)
if result == loadBlockFailed {
i.boundsCmp = 0
return nil, nil
}
if result == loadBlockIrrelevant {
Expand All @@ -1657,6 +1665,12 @@ func (i *twoLevelIterator) SeekPrefixGE(
}
// Fall through to skipForward.
dontSeekWithinSingleLevelIter = true
// Clear boundsCmp. Normally, singleLevelIterator.SeekPrefixGE will
// clear it, but if we're skipping this index block altogether, it's
// possible we'll return without ever seeking within the
// single-level iterator, in which case boundsCmp may be improperly
// left == 0.
i.boundsCmp = 0
}
}
// Else fast-path: The bounds have moved forward and this SeekGE is
Expand Down
104 changes: 104 additions & 0 deletions testdata/iter_histories/iter_optimizations
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,107 @@ seek-prefix-ge p
p@5: (., [p-"p\x00") @1=foo UPDATED)
.
p: (., [p-"p\x00") @1=foo UPDATED)

# Regression test for #1963 / cockroachdb/cockroach#88296.
#
# The iterators in this test move their bounds monotonically forward
# [a,b)→[b,e). This enables the sstable iterator optimization for monotonically
# moving bounds (see boundsCmp in sstable/reader.go). With this optimization,
# the first seek after the SetBounds may use the fact that the bounds moved
# forward monotonically to avoid re-seeking within the index.
#
# The test cases below exercise a seek to a key, followed by a seek to a smaller
# key. The second seek should not make use of the bounds optimization because
# doing so may incorrectly skip all keys between the lower bound and the first
# seek key. Previously, the code paths that handled block-property filtering on
# a two-level iterator could leave the iterator in a state such that the second
# seek would improperly also exercise the monotonic bounds optimization. In the
# test cases below, this would result in the key 'b' not being found. Each test
# case exercises a different combination of seek-ge and seek-prefix-ge.

reset block-size=1 index-block-size=1
----

batch commit
set a a
set b b
set b@4 b@4
set z@6 z@6
----
committed 4 keys

flush
----

combined-iter lower=a upper=b point-key-filter=(1,4)
seek-ge a
set-bounds lower=b upper=e
seek-prefix-ge d@5
seek-prefix-ge b
----
a: (a, .)
.
.
b: (b, .)

combined-iter lower=a upper=b point-key-filter=(1,4)
seek-ge a
set-bounds lower=b upper=e
seek-ge d@5
seek-prefix-ge b
----
a: (a, .)
.
.
b: (b, .)

combined-iter lower=a upper=b point-key-filter=(1,4)
seek-ge a
set-bounds lower=b upper=e
seek-ge d@5
seek-ge b
----
a: (a, .)
.
.
b: (b, .)

combined-iter lower=a upper=b point-key-filter=(1,4)
seek-ge a
set-bounds lower=b upper=e
seek-prefix-ge d@5
seek-ge b
----
a: (a, .)
.
.
b: (b, .)

# Test a similar case with range key masking. The previous bug did not apply to
# this case, because range-key masking never skips blocks on a seek.

reset block-size=1 index-block-size=1
----

batch commit
set a a
set b b
set b@4 b@4
set z@6 z@6
range-key-set a z @9 v
----
committed 5 keys

flush
----

combined-iter lower=a upper=b mask-suffix=@10 mask-filter
seek-ge a
set-bounds lower=b upper=e
seek-prefix-ge d@5
seek-ge b
----
a: (a, [a-b) @9=v UPDATED)
.
d@5: (., [d-"d\x00") @9=v UPDATED)
b: (b, [b-e) @9=v UPDATED)

0 comments on commit 0f464e9

Please sign in to comment.