Skip to content

Commit

Permalink
compact: add facility to split and encode spans
Browse files Browse the repository at this point in the history
Add `SplitAndEncodeSpan` which can split a range del or range key span
and encode the first split.
  • Loading branch information
RaduBerinde committed May 6, 2024
1 parent bc2d51c commit ab6cd13
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 17 deletions.
44 changes: 44 additions & 0 deletions internal/compact/spans.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/cockroachdb/pebble/internal/invariants"
"github.com/cockroachdb/pebble/internal/keyspan"
"github.com/cockroachdb/pebble/internal/rangekey"
"github.com/cockroachdb/pebble/sstable"
)

// RangeDelSpanCompactor coalesces RANGEDELs within snapshot stripes and elides
Expand Down Expand Up @@ -169,3 +170,46 @@ func (c *RangeKeySpanCompactor) elideInLastStripe(
}
return keys[:k]
}

// SplitAndEncodeSpan splits a span at upToKey and encodes the first part into
// the table writer, and updates the span to store the remaining part.
//
// If upToKey is nil or the span ends before upToKey, we encode the entire span
// and reset it to the empty span.
//
// Note that the span.Start slice will be reused (it will be replaced with a
// copy of upToKey, if appropriate).
//
// The span can contain either only RANGEDEL keys or only range keys.
func SplitAndEncodeSpan(
cmp base.Compare, span *keyspan.Span, upToKey []byte, tw *sstable.Writer,
) error {
if span.Empty() {
return nil
}

if upToKey == nil || cmp(span.End, upToKey) <= 0 {
if err := tw.EncodeSpan(span); err != nil {
return err
}
span.Reset()
return nil
}

if cmp(span.Start, upToKey) >= 0 {
// The span starts at/after upToKey; nothing to encode.
return nil
}

// Split the span at upToKey and encode the first part.
splitSpan := keyspan.Span{
Start: span.Start,
End: upToKey,
Keys: span.Keys,
}
if err := tw.EncodeSpan(&splitSpan); err != nil {
return err
}
span.Start = append(span.Start[:0], upToKey...)
return nil
}
48 changes: 46 additions & 2 deletions internal/compact/spans_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ import (
"github.com/cockroachdb/datadriven"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/keyspan"
"github.com/cockroachdb/pebble/objstorage"
"github.com/cockroachdb/pebble/sstable"
"github.com/stretchr/testify/require"
)

// TestRangeDelSpanCompactor tests the range key coalescing and striping logic.
Expand Down Expand Up @@ -40,7 +43,8 @@ func TestRangeDelSpanCompactor(t *testing.T) {
return output.String()

default:
return fmt.Sprintf("unknown command: %s", td.Cmd)
td.Fatalf(t, "unknown command: %s", td.Cmd)
return ""
}
})
}
Expand Down Expand Up @@ -71,7 +75,8 @@ func TestRangeKeySpanCompactor(t *testing.T) {
return output.String()

default:
return fmt.Sprintf("unknown command: %s", td.Cmd)
td.Fatalf(t, "unknown command: %s", td.Cmd)
return ""
}
})
}
Expand All @@ -90,3 +95,42 @@ func maybeParseInUseKeyRanges(td *datadriven.TestData) []base.UserKeyBounds {
}
return keyRanges
}

func TestSplitAndEncodeSpan(t *testing.T) {
var span keyspan.Span
datadriven.RunTest(t, "testdata/split_and_encode_span", func(t *testing.T, td *datadriven.TestData) string {
switch td.Cmd {
case "set":
span = keyspan.ParseSpan(td.Input)
return ""

case "encode":
var upToStr string
td.MaybeScanArgs(t, "up-to", &upToStr)
var upToKey []byte
if upToStr != "" {
upToKey = []byte(upToStr)
}

obj := &objstorage.MemObj{}
tw := sstable.NewWriter(obj, sstable.WriterOptions{TableFormat: sstable.TableFormatMax})
require.NoError(t, SplitAndEncodeSpan(base.DefaultComparer.Compare, &span, upToKey, tw))
require.NoError(t, tw.Close())
_, rangeDels, rangeKeys := sstable.ReadAll(obj)
require.LessOrEqual(t, len(rangeDels)+len(rangeKeys), 1)
s := "."
if all := append(rangeDels, rangeKeys...); len(all) == 1 {
s = all[0].String()
}
remaining := "."
if !span.Empty() {
remaining = span.String()
}
return fmt.Sprintf("Encoded: %s\nRemaining: %s\n", s, remaining)

default:
td.Fatalf(t, "unknown command: %s", td.Cmd)
return ""
}
})
}
51 changes: 51 additions & 0 deletions internal/compact/testdata/split_and_encode_span
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
set
a-c:{(#9,RANGEKEYSET,@3,foo5) (#4,RANGEKEYSET,@3,foo3) (#3,RANGEKEYSET,@3,foo2)}
----

encode
----
Encoded: a-c:{(#9,RANGEKEYSET,@3,foo5) (#4,RANGEKEYSET,@3,foo3) (#3,RANGEKEYSET,@3,foo2)}
Remaining: .

set
a-c:{(#9,RANGEKEYUNSET,@3) (#8,RANGEKEYSET,@3,foo5) (#4,RANGEKEYSET,@3,foo3) (#3,RANGEKEYSET,@3,foo2)}
----

encode up-to=A
----
Encoded: .
Remaining: a-c:{(#9,RANGEKEYUNSET,@3) (#8,RANGEKEYSET,@3,foo5) (#4,RANGEKEYSET,@3,foo3) (#3,RANGEKEYSET,@3,foo2)}

encode up-to=a
----
Encoded: .
Remaining: a-c:{(#9,RANGEKEYUNSET,@3) (#8,RANGEKEYSET,@3,foo5) (#4,RANGEKEYSET,@3,foo3) (#3,RANGEKEYSET,@3,foo2)}

encode up-to=b
----
Encoded: a-b:{(#9,RANGEKEYUNSET,@3) (#8,RANGEKEYSET,@3,foo5) (#4,RANGEKEYSET,@3,foo3) (#3,RANGEKEYSET,@3,foo2)}
Remaining: b-c:{(#9,RANGEKEYUNSET,@3) (#8,RANGEKEYSET,@3,foo5) (#4,RANGEKEYSET,@3,foo3) (#3,RANGEKEYSET,@3,foo2)}

encode up-to=c
----
Encoded: b-c:{(#9,RANGEKEYUNSET,@3) (#8,RANGEKEYSET,@3,foo5) (#4,RANGEKEYSET,@3,foo3) (#3,RANGEKEYSET,@3,foo2)}
Remaining: .

set
a-c:{(#9,RANGEDEL) (#8,RANGEDEL) (#4,RANGEDEL))
----

encode up-to=b
----
Encoded: a-b:{(#9,RANGEDEL) (#8,RANGEDEL) (#4,RANGEDEL)}
Remaining: b-c:{(#9,RANGEDEL) (#8,RANGEDEL) (#4,RANGEDEL)}

encode
----
Encoded: b-c:{(#9,RANGEDEL) (#8,RANGEDEL) (#4,RANGEDEL)}
Remaining: .

encode
----
Encoded: .
Remaining: .
43 changes: 28 additions & 15 deletions sstable/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/cockroachdb/pebble/internal/invariants"
"github.com/cockroachdb/pebble/internal/keyspan"
"github.com/cockroachdb/pebble/internal/private"
"github.com/cockroachdb/pebble/internal/rangedel"
"github.com/cockroachdb/pebble/internal/rangekey"
"github.com/cockroachdb/pebble/objstorage"
)
Expand Down Expand Up @@ -1937,6 +1938,33 @@ func (w *Writer) assertFormatCompatibility() error {
return nil
}

// UnsafeLastPointUserKey returns the last point key written to the writer to
// which this option was passed during creation. The returned key points
// directly into a buffer belonging to the Writer. The value's lifetime ends the
// next time a point key is added to the Writer.
//
// Must not be called after Writer is closed.
func (w *Writer) UnsafeLastPointUserKey() []byte {
if w != nil && w.dataBlockBuf.dataBlock.nEntries >= 1 {
// w.dataBlockBuf.dataBlock.curKey is guaranteed to point to the last point key
// which was added to the Writer.
return w.dataBlockBuf.dataBlock.getCurUserKey()
}
return nil
}

// EncodeSpan encodes the keys in the given span. The span can contain either
// only RANGEDEL keys or only range keys.
func (w *Writer) EncodeSpan(span *keyspan.Span) error {
if span.Empty() {
return nil
}
if span.Keys[0].Kind() == base.InternalKeyKindRangeDelete {
return rangedel.Encode(span, w.Add)
}
return rangekey.Encode(span, w.AddRangeKey)
}

// Close finishes writing the table and closes the underlying file that the
// table was written to.
func (w *Writer) Close() (err error) {
Expand Down Expand Up @@ -2246,21 +2274,6 @@ type WriterOption interface {
writerApply(*Writer)
}

// UnsafeLastPointUserKey returns the last point key written to the writer to
// which this option was passed during creation. The returned key points
// directly into a buffer belonging to the Writer. The value's lifetime ends the
// next time a point key is added to the Writer.
//
// Must not be called after Writer is closed.
func (w *Writer) UnsafeLastPointUserKey() []byte {
if w != nil && w.dataBlockBuf.dataBlock.nEntries >= 1 {
// w.dataBlockBuf.dataBlock.curKey is guaranteed to point to the last point key
// which was added to the Writer.
return w.dataBlockBuf.dataBlock.getCurUserKey()
}
return nil
}

// NewWriter returns a new table writer for the file. Closing the writer will
// close the file.
func NewWriter(writable objstorage.Writable, o WriterOptions, extraOpts ...WriterOption) *Writer {
Expand Down

0 comments on commit ab6cd13

Please sign in to comment.