Skip to content

Commit

Permalink
manifest: add a helper for DebugString parsing
Browse files Browse the repository at this point in the history
We add a parsing helper and use it to simplify the
`ParseFileMetadataDebug` and `ParseVersionDebug` code. This will make
it easier to extend these functions (e.g. to support virtual
sstables).
  • Loading branch information
RaduBerinde committed Mar 11, 2024
1 parent 623524f commit a034560
Show file tree
Hide file tree
Showing 2 changed files with 202 additions and 66 deletions.
144 changes: 144 additions & 0 deletions internal/manifest/testutils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
// Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.

package manifest

import (
"fmt"
"regexp"
"strconv"
"strings"

"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/base"
)

// debugParser is a helper used to implement parsing of debug strings, like
// ParseFileMetadataDebug.
//
// It takes a string and splits it into tokens. Tokens are separated by
// whitespace; in addition separators ':', '[', ']', '-' are always separate
// tokens. For example, the string `000001:[a - b]` results in tokens `000001`,
// `:`, `[`, `a`, `-`, `b`, `]`.
//
// All debugParser methods throw panics instead of returning errors. The code
// that uses a debugParser can recover them and convert them to errors.
type debugParser struct {
original string
tokens []string
lastToken string
}

const debugParserSeparators = ":[]-"

func makeDebugParser(s string) debugParser {
p := debugParser{
original: s,
}
for _, f := range strings.Fields(s) {
for f != "" {
pos := strings.IndexAny(f, debugParserSeparators)
if pos == -1 {
p.tokens = append(p.tokens, f)
break
}
if pos > 0 {
p.tokens = append(p.tokens, f[:pos])
}
p.tokens = append(p.tokens, f[pos:pos+1])
f = f[pos+1:]
}
}
return p
}

// Done returns true if there are no more tokens.
func (p *debugParser) Done() bool {
return len(p.tokens) == 0
}

// Peek returns the next token, without consuming the token. Returns "" if there
// are no more tokens.
func (p *debugParser) Peek() string {
if p.Done() {
p.lastToken = ""
return ""
}
p.lastToken = p.tokens[0]
return p.tokens[0]
}

// Next returns the next token, or "" if there are no more tokens.
func (p *debugParser) Next() string {
res := p.Peek()
if res != "" {
p.tokens = p.tokens[1:]
}
return res
}

// Expect consumes the next tokens, verifying that they exactly match the
// arguments.
func (p *debugParser) Expect(tokens ...string) {
for _, tok := range tokens {
if res := p.Next(); res != tok {
p.Errf("expected %q, got %q", tok, res)
}
}
}

// TryLevel tries to parse a token as a level (e.g. L1, L0.2). If successful,
// the token is consumed.
func (p *debugParser) TryLevel() (level int, ok bool) {
t := p.Peek()
if regexp.MustCompile(`^L[0-9](|\.[0-9]+)$`).MatchString(t) {
p.Next()
return int(t[1] - '0'), true
}
return 0, false
}

// Level parses the next token as a level.
func (p *debugParser) Level() int {
level, ok := p.TryLevel()
if !ok {
p.Errf("cannot parse level")
}
return level
}

// Int parses the next token as an integer.
func (p *debugParser) Int() int {
x, err := strconv.Atoi(p.Next())
if err != nil {
p.Errf("cannot parse number: %v", err)
}
return x
}

// Uint64 parses the next token as an uint64.
func (p *debugParser) Uint64() uint64 {
x, err := strconv.ParseUint(p.Next(), 10, 64)
if err != nil {
p.Errf("cannot parse number: %v", err)
}
return x
}

// FileNum parses the next token as a FileNum.
func (p *debugParser) FileNum() base.FileNum {
return base.FileNum(p.Int())
}

// InternalKey parses the next token as an internal key.
func (p *debugParser) InternalKey() base.InternalKey {
return base.ParsePrettyInternalKey(p.Next())
}

// Errf panics with an error which includes the original string and the last
// token.
func (p *debugParser) Errf(format string, args ...any) {
msg := fmt.Sprintf(format, args...)
panic(errors.Errorf("error parsing %q at token %q: %s", p.original, p.lastToken, msg))
}
124 changes: 58 additions & 66 deletions internal/manifest/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@ import (
"bytes"
stdcmp "cmp"
"fmt"
"slices"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"unicode"

"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/base"
Expand Down Expand Up @@ -813,56 +812,55 @@ func (m *FileMetadata) DebugString(format base.FormatKey, verbose bool) string {

// ParseFileMetadataDebug parses a FileMetadata from its DebugString
// representation.
func ParseFileMetadataDebug(s string) (*FileMetadata, error) {
// Split lines of the form:
// 000000:[a#0,SET-z#0,SET] seqnums:[5-5] points:[...] ranges:[...]
fields := strings.FieldsFunc(s, func(c rune) bool {
switch c {
case ':', '[', '-', ']':
return true
default:
return unicode.IsSpace(c) // NB: also trim whitespace padding.
}
})
if len(fields)%3 != 0 {
return nil, errors.Newf("malformed input: %s", s)
}
m := &FileMetadata{}
for len(fields) > 0 {
prefix := fields[0]
if prefix == "seqnums" {
smallestSeqNum, err := strconv.ParseUint(fields[1], 10, 64)
if err != nil {
return m, errors.Newf("malformed input: %s: %s", s, err)
}
largestSeqNum, err := strconv.ParseUint(fields[2], 10, 64)
if err != nil {
return m, errors.Newf("malformed input: %s: %s", s, err)
func ParseFileMetadataDebug(s string) (_ *FileMetadata, err error) {
defer func() {
if r := recover(); r != nil {
var ok bool
err, ok = r.(error)
if !ok {
err = errors.Errorf("%v", r)
}
m.SmallestSeqNum, m.LargestSeqNum = smallestSeqNum, largestSeqNum
fields = fields[3:]
continue
}
smallest := base.ParsePrettyInternalKey(fields[1])
largest := base.ParsePrettyInternalKey(fields[2])
switch prefix {
}()

// Input format:
// 000000:[a#0,SET-z#0,SET] seqnums:[5-5] points:[...] ranges:[...]
m := &FileMetadata{}
p := makeDebugParser(s)
m.FileNum = p.FileNum()
p.Expect(":", "[")
m.Smallest = p.InternalKey()
p.Expect("-")
m.Largest = p.InternalKey()
p.Expect("]")

for !p.Done() {
field := p.Next()
p.Expect(":", "[")
switch field {
case "seqnums":
m.SmallestSeqNum = p.Uint64()
p.Expect("-")
m.LargestSeqNum = p.Uint64()

case "points":
m.SmallestPointKey, m.LargestPointKey = smallest, largest
m.SmallestPointKey = p.InternalKey()
p.Expect("-")
m.LargestPointKey = p.InternalKey()
m.HasPointKeys = true

case "ranges":
m.SmallestRangeKey, m.LargestRangeKey = smallest, largest
m.SmallestRangeKey = p.InternalKey()
p.Expect("-")
m.LargestRangeKey = p.InternalKey()
m.HasRangeKeys = true

default:
fileNum, err := strconv.ParseUint(prefix, 10, 64)
if err != nil {
return m, errors.Newf("malformed input: %s: %s", s, err)
}
m.FileNum = base.FileNum(fileNum)
m.Smallest, m.Largest = smallest, largest
m.boundsSet = true
p.Errf("unknown field %q", field)
}
fields = fields[3:]
p.Expect("]")
}

// By default, when the parser sees just the overall bounds, we set the point
// keys. This preserves backwards compatability with existing test cases that
// specify only the overall bounds.
Expand Down Expand Up @@ -1255,34 +1253,28 @@ func (v *Version) string(verbose bool) string {

// ParseVersionDebug parses a Version from its DebugString output.
func ParseVersionDebug(comparer *base.Comparer, flushSplitBytes int64, s string) (*Version, error) {
var level int
var files [NumLevels][]*FileMetadata
level := -1
for _, l := range strings.Split(s, "\n") {
l = strings.TrimSpace(l)

switch l[:min(len(l), 3)] {
case "L0.", "L0:", "L1:", "L2:", "L3:", "L4:", "L5:", "L6:":
level = int(l[1] - '0')
p := makeDebugParser(l)
if l, ok := p.TryLevel(); ok {
level = l
continue
}

default:
m, err := ParseFileMetadataDebug(l)
if err != nil {
return nil, err
}
// If we only parsed overall bounds, default to setting the point bounds.
if !m.HasPointKeys && !m.HasRangeKeys {
m.SmallestPointKey, m.LargestPointKey = m.Smallest, m.Largest
m.HasPointKeys = true
}
files[level] = append(files[level], m)
if level == -1 {
return nil, errors.Errorf("version string must start with a level")
}
m, err := ParseFileMetadataDebug(l)
if err != nil {
return nil, err
}
files[level] = append(files[level], m)
}
// Reverse the order of L0 files. This ensures we construct the same
// sublevels. (They're printed from higher sublevel to lower, which means in
// a partial order that represents newest to oldest).
for i := 0; i < len(files[0])/2; i++ {
files[0][i], files[0][len(files[0])-i-1] = files[0][len(files[0])-i-1], files[0][i]
}
// L0 files are printed from higher sublevel to lower, which means in a
// partial order that represents newest to oldest. Reverse the order of L0
// files to ensure we construct the same sublevels.
slices.Reverse(files[0])
return NewVersion(comparer, flushSplitBytes, files), nil
}

Expand Down

0 comments on commit a034560

Please sign in to comment.