From a034560d9ebf519bbb8786361d66d8c6b19fbb13 Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Mon, 11 Mar 2024 09:45:30 -0700 Subject: [PATCH] manifest: add a helper for DebugString parsing We add a parsing helper and use it to simplify the `ParseFileMetadataDebug` and `ParseVersionDebug` code. This will make it easier to extend these functions (e.g. to support virtual sstables). --- internal/manifest/testutils.go | 144 +++++++++++++++++++++++++++++++++ internal/manifest/version.go | 124 +++++++++++++--------------- 2 files changed, 202 insertions(+), 66 deletions(-) create mode 100644 internal/manifest/testutils.go diff --git a/internal/manifest/testutils.go b/internal/manifest/testutils.go new file mode 100644 index 0000000000..1e6b60e2dc --- /dev/null +++ b/internal/manifest/testutils.go @@ -0,0 +1,144 @@ +// Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import ( + "fmt" + "regexp" + "strconv" + "strings" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" +) + +// debugParser is a helper used to implement parsing of debug strings, like +// ParseFileMetadataDebug. +// +// It takes a string and splits it into tokens. Tokens are separated by +// whitespace; in addition separators ':', '[', ']', '-' are always separate +// tokens. For example, the string `000001:[a - b]` results in tokens `000001`, +// `:`, `[`, `a`, `-`, `b`, `]`. +// +// All debugParser methods throw panics instead of returning errors. The code +// that uses a debugParser can recover them and convert them to errors. +type debugParser struct { + original string + tokens []string + lastToken string +} + +const debugParserSeparators = ":[]-" + +func makeDebugParser(s string) debugParser { + p := debugParser{ + original: s, + } + for _, f := range strings.Fields(s) { + for f != "" { + pos := strings.IndexAny(f, debugParserSeparators) + if pos == -1 { + p.tokens = append(p.tokens, f) + break + } + if pos > 0 { + p.tokens = append(p.tokens, f[:pos]) + } + p.tokens = append(p.tokens, f[pos:pos+1]) + f = f[pos+1:] + } + } + return p +} + +// Done returns true if there are no more tokens. +func (p *debugParser) Done() bool { + return len(p.tokens) == 0 +} + +// Peek returns the next token, without consuming the token. Returns "" if there +// are no more tokens. +func (p *debugParser) Peek() string { + if p.Done() { + p.lastToken = "" + return "" + } + p.lastToken = p.tokens[0] + return p.tokens[0] +} + +// Next returns the next token, or "" if there are no more tokens. +func (p *debugParser) Next() string { + res := p.Peek() + if res != "" { + p.tokens = p.tokens[1:] + } + return res +} + +// Expect consumes the next tokens, verifying that they exactly match the +// arguments. +func (p *debugParser) Expect(tokens ...string) { + for _, tok := range tokens { + if res := p.Next(); res != tok { + p.Errf("expected %q, got %q", tok, res) + } + } +} + +// TryLevel tries to parse a token as a level (e.g. L1, L0.2). If successful, +// the token is consumed. +func (p *debugParser) TryLevel() (level int, ok bool) { + t := p.Peek() + if regexp.MustCompile(`^L[0-9](|\.[0-9]+)$`).MatchString(t) { + p.Next() + return int(t[1] - '0'), true + } + return 0, false +} + +// Level parses the next token as a level. +func (p *debugParser) Level() int { + level, ok := p.TryLevel() + if !ok { + p.Errf("cannot parse level") + } + return level +} + +// Int parses the next token as an integer. +func (p *debugParser) Int() int { + x, err := strconv.Atoi(p.Next()) + if err != nil { + p.Errf("cannot parse number: %v", err) + } + return x +} + +// Uint64 parses the next token as an uint64. +func (p *debugParser) Uint64() uint64 { + x, err := strconv.ParseUint(p.Next(), 10, 64) + if err != nil { + p.Errf("cannot parse number: %v", err) + } + return x +} + +// FileNum parses the next token as a FileNum. +func (p *debugParser) FileNum() base.FileNum { + return base.FileNum(p.Int()) +} + +// InternalKey parses the next token as an internal key. +func (p *debugParser) InternalKey() base.InternalKey { + return base.ParsePrettyInternalKey(p.Next()) +} + +// Errf panics with an error which includes the original string and the last +// token. +func (p *debugParser) Errf(format string, args ...any) { + msg := fmt.Sprintf(format, args...) + panic(errors.Errorf("error parsing %q at token %q: %s", p.original, p.lastToken, msg)) +} diff --git a/internal/manifest/version.go b/internal/manifest/version.go index 5b4a650065..553dc17d26 100644 --- a/internal/manifest/version.go +++ b/internal/manifest/version.go @@ -8,12 +8,11 @@ import ( "bytes" stdcmp "cmp" "fmt" + "slices" "sort" - "strconv" "strings" "sync" "sync/atomic" - "unicode" "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble/internal/base" @@ -813,56 +812,55 @@ func (m *FileMetadata) DebugString(format base.FormatKey, verbose bool) string { // ParseFileMetadataDebug parses a FileMetadata from its DebugString // representation. -func ParseFileMetadataDebug(s string) (*FileMetadata, error) { - // Split lines of the form: - // 000000:[a#0,SET-z#0,SET] seqnums:[5-5] points:[...] ranges:[...] - fields := strings.FieldsFunc(s, func(c rune) bool { - switch c { - case ':', '[', '-', ']': - return true - default: - return unicode.IsSpace(c) // NB: also trim whitespace padding. - } - }) - if len(fields)%3 != 0 { - return nil, errors.Newf("malformed input: %s", s) - } - m := &FileMetadata{} - for len(fields) > 0 { - prefix := fields[0] - if prefix == "seqnums" { - smallestSeqNum, err := strconv.ParseUint(fields[1], 10, 64) - if err != nil { - return m, errors.Newf("malformed input: %s: %s", s, err) - } - largestSeqNum, err := strconv.ParseUint(fields[2], 10, 64) - if err != nil { - return m, errors.Newf("malformed input: %s: %s", s, err) +func ParseFileMetadataDebug(s string) (_ *FileMetadata, err error) { + defer func() { + if r := recover(); r != nil { + var ok bool + err, ok = r.(error) + if !ok { + err = errors.Errorf("%v", r) } - m.SmallestSeqNum, m.LargestSeqNum = smallestSeqNum, largestSeqNum - fields = fields[3:] - continue } - smallest := base.ParsePrettyInternalKey(fields[1]) - largest := base.ParsePrettyInternalKey(fields[2]) - switch prefix { + }() + + // Input format: + // 000000:[a#0,SET-z#0,SET] seqnums:[5-5] points:[...] ranges:[...] + m := &FileMetadata{} + p := makeDebugParser(s) + m.FileNum = p.FileNum() + p.Expect(":", "[") + m.Smallest = p.InternalKey() + p.Expect("-") + m.Largest = p.InternalKey() + p.Expect("]") + + for !p.Done() { + field := p.Next() + p.Expect(":", "[") + switch field { + case "seqnums": + m.SmallestSeqNum = p.Uint64() + p.Expect("-") + m.LargestSeqNum = p.Uint64() + case "points": - m.SmallestPointKey, m.LargestPointKey = smallest, largest + m.SmallestPointKey = p.InternalKey() + p.Expect("-") + m.LargestPointKey = p.InternalKey() m.HasPointKeys = true + case "ranges": - m.SmallestRangeKey, m.LargestRangeKey = smallest, largest + m.SmallestRangeKey = p.InternalKey() + p.Expect("-") + m.LargestRangeKey = p.InternalKey() m.HasRangeKeys = true + default: - fileNum, err := strconv.ParseUint(prefix, 10, 64) - if err != nil { - return m, errors.Newf("malformed input: %s: %s", s, err) - } - m.FileNum = base.FileNum(fileNum) - m.Smallest, m.Largest = smallest, largest - m.boundsSet = true + p.Errf("unknown field %q", field) } - fields = fields[3:] + p.Expect("]") } + // By default, when the parser sees just the overall bounds, we set the point // keys. This preserves backwards compatability with existing test cases that // specify only the overall bounds. @@ -1255,34 +1253,28 @@ func (v *Version) string(verbose bool) string { // ParseVersionDebug parses a Version from its DebugString output. func ParseVersionDebug(comparer *base.Comparer, flushSplitBytes int64, s string) (*Version, error) { - var level int var files [NumLevels][]*FileMetadata + level := -1 for _, l := range strings.Split(s, "\n") { - l = strings.TrimSpace(l) - - switch l[:min(len(l), 3)] { - case "L0.", "L0:", "L1:", "L2:", "L3:", "L4:", "L5:", "L6:": - level = int(l[1] - '0') + p := makeDebugParser(l) + if l, ok := p.TryLevel(); ok { + level = l + continue + } - default: - m, err := ParseFileMetadataDebug(l) - if err != nil { - return nil, err - } - // If we only parsed overall bounds, default to setting the point bounds. - if !m.HasPointKeys && !m.HasRangeKeys { - m.SmallestPointKey, m.LargestPointKey = m.Smallest, m.Largest - m.HasPointKeys = true - } - files[level] = append(files[level], m) + if level == -1 { + return nil, errors.Errorf("version string must start with a level") } + m, err := ParseFileMetadataDebug(l) + if err != nil { + return nil, err + } + files[level] = append(files[level], m) } - // Reverse the order of L0 files. This ensures we construct the same - // sublevels. (They're printed from higher sublevel to lower, which means in - // a partial order that represents newest to oldest). - for i := 0; i < len(files[0])/2; i++ { - files[0][i], files[0][len(files[0])-i-1] = files[0][len(files[0])-i-1], files[0][i] - } + // L0 files are printed from higher sublevel to lower, which means in a + // partial order that represents newest to oldest. Reverse the order of L0 + // files to ensure we construct the same sublevels. + slices.Reverse(files[0]) return NewVersion(comparer, flushSplitBytes, files), nil }