diff --git a/internal/manifest/version.go b/internal/manifest/version.go index 94acac037d..a86835c4c2 100644 --- a/internal/manifest/version.go +++ b/internal/manifest/version.go @@ -654,6 +654,11 @@ type Version struct { Levels [NumLevels]LevelMetadata + // RangeKeyLevels holds a subset of the same files as Levels that contain range + // keys (i.e. fileMeta.HasRangeKeys == true). The memory amplification of this + // duplication should be minimal, as range keys are expected to be rare. + RangeKeyLevels [NumLevels]LevelMetadata + // The callback to invoke when the last reference to a version is // removed. Will be called with list.mu held. Deleted func(obsolete []*FileMetadata) diff --git a/internal/manifest/version_edit.go b/internal/manifest/version_edit.go index 911457c30f..75509ab171 100644 --- a/internal/manifest/version_edit.go +++ b/internal/manifest/version_edit.go @@ -648,6 +648,11 @@ func (b *BulkVersionEdit) Apply( } else { v.Levels[level] = curr.Levels[level].clone() } + if curr == nil || curr.RangeKeyLevels[level].tree.root == nil { + v.RangeKeyLevels[level] = makeLevelMetadata(cmp, level, nil /* files */) + } else { + v.RangeKeyLevels[level] = curr.RangeKeyLevels[level].clone() + } if len(b.Added[level]) == 0 && len(b.Deleted[level]) == 0 { // There are no edits on this level. @@ -667,6 +672,7 @@ func (b *BulkVersionEdit) Apply( // Some edits on this level. lm := &v.Levels[level] + lmRange := &v.RangeKeyLevels[level] addedFiles := b.Added[level] deletedMap := b.Deleted[level] if n := v.Levels[level].Len() + len(addedFiles); n == 0 { @@ -688,6 +694,16 @@ func (b *BulkVersionEdit) Apply( err := errors.Errorf("pebble: internal error: file L%d.%s obsolete during B-Tree removal", level, f.FileNum) return nil, nil, err } + if f.HasRangeKeys { + if obsolete := v.RangeKeyLevels[level].tree.delete(f); obsolete { + // Deleting a file from the B-Tree may decrement its + // reference count. However, because we cloned the + // previous level's B-Tree, this should never result in a + // file's reference count dropping to zero. + err := errors.Errorf("pebble: internal error: file L%d.%s obsolete during range-key B-Tree removal", level, f.FileNum) + return nil, nil, err + } + } } var sm, la *FileMetadata @@ -714,6 +730,12 @@ func (b *BulkVersionEdit) Apply( if err != nil { return nil, nil, errors.Wrap(err, "pebble") } + if f.HasRangeKeys { + err = lmRange.tree.insert(f) + if err != nil { + return nil, nil, errors.Wrap(err, "pebble") + } + } removeZombie(f.FileNum) // Track the keys with the smallest and largest keys, so that we can // check consistency of the modified span. diff --git a/range_keys.go b/range_keys.go index 4ad233db3e..af94490eea 100644 --- a/range_keys.go +++ b/range_keys.go @@ -47,32 +47,35 @@ func (d *DB) newRangeKeyIter( // TODO(bilal): Roll the LevelIter allocation into it.rangeKey.iterConfig. levelIters := make([]keyspan.LevelIter, 0) // Next are the file levels: L0 sub-levels followed by lower levels. - addLevelIterForFiles := func(files manifest.LevelIterator, level manifest.Level) { - rangeIter := files.Filter(manifest.KeyTypeRange) - if rangeIter.First() == nil { - // No files with range keys. - return + // + // Add file-specific iterators for L0 files containing range keys. This is less + // efficient than using levelIters for sublevels of L0 files containing + // range keys, but range keys are expected to be sparse anyway, reducing the + // cost benefit of maintaining a separate L0Sublevels instance for range key + // files and then using it here. + iter := current.RangeKeyLevels[0].Iter() + for f := iter.First(); f != nil; f = iter.Next() { + spanIterOpts := &keyspan.SpanIterOptions{RangeKeyFilters: it.opts.RangeKeyFilters} + spanIter, err := d.tableNewRangeKeyIter(f, spanIterOpts) + if err != nil { + it.rangeKey.iterConfig.AddLevel(&errorKeyspanIter{err: err}) + continue } - levelIters = append(levelIters, keyspan.LevelIter{}) - li := &levelIters[len(levelIters)-1] - spanIterOpts := keyspan.SpanIterOptions{RangeKeyFilters: it.opts.RangeKeyFilters} - - li.Init(spanIterOpts, it.cmp, d.tableNewRangeKeyIter, files, level, d.opts.Logger, manifest.KeyTypeRange) - it.rangeKey.iterConfig.AddLevel(li) - } - - // Add level iterators for the L0 sublevels, iterating from newest to - // oldest. - for i := len(current.L0SublevelFiles) - 1; i >= 0; i-- { - addLevelIterForFiles(current.L0SublevelFiles[i].Iter(), manifest.L0Sublevel(i)) + it.rangeKey.iterConfig.AddLevel(spanIter) } // Add level iterators for the non-empty non-L0 levels. - for level := 1; level < len(current.Levels); level++ { - if current.Levels[level].Empty() { + for level := 1; level < len(current.RangeKeyLevels); level++ { + if current.RangeKeyLevels[level].Empty() { continue } - addLevelIterForFiles(current.Levels[level].Iter(), manifest.Level(level)) + levelIters = append(levelIters, keyspan.LevelIter{}) + li := &levelIters[len(levelIters)-1] + spanIterOpts := keyspan.SpanIterOptions{RangeKeyFilters: it.opts.RangeKeyFilters} + + li.Init(spanIterOpts, it.cmp, d.tableNewRangeKeyIter, current.RangeKeyLevels[level].Iter(), + manifest.Level(level), d.opts.Logger, manifest.KeyTypeRange) + it.rangeKey.iterConfig.AddLevel(li) } return it.rangeKey.rangeKeyIter }