Skip to content

Commit

Permalink
[extension/filestorage] change bbolt DB options (#9004)
Browse files Browse the repository at this point in the history
bbolt doesn't automatically reclaim unused disk space - it expects the
user to manually compact it. This has performance implications in that
it keeps track of disk segments in a freelist structure, which can be
large even if the db doesn't actually contain any data. By default, it
also syncs this freelist to disk on every transaction.

Disable freelist syncing and change freelist type to one which performs
better on large dbs. The result is a massive reduction in the cost of
writes to a large DB file. Include benchmarks to demonstrate this.

Not syncing the freelist makes opening the DB more expensive, but this
only happens on extension start, and the runtime remains under 1s even
on 10 Gb db files.
  • Loading branch information
Mikołaj Świątek authored Apr 5, 2022
1 parent 27feee4 commit 637d773
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 24 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- `cmd/mdatagen`: Add resource attributes definition to metadata.yaml and move `pdata.Metrics` creation to the
generated code (#5270)
- Add `make crosslink` target to ensure replace statements are included in `go.mod` for all transitive dependencies within repository (#8822)
- `filestorageextension`: Change bbolt DB settings for better performance (#9004)

### 🛑 Breaking changes 🛑

Expand Down
6 changes: 4 additions & 2 deletions extension/storage/filestorage/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ type fileStorageClient struct {

func newClient(filePath string, timeout time.Duration) (*fileStorageClient, error) {
options := &bbolt.Options{
Timeout: timeout,
NoSync: true,
Timeout: timeout,
NoSync: true,
NoFreelistSync: true,
FreelistType: bbolt.FreelistMapType,
}
db, err := bbolt.Open(filePath, 0600, options)
if err != nil {
Expand Down
70 changes: 70 additions & 0 deletions extension/storage/filestorage/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,76 @@ func BenchmarkClientDelete(b *testing.B) {
}
}

// check the performance impact of the max lifetime DB size
// bbolt doesn't compact the freelist automatically, so there's a cost even if the data is deleted
func BenchmarkClientSetLargeDB(b *testing.B) {
entrySizeInBytes := 1024 * 1024
entryCount := 2000
entry := make([]byte, entrySizeInBytes)
var testKey string

tempDir := newTempDir(b)
dbFile := filepath.Join(tempDir, "my_db")

client, err := newClient(dbFile, time.Second)
require.NoError(b, err)

ctx := context.Background()

for n := 0; n < entryCount; n++ {
testKey = fmt.Sprintf("testKey-%d", n)
client.Set(ctx, testKey, entry)
}

for n := 0; n < entryCount; n++ {
testKey = fmt.Sprintf("testKey-%d", n)
client.Delete(ctx, testKey)
}

testKey = "testKey"
testValue := []byte("testValue")
b.ResetTimer()
for n := 0; n < b.N; n++ {
client.Set(ctx, testKey, testValue)
}
}

// check the cost of opening an existing DB with data
// this can change depending on freelist type and whether it's synced to disk
func BenchmarkClientInitLargeDB(b *testing.B) {
entrySizeInBytes := 1024 * 1024
entry := make([]byte, entrySizeInBytes)
entryCount := 2000
var testKey string

tempDir := newTempDir(b)
dbFile := filepath.Join(tempDir, "my_db")

client, err := newClient(dbFile, time.Second)
require.NoError(b, err)

ctx := context.Background()

for n := 0; n < entryCount; n++ {
testKey = fmt.Sprintf("testKey-%d", n)
client.Set(ctx, testKey, entry)
}

err = client.Close(ctx)
require.NoError(b, err)

var tempClient *fileStorageClient
b.ResetTimer()
for n := 0; n < b.N; n++ {
tempClient, err = newClient(dbFile, time.Second)
require.NoError(b, err)
b.StopTimer()
err = tempClient.Close(ctx)
require.NoError(b, err)
b.StartTimer()
}
}

func newTempDir(tb testing.TB) string {
tempDir, err := ioutil.TempDir("", "")
require.NoError(tb, err)
Expand Down
36 changes: 14 additions & 22 deletions extension/storage/filestorage/extension_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,46 +270,37 @@ func TestCompaction(t *testing.T) {
path := filepath.Join(tempDir, file.Name())
stats, err := os.Stat(path)
require.NoError(t, err)
newStats := stats

var key string
i := 0

// add data until database file changes size (we are checking compacted size)
for newStats.Size() <= stats.Size() {
key = fmt.Sprintf("key_%d", i)
err = client.Set(ctx, key, []byte(key))
require.NoError(t, err)
// magic numbers giving enough data to force bbolt to allocate a new page
// see https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/9004 for some discussion
numEntries := 50
entrySize := 512
entry := make([]byte, entrySize)

// compact before checking size
c, fok := client.(*fileStorageClient)
require.True(t, fok)
client, err = c.Compact(ctx, tempDir, cfg.Timeout, 1)
require.NoError(t, err)

// check size after compaction
newStats, err = os.Stat(path)
// add the data to the db
for i = 0; i < numEntries; i++ {
key = fmt.Sprintf("key_%d", i)
err = client.Set(ctx, key, entry)
require.NoError(t, err)
i++
}

stats = newStats

// compact again just in case
// compact the db
c, ok := client.(*fileStorageClient)
require.True(t, ok)
client, err = c.Compact(ctx, tempDir, cfg.Timeout, 1)
require.NoError(t, err)

// check size after compaction
newStats, err = os.Stat(path)
newStats, err := os.Stat(path)
require.NoError(t, err)
require.Equal(t, stats.Size(), newStats.Size())
require.Less(t, stats.Size(), newStats.Size())

// remove data from database
for i = i - 1; i >= 0; i-- {
for i = 0; i < numEntries; i++ {
key = fmt.Sprintf("key_%d", i)
// Set the data
err = client.Delete(ctx, key)
require.NoError(t, err)
}
Expand All @@ -321,6 +312,7 @@ func TestCompaction(t *testing.T) {
require.NoError(t, err)

// check size
stats = newStats
newStats, err = os.Stat(path)
require.NoError(t, err)
require.Less(t, newStats.Size(), stats.Size())
Expand Down

0 comments on commit 637d773

Please sign in to comment.