From 637d7733b0940630f38de205fbe7c3a504722a06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20=C5=9Awi=C4=85tek?= Date: Tue, 5 Apr 2022 22:03:02 +0200 Subject: [PATCH] [extension/filestorage] change bbolt DB options (#9004) bbolt doesn't automatically reclaim unused disk space - it expects the user to manually compact it. This has performance implications in that it keeps track of disk segments in a freelist structure, which can be large even if the db doesn't actually contain any data. By default, it also syncs this freelist to disk on every transaction. Disable freelist syncing and change freelist type to one which performs better on large dbs. The result is a massive reduction in the cost of writes to a large DB file. Include benchmarks to demonstrate this. Not syncing the freelist makes opening the DB more expensive, but this only happens on extension start, and the runtime remains under 1s even on 10 Gb db files. --- CHANGELOG.md | 1 + extension/storage/filestorage/client.go | 6 +- extension/storage/filestorage/client_test.go | 70 +++++++++++++++++++ .../storage/filestorage/extension_test.go | 36 ++++------ 4 files changed, 89 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8873c98c98c..ba4e27b4d2b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - `cmd/mdatagen`: Add resource attributes definition to metadata.yaml and move `pdata.Metrics` creation to the generated code (#5270) - Add `make crosslink` target to ensure replace statements are included in `go.mod` for all transitive dependencies within repository (#8822) +- `filestorageextension`: Change bbolt DB settings for better performance (#9004) ### 🛑 Breaking changes 🛑 diff --git a/extension/storage/filestorage/client.go b/extension/storage/filestorage/client.go index 8de0e17b7f06..09ed200d1922 100644 --- a/extension/storage/filestorage/client.go +++ b/extension/storage/filestorage/client.go @@ -33,8 +33,10 @@ type fileStorageClient struct { func newClient(filePath string, timeout time.Duration) (*fileStorageClient, error) { options := &bbolt.Options{ - Timeout: timeout, - NoSync: true, + Timeout: timeout, + NoSync: true, + NoFreelistSync: true, + FreelistType: bbolt.FreelistMapType, } db, err := bbolt.Open(filePath, 0600, options) if err != nil { diff --git a/extension/storage/filestorage/client_test.go b/extension/storage/filestorage/client_test.go index 71090bf96ada..f967e2b4f79c 100644 --- a/extension/storage/filestorage/client_test.go +++ b/extension/storage/filestorage/client_test.go @@ -300,6 +300,76 @@ func BenchmarkClientDelete(b *testing.B) { } } +// check the performance impact of the max lifetime DB size +// bbolt doesn't compact the freelist automatically, so there's a cost even if the data is deleted +func BenchmarkClientSetLargeDB(b *testing.B) { + entrySizeInBytes := 1024 * 1024 + entryCount := 2000 + entry := make([]byte, entrySizeInBytes) + var testKey string + + tempDir := newTempDir(b) + dbFile := filepath.Join(tempDir, "my_db") + + client, err := newClient(dbFile, time.Second) + require.NoError(b, err) + + ctx := context.Background() + + for n := 0; n < entryCount; n++ { + testKey = fmt.Sprintf("testKey-%d", n) + client.Set(ctx, testKey, entry) + } + + for n := 0; n < entryCount; n++ { + testKey = fmt.Sprintf("testKey-%d", n) + client.Delete(ctx, testKey) + } + + testKey = "testKey" + testValue := []byte("testValue") + b.ResetTimer() + for n := 0; n < b.N; n++ { + client.Set(ctx, testKey, testValue) + } +} + +// check the cost of opening an existing DB with data +// this can change depending on freelist type and whether it's synced to disk +func BenchmarkClientInitLargeDB(b *testing.B) { + entrySizeInBytes := 1024 * 1024 + entry := make([]byte, entrySizeInBytes) + entryCount := 2000 + var testKey string + + tempDir := newTempDir(b) + dbFile := filepath.Join(tempDir, "my_db") + + client, err := newClient(dbFile, time.Second) + require.NoError(b, err) + + ctx := context.Background() + + for n := 0; n < entryCount; n++ { + testKey = fmt.Sprintf("testKey-%d", n) + client.Set(ctx, testKey, entry) + } + + err = client.Close(ctx) + require.NoError(b, err) + + var tempClient *fileStorageClient + b.ResetTimer() + for n := 0; n < b.N; n++ { + tempClient, err = newClient(dbFile, time.Second) + require.NoError(b, err) + b.StopTimer() + err = tempClient.Close(ctx) + require.NoError(b, err) + b.StartTimer() + } +} + func newTempDir(tb testing.TB) string { tempDir, err := ioutil.TempDir("", "") require.NoError(tb, err) diff --git a/extension/storage/filestorage/extension_test.go b/extension/storage/filestorage/extension_test.go index 74970713feab..1a7b24fe7201 100644 --- a/extension/storage/filestorage/extension_test.go +++ b/extension/storage/filestorage/extension_test.go @@ -270,46 +270,37 @@ func TestCompaction(t *testing.T) { path := filepath.Join(tempDir, file.Name()) stats, err := os.Stat(path) require.NoError(t, err) - newStats := stats var key string i := 0 - // add data until database file changes size (we are checking compacted size) - for newStats.Size() <= stats.Size() { - key = fmt.Sprintf("key_%d", i) - err = client.Set(ctx, key, []byte(key)) - require.NoError(t, err) + // magic numbers giving enough data to force bbolt to allocate a new page + // see https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/9004 for some discussion + numEntries := 50 + entrySize := 512 + entry := make([]byte, entrySize) - // compact before checking size - c, fok := client.(*fileStorageClient) - require.True(t, fok) - client, err = c.Compact(ctx, tempDir, cfg.Timeout, 1) - require.NoError(t, err) - - // check size after compaction - newStats, err = os.Stat(path) + // add the data to the db + for i = 0; i < numEntries; i++ { + key = fmt.Sprintf("key_%d", i) + err = client.Set(ctx, key, entry) require.NoError(t, err) - i++ } - stats = newStats - - // compact again just in case + // compact the db c, ok := client.(*fileStorageClient) require.True(t, ok) client, err = c.Compact(ctx, tempDir, cfg.Timeout, 1) require.NoError(t, err) // check size after compaction - newStats, err = os.Stat(path) + newStats, err := os.Stat(path) require.NoError(t, err) - require.Equal(t, stats.Size(), newStats.Size()) + require.Less(t, stats.Size(), newStats.Size()) // remove data from database - for i = i - 1; i >= 0; i-- { + for i = 0; i < numEntries; i++ { key = fmt.Sprintf("key_%d", i) - // Set the data err = client.Delete(ctx, key) require.NoError(t, err) } @@ -321,6 +312,7 @@ func TestCompaction(t *testing.T) { require.NoError(t, err) // check size + stats = newStats newStats, err = os.Stat(path) require.NoError(t, err) require.Less(t, newStats.Size(), stats.Size())