From 675efcdfd28807be3ebce92141b3b0eb4bc40ae6 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Wed, 24 Jun 2020 20:46:12 +0530 Subject: [PATCH] Increase default valueThreshold from 32B to 1KB (#1346) Increase value threshold from 32 bytes to 1 KB to allow easier space reclaimation. There have been multiple reports about value log garbage collection being unsatisfactory. This commit increases the default value of threshold from 32 bytes to 1 KB which would allow keys and values upto 1 KB to be co-allocated in the LSM tree. This would allow GC to reclaim space easily for values less than 1 KB. --- batch_test.go | 6 +++++- db2_test.go | 7 +------ db_test.go | 1 + managed_db_test.go | 2 +- options.go | 4 ++-- table/builder.go | 3 +++ value_test.go | 8 ++++++-- 7 files changed, 19 insertions(+), 12 deletions(-) diff --git a/batch_test.go b/batch_test.go index cc2a121fa..8ff14a501 100644 --- a/batch_test.go +++ b/batch_test.go @@ -70,7 +70,11 @@ func TestWriteBatch(t *testing.T) { require.NoError(t, err) } t.Run("disk mode", func(t *testing.T) { - runBadgerTest(t, nil, func(t *testing.T, db *DB) { + opt := getTestOptions("") + // Set value threshold to 32 bytes otherwise write batch will generate + // too many files and we will crash with too many files open error. + opt.ValueThreshold = 32 + runBadgerTest(t, &opt, func(t *testing.T, db *DB) { test(t, db) }) }) diff --git a/db2_test.go b/db2_test.go index 7fa57b172..d86e5a40a 100644 --- a/db2_test.go +++ b/db2_test.go @@ -125,7 +125,6 @@ func TestTruncateVlogNoClose(t *testing.T) { t.Skip("Skipping test meant to be run manually.") return } - fmt.Println("running") dir := "p" opts := getTestOptions(dir) opts.SyncWrites = true @@ -707,8 +706,8 @@ func TestWindowsDataLoss(t *testing.T) { defer removeDir(dir) opt := DefaultOptions(dir).WithSyncWrites(true) + opt.ValueThreshold = 32 - fmt.Println("First DB Open") db, err := Open(opt) require.NoError(t, err) keyCount := 20 @@ -730,8 +729,6 @@ func TestWindowsDataLoss(t *testing.T) { } require.NoError(t, db.Close()) - fmt.Println() - fmt.Println("Second DB Open") opt.Truncate = true db, err = Open(opt) require.NoError(t, err) @@ -753,8 +750,6 @@ func TestWindowsDataLoss(t *testing.T) { require.NoError(t, db.manifest.close()) require.NoError(t, db.lc.close()) - fmt.Println() - fmt.Println("Third DB Open") opt.Truncate = true db, err = Open(opt) require.NoError(t, err) diff --git a/db_test.go b/db_test.go index af80a2d5e..92c73a995 100644 --- a/db_test.go +++ b/db_test.go @@ -1987,6 +1987,7 @@ func TestNoCrash(t *testing.T) { ops := getTestOptions(dir) ops.ValueLogMaxEntries = 1 + ops.ValueThreshold = 32 db, err := Open(ops) require.NoError(t, err, "unable to open db") diff --git a/managed_db_test.go b/managed_db_test.go index 959000657..cd5bd1517 100644 --- a/managed_db_test.go +++ b/managed_db_test.go @@ -604,7 +604,7 @@ func TestWriteBatchManagedMode(t *testing.T) { } opt := DefaultOptions("") opt.managedTxns = true - opt.MaxTableSize = 1 << 15 // This would create multiple transactions in write batch. + opt.MaxTableSize = 1 << 20 // This would create multiple transactions in write batch. runBadgerTest(t, &opt, func(t *testing.T, db *DB) { wb := db.NewWriteBatchAt(1) defer wb.Cancel() diff --git a/options.go b/options.go index 17af22354..4a0d1e0c3 100644 --- a/options.go +++ b/options.go @@ -162,7 +162,7 @@ func DefaultOptions(path string) Options { ValueLogFileSize: 1<<30 - 1, ValueLogMaxEntries: 1000000, - ValueThreshold: 32, + ValueThreshold: 1 << 10, // 1 KB. Truncate: false, Logger: defaultLogger(INFO), LogRotatesToFlush: 2, @@ -357,7 +357,7 @@ func (opt Options) WithMaxLevels(val int) Options { // ValueThreshold sets the threshold used to decide whether a value is stored directly in the LSM // tree or separately in the log value files. // -// The default value of ValueThreshold is 32, but LSMOnlyOptions sets it to maxValueThreshold. +// The default value of ValueThreshold is 1 KB, but LSMOnlyOptions sets it to maxValueThreshold. func (opt Options) WithValueThreshold(val int) Options { opt.ValueThreshold = val return opt diff --git a/table/builder.go b/table/builder.go index adf064d9a..50ba15356 100644 --- a/table/builder.go +++ b/table/builder.go @@ -204,6 +204,9 @@ func (b *Builder) addHelper(key []byte, v y.ValueStruct, vpLen uint64) { diffKey = b.keyDiff(key) } + y.AssertTrue(len(key)-len(diffKey) <= math.MaxUint16) + y.AssertTrue(len(diffKey) <= math.MaxUint16) + h := header{ overlap: uint16(len(key) - len(diffKey)), diff: uint16(len(diffKey)), diff --git a/value_test.go b/value_test.go index 107a90b4b..c60b98aa2 100644 --- a/value_test.go +++ b/value_test.go @@ -41,7 +41,7 @@ func TestValueBasic(t *testing.T) { y.Check(err) defer removeDir(dir) - kv, _ := Open(getTestOptions(dir)) + kv, _ := Open(getTestOptions(dir).WithValueThreshold(32)) defer kv.Close() log := &kv.vlog @@ -472,7 +472,7 @@ func TestPersistLFDiscardStats(t *testing.T) { require.NoError(t, err) } - time.Sleep(1 * time.Second) // wait for compaction to complete + time.Sleep(2 * time.Second) // wait for compaction to complete persistedMap := make(map[uint32]int64) db.vlog.lfDiscardStats.Lock() @@ -509,6 +509,7 @@ func TestChecksums(t *testing.T) { opts := getTestOptions(dir) opts.Truncate = true opts.ValueLogFileSize = 100 * 1024 * 1024 // 100Mb + opts.ValueThreshold = 32 kv, err := Open(opts) require.NoError(t, err) require.NoError(t, kv.Close()) @@ -592,6 +593,7 @@ func TestPartialAppendToValueLog(t *testing.T) { opts := getTestOptions(dir) opts.Truncate = true opts.ValueLogFileSize = 100 * 1024 * 1024 // 100Mb + opts.ValueThreshold = 32 kv, err := Open(opts) require.NoError(t, err) require.NoError(t, kv.Close()) @@ -1167,6 +1169,7 @@ func TestValueEntryChecksum(t *testing.T) { opt := getTestOptions(dir) opt.VerifyValueChecksum = true + opt.ValueThreshold = 32 db, err := Open(opt) require.NoError(t, err) @@ -1195,6 +1198,7 @@ func TestValueEntryChecksum(t *testing.T) { opt := getTestOptions(dir) opt.VerifyValueChecksum = true + opt.ValueThreshold = 32 db, err := Open(opt) require.NoError(t, err)