From 969a8e86294e164180aabd630313a06b97827081 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Fri, 6 Dec 2019 15:56:37 +0530 Subject: [PATCH] Fix windows dataloss issue (#1134) Windows doesn't allow memory mapping a file to a size greater than the file's actual size. To circumvent this, we increase the file size by truncating it. https://github.com/dgraph-io/badger/blob/f5b63211d7f3e2f5f8b698893313b2a54e4df7de/y/mmap_windows.go#L41-L48 When badger would re-open, we try to replay this "truncated" file. Since this truncated file consists of all zeros, the replay would return the last offset as `zero` and then we would truncate the original file to size `zero`. The replay function would return `zero` as the last valid offset which was wrong. The last valid offset is start offset plus the forward movements of the file offset. So instead of https://github.com/dgraph-io/badger/blob/f5b63211d7f3e2f5f8b698893313b2a54e4df7de/value.go#L433 ```go var validEndOff uint32 // notice we're starting from zero, not the start point. ``` we should be doing ```go var validEndOff uint32 = offset ``` Fixes - https://github.com/dgraph-io/badger/issues/1126 --- db2_test.go | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++ value.go | 2 +- 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/db2_test.go b/db2_test.go index 4d7ee69a9..9362a7219 100644 --- a/db2_test.go +++ b/db2_test.go @@ -27,6 +27,7 @@ import ( "os" "path" "regexp" + "runtime" "testing" "github.com/dgraph-io/badger/v2/options" @@ -673,3 +674,90 @@ func TestL0GCBug(t *testing.T) { checkKeys(db2) require.NoError(t, db2.Close()) } + +// Regression test for https://github.com/dgraph-io/badger/issues/1126 +// +// The test has 3 steps +// Step 1 - Create badger data. It is necessary that the value size is +// greater than valuethreshold. The value log file size after +// this step is around 170 bytes. +// Step 2 - Re-open the same badger and simulate a crash. The value log file +// size after this crash is around 2 GB (we increase the file size to mmap it). +// Step 3 - Re-open the same badger. We should be able to read all the data +// inserted in the first step. +func TestWindowsDataLoss(t *testing.T) { + if runtime.GOOS != "windows" { + t.Skip("The test is only for Windows.") + } + + dir, err := ioutil.TempDir("", "badger-test") + require.NoError(t, err) + defer removeDir(dir) + + opt := DefaultOptions(dir).WithSyncWrites(true) + + fmt.Println("First DB Open") + db, err := Open(opt) + require.NoError(t, err) + keyCount := 20 + var keyList [][]byte // Stores all the keys generated. + for i := 0; i < keyCount; i++ { + // It is important that we create different transactions for each request. + err := db.Update(func(txn *Txn) error { + key := []byte(fmt.Sprintf("%d", i)) + v := []byte("barValuebarValuebarValuebarValuebarValue") + require.Greater(t, len(v), opt.ValueThreshold) + + //32 bytes length and now it's not working + err := txn.Set(key, v) + require.NoError(t, err) + keyList = append(keyList, key) + return nil + }) + require.NoError(t, err) + } + require.NoError(t, db.Close()) + + fmt.Println() + fmt.Println("Second DB Open") + opt.Truncate = true + db, err = Open(opt) + require.NoError(t, err) + + // Return after reading one entry. We're simulating a crash. + // Simulate a crash by not closing db but releasing the locks. + if db.dirLockGuard != nil { + require.NoError(t, db.dirLockGuard.release()) + } + if db.valueDirGuard != nil { + require.NoError(t, db.valueDirGuard.release()) + } + // Don't use vlog.Close here. We don't want to fix the file size. Only un-mmap + // the data so that we can truncate the file durning the next vlog.Open. + require.NoError(t, y.Munmap(db.vlog.filesMap[db.vlog.maxFid].fmap)) + + fmt.Println() + fmt.Println("Third DB Open") + opt.Truncate = true + db, err = Open(opt) + require.NoError(t, err) + defer db.Close() + + txn := db.NewTransaction(false) + defer txn.Discard() + it := txn.NewIterator(DefaultIteratorOptions) + defer it.Close() + + var result [][]byte // stores all the keys read from the db. + for it.Rewind(); it.Valid(); it.Next() { + item := it.Item() + k := item.Key() + err := item.Value(func(v []byte) error { + _ = v + return nil + }) + require.NoError(t, err) + result = append(result, k) + } + require.ElementsMatch(t, keyList, result) +} diff --git a/value.go b/value.go index f71970e1c..49383f474 100644 --- a/value.go +++ b/value.go @@ -430,7 +430,7 @@ func (vlog *valueLog) iterate(lf *logFile, offset uint32, fn logEntry) (uint32, } var lastCommit uint64 - var validEndOffset uint32 + var validEndOffset uint32 = offset for { e, err := read.Entry(reader) if err == io.EOF {