From 213f7f78777b6d9182f83ed163efdd898251ad3a Mon Sep 17 00:00:00 2001 From: Joe Betz Date: Wed, 12 Feb 2020 10:12:48 -0800 Subject: [PATCH] mvcc/backend: Delete orphaned db.tmp files before defrag --- CHANGELOG-3.5.md | 1 + etcdserver/api/snap/snapshotter.go | 19 +++++++++++++++++++ mvcc/backend/backend.go | 20 ++++++++++++++++---- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/CHANGELOG-3.5.md b/CHANGELOG-3.5.md index 6b21c230403..a11ccba202c 100644 --- a/CHANGELOG-3.5.md +++ b/CHANGELOG-3.5.md @@ -81,6 +81,7 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change. - `etcd --experimental-backend-bbolt-freelist-type` has been deprecated. - Support [rollback/downgrade](TODO). - Deprecate v2 apply on cluster version. [Use v3 request to set cluster version and recover cluster version from v3 backend](https://github.com/etcd-io/etcd/pull/11427). +- [Fix corruption bug in defrag](https://github.com/etcd-io/etcd/pull/11613). ### Package `embed` diff --git a/etcdserver/api/snap/snapshotter.go b/etcdserver/api/snap/snapshotter.go index a933b3519b3..87f33e59805 100644 --- a/etcdserver/api/snap/snapshotter.go +++ b/etcdserver/api/snap/snapshotter.go @@ -206,6 +206,9 @@ func (s *Snapshotter) snapNames() ([]string, error) { if err != nil { return nil, err } + if err = s.cleanupSnapdir(names); err != nil { + return nil, err + } snaps := checkSuffix(s.lg, names) if len(snaps) == 0 { return nil, ErrNoSnapshot @@ -231,3 +234,19 @@ func checkSuffix(lg *zap.Logger, names []string) []string { } return snaps } + +// cleanupSnapdir removes any files that should not be in the snapshot directory: +// - db.tmp prefixed files that can be orphaned by defragmentation +func (s *Snapshotter) cleanupSnapdir(filenames []string) error { + for _, filename := range filenames { + if strings.HasPrefix(filename, "db.tmp") { + if s.lg != nil { + s.lg.Info("found orphaned defragmentation file; deleting", zap.String("path", filename)) + if rmErr := os.Remove(filepath.Join(s.dir, filename)); rmErr != nil && !os.IsNotExist(rmErr) { + return fmt.Errorf("failed to remove orphaned defragmentation file %s: %v", filename, rmErr) + } + } + } + } + return nil +} diff --git a/mvcc/backend/backend.go b/mvcc/backend/backend.go index 63a4a2e9998..77e2d0dd986 100644 --- a/mvcc/backend/backend.go +++ b/mvcc/backend/backend.go @@ -358,13 +358,24 @@ func (b *backend) defrag() error { b.batchTx.tx = nil - tmpdb, err := bolt.Open(b.db.Path()+".tmp", 0600, boltOpenOptions) + // Create a temporary file to ensure we start with a clean slate. + // Snapshotter.cleanupSnapdir cleans up any of these that are found during startup. + dir := filepath.Dir(b.db.Path()) + temp, err := ioutil.TempFile(dir, "db.tmp.*") + if err != nil { + return err + } + options := *boltOpenOptions + options.OpenFile = func(path string, i int, mode os.FileMode) (file *os.File, err error) { + return temp, nil + } + tdbp := temp.Name() + tmpdb, err := bolt.Open(tdbp, 0600, &options) if err != nil { return err } dbp := b.db.Path() - tdbp := tmpdb.Path() size1, sizeInUse1 := b.Size(), b.SizeInUse() if b.lg != nil { b.lg.Info( @@ -376,12 +387,12 @@ func (b *backend) defrag() error { zap.String("current-db-size-in-use", humanize.Bytes(uint64(sizeInUse1))), ) } - + // gofail: var defragBeforeCopy struct{} err = defragdb(b.db, tmpdb, defragLimit) if err != nil { tmpdb.Close() if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil { - b.lg.Error("failed to remove dirs under tmpdb", zap.Error(rmErr)) + b.lg.Error("failed to remove db.tmp after defragmentation completed", zap.Error(rmErr)) } return err } @@ -394,6 +405,7 @@ func (b *backend) defrag() error { if err != nil { b.lg.Fatal("failed to close tmp database", zap.Error(err)) } + // gofail: var defragBeforeRename struct{} err = os.Rename(tdbp, dbp) if err != nil { b.lg.Fatal("failed to rename tmp database", zap.Error(err))