From 7b16bbd263d39df1d52d939660e57bf07bc5287f Mon Sep 17 00:00:00 2001 From: fanmin shi Date: Thu, 4 May 2017 11:29:28 -0700 Subject: [PATCH] etcdserver: renames xxx.snap.db to db in NewServer() In the case that follower recieves a snapshot from leader and crashes before renaming xxx.snap.db to db, restarting follower results loading old db. This will causes a index mismatch between snap metadata index and consistent index from db. The pr fixes the above on init of etcdserver through: 1. check if xxx.snap.db (xxx==snapshot.Metadata.Index) exists. 2. rename xxx.snap.db to db if exists. 3. load backend again with the new db file. FIXES #7628 --- etcdserver/server.go | 33 +++++++++++++++++++-------------- etcdserver/util.go | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/etcdserver/server.go b/etcdserver/server.go index 33430276bff6..7ec97b2466f2 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -57,6 +57,7 @@ import ( "github.com/coreos/etcd/wal" "github.com/coreos/go-semver/semver" "github.com/coreos/pkg/capnslog" + "golang.org/x/net/context" ) @@ -274,20 +275,7 @@ func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) { bepath := filepath.Join(cfg.SnapDir(), databaseFilename) beExist := fileutil.Exist(bepath) - var be backend.Backend - beOpened := make(chan struct{}) - go func() { - be = newBackend(bepath, cfg.QuotaBackendBytes) - beOpened <- struct{}{} - }() - - select { - case <-beOpened: - case <-time.After(time.Second): - plog.Warningf("another etcd process is running with the same data dir and holding the file lock.") - plog.Warningf("waiting for it to exit before starting...") - <-beOpened - } + be := loadBackend(bepath, cfg.QuotaBackendBytes) defer func() { if err != nil { @@ -385,6 +373,23 @@ func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) { plog.Panicf("recovered store from snapshot error: %v", err) } plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index) + + // if follower recieves snapshot from leader and crashes before renaming xxx.snap.db to db, + // restarting follower results loading a outdated db. + // In this case: + // 1. check if xxx.snap.db (xxx==snapshot.Metadata.Index) exists. + // 2. rename xxx.snap.db to db if exists. + // 3. load backend again with the new db file. + snapfn, err := snap.GetDBFilePathByID(cfg.SnapDir(), snapshot.Metadata.Index) + if err != nil && err != snap.ErrDBSnapFileNotFound { + return nil, err + } + if snapfn != "" { + if err := os.Rename(snapfn, bepath); err != nil { + plog.Panicf("rename snapshot file error: %v", err) + } + be = loadBackend(bepath, cfg.QuotaBackendBytes) + } } cfg.Print() if !cfg.ForceNewCluster { diff --git a/etcdserver/util.go b/etcdserver/util.go index e3896ffc2d3d..1f9a0f117f6e 100644 --- a/etcdserver/util.go +++ b/etcdserver/util.go @@ -18,6 +18,7 @@ import ( "time" "github.com/coreos/etcd/etcdserver/membership" + "github.com/coreos/etcd/mvcc/backend" "github.com/coreos/etcd/pkg/types" "github.com/coreos/etcd/rafthttp" ) @@ -95,3 +96,21 @@ func (nc *notifier) notify(err error) { nc.err = err close(nc.c) } + +func loadBackend(bepath string, quotaBackendBytes int64) (be backend.Backend) { + beOpened := make(chan struct{}) + go func() { + be = newBackend(bepath, quotaBackendBytes) + beOpened <- struct{}{} + }() + + select { + case <-beOpened: + case <-time.After(time.Second): + plog.Warningf("another etcd process is running with the same data dir and holding the file lock.") + plog.Warningf("waiting for it to exit before starting...") + <-beOpened + } + + return be +}