From b6ad71c776ffda46bcfc35bdb3643e0c05318f51 Mon Sep 17 00:00:00 2001
From: fanmin shi <fanmin.shi@coreos.com>
Date: Thu, 4 May 2017 11:29:28 -0700
Subject: [PATCH] etcdserver: renames xxx.snap.db to db in NewServer()

In the case that follower recieves a snapshot from leader
and crashes before renaming xxx.snap.db to db, restarting
follower results loading old db. This will causes a index
mismatch between snap metadata index and consistent index
from db.

The pr fixes the above on init of etcdserver through:

1. check if xxx.snap.db (xxx==snapshot.Metadata.Index) exists.
2. rename xxx.snap.db to db if exists.
3. load backend again with the new db file.

FIXES #7628
---
 etcdserver/server.go | 32 +++++++++++++++++++-------------
 etcdserver/util.go   | 17 +++++++++++++++++
 2 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/etcdserver/server.go b/etcdserver/server.go
index 33430276bff6..bc1514efbd09 100644
--- a/etcdserver/server.go
+++ b/etcdserver/server.go
@@ -57,6 +57,7 @@ import (
 	"github.com/coreos/etcd/wal"
 	"github.com/coreos/go-semver/semver"
 	"github.com/coreos/pkg/capnslog"
+
 	"golang.org/x/net/context"
 )
 
@@ -275,19 +276,7 @@ func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) {
 	beExist := fileutil.Exist(bepath)
 
 	var be backend.Backend
-	beOpened := make(chan struct{})
-	go func() {
-		be = newBackend(bepath, cfg.QuotaBackendBytes)
-		beOpened <- struct{}{}
-	}()
-
-	select {
-	case <-beOpened:
-	case <-time.After(time.Second):
-		plog.Warningf("another etcd process is running with the same data dir and holding the file lock.")
-		plog.Warningf("waiting for it to exit before starting...")
-		<-beOpened
-	}
+	loadBackend(bepath, &be, cfg.QuotaBackendBytes)
 
 	defer func() {
 		if err != nil {
@@ -385,6 +374,23 @@ func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) {
 				plog.Panicf("recovered store from snapshot error: %v", err)
 			}
 			plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
+
+			// if follower recieves snapshot from leader and crashes before renaming xxx.snap.db to db,
+			// restarting follower results loading a outdated db.
+			// In this case:
+			// 1. check if xxx.snap.db (xxx==snapshot.Metadata.Index) exists.
+			// 2. rename xxx.snap.db to db if exists.
+			// 3. load backend again with the new db file.
+			snapfn, err := snap.GetDBFilePathByID(cfg.SnapDir(), snapshot.Metadata.Index)
+			if err != nil && err != snap.ErrDBSnapFileNotFound {
+				return nil, err
+			}
+			if snapfn != "" {
+				if err := os.Rename(snapfn, bepath); err != nil {
+					plog.Panicf("rename snapshot file error: %v", err)
+				}
+				loadBackend(bepath, &be, cfg.QuotaBackendBytes)
+			}
 		}
 		cfg.Print()
 		if !cfg.ForceNewCluster {
diff --git a/etcdserver/util.go b/etcdserver/util.go
index e3896ffc2d3d..3aeb26317bc4 100644
--- a/etcdserver/util.go
+++ b/etcdserver/util.go
@@ -18,6 +18,7 @@ import (
 	"time"
 
 	"github.com/coreos/etcd/etcdserver/membership"
+	"github.com/coreos/etcd/mvcc/backend"
 	"github.com/coreos/etcd/pkg/types"
 	"github.com/coreos/etcd/rafthttp"
 )
@@ -95,3 +96,19 @@ func (nc *notifier) notify(err error) {
 	nc.err = err
 	close(nc.c)
 }
+
+func loadBackend(bepath string, be *backend.Backend, QuotaBackendBytes int64) {
+	beOpened := make(chan struct{})
+	go func() {
+		*be = newBackend(bepath, QuotaBackendBytes)
+		beOpened <- struct{}{}
+	}()
+
+	select {
+	case <-beOpened:
+	case <-time.After(time.Second):
+		plog.Warningf("another etcd process is running with the same data dir and holding the file lock.")
+		plog.Warningf("waiting for it to exit before starting...")
+		<-beOpened
+	}
+}