diff --git a/etcdmain/etcd.go b/etcdmain/etcd.go index 236755eaf78..ba6031652ca 100644 --- a/etcdmain/etcd.go +++ b/etcdmain/etcd.go @@ -189,7 +189,10 @@ func startEtcd(cfg *embed.Config) (<-chan struct{}, <-chan error, error) { return nil, nil, err } osutil.RegisterInterruptHandler(e.Server.Stop) - <-e.Server.ReadyNotify() // wait for e.Server to join the cluster + select { + case <-e.Server.ReadyNotify(): // wait for e.Server to join the cluster + case <-e.Server.StopNotify(): // publish aborted from 'ErrStopped' + } return e.Server.StopNotify(), e.Err(), nil } diff --git a/integration/cluster.go b/integration/cluster.go index d2f5de39f86..b9bc6b964f6 100644 --- a/integration/cluster.go +++ b/integration/cluster.go @@ -465,6 +465,8 @@ type member struct { // serverClient is a clientv3 that directly calls the etcdserver. serverClient *clientv3.Client + + keepDataDirTerminate bool } func (m *member) GRPCAddr() string { return m.grpcAddr } @@ -769,8 +771,10 @@ func (m *member) Restart(t *testing.T) error { func (m *member) Terminate(t *testing.T) { plog.Printf("terminating %s (%s)", m.Name, m.grpcAddr) m.Close() - if err := os.RemoveAll(m.ServerConfig.DataDir); err != nil { - t.Fatal(err) + if !m.keepDataDirTerminate { + if err := os.RemoveAll(m.ServerConfig.DataDir); err != nil { + t.Fatal(err) + } } plog.Printf("terminated %s (%s)", m.Name, m.grpcAddr) } diff --git a/integration/cluster_test.go b/integration/cluster_test.go index 1bf5fc56c48..6e85ba37e2a 100644 --- a/integration/cluster_test.go +++ b/integration/cluster_test.go @@ -27,6 +27,7 @@ import ( "github.com/coreos/etcd/client" "github.com/coreos/etcd/etcdserver" "github.com/coreos/etcd/pkg/testutil" + "github.com/coreos/pkg/capnslog" "golang.org/x/net/context" ) @@ -441,6 +442,51 @@ func TestRejectUnhealthyRemove(t *testing.T) { } } +// TestRestartRemoved ensures that restarting removed member must exit +// if 'initial-cluster-state' is set 'new' and old data directory still exists +// (see https://github.com/coreos/etcd/issues/7512 for more). +func TestRestartRemoved(t *testing.T) { + defer testutil.AfterTest(t) + capnslog.SetGlobalLogLevel(capnslog.INFO) + + // 1. start single-member cluster + c := NewCluster(t, 1) + for _, m := range c.Members { + m.ServerConfig.StrictReconfigCheck = true + } + c.Launch(t) + defer c.Terminate(t) + + // 2. add a new member + c.AddMember(t) + c.WaitLeader(t) + + oldm := c.Members[0] + oldm.keepDataDirTerminate = true + + // 3. remove first member, shut down without deleting data + if err := c.removeMember(t, uint64(c.Members[0].s.ID())); err != nil { + t.Fatalf("expected to remove member, got error %v", err) + } + c.WaitLeader(t) + + // 4. restart first member with 'initial-cluster-state=new' + // wrong config, expects exit within ReqTimeout + oldm.ServerConfig.NewCluster = false + if err := oldm.Restart(t); err != nil { + t.Fatalf("unexpected ForceRestart error: %v", err) + } + defer func() { + oldm.Close() + os.RemoveAll(oldm.ServerConfig.DataDir) + }() + select { + case <-oldm.s.StopNotify(): + case <-time.After(time.Minute): + t.Fatalf("removed member didn't exit within %v", time.Minute) + } +} + // clusterMustProgress ensures that cluster can make progress. It creates // a random key first, and check the new key could be got from all client urls // of the cluster.