From 87d99fe0387ee1df1cf1811d88d37331939ef4ae Mon Sep 17 00:00:00 2001 From: fanmin shi Date: Tue, 9 May 2017 13:45:11 -0700 Subject: [PATCH 1/2] etcd-runner: remove mutex on validate() and release() in global.go election runner can deadlock in atomic release(). suppose election runner has two clients A and B. if A is a leader and B is a follower, B obtains lock for release() and waits for A to close(nextc) which signal next round is ready. However, A can only close(nextc) if it obtains lock for release(); hence deadlock. this pr removes atomicity of validate() and release() in global.go and gives the responsibility of locking to each runner. FIXES #7891 --- .../etcd-runner/command/election_command.go | 4 ++-- tools/functional-tester/etcd-runner/command/global.go | 7 ------- .../etcd-runner/command/lock_racer_command.go | 7 +++++++ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/functional-tester/etcd-runner/command/election_command.go b/tools/functional-tester/etcd-runner/command/election_command.go index e45c9ec1195..81f1d5cc939 100644 --- a/tools/functional-tester/etcd-runner/command/election_command.go +++ b/tools/functional-tester/etcd-runner/command/election_command.go @@ -89,14 +89,14 @@ func runElectionFunc(cmd *cobra.Command, args []string) { } }() err = e.Campaign(ctx, v) + cancel() + <-donec if err == nil { observedLeader = v } if observedLeader == v { validateWaiters = len(rcs) } - cancel() - <-donec select { case <-ctx.Done(): return nil diff --git a/tools/functional-tester/etcd-runner/command/global.go b/tools/functional-tester/etcd-runner/command/global.go index 1d26f2047ed..4b7ac6eed9f 100644 --- a/tools/functional-tester/etcd-runner/command/global.go +++ b/tools/functional-tester/etcd-runner/command/global.go @@ -56,7 +56,6 @@ func newClient(eps []string, timeout time.Duration) *clientv3.Client { } func doRounds(rcs []roundClient, rounds int, requests int) { - var mu sync.Mutex var wg sync.WaitGroup wg.Add(len(rcs)) @@ -73,22 +72,16 @@ func doRounds(rcs []roundClient, rounds int, requests int) { for rc.acquire() != nil { /* spin */ } - mu.Lock() if err := rc.validate(); err != nil { log.Fatal(err) } - mu.Unlock() time.Sleep(10 * time.Millisecond) rc.progress++ finished <- struct{}{} - mu.Lock() for rc.release() != nil { /* spin */ - mu.Unlock() - mu.Lock() } - mu.Unlock() } }(&rcs[i]) } diff --git a/tools/functional-tester/etcd-runner/command/lock_racer_command.go b/tools/functional-tester/etcd-runner/command/lock_racer_command.go index b0ec491f431..1560362282d 100644 --- a/tools/functional-tester/etcd-runner/command/lock_racer_command.go +++ b/tools/functional-tester/etcd-runner/command/lock_racer_command.go @@ -18,6 +18,7 @@ import ( "context" "errors" "fmt" + "sync" "github.com/coreos/etcd/clientv3/concurrency" @@ -47,6 +48,8 @@ func runRacerFunc(cmd *cobra.Command, args []string) { rcs := make([]roundClient, totalClientConnections) ctx := context.Background() + // mu ensures validate and release funcs are atomic. + var mu sync.Mutex cnt := 0 eps := endpointsFromFlag(cmd) @@ -69,12 +72,16 @@ func runRacerFunc(cmd *cobra.Command, args []string) { m := concurrency.NewMutex(s, racers) rcs[i].acquire = func() error { return m.Lock(ctx) } rcs[i].validate = func() error { + mu.Lock() + defer mu.Unlock() if cnt++; cnt != 1 { return fmt.Errorf("bad lock; count: %d", cnt) } return nil } rcs[i].release = func() error { + mu.Lock() + defer mu.Unlock() if err := m.Unlock(ctx); err != nil { return err } From b44bd6d2a9d769e2ce81cb839384a40bc876f144 Mon Sep 17 00:00:00 2001 From: fanmin shi Date: Wed, 10 May 2017 11:21:17 -0700 Subject: [PATCH 2/2] etcd-runner: fix race on nextc --- .../functional-tester/etcd-runner/command/election_command.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/functional-tester/etcd-runner/command/election_command.go b/tools/functional-tester/etcd-runner/command/election_command.go index 81f1d5cc939..24610187d90 100644 --- a/tools/functional-tester/etcd-runner/command/election_command.go +++ b/tools/functional-tester/etcd-runner/command/election_command.go @@ -129,8 +129,10 @@ func runElectionFunc(cmd *cobra.Command, args []string) { return err } if observedLeader == v { - close(nextc) + oldNextc := nextc nextc = make(chan struct{}) + close(oldNextc) + } <-rcNextc observedLeader = ""