Skip to content

Commit

Permalink
Merge pull request #9569 from planetscale/mysql-teardown
Browse files Browse the repository at this point in the history
Add 30 second wait timeout in mysql stop for cluster Teardown
  • Loading branch information
GuptaManan100 authored Jan 27, 2022
2 parents ca3e191 + 7b5d64c commit 0bd6b78
Showing 1 changed file with 51 additions and 5 deletions.
56 changes: 51 additions & 5 deletions go/test/endtoend/cluster/cluster_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"path"
"regexp"
"strconv"
"strings"
"sync"
"syscall"
"time"
Expand Down Expand Up @@ -731,6 +732,7 @@ func (cluster *LocalProcessCluster) Teardown() {
}

var mysqlctlProcessList []*exec.Cmd
var mysqlctlTabletUIDs []int
for _, keyspace := range cluster.Keyspaces {
for _, shard := range keyspace.Shards {
for _, tablet := range shard.Vttablets {
Expand All @@ -739,6 +741,7 @@ func (cluster *LocalProcessCluster) Teardown() {
log.Errorf("Error in mysqlctl teardown: %v", err)
} else {
mysqlctlProcessList = append(mysqlctlProcessList, proc)
mysqlctlTabletUIDs = append(mysqlctlTabletUIDs, tablet.MysqlctlProcess.TabletUID)
}
}
if tablet.MysqlctldProcess.TabletUID > 0 {
Expand All @@ -754,11 +757,11 @@ func (cluster *LocalProcessCluster) Teardown() {
}
}

for _, proc := range mysqlctlProcessList {
if err := proc.Wait(); err != nil {
log.Errorf("Error in mysqlctl teardown wait: %v", err)
}
}
// On the CI it was noticed that MySQL shutdown hangs sometimes and
// on local investigation it was waiting on SEMI_SYNC acks for an internal command
// of Vitess even after closing the socket file.
// To prevent this process for hanging for 5 minutes, we will add a 30-second timeout.
cluster.waitForMySQLProcessToExit(mysqlctlProcessList, mysqlctlTabletUIDs)

if err := cluster.VtctldProcess.TearDown(); err != nil {
log.Errorf("Error in vtctld teardown: %v", err)
Expand All @@ -774,6 +777,49 @@ func (cluster *LocalProcessCluster) Teardown() {
cluster.teardownCompleted = true
}

func (cluster *LocalProcessCluster) waitForMySQLProcessToExit(mysqlctlProcessList []*exec.Cmd, mysqlctlTabletUIDs []int) {
wg := sync.WaitGroup{}
for i, cmd := range mysqlctlProcessList {
wg.Add(1)
go func(cmd *exec.Cmd, tabletUID int) {
defer func() {
wg.Done()
}()
exit := make(chan error)
go func() {
exit <- cmd.Wait()
}()
select {
case <-time.After(30 * time.Second):
break
case err := <-exit:
if err == nil {
return
}
log.Errorf("Error in mysqlctl teardown wait: %v", err)
break
}
pidFile := path.Join(os.Getenv("VTDATAROOT"), fmt.Sprintf("/vt_%010d/mysql.pid", tabletUID))
pidBytes, err := os.ReadFile(pidFile)
if err != nil {
// We can't read the file which means the PID file does not exist
// The server must have stopped
return
}
pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
if err != nil {
log.Errorf("Error in conversion to integer: %v", err)
return
}
err = syscall.Kill(pid, syscall.SIGKILL)
if err != nil {
log.Errorf("Error in killing process: %v", err)
}
}(cmd, mysqlctlTabletUIDs[i])
}
wg.Wait()
}

// StartVtworker starts a vtworker
func (cluster *LocalProcessCluster) StartVtworker(cell string, extraArgs ...string) error {
httpPort := cluster.GetAndReservePort()
Expand Down

0 comments on commit 0bd6b78

Please sign in to comment.