Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Qemu driver: graceful shutdown feature #3411

Merged
merged 12 commits into from
Nov 3, 2017
Merged
100 changes: 54 additions & 46 deletions client/driver/qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,26 @@ import (

var (
reQemuVersion = regexp.MustCompile(`version (\d[\.\d+]+)`)

// Prior to qemu 2.10.1, monitor socket paths are truncated to 108 bytes.
// We should consider this if driver.qemu.version is < 2.10.1 and the
// generated monitor path is too long.
//
// Relevant fix is here:
// https://github.com/qemu/qemu/commit/ad9579aaa16d5b385922d49edac2c96c79bcfb6
qemuVersionLongSocketPathFix = semver.New("2.10.1")
)

const (
// The key populated in Node Attributes to indicate presence of the Qemu driver
qemuDriverAttr = "driver.qemu"
qemuDriverVersionAttr = "driver.qemu.version"
qemuDriverLongMonitorPathAttr = "driver.qemu.longsocketpaths"
qemuDriverAttr = "driver.qemu"
qemuDriverVersionAttr = "driver.qemu.version"
// Represents an ACPI shutdown request to the VM (emulates pressing a physical power button)
// Reference: https://en.wikibooks.org/wiki/QEMU/Monitor
qemuGracefulShutdownMsg = "system_powerdown\n"
legacyMaxMonitorPathLen = 108
qemuMonitorSocketName = "qemu-monitor.sock"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about windows?

Copy link
Contributor Author

@cheeseprocedure cheeseprocedure Nov 2, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question! Until today, I didn't realize QEMU was available on Windows hosts. I would need to spin up an EC2 instance and experiment a bit with QEMU monitor interaction on that platform.

It appears an existing QEMU driver option can cause trouble on Windows hosts (accelerator = "kvm"). As a first pass, would it be acceptable to fail a task if graceful shutdown has been enabled for the job and runtime.GOOS == "windows" (ensuring it's called out in documentation, of course)?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think that is acceptable. Would you mind adding error cases for both graceful and kvm on windows.

// Maximum socket path length prior to qemu 2.10.1
qemuLegacyMaxMonitorPathLen = 108
)

// QemuDriver is a driver for running images via Qemu
Expand Down Expand Up @@ -75,11 +83,27 @@ type qemuHandle struct {
doneCh chan struct{}
}

func getMonitorPath(dir string, longPathSupport string) (string, error) {
if len(dir) > legacyMaxMonitorPathLen && longPathSupport != "1" {
return "", fmt.Errorf("monitor path is too long")
// getMonitorPath is used to determine whether a qemu monitor socket can be
// safely created and accessed in the task directory by the version of qemu
// present on the host. If it is safe to use, the socket's full path is
// returned along with a nil error. Otherwise, an empty string is returned
// along with a descriptive error.
func (d *QemuDriver) getMonitorPath(dir string) (string, error) {
var longPathSupport bool
currentQemuVer := d.DriverContext.node.Attributes[qemuDriverVersionAttr]
currentQemuSemver := semver.New(currentQemuVer)
if currentQemuSemver.LessThan(*qemuVersionLongSocketPathFix) {
longPathSupport = false
d.logger.Printf("[DEBUG] driver.qemu: long socket paths are not available in this version of QEMU (%s)", currentQemuVer)
} else {
longPathSupport = true
d.logger.Printf("[DEBUG] driver.qemu: long socket paths available in this version of QEMU (%s)", currentQemuVer)
}
fullSocketPath := fmt.Sprintf("%s/%s", dir, qemuMonitorSocketName)
if len(fullSocketPath) > qemuLegacyMaxMonitorPathLen && longPathSupport == false {
return "", fmt.Errorf("monitor path is too long for this version of qemu")
}
return fmt.Sprintf("%s/%s", dir, qemuMonitorSocketName), nil
return fullSocketPath, nil
}

// NewQemuDriver is used to create a new exec driver
Expand Down Expand Up @@ -154,21 +178,6 @@ func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool,
node.Attributes[qemuDriverAttr] = "1"
node.Attributes[qemuDriverVersionAttr] = currentQemuVersion

// Prior to qemu 2.10.1, monitor socket paths are truncated to 108 bytes.
// We should consider this if driver.qemu.version is < 2.10.1 and the
// generated monitor path is too long.
//
// Relevant fix is here:
// https://github.com/qemu/qemu/commit/ad9579aaa16d5b385922d49edac2c96c79bcfb6
currentQemuSemver := semver.New(currentQemuVersion)
fixedSocketPathLenVer := semver.New("2.10.1")
if currentQemuSemver.LessThan(*fixedSocketPathLenVer) {
node.Attributes[qemuDriverLongMonitorPathAttr] = "0"
d.logger.Printf("[DEBUG] driver.qemu: long socket paths are not available in this version of QEMU (%s)", currentQemuVersion)
} else {
d.logger.Printf("[DEBUG] driver.qemu: long socket paths available in this version of QEMU (%s)", currentQemuVersion)
node.Attributes[qemuDriverLongMonitorPathAttr] = "1"
}
return true, nil
}

Expand Down Expand Up @@ -233,13 +242,13 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse
if d.driverConfig.GracefulShutdown {
// This socket will be used to manage the virtual machine (for example,
// to perform graceful shutdowns)
monitorPath, err = getMonitorPath(ctx.TaskDir.Dir, ctx.TaskEnv.NodeAttrs[qemuDriverLongMonitorPathAttr])
if err == nil {
d.logger.Printf("[DEBUG] driver.qemu: got monitor path OK: %s", monitorPath)
args = append(args, "-monitor", fmt.Sprintf("unix:%s,server,nowait", monitorPath))
} else {
d.logger.Printf("[WARN] driver.qemu: %s", err)
monitorPath, err := d.getMonitorPath(ctx.TaskDir.Dir)
if err != nil {
d.logger.Printf("[ERR] driver.qemu: could not get qemu monitor path - error: %s", err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"[ERR] driver.qemu: could not get qemu monitor path: %s"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

return nil, err
}
d.logger.Printf("[DEBUG] driver.qemu: got monitor path OK: %s", monitorPath)
args = append(args, "-monitor", fmt.Sprintf("unix:%s,server,nowait", monitorPath))
}

// Add pass through arguments to qemu executable. A user can specify
Expand Down Expand Up @@ -436,11 +445,9 @@ func (h *qemuHandle) Signal(s os.Signal) error {

func (h *qemuHandle) Kill() error {
// First, try sending a graceful shutdown command via the qemu monitor
err := sendQemuShutdown(h.logger, h.monitorPath, h.userPid)

// If we did not send a graceful shutdown via the monitor socket, we'll
// issue an interrupt to the qemu process as a last resort
if err != nil {
if err := sendQemuShutdown(h.logger, h.monitorPath, h.userPid); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will log several debug messages relating to graceful shutdown even if the user didn't set the option. We should only attempt the graceful shutdown if the user asked for it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

h.logger.Printf("[DEBUG] driver.qemu: error sending graceful shutdown for user process pid %d: %s", h.userPid, err)
// Issue an interrupt to the qemu process as a last resort
if err := h.executor.ShutDown(); err != nil {
if h.pluginClient.Exited() {
return nil
Expand Down Expand Up @@ -490,22 +497,23 @@ func (h *qemuHandle) run() {
close(h.waitCh)
}

// sendQemuShutdown attempts to issue an ACPI power-off command via the qemu
// monitor
func sendQemuShutdown(logger *log.Logger, monitorPath string, userPid int) error {
if monitorPath == "" {
logger.Printf("[DEBUG] driver.qemu: monitorPath not set; will not attempt graceful shutdown for user process pid %d", userPid)
return errors.New("monitorPath not set")
} else {
monitorSocket, err := net.Dial("unix", monitorPath)
if err == nil {
defer monitorSocket.Close()
logger.Printf("[DEBUG] driver.qemu: sending graceful shutdown command to qemu monitor socket %q for user process pid %d", monitorPath, userPid)
_, err = monitorSocket.Write([]byte(qemuGracefulShutdownMsg))
if err != nil {
logger.Printf("[WARN] driver.qemu: failed to send shutdown message %q to monitor socket %q for user process pid %d: %s", qemuGracefulShutdownMsg, monitorPath, userPid, err)
}
} else {
logger.Printf("[WARN] driver.qemu: could not connect to qemu monitor %q for user process pid %d: %s", monitorPath, userPid, err)
}
}
monitorSocket, err := net.Dial("unix", monitorPath)
if err != nil {
logger.Printf("[WARN] driver.qemu: could not connect to qemu monitor %q for user process pid %d: %s", monitorPath, userPid, err)
return err
}
defer monitorSocket.Close()
logger.Printf("[DEBUG] driver.qemu: sending graceful shutdown command to qemu monitor socket %q for user process pid %d", monitorPath, userPid)
_, err = monitorSocket.Write([]byte(qemuGracefulShutdownMsg))
if err != nil {
logger.Printf("[WARN] driver.qemu: failed to send shutdown message %q to monitor socket %q for user process pid %d: %s", qemuGracefulShutdownMsg, monitorPath, userPid, err)
}
return err
}
120 changes: 109 additions & 11 deletions client/driver/qemu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@ func TestQemuDriver_Fingerprint(t *testing.T) {
if node.Attributes[qemuDriverVersionAttr] == "" {
t.Fatalf("Missing Qemu driver version")
}
if node.Attributes[qemuDriverLongMonitorPathAttr] == "" {
t.Fatalf("Missing Qemu long monitor socket path support flag")
}
}

func TestQemuDriver_StartOpen_Wait(t *testing.T) {
Expand Down Expand Up @@ -145,7 +142,7 @@ func TestQemuDriver_GracefulShutdown(t *testing.T) {
},
// With the use of tcg acceleration, it's very unlikely a qemu instance
// will boot (and gracefully halt) in a reasonable amount of time, so
// this timeout is kept low to reduce test execution time
// this timeout is kept low to reduce test execution time.
KillTimeout: time.Duration(1 * time.Second),
LogConfig: &structs.LogConfig{
MaxFiles: 10,
Expand All @@ -166,6 +163,14 @@ func TestQemuDriver_GracefulShutdown(t *testing.T) {
defer ctx.AllocDir.Destroy()
d := NewQemuDriver(ctx.DriverCtx)

apply, err := d.Fingerprint(&config.Config{}, ctx.DriverCtx.node)
if err != nil {
t.Fatalf("err: %v", err)
}
if !apply {
t.Fatalf("should apply")
}

dst := ctx.ExecCtx.TaskDir.Dir

copyFile("./test-resources/qemu/linux-0.2.img", filepath.Join(dst, "linux-0.2.img"), t)
Expand Down Expand Up @@ -304,25 +309,118 @@ func TestQemuDriverUser(t *testing.T) {
}
}

func TestQemuDriverGetMonitorPath(t *testing.T) {
func TestQemuDriverGetMonitorPathOldQemu(t *testing.T) {
task := &structs.Task{
Name: "linux",
Driver: "qemu",
Config: map[string]interface{}{
"image_path": "linux-0.2.img",
"accelerator": "tcg",
"graceful_shutdown": true,
"port_map": []map[string]int{{
"main": 22,
"web": 8080,
}},
"args": []string{"-nodefconfig", "-nodefaults"},
},
KillTimeout: time.Duration(1 * time.Second),
LogConfig: &structs.LogConfig{
MaxFiles: 10,
MaxFileSizeMB: 10,
},
Resources: &structs.Resources{
CPU: 500,
MemoryMB: 512,
Networks: []*structs.NetworkResource{
{
ReservedPorts: []structs.Port{{Label: "main", Value: 22000}, {Label: "web", Value: 80}},
},
},
},
}

ctx := testDriverContexts(t, task)
defer ctx.AllocDir.Destroy()

// Simulate an older version of qemu which does not support long monitor socket paths
ctx.DriverCtx.node.Attributes[qemuDriverVersionAttr] = "2.0.0"

d := &QemuDriver{DriverContext: *ctx.DriverCtx}

shortPath := strings.Repeat("x", 10)
_, err := getMonitorPath(shortPath, "0")
_, err := d.getMonitorPath(shortPath)
if err != nil {
t.Fatal("Should not have returned an error")
}

longPath := strings.Repeat("x", legacyMaxMonitorPathLen+100)
_, err = getMonitorPath(longPath, "0")
longPath := strings.Repeat("x", qemuLegacyMaxMonitorPathLen+100)
_, err = d.getMonitorPath(longPath)
if err == nil {
t.Fatal("Should have returned an error")
}
_, err = getMonitorPath(longPath, "1")

// Max length includes the '/' separator and socket name
maxLengthCount := qemuLegacyMaxMonitorPathLen - len(qemuMonitorSocketName) - 1
maxLengthLegacyPath := strings.Repeat("x", maxLengthCount)
_, err = d.getMonitorPath(maxLengthLegacyPath)
if err != nil {
t.Fatalf("Should not have returned an error: %s", err)
}
}

func TestQemuDriverGetMonitorPathNewQemu(t *testing.T) {
task := &structs.Task{
Name: "linux",
Driver: "qemu",
Config: map[string]interface{}{
"image_path": "linux-0.2.img",
"accelerator": "tcg",
"graceful_shutdown": true,
"port_map": []map[string]int{{
"main": 22,
"web": 8080,
}},
"args": []string{"-nodefconfig", "-nodefaults"},
},
KillTimeout: time.Duration(1 * time.Second),
LogConfig: &structs.LogConfig{
MaxFiles: 10,
MaxFileSizeMB: 10,
},
Resources: &structs.Resources{
CPU: 500,
MemoryMB: 512,
Networks: []*structs.NetworkResource{
{
ReservedPorts: []structs.Port{{Label: "main", Value: 22000}, {Label: "web", Value: 80}},
},
},
},
}

ctx := testDriverContexts(t, task)
defer ctx.AllocDir.Destroy()

// Simulate a version of qemu which supports long monitor socket paths
ctx.DriverCtx.node.Attributes[qemuDriverVersionAttr] = "2.99.99"

d := &QemuDriver{DriverContext: *ctx.DriverCtx}

shortPath := strings.Repeat("x", 10)
_, err := d.getMonitorPath(shortPath)
if err != nil {
t.Fatal("Should not have returned an error")
}

longPath := strings.Repeat("x", qemuLegacyMaxMonitorPathLen+100)
_, err = d.getMonitorPath(longPath)
if err != nil {
t.Fatal("Should not have returned an error")
}

maxLengthPath := strings.Repeat("x", legacyMaxMonitorPathLen)
_, err = getMonitorPath(maxLengthPath, "0")
maxLengthCount := qemuLegacyMaxMonitorPathLen - len(qemuMonitorSocketName) - 1
maxLengthLegacyPath := strings.Repeat("x", maxLengthCount)
_, err = d.getMonitorPath(maxLengthLegacyPath)
if err != nil {
t.Fatal("Should not have returned an error")
}
Expand Down