From 0e9bae7bb4637f57ff94c61bb7afbd19913d9c0b Mon Sep 17 00:00:00 2001 From: Michael McCracken Date: Thu, 18 Jan 2018 16:03:03 -0800 Subject: [PATCH 1/3] lxc: cleanup partially configured containers after errors in Start If there are any errors in container setup after c.Create() in Start(), the container will be left around, with no way to clean it up because the handle will not be created or returned from Start. Added a wrapper that checks for errors and performs appropriate cleanup. Returning a cleanup function from a wrapped function instead of just doing the cleanup before returning the error helps to ensure that future changes that might add or change error exits can't forget to consider a cleanup function. Adds a check to the invalid config test case to check that a container created with an invalid config doesn't get left behind. Signed-off-by: Michael McCracken --- client/driver/lxc.go | 45 ++++++++++++++++++++++++++++----------- client/driver/lxc_test.go | 12 +++++++++++ 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/client/driver/lxc.go b/client/driver/lxc.go index fefb6f2fbd2..14484564474 100644 --- a/client/driver/lxc.go +++ b/client/driver/lxc.go @@ -210,9 +210,20 @@ func (d *LxcDriver) Prestart(*ExecContext, *structs.Task) (*PrestartResponse, er // Start starts the LXC Driver func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, error) { + sresp, err, errCleanup := d.StartWithCleanup(ctx, task) + if err != nil { + if cleanupErr := errCleanup(); cleanupErr != nil { + d.logger.Printf("[ERR] error occured:\n%v\nwhile cleaning up from error in Start: %v", cleanupErr, err) + } + } + return sresp, err +} + +func (d *LxcDriver) StartWithCleanup(ctx *ExecContext, task *structs.Task) (*StartResponse, error, func() error) { + noCleanup := func() error { return nil } var driverConfig LxcDriverConfig if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil { - return nil, err + return nil, err, noCleanup } lxcPath := lxc.DefaultConfigPath() if path := d.config.Read("driver.lxc.path"); path != "" { @@ -222,7 +233,7 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, containerName := fmt.Sprintf("%s-%s", task.Name, d.DriverContext.allocID) c, err := lxc.NewContainer(containerName, lxcPath) if err != nil { - return nil, fmt.Errorf("unable to initialize container: %v", err) + return nil, fmt.Errorf("unable to initialize container: %v", err), noCleanup } var verbosity lxc.Verbosity @@ -232,7 +243,7 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, case "", "quiet": verbosity = lxc.Quiet default: - return nil, fmt.Errorf("lxc driver config 'verbosity' can only be either quiet or verbose") + return nil, fmt.Errorf("lxc driver config 'verbosity' can only be either quiet or verbose"), noCleanup } c.SetVerbosity(verbosity) @@ -249,7 +260,7 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, case "", "error": logLevel = lxc.ERROR default: - return nil, fmt.Errorf("lxc driver config 'log_level' can only be trace, debug, info, warn or error") + return nil, fmt.Errorf("lxc driver config 'log_level' can only be trace, debug, info, warn or error"), noCleanup } c.SetLogLevel(logLevel) @@ -267,12 +278,12 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, } if err := c.Create(options); err != nil { - return nil, fmt.Errorf("unable to create container: %v", err) + return nil, fmt.Errorf("unable to create container: %v", err), noCleanup } // Set the network type to none if err := c.SetConfigItem("lxc.network.type", "none"); err != nil { - return nil, fmt.Errorf("error setting network type configuration: %v", err) + return nil, fmt.Errorf("error setting network type configuration: %v", err), c.Destroy } // Bind mount the shared alloc dir and task local dir in the container @@ -290,7 +301,7 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, if filepath.IsAbs(paths[0]) { if !volumesEnabled { - return nil, fmt.Errorf("absolute bind-mount volume in config but '%v' is false", lxcVolumesConfigOption) + return nil, fmt.Errorf("absolute bind-mount volume in config but '%v' is false", lxcVolumesConfigOption), c.Destroy } } else { // Relative source paths are treated as relative to alloc dir @@ -302,21 +313,31 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, for _, mnt := range mounts { if err := c.SetConfigItem("lxc.mount.entry", mnt); err != nil { - return nil, fmt.Errorf("error setting bind mount %q error: %v", mnt, err) + return nil, fmt.Errorf("error setting bind mount %q error: %v", mnt, err), c.Destroy } } // Start the container if err := c.Start(); err != nil { - return nil, fmt.Errorf("unable to start container: %v", err) + return nil, fmt.Errorf("unable to start container: %v", err), c.Destroy + } + + stopAndDestroyCleanup := func() error { + if err := c.Stop(); err != nil { + return err + } + if err := c.Destroy(); err != nil { + return err + } + return nil } // Set the resource limits if err := c.SetMemoryLimit(lxc.ByteSize(task.Resources.MemoryMB) * lxc.MB); err != nil { - return nil, fmt.Errorf("unable to set memory limits: %v", err) + return nil, fmt.Errorf("unable to set memory limits: %v", err), stopAndDestroyCleanup } if err := c.SetCgroupItem("cpu.shares", strconv.Itoa(task.Resources.CPU)); err != nil { - return nil, fmt.Errorf("unable to set cpu shares: %v", err) + return nil, fmt.Errorf("unable to set cpu shares: %v", err), stopAndDestroyCleanup } h := lxcDriverHandle{ @@ -335,7 +356,7 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, go h.run() - return &StartResponse{Handle: &h}, nil + return &StartResponse{Handle: &h}, nil, noCleanup } func (d *LxcDriver) Cleanup(*ExecContext, *CreatedResources) error { return nil } diff --git a/client/driver/lxc_test.go b/client/driver/lxc_test.go index ddc78193c7d..b1d94cf2083 100644 --- a/client/driver/lxc_test.go +++ b/client/driver/lxc_test.go @@ -310,6 +310,7 @@ func TestLxcDriver_Start_NoVolumes(t *testing.T) { ctx := testDriverContexts(t, task) defer ctx.AllocDir.Destroy() + // set lxcVolumesConfigOption to false to disallow absolute paths as the source for the bind mount ctx.DriverCtx.config.Options = map[string]string{lxcVolumesConfigOption: "false"} d := NewLxcDriver(ctx.DriverCtx) @@ -317,8 +318,19 @@ func TestLxcDriver_Start_NoVolumes(t *testing.T) { if _, err := d.Prestart(ctx.ExecCtx, task); err != nil { t.Fatalf("prestart err: %v", err) } + + // expect the "absolute bind-mount volume in config.. " error _, err := d.Start(ctx.ExecCtx, task) if err == nil { t.Fatalf("expected error in start, got nil.") } + + // Because the container was created but not started before + // the expected error, we can test that the destroy-only + // cleanup is done here. + containerName := fmt.Sprintf("%s-%s", task.Name, ctx.DriverCtx.allocID) + if err := exec.Command("bash", "-c", fmt.Sprintf("lxc-ls -1 | grep -q %s", containerName)).Run(); err == nil { + t.Fatalf("error, container '%s' is still around", containerName) + } + } From 81f64eea27e7bf254add89f29a099665cc92881b Mon Sep 17 00:00:00 2001 From: Michael McCracken Date: Tue, 23 Jan 2018 15:03:09 -0800 Subject: [PATCH 2/3] review cleanup don't export an internal function, and simplify some code Signed-off-by: Michael McCracken --- client/driver/lxc.go | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/client/driver/lxc.go b/client/driver/lxc.go index 14484564474..c4a175f044a 100644 --- a/client/driver/lxc.go +++ b/client/driver/lxc.go @@ -210,16 +210,16 @@ func (d *LxcDriver) Prestart(*ExecContext, *structs.Task) (*PrestartResponse, er // Start starts the LXC Driver func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, error) { - sresp, err, errCleanup := d.StartWithCleanup(ctx, task) + sresp, err, errCleanup := d.startWithCleanup(ctx, task) if err != nil { if cleanupErr := errCleanup(); cleanupErr != nil { - d.logger.Printf("[ERR] error occured:\n%v\nwhile cleaning up from error in Start: %v", cleanupErr, err) + d.logger.Printf("[ERR] error occured while cleaning up from error in Start: %v", cleanupErr) } } return sresp, err } -func (d *LxcDriver) StartWithCleanup(ctx *ExecContext, task *structs.Task) (*StartResponse, error, func() error) { +func (d *LxcDriver) startWithCleanup(ctx *ExecContext, task *structs.Task) (*StartResponse, error, func() error) { noCleanup := func() error { return nil } var driverConfig LxcDriverConfig if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil { @@ -326,10 +326,7 @@ func (d *LxcDriver) StartWithCleanup(ctx *ExecContext, task *structs.Task) (*Sta if err := c.Stop(); err != nil { return err } - if err := c.Destroy(); err != nil { - return err - } - return nil + return c.Destroy() } // Set the resource limits From 2dd31f2cc7c04e0d859298e3fab41b9c4fa0e8fd Mon Sep 17 00:00:00 2001 From: Michael McCracken Date: Thu, 25 Jan 2018 13:56:14 -0800 Subject: [PATCH 3/3] fix speling in log Signed-off-by: Michael McCracken --- client/driver/lxc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/driver/lxc.go b/client/driver/lxc.go index c4a175f044a..a4f34c501ad 100644 --- a/client/driver/lxc.go +++ b/client/driver/lxc.go @@ -213,7 +213,7 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, sresp, err, errCleanup := d.startWithCleanup(ctx, task) if err != nil { if cleanupErr := errCleanup(); cleanupErr != nil { - d.logger.Printf("[ERR] error occured while cleaning up from error in Start: %v", cleanupErr) + d.logger.Printf("[ERR] error occurred while cleaning up from error in Start: %v", cleanupErr) } } return sresp, err