From 2f4b9da61afa31ba55b065a1360cf347f187838a Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Wed, 11 Dec 2019 10:28:41 -0500 Subject: [PATCH 1/3] drivers/exec: test all cgroups are destroyed --- drivers/shared/executor/executor_linux.go | 16 ++++ .../shared/executor/executor_linux_test.go | 83 +++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/drivers/shared/executor/executor_linux.go b/drivers/shared/executor/executor_linux.go index f81090a56e4..379bab13a8c 100644 --- a/drivers/shared/executor/executor_linux.go +++ b/drivers/shared/executor/executor_linux.go @@ -747,6 +747,22 @@ func configureBasicCgroups(cfg *lconfigs.Config) error { return nil } +func getCgroupPathHelper(subsystem, cgroup string) (string, error) { + mnt, root, err := cgroups.FindCgroupMountpointAndRoot("", subsystem) + if err != nil { + return "", err + } + + // This is needed for nested containers, because in /proc/self/cgroup we + // see paths from host, which don't exist in container. + relCgroup, err := filepath.Rel(root, cgroup) + if err != nil { + return "", err + } + + return filepath.Join(mnt, relCgroup), nil +} + func newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) { cfg := &lconfigs.Config{ Cgroups: &lconfigs.Cgroup{ diff --git a/drivers/shared/executor/executor_linux_test.go b/drivers/shared/executor/executor_linux_test.go index 1861ea5ae04..619b219e18c 100644 --- a/drivers/shared/executor/executor_linux_test.go +++ b/drivers/shared/executor/executor_linux_test.go @@ -19,6 +19,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/plugins/drivers" tu "github.com/hashicorp/nomad/testutil" + "github.com/opencontainers/runc/libcontainer/cgroups" lconfigs "github.com/opencontainers/runc/libcontainer/configs" "github.com/stretchr/testify/require" "golang.org/x/sys/unix" @@ -217,6 +218,88 @@ func TestExecutor_CgroupPaths(t *testing.T) { }, func(err error) { t.Error(err) }) } +// TestExecutor_CgroupPaths asserts that all cgroups created for a task +// are destroyed on shutdown +func TestExecutor_CgroupPathsAreDestroyed(t *testing.T) { + t.Parallel() + require := require.New(t) + testutil.ExecCompatible(t) + + testExecCmd := testExecutorCommandWithChroot(t) + execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir + execCmd.Cmd = "/bin/bash" + execCmd.Args = []string{"-c", "sleep 0.2; cat /proc/self/cgroup"} + defer allocDir.Destroy() + + execCmd.ResourceLimits = true + + executor := NewExecutorWithIsolation(testlog.HCLogger(t)) + defer executor.Shutdown("SIGKILL", 0) + + ps, err := executor.Launch(execCmd) + require.NoError(err) + require.NotZero(ps.Pid) + + state, err := executor.Wait(context.Background()) + require.NoError(err) + require.Zero(state.ExitCode) + + var cgroupsPaths string + tu.WaitForResult(func() (bool, error) { + output := strings.TrimSpace(testExecCmd.stdout.String()) + // sanity check that we got some cgroups + if !strings.Contains(output, ":devices:") { + return false, fmt.Errorf("was expected cgroup files but found:\n%v", output) + } + lines := strings.Split(output, "\n") + for _, line := range lines { + // Every cgroup entry should be /nomad/$ALLOC_ID + if line == "" { + continue + } + + // Skip rdma subsystem; rdma was added in most recent kernels and libcontainer/docker + // don't isolate it by default. + if strings.Contains(line, ":rdma:") { + continue + } + + if !strings.Contains(line, ":/nomad/") { + return false, fmt.Errorf("Not a member of the alloc's cgroup: expected=...:/nomad/... -- found=%q", line) + } + } + + cgroupsPaths = output + return true, nil + }, func(err error) { t.Error(err) }) + + // shutdown executor and test that cgroups are destroyed + executor.Shutdown("SIGKILL", 0) + + // test that the cgroup paths are not visible + tmpFile, err := ioutil.TempFile("", "") + require.NoError(err) + defer os.Remove(tmpFile.Name()) + + _, err = tmpFile.WriteString(cgroupsPaths) + require.NoError(err) + tmpFile.Close() + + subsystems, err := cgroups.ParseCgroupFile(tmpFile.Name()) + require.NoError(err) + + for subsystem, cgroup := range subsystems { + if !strings.Contains(cgroup, "nomad/") { + // this should only be rdma at this point + continue + } + + p, err := getCgroupPathHelper(subsystem, cgroup) + require.NoError(err) + require.Falsef(cgroups.PathExists(p), "cgroup for %s %s still exists", subsystem, cgroup) + } +} + func TestUniversalExecutor_LookupTaskBin(t *testing.T) { t.Parallel() require := require.New(t) From 596d0be5d80c8ff3ff0ec8e253184fae22a79385 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Wed, 11 Dec 2019 11:28:09 -0500 Subject: [PATCH 2/3] executor: stop joining executor to container cgroup Stop joining libcontainer executor process into the newly created task container cgroup, to ensure that the cgroups are fully destroyed on shutdown, and to make it consistent with other plugin processes. Previously, executor process is added to the container cgroup so the executor process resources get aggregated along with user processes in our metric aggregation. However, adding executor process to container cgroup adds some complications with much benefits: First, it complicates cleanup. We must ensure that the executor is removed from container cgroup on shutdown. Though, we had a bug where we missed removing it from the systemd cgroup. Because executor uses `containerState.CgroupPaths` on launch, which includes systemd, but `cgroups.GetAllSubsystems` which doesn't. Second, it may have advese side-effects. When a user process is cpu bound or uses too much memory, executor should remain functioning without risk of being killed (by OOM killer) or throttled. Third, it is inconsistent with other drivers and plugins. Logmon and DockerLogger processes aren't in the task cgroups. Neither are containerd processes, though it is equivalent to executor in responsibility. Fourth, in my experience when executor process moves cgroup while it's running, the cgroup aggregation is odd. The cgroup `memory.usage_in_bytes` doesn't seem to capture the full memory usage of the executor process and becomes a red-harring when investigating memory issues. For all the reasons above, I opted to have executor remain in nomad agent cgroup and we can revisit this when we have a better story for plugin process cgroup management. --- drivers/shared/executor/executor_linux.go | 50 ----------------------- 1 file changed, 50 deletions(-) diff --git a/drivers/shared/executor/executor_linux.go b/drivers/shared/executor/executor_linux.go index 379bab13a8c..7cf06c05769 100644 --- a/drivers/shared/executor/executor_linux.go +++ b/drivers/shared/executor/executor_linux.go @@ -17,7 +17,6 @@ import ( "github.com/armon/circbuf" "github.com/hashicorp/consul-template/signals" hclog "github.com/hashicorp/go-hclog" - multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/stats" cstructs "github.com/hashicorp/nomad/client/structs" @@ -92,15 +91,6 @@ func (l *LibcontainerExecutor) Launch(command *ExecCommand) (*ProcessState, erro l.command = command - // Move to the root cgroup until process is started - subsystems, err := cgroups.GetAllSubsystems() - if err != nil { - return nil, err - } - if err := JoinRootCgroup(subsystems); err != nil { - return nil, err - } - // create a new factory which will store the container state in the allocDir factory, err := libcontainer.New( path.Join(command.TaskDir, "../alloc/container"), @@ -194,15 +184,6 @@ func (l *LibcontainerExecutor) Launch(command *ExecCommand) (*ProcessState, erro return nil, err } - // Join process cgroups - containerState, err := container.State() - if err != nil { - l.logger.Error("error entering user process cgroups", "executor_pid", os.Getpid(), "error", err) - } - if err := cgroups.EnterPid(containerState.CgroupPaths, os.Getpid()); err != nil { - l.logger.Error("error entering user process cgroups", "executor_pid", os.Getpid(), "error", err) - } - // start a goroutine to wait on the process to complete, so Wait calls can // be multiplexed l.userProcExited = make(chan interface{}) @@ -287,15 +268,6 @@ func (l *LibcontainerExecutor) Shutdown(signal string, grace time.Duration) erro return nil } - // move executor to root cgroup - subsystems, err := cgroups.GetAllSubsystems() - if err != nil { - return err - } - if err := JoinRootCgroup(subsystems); err != nil { - return err - } - status, err := l.container.Status() if err != nil { return err @@ -787,28 +759,6 @@ func newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) { return cfg, nil } -// JoinRootCgroup moves the current process to the cgroups of the init process -func JoinRootCgroup(subsystems []string) error { - mErrs := new(multierror.Error) - paths := map[string]string{} - for _, s := range subsystems { - mnt, _, err := cgroups.FindCgroupMountpointAndRoot("", s) - if err != nil { - multierror.Append(mErrs, fmt.Errorf("error getting cgroup path for subsystem: %s", s)) - continue - } - - paths[s] = mnt - } - - err := cgroups.EnterPid(paths, os.Getpid()) - if err != nil { - multierror.Append(mErrs, err) - } - - return mErrs.ErrorOrNil() -} - // cmdDevices converts a list of driver.DeviceConfigs into excutor.Devices. func cmdDevices(devices []*drivers.DeviceConfig) ([]*lconfigs.Device, error) { if len(devices) == 0 { From f794b49ec636243c7dce0798c554adfa72a33a1b Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Wed, 11 Dec 2019 11:39:16 -0500 Subject: [PATCH 3/3] simplify cgroup path lookup --- drivers/shared/executor/executor_linux.go | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/shared/executor/executor_linux.go b/drivers/shared/executor/executor_linux.go index 7cf06c05769..7a9c55b8ae7 100644 --- a/drivers/shared/executor/executor_linux.go +++ b/drivers/shared/executor/executor_linux.go @@ -26,7 +26,6 @@ import ( "github.com/hashicorp/nomad/plugins/drivers" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups" - cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs" lconfigs "github.com/opencontainers/runc/libcontainer/configs" ldevices "github.com/opencontainers/runc/libcontainer/devices" lutils "github.com/opencontainers/runc/libcontainer/utils" @@ -35,7 +34,7 @@ import ( ) const ( - defaultCgroupParent = "nomad" + defaultCgroupParent = "/nomad" ) var ( @@ -692,30 +691,21 @@ func configureBasicCgroups(cfg *lconfigs.Config) error { id := uuid.Generate() // Manually create freezer cgroup - cfg.Cgroups.Paths = map[string]string{} - root, err := cgroups.FindCgroupMountpointDir() - if err != nil { - return err - } - if _, err := os.Stat(root); err != nil { - return err - } + subsystem := "freezer" - freezer := cgroupFs.FreezerGroup{} - subsystem := freezer.Name() - path, err := cgroups.FindCgroupMountpoint("", subsystem) + path, err := getCgroupPathHelper(subsystem, filepath.Join(defaultCgroupParent, id)) if err != nil { return fmt.Errorf("failed to find %s cgroup mountpoint: %v", subsystem, err) } - // Sometimes subsystems can be mounted together as 'cpu,cpuacct'. - path = filepath.Join(root, filepath.Base(path), defaultCgroupParent, id) if err = os.MkdirAll(path, 0755); err != nil { return err } - cfg.Cgroups.Paths[subsystem] = path + cfg.Cgroups.Paths = map[string]string{ + subsystem: path, + } return nil }