From 0be58d72f4ec8db6a5671b8f357afb190dc8e3c4 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Thu, 3 Jun 2021 14:15:50 -0400 Subject: [PATCH] drivers/exec: Don't inherit Nomad oom_score_adj value (#10698) Explicitly set the `oom_score_adj` value for `exec` and `java` tasks. We recommend that the Nomad service to have oom_score_adj of a low value (e.g. -1000) to avoid having nomad agent OOM Killed if the node is oversubscriped. However, Nomad's workloads should not inherit Nomad's process, which is the default behavior. Fixes #10663 --- drivers/shared/executor/executor_linux.go | 4 ++ .../shared/executor/executor_linux_test.go | 56 +++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/drivers/shared/executor/executor_linux.go b/drivers/shared/executor/executor_linux.go index 5c4919e8358..54be7ed79f7 100644 --- a/drivers/shared/executor/executor_linux.go +++ b/drivers/shared/executor/executor_linux.go @@ -764,6 +764,10 @@ func newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) { configureCapabilities(cfg, command) + // children should not inherit Nomad agent oom_score_adj value + oomScoreAdj := 0 + cfg.OomScoreAdj = &oomScoreAdj + if err := configureIsolation(cfg, command); err != nil { return nil, err } diff --git a/drivers/shared/executor/executor_linux_test.go b/drivers/shared/executor/executor_linux_test.go index b3576797ad4..a8a71f88448 100644 --- a/drivers/shared/executor/executor_linux_test.go +++ b/drivers/shared/executor/executor_linux_test.go @@ -465,6 +465,62 @@ func TestExecutor_EscapeContainer(t *testing.T) { require.NoError(err) } +// TestExecutor_DoesNotInheritOomScoreAdj asserts that the exec processes do not +// inherit the oom_score_adj value of Nomad agent/executor process +func TestExecutor_DoesNotInheritOomScoreAdj(t *testing.T) { + t.Parallel() + testutil.ExecCompatible(t) + + oomPath := "/proc/self/oom_score_adj" + origValue, err := os.ReadFile(oomPath) + require.NoError(t, err, "reading oom_score_adj") + + err = os.WriteFile(oomPath, []byte("-100"), 0644) + require.NoError(t, err, "setting temporary oom_score_adj") + + defer func() { + err := os.WriteFile(oomPath, origValue, 0644) + require.NoError(t, err, "restoring oom_score_adj") + }() + + testExecCmd := testExecutorCommandWithChroot(t) + execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir + defer allocDir.Destroy() + + execCmd.ResourceLimits = true + execCmd.Cmd = "/bin/bash" + execCmd.Args = []string{"-c", "cat /proc/self/oom_score_adj"} + + executor := NewExecutorWithIsolation(testlog.HCLogger(t)) + defer executor.Shutdown("SIGKILL", 0) + + _, err = executor.Launch(execCmd) + require.NoError(t, err) + + ch := make(chan interface{}) + go func() { + executor.Wait(context.Background()) + close(ch) + }() + + select { + case <-ch: + // all good + case <-time.After(5 * time.Second): + require.Fail(t, "timeout waiting for exec to shutdown") + } + + expected := "0" + tu.WaitForResult(func() (bool, error) { + output := strings.TrimSpace(testExecCmd.stdout.String()) + if output != expected { + return false, fmt.Errorf("oom_score_adj didn't match: want\n%v\n; got:\n%v\n", expected, output) + } + return true, nil + }, func(err error) { require.NoError(t, err) }) + +} + func TestExecutor_Capabilities(t *testing.T) { t.Parallel() testutil.ExecCompatible(t)