From 3ef9637886bbcb96590c53f122cb5a50fe603f21 Mon Sep 17 00:00:00 2001 From: Matt Robenolt Date: Mon, 18 Dec 2023 17:01:37 -0800 Subject: [PATCH 1/5] drivers/executor: set oom_score_adj for raw_exec This might not be wholly true since I don't know all configurations of Nomad, but in our use cases, we run some of our tasks as `raw_exec` for reasons. We observed that our tasks were running with `oom_score_adj = -1000`, which prevents them from being OOM'd. This value is being inherited from the nomad agent parent process, as configured by systemd. Similar to #10698, we also were shocked to have this value inherited down to every child process and believe that we should also set this value to 0 explicitly. I have no idea if there are other paths that might leverage this or other ways that `raw_exec` can manifest, but this is how I was able to observe and fix in one of our configurations. We have been running in production our tasks wrapped in a script that does: `echo 0 > /proc/self/oom_score_adj` to avoid this issue. --- drivers/shared/executor/executor_universal_linux.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/shared/executor/executor_universal_linux.go b/drivers/shared/executor/executor_universal_linux.go index 4aab9199d22..63d942a817f 100644 --- a/drivers/shared/executor/executor_universal_linux.go +++ b/drivers/shared/executor/executor_universal_linux.go @@ -5,6 +5,7 @@ package executor import ( "fmt" + "os" "os/exec" "strconv" "syscall" @@ -112,6 +113,11 @@ func (e *UniversalExecutor) statCG(cgroup string) (int, func(), error) { func (e *UniversalExecutor) configureResourceContainer(command *ExecCommand, pid int) (func(), error) { cgroup := command.StatsCgroup() + // children should not inherit Nomad agent oom_score_adj value + if err := os.WriteFile("/proc/self/oom_score_adj", []byte("0"), 0644); err != nil { + return nil, err + } + // cgCleanup will be called after the task has been launched // v1: remove the executor process from the task's cgroups // v2: let go of the file descriptor of the task's cgroup From b5c285ed28a84ab030a98e9ee504811bea0d2f73 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Tue, 2 Jan 2024 17:17:17 +0000 Subject: [PATCH 2/5] drivers/executor: minor cleanup of setting oom adjustment --- drivers/shared/executor/executor_universal_linux.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/shared/executor/executor_universal_linux.go b/drivers/shared/executor/executor_universal_linux.go index 63d942a817f..32c4bd90295 100644 --- a/drivers/shared/executor/executor_universal_linux.go +++ b/drivers/shared/executor/executor_universal_linux.go @@ -113,8 +113,8 @@ func (e *UniversalExecutor) statCG(cgroup string) (int, func(), error) { func (e *UniversalExecutor) configureResourceContainer(command *ExecCommand, pid int) (func(), error) { cgroup := command.StatsCgroup() - // children should not inherit Nomad agent oom_score_adj value - if err := os.WriteFile("/proc/self/oom_score_adj", []byte("0"), 0644); err != nil { + // ensure tasks do not inherit Nomad agent oom_score_adj value + if err := e.setOomAdj(); err != nil { return nil, err } @@ -250,6 +250,14 @@ func (e *UniversalExecutor) configureCG2(cgroup string, command *ExecCommand) { _ = ed.Write("cpuset.cpus", cpusetCpus) } +func (e *UniversalExecutor) setOomAdj() error { + // children should not inherit Nomad agent oom_score_adj value + // + // /proc/self/oom_score_adj should work on both cgroups v1 and v2 systems + // range is -1000 to 1000; 0 is the default + return os.WriteFile("/proc/self/oom_score_adj", []byte("0"), 0644) +} + func (*UniversalExecutor) computeCPU(command *ExecCommand) uint64 { cpuShares := command.Resources.LinuxResources.CPUShares cpuWeight := cgroups.ConvertCPUSharesToCgroupV2Value(uint64(cpuShares)) From a4cc36530f90b2f503685527f8f4f8ce0cb6505d Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Tue, 2 Jan 2024 17:30:52 +0000 Subject: [PATCH 3/5] e2e: add test for raw_exec oom adjust score --- e2e/rawexec/doc.go | 5 +++++ e2e/rawexec/input/oomadj.hcl | 32 ++++++++++++++++++++++++++++++++ e2e/rawexec/rawexec_test.go | 29 +++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 e2e/rawexec/doc.go create mode 100644 e2e/rawexec/input/oomadj.hcl create mode 100644 e2e/rawexec/rawexec_test.go diff --git a/e2e/rawexec/doc.go b/e2e/rawexec/doc.go new file mode 100644 index 00000000000..dac71294eef --- /dev/null +++ b/e2e/rawexec/doc.go @@ -0,0 +1,5 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +// Package rawexec tests the raw_exec task driver. +package rawexec diff --git a/e2e/rawexec/input/oomadj.hcl b/e2e/rawexec/input/oomadj.hcl new file mode 100644 index 00000000000..bc6abe51547 --- /dev/null +++ b/e2e/rawexec/input/oomadj.hcl @@ -0,0 +1,32 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +job "oomadj" { + type = "batch" + + constraint { + attribute = "${attr.kernel.name}" + value = "linux" + } + + group "group" { + + reschedule { + attempts = 0 + unlimited = false + } + + restart { + attempts = 0 + mode = "fail" + } + + task "cat" { + driver = "raw_exec" + config { + command = "cat" + args = ["/proc/self/oom_score_adj"] + } + } + } +} diff --git a/e2e/rawexec/rawexec_test.go b/e2e/rawexec/rawexec_test.go new file mode 100644 index 00000000000..8907e797e16 --- /dev/null +++ b/e2e/rawexec/rawexec_test.go @@ -0,0 +1,29 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package rawexec + +import ( + "testing" + + "github.com/hashicorp/nomad/e2e/v3/cluster3" + "github.com/hashicorp/nomad/e2e/v3/jobs3" + "github.com/shoenig/test/must" +) + +func TestRawExec(t *testing.T) { + cluster3.Establish(t, + cluster3.Leader(), + cluster3.LinuxClients(1), + ) + + t.Run("testOomAdj", testOomAdj) +} + +func testOomAdj(t *testing.T) { + job, cleanup := jobs3.Submit(t, "./input/oomadj.hcl") + t.Cleanup(cleanup) + + logs := job.TaskLogs("group", "cat") + must.StrContains(t, logs.Stdout, "0") +} From b5f8363e0c0b290767e055aa7c795bc3f57265e0 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Tue, 2 Jan 2024 18:07:58 +0000 Subject: [PATCH 4/5] e2e: set oom score adjust to -999 --- e2e/terraform/etc/nomad.d/nomad-client.service | 1 + 1 file changed, 1 insertion(+) diff --git a/e2e/terraform/etc/nomad.d/nomad-client.service b/e2e/terraform/etc/nomad.d/nomad-client.service index e37b995c09e..ef6a95e14a2 100644 --- a/e2e/terraform/etc/nomad.d/nomad-client.service +++ b/e2e/terraform/etc/nomad.d/nomad-client.service @@ -17,6 +17,7 @@ LimitNPROC=infinity TasksMax=infinity Restart=on-failure RestartSec=2 +OOMScoreAdjust=-999 [Install] WantedBy=multi-user.target From 62ac11030fd4605b8c0ca4045c932bbd534d59e6 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Tue, 2 Jan 2024 18:51:06 +0000 Subject: [PATCH 5/5] cl: add cl --- .changelog/19515.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/19515.txt diff --git a/.changelog/19515.txt b/.changelog/19515.txt new file mode 100644 index 00000000000..0237b257b9f --- /dev/null +++ b/.changelog/19515.txt @@ -0,0 +1,3 @@ +```release-note:bug +rawexec: Fixed a bug where oom_score_adj would be inherited from Nomad client +```