Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

drivers/executor: set oom_score_adj for raw_exec #19515

Merged
merged 5 commits into from
Jan 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/19515.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
rawexec: Fixed a bug where oom_score_adj would be inherited from Nomad client
```
14 changes: 14 additions & 0 deletions drivers/shared/executor/executor_universal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package executor

import (
"fmt"
"os"
"os/exec"
"strconv"
"syscall"
Expand Down Expand Up @@ -112,6 +113,11 @@ func (e *UniversalExecutor) statCG(cgroup string) (int, func(), error) {
func (e *UniversalExecutor) configureResourceContainer(command *ExecCommand, pid int) (func(), error) {
cgroup := command.StatsCgroup()

// ensure tasks do not inherit Nomad agent oom_score_adj value
if err := e.setOomAdj(); err != nil {
return nil, err
}

// cgCleanup will be called after the task has been launched
// v1: remove the executor process from the task's cgroups
// v2: let go of the file descriptor of the task's cgroup
Expand Down Expand Up @@ -244,6 +250,14 @@ func (e *UniversalExecutor) configureCG2(cgroup string, command *ExecCommand) {
_ = ed.Write("cpuset.cpus", cpusetCpus)
}

func (e *UniversalExecutor) setOomAdj() error {
// children should not inherit Nomad agent oom_score_adj value
//
// /proc/self/oom_score_adj should work on both cgroups v1 and v2 systems
// range is -1000 to 1000; 0 is the default
return os.WriteFile("/proc/self/oom_score_adj", []byte("0"), 0644)
}

func (*UniversalExecutor) computeCPU(command *ExecCommand) uint64 {
cpuShares := command.Resources.LinuxResources.CPUShares
cpuWeight := cgroups.ConvertCPUSharesToCgroupV2Value(uint64(cpuShares))
Expand Down
5 changes: 5 additions & 0 deletions e2e/rawexec/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

// Package rawexec tests the raw_exec task driver.
package rawexec
32 changes: 32 additions & 0 deletions e2e/rawexec/input/oomadj.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

job "oomadj" {
type = "batch"

constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

group "group" {

reschedule {
attempts = 0
unlimited = false
}

restart {
attempts = 0
mode = "fail"
}

task "cat" {
driver = "raw_exec"
config {
command = "cat"
args = ["/proc/self/oom_score_adj"]
}
}
}
}
29 changes: 29 additions & 0 deletions e2e/rawexec/rawexec_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

package rawexec

import (
"testing"

"github.com/hashicorp/nomad/e2e/v3/cluster3"
"github.com/hashicorp/nomad/e2e/v3/jobs3"
"github.com/shoenig/test/must"
)

func TestRawExec(t *testing.T) {
cluster3.Establish(t,
cluster3.Leader(),
cluster3.LinuxClients(1),
)

t.Run("testOomAdj", testOomAdj)
}

func testOomAdj(t *testing.T) {
job, cleanup := jobs3.Submit(t, "./input/oomadj.hcl")
t.Cleanup(cleanup)

logs := job.TaskLogs("group", "cat")
must.StrContains(t, logs.Stdout, "0")
}
1 change: 1 addition & 0 deletions e2e/terraform/etc/nomad.d/nomad-client.service
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ LimitNPROC=infinity
TasksMax=infinity
Restart=on-failure
RestartSec=2
OOMScoreAdjust=-999

[Install]
WantedBy=multi-user.target
Loading