Skip to content

Commit

Permalink
drivers/raw_exec: enable configuring raw_exec task to have no memory …
Browse files Browse the repository at this point in the history
…limit (#19670)

* drivers/raw_exec: enable configuring raw_exec task to have no memory limit

This PR makes it possible to configure a raw_exec task to not have an
upper memory limit, which is how the driver would behave pre-1.7.

This is done by setting memory_max = -1. The cluster (or node pool) must
have memory oversubscription enabled.

* cl: add cl
  • Loading branch information
shoenig committed Jan 11, 2024
1 parent d502865 commit 80e6ecb
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 7 deletions.
3 changes: 3 additions & 0 deletions .changelog/19670.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
drivers: Enable configuring a raw_exec task to not have an upper memory limit
```
36 changes: 30 additions & 6 deletions drivers/shared/executor/executor_universal_linux.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

//go:build linux

package executor

import (
Expand All @@ -20,6 +22,12 @@ import (
"golang.org/x/sys/unix"
)

const (
// memoryNoLimit is a sentinel value for memory_max that indicates the
// raw_exec driver should not enforce a maximum memory limit
memoryNoLimit = -1
)

// setCmdUser takes a user id as a string and looks up the user, and sets the command
// to execute as that user.
func setCmdUser(cmd *exec.Cmd, userid string) error {
Expand Down Expand Up @@ -226,7 +234,11 @@ func (e *UniversalExecutor) configureCG2(cgroup string, command *ExecCommand) {
// write memory cgroup files
memHard, memSoft := e.computeMemory(command)
ed := cgroupslib.OpenPath(cgroup)
_ = ed.Write("memory.max", strconv.FormatInt(memHard, 10))
if memHard == memoryNoLimit {
_ = ed.Write("memory.max", "max")
} else {
_ = ed.Write("memory.max", strconv.FormatInt(memHard, 10))
}
if memSoft > 0 {
ed = cgroupslib.OpenPath(cgroup)
_ = ed.Write("memory.low", strconv.FormatInt(memSoft, 10))
Expand Down Expand Up @@ -264,17 +276,29 @@ func (*UniversalExecutor) computeCPU(command *ExecCommand) uint64 {
return cpuWeight
}

func mbToBytes(n int64) int64 {
return n * 1024 * 1024
}

// computeMemory returns the hard and soft memory limits for the task
func (*UniversalExecutor) computeMemory(command *ExecCommand) (int64, int64) {
mem := command.Resources.NomadResources.Memory
memHard, memSoft := mem.MemoryMaxMB, mem.MemoryMB
if memHard <= 0 {

switch memHard {
case 0:
// typical case where 'memory' is the hard limit
memHard = mem.MemoryMB
memSoft = 0
return mbToBytes(memHard), 0
case memoryNoLimit:
// special oversub case where 'memory' is soft limit and there is no
// hard limit - helping re-create old raw_exec behavior
return memoryNoLimit, mbToBytes(memSoft)
default:
// typical oversub case where 'memory' is soft limit and 'memory_max'
// is hard limit
return mbToBytes(memHard), mbToBytes(memSoft)
}
memHardBytes := memHard * 1024 * 1024
memSoftBytes := memSoft * 1024 * 1024
return memHardBytes, memSoftBytes
}

// withNetworkIsolation calls the passed function the network namespace `spec`
Expand Down
68 changes: 68 additions & 0 deletions drivers/shared/executor/executor_universal_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

//go:build linux

package executor

import (
"fmt"
"testing"

"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/shoenig/test/must"
)

func Test_computeMemory(t *testing.T) {
cases := []struct {
memory int64
memoryMax int64
expSoft int64
expHard int64
}{
{
// typical case; only 'memory' is set and that is used as the hard
// memory limit
memory: 100,
memoryMax: 0,
expSoft: 0,
expHard: mbToBytes(100),
},
{
// oversub case; both 'memory' and 'memory_max' are set and used as
// the soft and hard memory limits
memory: 100,
memoryMax: 200,
expSoft: mbToBytes(100),
expHard: mbToBytes(200),
},
{
// special oversub case; 'memory' is set and 'memory_max' is set to
// -1; which indicates there should be no hard limit (i.e. -1 / max)
memory: 100,
memoryMax: memoryNoLimit,
expSoft: mbToBytes(100),
expHard: memoryNoLimit,
},
}

for _, tc := range cases {
name := fmt.Sprintf("(%d,%d)", tc.memory, tc.memoryMax)
t.Run(name, func(t *testing.T) {
command := &ExecCommand{
Resources: &drivers.Resources{
NomadResources: &structs.AllocatedTaskResources{
Memory: structs.AllocatedMemoryResources{
MemoryMB: tc.memory,
MemoryMaxMB: tc.memoryMax,
},
},
},
}
hard, soft := (*UniversalExecutor)(nil).computeMemory(command)
must.Eq(t, tc.expSoft, soft)
must.Eq(t, tc.expHard, hard)
})
}
}
38 changes: 38 additions & 0 deletions e2e/rawexec/input/oversubmax.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

job "oversubmax" {
type = "batch"

constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

group "group" {
reschedule {
attempts = 0
unlimited = false
}

restart {
attempts = 0
mode = "fail"
}

task "cat" {
driver = "raw_exec"

config {
command = "bash"
args = ["-c", "cat /sys/fs/cgroup/$(cat /proc/self/cgroup | cut -d':' -f3)/memory.{low,max}"]
}

resources {
cpu = 100
memory = 64
memory_max = -1 # unlimited
}
}
}
}
21 changes: 21 additions & 0 deletions e2e/rawexec/rawexec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package rawexec

import (
"regexp"
"testing"

"github.com/hashicorp/nomad/e2e/v3/cluster3"
Expand All @@ -18,6 +19,8 @@ func TestRawExec(t *testing.T) {
)

t.Run("testOomAdj", testOomAdj)
t.Run("testOversubMemory", testOversubMemory)
t.Run("testOversubMemoryUnlimited", testOversubMemoryUnlimited)
}

func testOomAdj(t *testing.T) {
Expand All @@ -27,3 +30,21 @@ func testOomAdj(t *testing.T) {
logs := job.TaskLogs("group", "cat")
must.StrContains(t, logs.Stdout, "0")
}

func testOversubMemory(t *testing.T) {
job, cleanup := jobs3.Submit(t, "./input/oversub.hcl")
t.Cleanup(cleanup)

logs := job.TaskLogs("group", "cat")
must.StrContains(t, logs.Stdout, "134217728") // 128 mb memory_max
}

func testOversubMemoryUnlimited(t *testing.T) {
job, cleanup := jobs3.Submit(t, "./input/oversubmax.hcl")
t.Cleanup(cleanup)

// will print memory.low then memory.max
logs := job.TaskLogs("group", "cat")
logsRe := regexp.MustCompile(`67108864\s+max`)
must.RegexMatch(t, logsRe, logs.Stdout)
}
10 changes: 9 additions & 1 deletion nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -2466,6 +2466,12 @@ func (r *Resources) DiskInBytes() int64 {
return int64(r.DiskMB * BytesInMegabyte)
}

const (
// memoryNoLimit is a sentinel value indicating there is no upper hard
// memory limit
memoryNoLimit = -1
)

func (r *Resources) Validate() error {
var mErr multierror.Error

Expand All @@ -2488,7 +2494,9 @@ func (r *Resources) Validate() error {
}
}

if r.MemoryMaxMB != 0 && r.MemoryMaxMB < r.MemoryMB {
// ensure memory_max is greater than memory, unless it is set to 0 or -1 which
// are both sentinel values
if (r.MemoryMaxMB != 0 && r.MemoryMaxMB != memoryNoLimit) && r.MemoryMaxMB < r.MemoryMB {
mErr.Errors = append(mErr.Errors, fmt.Errorf("MemoryMaxMB value (%d) should be larger than MemoryMB value (%d)", r.MemoryMaxMB, r.MemoryMB))
}

Expand Down
8 changes: 8 additions & 0 deletions nomad/structs/structs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2139,6 +2139,14 @@ func TestTask_Validate_Resources(t *testing.T) {
},
err: "MemoryMaxMB value (10) should be larger than MemoryMB value (200",
},
{
name: "memory max no limit",
res: &Resources{
CPU: 100,
MemoryMB: 200,
MemoryMaxMB: -1,
},
},
}

for i := range cases {
Expand Down
12 changes: 12 additions & 0 deletions website/content/docs/drivers/raw_exec.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,18 @@ properly. Nomad will not leak any processes if cgroups are being used to
manage the process tree. Cgroups are used on Linux when Nomad is being run with
appropriate privileges, and the cgroup system is mounted.

If the cluster is configured with memory oversubscription enabled, a task using
the `raw_exec` driver can be configured to have no maximum memory limit by
setting `memory_max = -1`.

```hcl
resources {
cpu = 500
memory = 128
memory_max = -1 # no limit
}
```


[hardening]: /nomad/docs/install/production/requirements#user-permissions
[plugin-options]: #plugin-options
Expand Down

0 comments on commit 80e6ecb

Please sign in to comment.