From 83c7d89a9d22b44ea5368a25c1b0c0bb821b4ee3 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 9 Aug 2024 09:39:04 -0400 Subject: [PATCH] cgroupslib: allow initial controller check with delegated cgroups v2 During Nomad client initialization with cgroups v2, we assert that the required cgroup controllers are available in the root `cgroup.subtree_control` file by idempotently writing to the file. But if Nomad is running with delegated cgroups, this will fail file permissions checks even if the subtree control file already has the controllers we need. Update the initialization to first check if the controllers are missing before attempting to write to them. This allows cgroup delegation so long as the cluster administrator has pre-created a Nomad owned cgroups tree and set the `Delegate` option in a systemd override. If not, initialization fails in the existing way. Although this is one small step along the way to supporting a rootless Nomad client, running Nomad as non-root is still unsupported. I've intentionally not documented setting up cgroup delegation in this PR, as this PR is insufficient by itself to have a secure and properly-working rootless Nomad client. Ref: https://github.com/hashicorp/nomad/issues/18211 Ref: https://github.com/hashicorp/nomad/issues/13669 --- .changelog/23803.txt | 3 +++ client/lib/cgroupslib/init.go | 10 ++++++--- client/lib/cgroupslib/mount.go | 35 +++++++++++++++++++++++------ client/lib/cgroupslib/mount_test.go | 11 +++++++-- 4 files changed, 47 insertions(+), 12 deletions(-) create mode 100644 .changelog/23803.txt diff --git a/.changelog/23803.txt b/.changelog/23803.txt new file mode 100644 index 00000000000..c352bea35ac --- /dev/null +++ b/.changelog/23803.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cgroups: Allow clients with delegated cgroups check that required cgroup v2 controllers exist +``` diff --git a/client/lib/cgroupslib/init.go b/client/lib/cgroupslib/init.go index ec089dabd9f..181697e4854 100644 --- a/client/lib/cgroupslib/init.go +++ b/client/lib/cgroupslib/init.go @@ -133,9 +133,13 @@ func Init(log hclog.Logger, cores string) error { // // configuring root cgroup (/sys/fs/cgroup) // - - if err := writeCG(activation, subtreeFile); err != nil { - return fmt.Errorf("failed to create nomad cgroup: %w", err) + // clients with delegated cgroups typically won't be able to write to + // the subtree file, but that's ok so long as the required controllers + // are activated + if !functionalCgroups2(subtreeFile) { + if err := writeCG(activation, subtreeFile); err != nil { + return fmt.Errorf("failed to create nomad cgroup: %w", err) + } } // diff --git a/client/lib/cgroupslib/mount.go b/client/lib/cgroupslib/mount.go index ba18585c867..4042546ab89 100644 --- a/client/lib/cgroupslib/mount.go +++ b/client/lib/cgroupslib/mount.go @@ -11,15 +11,17 @@ import ( "os" "path/filepath" "strings" + "syscall" "github.com/hashicorp/go-set/v2" ) +// detect tries to detect which cgroups version we have by looking at the mount +// and whether Nomad owns the cgroup. +// - For cgroups v1 this requires root. +// - For cgroups v2 we look for root or whether we're the owner of the slice. +// - All other cases, including any file permission errors, return OFF. func detect() Mode { - if os.Geteuid() > 0 { - return OFF - } - f, err := os.Open("/proc/self/mountinfo") if err != nil { return OFF @@ -29,14 +31,33 @@ func detect() Mode { }() mode := scan(f) - if mode == CG2 && !functionalCgroups2() { + + if mode == CG1 && os.Geteuid() > 0 { return OFF } + + if mode == CG2 { + if !functionalCgroups2("cgroup.controllers") { + return OFF + } + uid := os.Geteuid() + if uid > 0 { + // allow for cgroup delegation if we own the slice + cgPath := filepathCG("nomad.slice") + fi, err := os.Stat(cgPath) + if err != nil { + return OFF + } + if uid != int(fi.Sys().(*syscall.Stat_t).Uid) { + return OFF + } + } + } + return mode } -func functionalCgroups2() bool { - const controllersFile = "cgroup.controllers" +func functionalCgroups2(controllersFile string) bool { requiredCgroup2Controllers := []string{"cpuset", "cpu", "io", "memory", "pids"} controllersRootPath := filepath.Join(root, controllersFile) diff --git a/client/lib/cgroupslib/mount_test.go b/client/lib/cgroupslib/mount_test.go index b2c547fafb0..a4bbe7740b7 100644 --- a/client/lib/cgroupslib/mount_test.go +++ b/client/lib/cgroupslib/mount_test.go @@ -6,6 +6,7 @@ package cgroupslib import ( + "os" "strings" "testing" @@ -62,6 +63,12 @@ func Test_scan(t *testing.T) { func TestGetMode(t *testing.T) { mode := GetMode() - ok := mode == CG1 || mode == CG2 - must.True(t, ok) + if os.Geteuid() == 0 { + ok := mode == CG1 || mode == CG2 + must.True(t, ok) + } else { + // note: we can't test with a delegated cgroup without overwriting the + // package level root + must.Eq(t, OFF, mode) + } }