Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support running podman under a root v2 cgroup #14308

Merged
merged 1 commit into from
May 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libpod/container_internal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -3108,7 +3108,7 @@ func (c *Container) getOCICgroupPath() (string, error) {
case c.config.NoCgroups:
return "", nil
case c.config.CgroupsMode == cgroupSplit:
selfCgroup, err := utils.GetOwnCgroup()
selfCgroup, err := utils.GetOwnCgroupDisallowRoot()
if err != nil {
return "", err
}
Expand Down
9 changes: 9 additions & 0 deletions pkg/specgen/generate/validate.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package generate

import (
"io/ioutil"
"os"
"path/filepath"

Expand Down Expand Up @@ -166,6 +167,14 @@ func verifyContainerResourcesCgroupV2(s *specgen.SpecGenerator) ([]string, error
if err != nil {
return warnings, err
}

if own == "/" {
// If running under the root cgroup try to create or reuse a "probe" cgroup to read memory values
own = "podman_probe"
_ = os.MkdirAll(filepath.Join("/sys/fs/cgroup", own), 0o755)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it fine to leak the cgroup here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seemed reasonable to me to leak since it's one global value for the system, the memory overhead is tiny, and it avoids the race of a cleanup/recreate. IIRC we leak in other cases (libpod_parent), and I assume for similar reasons. Additionally in the case it is used, there is likely no systemd running which would have created many more times the single probe group used here. If it's a concern we could overload libpod_parent by pre-creating it, but keeping it isolated eliminates any future conflict.

_ = ioutil.WriteFile("/sys/fs/cgroup/cgroup.subtree_control", []byte("+memory"), 0o644)
}

memoryMax := filepath.Join("/sys/fs/cgroup", own, "memory.max")
memorySwapMax := filepath.Join("/sys/fs/cgroup", own, "memory.swap.max")
_, errMemoryMax := os.Stat(memoryMax)
Expand Down
Empty file added utils/testdata/cgroup.empty
Empty file.
1 change: 1 addition & 0 deletions utils/testdata/cgroup.other
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0::/other
1 change: 1 addition & 0 deletions utils/testdata/cgroup.root
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0::/
14 changes: 9 additions & 5 deletions utils/utils_supported.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,15 @@ func RunUnderSystemdScope(pid int, slice string, unitName string) error {
return nil
}

func getCgroupProcess(procFile string) (string, error) {
func getCgroupProcess(procFile string, allowRoot bool) (string, error) {
f, err := os.Open(procFile)
if err != nil {
return "", err
}
defer f.Close()

scanner := bufio.NewScanner(f)
cgroup := "/"
cgroup := ""
for scanner.Scan() {
line := scanner.Text()
parts := strings.SplitN(line, ":", 3)
Expand All @@ -87,20 +87,24 @@ func getCgroupProcess(procFile string) (string, error) {
cgroup = parts[2]
}
}
if cgroup == "/" {
if len(cgroup) == 0 || (!allowRoot && cgroup == "/") {
return "", errors.Errorf("could not find cgroup mount in %q", procFile)
}
return cgroup, nil
}

// GetOwnCgroup returns the cgroup for the current process.
func GetOwnCgroup() (string, error) {
return getCgroupProcess("/proc/self/cgroup")
return getCgroupProcess("/proc/self/cgroup", true)
}

func GetOwnCgroupDisallowRoot() (string, error) {
return getCgroupProcess("/proc/self/cgroup", false)
}

// GetCgroupProcess returns the cgroup for the specified process process.
func GetCgroupProcess(pid int) (string, error) {
return getCgroupProcess(fmt.Sprintf("/proc/%d/cgroup", pid))
return getCgroupProcess(fmt.Sprintf("/proc/%d/cgroup", pid), true)
}

// MoveUnderCgroupSubtree moves the PID under a cgroup subtree.
Expand Down
26 changes: 26 additions & 0 deletions utils/utils_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//go:build linux || darwin
// +build linux darwin

package utils

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestCgroupProcess(t *testing.T) {
val, err := getCgroupProcess("testdata/cgroup.root", true)
assert.Nil(t, err)
assert.Equal(t, "/", val)

_, err = getCgroupProcess("testdata/cgroup.root", false)
assert.NotNil(t, err)

val, err = getCgroupProcess("testdata/cgroup.other", true)
assert.Nil(t, err)
assert.Equal(t, "/other", val)

_, err = getCgroupProcess("testdata/cgroup.empty", true)
assert.NotNil(t, err)
}
4 changes: 4 additions & 0 deletions utils/utils_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ func GetOwnCgroup() (string, error) {
return "", errors.New("not implemented for windows")
}

func GetOwnCgroupDisallowRoot() (string, error) {
return "", errors.New("not implemented for windows")
}

func GetCgroupProcess(pid int) (string, error) {
return "", errors.New("not implemented for windows")
}