diff --git a/.travis.yml b/.travis.yml index 3c2e39b751b..4df240df638 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,6 +38,8 @@ matrix: - sudo ssh default -t 'cd /vagrant && sudo make localintegration RUNC_USE_SYSTEMD=yes' # same setup but with fs2 driver instead of systemd - sudo ssh default -t 'cd /vagrant && sudo make localintegration' + # rootless + - sudo ssh default -t 'cd /vagrant && sudo make localrootlessintegration' allow_failures: - go: tip diff --git a/Vagrantfile b/Vagrantfile index 444d077077e..22d06091112 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -19,5 +19,7 @@ install podman make golang-go libseccomp-devel bats jq ts run EOF dnf clean all + # Add a user for rootless tests + useradd -u2000 -m -d/home/rootless -s/bin/bash rootless SHELL end diff --git a/libcontainer/cgroups/fs2/create.go b/libcontainer/cgroups/fs2/create.go index 0cc2c1f61f2..c416a970861 100644 --- a/libcontainer/cgroups/fs2/create.go +++ b/libcontainer/cgroups/fs2/create.go @@ -65,7 +65,7 @@ func neededControllers(cgroup *configs.Cgroup) ([]string, error) { // CreateCgroupPath creates cgroupv2 path, enabling all the // needed controllers in the process. -func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) { +func CreateCgroupPath(path string, c *configs.Cgroup, rootless bool) (Err error) { if !strings.HasPrefix(path, UnifiedMountpoint) { return fmt.Errorf("invalid cgroup path %s", path) } @@ -79,6 +79,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) { elements := strings.Split(path, "/") elements = elements[3:] current := "/sys/fs" +elementLoop: for i, e := range elements { current = filepath.Join(current, e) if i > 0 { @@ -95,13 +96,25 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) { } }() } + if rootless { + // Write cgroup.type explicitly. + // Otherwise ENOTSUP may happen. + cgType := filepath.Join(current, "cgroup.type") + _ = ioutil.WriteFile(cgType, []byte("threaded"), 0755) + } } // enable needed controllers if i < len(elements)-1 { file := filepath.Join(current, "cgroup.subtree_control") if err := ioutil.WriteFile(file, []byte(allCtrs), 0755); err != nil { - // XXX: we can enable _some_ controllers doing it one-by one - // instead of erroring out -- does it makes sense to do so? + if rootless { + // try write one by one + for _, ctr := range ctrs { + _ = ioutil.WriteFile(file, []byte(ctr), 0755) + } + // ignore unwritable files + continue elementLoop + } return err } } diff --git a/libcontainer/cgroups/fs2/fs2.go b/libcontainer/cgroups/fs2/fs2.go index 93a2aa59d1a..6e402611241 100644 --- a/libcontainer/cgroups/fs2/fs2.go +++ b/libcontainer/cgroups/fs2/fs2.go @@ -53,7 +53,10 @@ func (m *manager) getControllers() error { file := filepath.Join(m.dirPath, "cgroup.controllers") data, err := ioutil.ReadFile(file) - if err != nil && !m.rootless { + if err != nil { + if m.rootless && m.config.Path == "" { + return nil + } return err } fields := strings.Fields(string(data)) @@ -66,10 +69,23 @@ func (m *manager) getControllers() error { } func (m *manager) Apply(pid int) error { - if err := CreateCgroupPath(m.dirPath, m.config); err != nil { + if err := CreateCgroupPath(m.dirPath, m.config, m.rootless); err != nil { + // Related tests: + // - "runc create (no limits + no cgrouppath + no permission) succeeds" + // - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error" + // - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" + if m.rootless { + if m.config.Path == "" { + cl, clErr := neededControllers(m.config) + if clErr == nil && len(cl) == 0 { + return nil + } + return errors.Wrap(err, "rootless needs no limits + no cgrouppath when no permission is granted for cgroups") + } + } return err } - if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil && !m.rootless { + if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil { return err } return nil @@ -199,7 +215,11 @@ func (m *manager) Set(container *configs.Config) error { } } // devices (since kernel 4.15, pseudo-controller) - if err := setDevices(m.dirPath, container.Cgroups); err != nil { + // + // When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work. + // However, errors from other subsystems are not ignored. + // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" + if err := setDevices(m.dirPath, container.Cgroups); err != nil && !m.rootless { errs = append(errs, err) } // cpuset (since kernel 5.0) @@ -218,7 +238,7 @@ func (m *manager) Set(container *configs.Config) error { if err := setFreezer(m.dirPath, container.Cgroups.Freezer); err != nil { errs = append(errs, err) } - if len(errs) > 0 && !m.rootless { + if len(errs) > 0 { return errors.Errorf("error while setting cgroup v2: %+v", errs) } m.config = container.Cgroups diff --git a/libcontainer/cgroups/systemd/v2.go b/libcontainer/cgroups/systemd/v2.go index 130fefb645e..1572b39dadf 100644 --- a/libcontainer/cgroups/systemd/v2.go +++ b/libcontainer/cgroups/systemd/v2.go @@ -151,7 +151,7 @@ func (m *unifiedManager) Apply(pid int) error { if err != nil { return err } - if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil { + if err := fs2.CreateCgroupPath(m.path, m.cgroups, m.rootless); err != nil { return err } return nil diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index 6abb2ed6b02..31a82336c61 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -95,7 +95,7 @@ EOF runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions [ "$status" -eq 1 ] - [[ ${lines[1]} == *"cannot set pids limit: container could not join or create cgroup"* ]] + [[ ${lines[1]} == *"rootless needs no limits + no cgrouppath when no permission is granted for cgroups"* ]] || [[ ${lines[1]} == *"cannot set pids limit: container could not join or create cgroup"* ]] } @test "runc create (limits + cgrouppath + permission on the cgroup dir) succeeds" { diff --git a/tests/integration/update.bats b/tests/integration/update.bats index 61ce69376c8..2a5d7363618 100644 --- a/tests/integration/update.bats +++ b/tests/integration/update.bats @@ -68,6 +68,10 @@ EOF # check that initial values were properly set check_cgroup_value "cpuset.cpus" 0 + if [[ "$CGROUP_UNIFIED" = "no" ]] && grep memory "$CGROUP_PATH/cgroup.controllers"; then +# This happen on containerized environment because "echo + memory > /sys/fs/cgroup/cgroup.subtree_control" fails with EINVAL + skip "memory controller not available" + fi check_cgroup_value $MEM_LIMIT 33554432 check_cgroup_value $MEM_RESERVE 25165824 check_cgroup_value "pids.max" 20 diff --git a/tests/rootless.sh b/tests/rootless.sh index d6092d042cb..b56af47a987 100755 --- a/tests/rootless.sh +++ b/tests/rootless.sh @@ -82,6 +82,24 @@ function enable_cgroup() { # handling. [[ "$cg" == "cpuset" ]] && chown rootless:rootless "$CGROUP_MOUNT/$cg$CGROUP_PATH/cpuset."{cpus,mems} done + # cgroup v2 + if [[ -e "$CGROUP_MOUNT/cgroup.controllers" ]]; then + # Enable controllers. Some controller (e.g. memory) may fail. + for f in $(cat "$CGROUP_MOUNT/cgroup.controllers"); do echo +$f > "$CGROUP_MOUNT/cgroup.subtree_control"; done + # Create the cgroup. + mkdir -p "$CGROUP_MOUNT/$CGROUP_PATH" + # chown/chmod dir + cgroup.subtree_control + cgroup.procs + parent's cgroup.procs. + # See https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#delegation-containment + chown root:rootless "$CGROUP_MOUNT/$CGROUP_PATH" "$CGROUP_MOUNT/$CGROUP_PATH/cgroup.subtree_control" "$CGROUP_MOUNT/$CGROUP_PATH/cgroup.procs" "$CGROUP_MOUNT/cgroup.procs" + chmod g+rw "$CGROUP_MOUNT/$CGROUP_PATH" "$CGROUP_MOUNT/$CGROUP_PATH/cgroup.subtree_control" "$CGROUP_MOUNT/$CGROUP_PATH/cgroup.procs" "$CGROUP_MOUNT/cgroup.procs" + # Fix up cgroup.type. + echo threaded > "$CGROUP_MOUNT/$CGROUP_PATH/cgroup.type" + # Make sure cgroup.type doesn't contain "invalid". Otherwise write ops will fail with ENOTSUP. + # See http://man7.org/linux/man-pages/man7/cgroups.7.html + if grep invalid "$CGROUP_MOUNT/$CGROUP_PATH/cgroup.type"; then + exit 1 + fi + fi } function disable_cgroup() {