Skip to content

Commit

Permalink
Disable rootless mode except RootlessCgMgr when executed as the root …
Browse files Browse the repository at this point in the history
…in userns

This PR decomposes `libcontainer/configs.Config.Rootless bool` into `RootlessEUID bool` and
`RootlessCgroups bool`, so as to make "runc-in-userns" to be more compatible with "rootful" runc.

`RootlessEUID` denotes that runc is being executed as a non-root user (euid != 0) in
the current user namespace. `RootlessEUID` is almost identical to the former `Rootless`
except cgroups stuff.

`RootlessCgroups` denotes that runc is unlikely to have the full access to cgroups.
`RootlessCgroups` is set to false if runc is executed as the root (euid == 0) in the initial namespace.
Otherwise `RootlessCgroups` is set to true.
(Hint: if `RootlessEUID` is true, `RootlessCgroups` becomes true as well)

When runc is executed as the root (euid == 0) in an user namespace (e.g. by Docker-in-LXD, Podman, Usernetes),
`RootlessEUID` is set to false but `RootlessCgroups` is set to true.
So, "runc-in-userns" behaves almost same as "rootful" runc except that cgroups errors are ignored.

This PR does not have any impact on CLI flags and `state.json`.

Note about CLI:
* Now `runc --rootless=(auto|true|false)` CLI flag is only used for setting `RootlessCgroups`.
* Now `runc spec --rootless` is only required when `RootlessEUID` is set to true.
  For runc-in-userns, `runc spec`  without `--rootless` should work, when sufficient numbers of
  UID/GID are mapped.

Note about `$XDG_RUNTIME_DIR` (e.g. `/run/user/1000`):
* `$XDG_RUNTIME_DIR` is ignored if runc is being executed as the root (euid == 0) in the initial namespace, for backward compatibility.
  (`/run/runc` is used)
* If runc is executed as the root (euid == 0) in an user namespace, `$XDG_RUNTIME_DIR` is honored if `$USER != "" && $USER != "root"`.
  This allows unprivileged users to allow execute runc as the root in userns, without mounting writable `/run/runc`.

Note about `state.json`:
* `rootless` is set to true when `RootlessEUID == true && RootlessCgroups == true`.

Signed-off-by: Akihiro Suda <[email protected]>
  • Loading branch information
AkihiroSuda committed Aug 24, 2018
1 parent 459bfae commit 3334b2e
Show file tree
Hide file tree
Showing 21 changed files with 211 additions and 180 deletions.
11 changes: 5 additions & 6 deletions checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@ package main

import (
"fmt"
"os"
"strconv"
"strings"

"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"

"golang.org/x/sys/unix"
Expand Down Expand Up @@ -44,12 +47,8 @@ checkpointed.`,
return err
}
// XXX: Currently this is untested with rootless containers.
rootless, err := isRootless(context)
if err != nil {
return err
}
if rootless {
return fmt.Errorf("runc checkpoint requires root")
if os.Geteuid() != 0 || system.RunningInUserNS() {
logrus.Warn("runc checkpoint is untested with rootless containers")
}

container, err := getContainer(context)
Expand Down
10 changes: 8 additions & 2 deletions libcontainer/cgroups/fs/apply_raw.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ type subsystem interface {
type Manager struct {
mu sync.Mutex
Cgroups *configs.Cgroup
Rootless bool
Rootless bool // ignore permission-related errors
Paths map[string]string
}

Expand Down Expand Up @@ -174,7 +174,7 @@ func (m *Manager) Apply(pid int) (err error) {
m.Paths[sys.Name()] = p

if err := sys.Apply(d); err != nil {
// In the case of rootless, where an explicit cgroup path hasn't
// In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't
// been set, we don't bail on error in case of permission problems.
// Cases where limits have been set (and we couldn't create our own
// cgroup) are handled by Set.
Expand Down Expand Up @@ -236,6 +236,12 @@ func (m *Manager) Set(container *configs.Config) error {
for _, sys := range subsystems {
path := paths[sys.Name()]
if err := sys.Set(path, container.Cgroups); err != nil {
if m.Rootless && sys.Name() == "devices" {
continue
}
// When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
// However, errors from other subsystems are not ignored.
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
if path == "" {
// We never created a path for this cgroup, so we cannot set
// limits for it (though we have already tried at this point).
Expand Down
13 changes: 10 additions & 3 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,19 @@ type Config struct {
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`

// Rootless specifies whether the container is a rootless container.
Rootless bool `json:"rootless"`

// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
// to limit the resources (e.g., L3 cache) the container has available
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`

// RootlessEUID is set when the runc was launched with non-zero EUID.
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
// When RootlessEUID is set, runc creates a new userns for the container.
// (config.json needs to contain userns settings)
RootlessEUID bool `json:"rootless_euid,omitempty"`

// RootlessCgroups is set when unlikely to have the full access to cgroups.
// When RootlessCgroups is set, cgroups errors are ignored.
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
}

type Hooks struct {
Expand Down
53 changes: 13 additions & 40 deletions libcontainer/configs/validate/rootless.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,18 @@ package validate

import (
"fmt"
"os"
"reflect"
"strings"

"github.com/opencontainers/runc/libcontainer/configs"
)

var (
geteuid = os.Geteuid
getegid = os.Getegid
)

func (v *ConfigValidator) rootless(config *configs.Config) error {
if err := rootlessMappings(config); err != nil {
// rootlessEUID makes sure that the config can be applied when runc
// is being executed as a non-root user (euid != 0) in the current user namespace.
func (v *ConfigValidator) rootlessEUID(config *configs.Config) error {
if err := rootlessEUIDMappings(config); err != nil {
return err
}
if err := rootlessMount(config); err != nil {
if err := rootlessEUIDMount(config); err != nil {
return err
}

Expand All @@ -38,46 +33,24 @@ func hasIDMapping(id int, mappings []configs.IDMap) bool {
return false
}

func rootlessMappings(config *configs.Config) error {
if euid := geteuid(); euid != 0 {
if !config.Namespaces.Contains(configs.NEWUSER) {
return fmt.Errorf("rootless containers require user namespaces")
}
if len(config.UidMappings) == 0 {
return fmt.Errorf("rootless containers requires at least one UID mapping")
}
if len(config.GidMappings) == 0 {
return fmt.Errorf("rootless containers requires at least one GID mapping")
}
func rootlessEUIDMappings(config *configs.Config) error {
if !config.Namespaces.Contains(configs.NEWUSER) {
return fmt.Errorf("rootless container requires user namespaces")
}

return nil
}

// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
func rootlessCgroup(config *configs.Config) error {
// Nothing set at all.
if config.Cgroups == nil || config.Cgroups.Resources == nil {
return nil
if len(config.UidMappings) == 0 {
return fmt.Errorf("rootless containers requires at least one UID mapping")
}

// Used for comparing to the zero value.
left := reflect.ValueOf(*config.Cgroups.Resources)
right := reflect.Zero(left.Type())

// This is all we need to do, since specconv won't add cgroup options in
// rootless mode.
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
return fmt.Errorf("cannot specify resource limits in rootless container")
if len(config.GidMappings) == 0 {
return fmt.Errorf("rootless containers requires at least one GID mapping")
}

return nil
}

// mount verifies that the user isn't trying to set up any mounts they don't have
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
// `gid=` option that doesn't resolve to root.
func rootlessMount(config *configs.Config) error {
func rootlessEUIDMount(config *configs.Config) error {
// XXX: We could whitelist allowed devices at this point, but I'm not
// convinced that's a good idea. The kernel is the best arbiter of
// access control.
Expand Down
44 changes: 20 additions & 24 deletions libcontainer/configs/validate/rootless_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,82 +6,78 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)

func init() {
geteuid = func() int { return 1337 }
getegid = func() int { return 7331 }
}

func rootlessConfig() *configs.Config {
func rootlessEUIDConfig() *configs.Config {
return &configs.Config{
Rootfs: "/var",
Rootless: true,
Rootfs: "/var",
RootlessEUID: true,
RootlessCgroups: true,
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWUSER},
},
),
UidMappings: []configs.IDMap{
{
HostID: geteuid(),
HostID: 1337,
ContainerID: 0,
Size: 1,
},
},
GidMappings: []configs.IDMap{
{
HostID: getegid(),
HostID: 7331,
ContainerID: 0,
Size: 1,
},
},
}
}

func TestValidateRootless(t *testing.T) {
func TestValidateRootlessEUID(t *testing.T) {
validator := New()

config := rootlessConfig()
config := rootlessEUIDConfig()
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}

/* rootlessMappings() */
/* rootlessEUIDMappings */

func TestValidateRootlessUserns(t *testing.T) {
func TestValidateRootlessEUIDUserns(t *testing.T) {
validator := New()

config := rootlessConfig()
config := rootlessEUIDConfig()
config.Namespaces = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if user namespaces not set")
}
}

func TestValidateRootlessMappingUid(t *testing.T) {
func TestValidateRootlessEUIDMappingUid(t *testing.T) {
validator := New()

config := rootlessConfig()
config := rootlessEUIDConfig()
config.UidMappings = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if no uid mappings provided")
}
}

func TestValidateRootlessMappingGid(t *testing.T) {
func TestValidateNonZeroEUIDMappingGid(t *testing.T) {
validator := New()

config := rootlessConfig()
config := rootlessEUIDConfig()
config.GidMappings = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if no gid mappings provided")
}
}

/* rootlessMount() */
/* rootlessEUIDMount() */

func TestValidateRootlessMountUid(t *testing.T) {
config := rootlessConfig()
func TestValidateRootlessEUIDMountUid(t *testing.T) {
config := rootlessEUIDConfig()
validator := New()

config.Mounts = []*configs.Mount{
Expand Down Expand Up @@ -119,8 +115,8 @@ func TestValidateRootlessMountUid(t *testing.T) {
}
}

func TestValidateRootlessMountGid(t *testing.T) {
config := rootlessConfig()
func TestValidateRootlessEUIDMountGid(t *testing.T) {
config := rootlessEUIDConfig()
validator := New()

config.Mounts = []*configs.Mount{
Expand Down
4 changes: 2 additions & 2 deletions libcontainer/configs/validate/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.intelrdt(config); err != nil {
return err
}
if config.Rootless {
if err := v.rootless(config); err != nil {
if config.RootlessEUID {
if err := v.rootlessEUID(config); err != nil {
return err
}
}
Expand Down
34 changes: 17 additions & 17 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ type State struct {

// Platform specific fields below here

// Specifies if the container was started under the rootless mode.
// Specified if the container was started under the rootless mode.
// Set to true if BaseState.Config.RootlessEUID && BaseState.Config.RootlessCgroups
Rootless bool `json:"rootless"`

// Path to all the cgroups setup for a container. Key is cgroup subsystem name
Expand Down Expand Up @@ -545,7 +546,8 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
PassedFilesCount: len(process.ExtraFiles),
ContainerId: c.ID(),
NoNewPrivileges: c.config.NoNewPrivileges,
Rootless: c.config.Rootless,
RootlessEUID: c.config.RootlessEUID,
RootlessCgroups: c.config.RootlessCgroups,
AppArmorProfile: c.config.AppArmorProfile,
ProcessLabel: c.config.ProcessLabel,
Rlimits: c.config.Rlimits,
Expand Down Expand Up @@ -613,16 +615,16 @@ func (c *linuxContainer) Resume() error {

func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
// XXX(cyphar): This requires cgroups.
if c.config.Rootless {
return nil, fmt.Errorf("cannot get OOM notifications from rootless container")
if c.config.RootlessCgroups {
logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups")
}
return notifyOnOOM(c.cgroupManager.GetPaths())
}

func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
// XXX(cyphar): This requires cgroups.
if c.config.Rootless {
return nil, fmt.Errorf("cannot get memory pressure notifications from rootless container")
if c.config.RootlessCgroups {
logrus.Warn("getting memory pressure notifications may fail if you don't have the full access to cgroups")
}
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
}
Expand Down Expand Up @@ -866,12 +868,11 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()

// Checkpoint is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS().
// (CLI prints a warning)
// TODO(avagin): Figure out how to make this work nicely. CRIU 2.0 has
// support for doing unprivileged dumps, but the setup of
// rootless containers might make this complicated.
if c.config.Rootless {
return fmt.Errorf("cannot checkpoint a rootless container")
}

// criu 1.5.2 => 10502
if err := c.checkCriuVersion(10502); err != nil {
Expand Down Expand Up @@ -1105,11 +1106,10 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {

var extraFiles []*os.File

// Restore is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS().
// (CLI prints a warning)
// TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
// support for unprivileged restore at the moment.
if c.config.Rootless {
return fmt.Errorf("cannot restore a rootless container")
}

// criu 1.5.2 => 10502
if err := c.checkCriuVersion(10502); err != nil {
Expand Down Expand Up @@ -1717,7 +1717,7 @@ func (c *linuxContainer) currentState() (*State, error) {
InitProcessStartTime: startTime,
Created: c.created,
},
Rootless: c.config.Rootless,
Rootless: c.config.RootlessEUID && c.config.RootlessCgroups,
CgroupPaths: c.cgroupManager.GetPaths(),
IntelRdtPath: intelRdtPath,
NamespacePaths: make(map[configs.NamespaceType]string),
Expand Down Expand Up @@ -1818,7 +1818,7 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
if !joinExistingUser {
// write uid mappings
if len(c.config.UidMappings) > 0 {
if c.config.Rootless && c.newuidmapPath != "" {
if c.config.RootlessEUID && c.newuidmapPath != "" {
r.AddData(&Bytemsg{
Type: UidmapPathAttr,
Value: []byte(c.newuidmapPath),
Expand All @@ -1844,7 +1844,7 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
Type: GidmapAttr,
Value: b,
})
if c.config.Rootless && c.newgidmapPath != "" {
if c.config.RootlessEUID && c.newgidmapPath != "" {
r.AddData(&Bytemsg{
Type: GidmapPathAttr,
Value: []byte(c.newgidmapPath),
Expand All @@ -1869,8 +1869,8 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na

// write rootless
r.AddData(&Boolmsg{
Type: RootlessAttr,
Value: c.config.Rootless,
Type: RootlessEUIDAttr,
Value: c.config.RootlessEUID,
})

return bytes.NewReader(r.Serialize()), nil
Expand Down
Loading

0 comments on commit 3334b2e

Please sign in to comment.