From 06f789cf26774dd64cb2a9cc0b3c6a6ff832733b Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Thu, 5 Jul 2018 15:28:21 +0900 Subject: [PATCH] Disable rootless mode except RootlessCgMgr when executed as the root in userns This PR decomposes `libcontainer/configs.Config.Rootless bool` into `RootlessEUID bool` and `RootlessCgroups bool`, so as to make "runc-in-userns" to be more compatible with "rootful" runc. `RootlessEUID` denotes that runc is being executed as a non-root user (euid != 0) in the current user namespace. `RootlessEUID` is almost identical to the former `Rootless` except cgroups stuff. `RootlessCgroups` denotes that runc is unlikely to have the full access to cgroups. `RootlessCgroups` is set to false if runc is executed as the root (euid == 0) in the initial namespace. Otherwise `RootlessCgroups` is set to true. (Hint: if `RootlessEUID` is true, `RootlessCgroups` becomes true as well) When runc is executed as the root (euid == 0) in an user namespace (e.g. by Docker-in-LXD, Podman, Usernetes), `RootlessEUID` is set to false but `RootlessCgroups` is set to true. So, "runc-in-userns" behaves almost same as "rootful" runc except that cgroups errors are ignored. This PR does not have any impact on CLI flags and `state.json`. Note about CLI: * Now `runc --rootless=(auto|true|false)` CLI flag is only used for setting `RootlessCgroups`. * Now `runc spec --rootless` is only required when `RootlessEUID` is set to true. For runc-in-userns, `runc spec` without `--rootless` should work, when sufficient numbers of UID/GID are mapped. Note about `$XDG_RUNTIME_DIR` (e.g. `/run/user/1000`): * `$XDG_RUNTIME_DIR` is ignored if runc is being executed as the root (euid == 0) in the initial namespace, for backward compatibility. (`/run/runc` is used) * If runc is executed as the root (euid == 0) in an user namespace, `$XDG_RUNTIME_DIR` is honored if `$USER != "" && $USER != "root"`. This allows unprivileged users to allow execute runc as the root in userns, without mounting writable `/run/runc`. Note about `state.json`: * `rootless` is set to true when `RootlessEUID == true && RootlessCgroups == true`. Signed-off-by: Akihiro Suda --- checkpoint.go | 11 ++-- libcontainer/cgroups/fs/apply_raw.go | 10 +++- libcontainer/configs/config.go | 13 ++++- libcontainer/configs/validate/rootless.go | 53 +++++------------ .../configs/validate/rootless_test.go | 44 +++++++------- libcontainer/configs/validate/validator.go | 4 +- libcontainer/container_linux.go | 51 +++++++++-------- libcontainer/factory_linux.go | 2 +- libcontainer/init_linux.go | 7 ++- libcontainer/message_linux.go | 20 +++---- libcontainer/nsenter/nsexec.c | 12 ++-- libcontainer/process_linux.go | 21 +++---- libcontainer/specconv/example.go | 2 +- libcontainer/specconv/spec_linux.go | 33 +++++------ libcontainer/specconv/spec_linux_test.go | 5 +- main.go | 11 +--- pause.go | 19 ++++++- ps.go | 8 +-- restore.go | 11 ++-- rootless_linux.go | 57 +++++++++++++++++++ spec.go | 6 +- utils_linux.go | 29 ++-------- 22 files changed, 231 insertions(+), 198 deletions(-) create mode 100644 rootless_linux.go diff --git a/checkpoint.go b/checkpoint.go index 67c767a4f46..fb086186a8f 100644 --- a/checkpoint.go +++ b/checkpoint.go @@ -4,11 +4,14 @@ package main import ( "fmt" + "os" "strconv" "strings" "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" "github.com/urfave/cli" "golang.org/x/sys/unix" @@ -44,12 +47,8 @@ checkpointed.`, return err } // XXX: Currently this is untested with rootless containers. - rootless, err := isRootless(context) - if err != nil { - return err - } - if rootless { - return fmt.Errorf("runc checkpoint requires root") + if os.Geteuid() != 0 || system.RunningInUserNS() { + logrus.Warn("runc checkpoint is untested with rootless containers") } container, err := getContainer(context) diff --git a/libcontainer/cgroups/fs/apply_raw.go b/libcontainer/cgroups/fs/apply_raw.go index 09b96de5d1f..74bfcf94f0a 100644 --- a/libcontainer/cgroups/fs/apply_raw.go +++ b/libcontainer/cgroups/fs/apply_raw.go @@ -65,7 +65,7 @@ type subsystem interface { type Manager struct { mu sync.Mutex Cgroups *configs.Cgroup - Rootless bool + Rootless bool // ignore permission-related errors Paths map[string]string } @@ -174,7 +174,7 @@ func (m *Manager) Apply(pid int) (err error) { m.Paths[sys.Name()] = p if err := sys.Apply(d); err != nil { - // In the case of rootless, where an explicit cgroup path hasn't + // In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't // been set, we don't bail on error in case of permission problems. // Cases where limits have been set (and we couldn't create our own // cgroup) are handled by Set. @@ -236,6 +236,12 @@ func (m *Manager) Set(container *configs.Config) error { for _, sys := range subsystems { path := paths[sys.Name()] if err := sys.Set(path, container.Cgroups); err != nil { + if m.Rootless && sys.Name() == "devices" { + continue + } + // When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work. + // However, errors from other subsystems are not ignored. + // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" if path == "" { // We never created a path for this cgroup, so we cannot set // limits for it (though we have already tried at this point). diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index b1c4762fe20..f6d1f134dbf 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -186,12 +186,19 @@ type Config struct { // callers keyring in this case. NoNewKeyring bool `json:"no_new_keyring"` - // Rootless specifies whether the container is a rootless container. - Rootless bool `json:"rootless"` - // IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into // to limit the resources (e.g., L3 cache) the container has available IntelRdt *IntelRdt `json:"intel_rdt,omitempty"` + + // RootlessEUID is set when the runc was launched with non-zero EUID. + // Note that RootlessEUID is set to false when launched with EUID=0 in userns. + // When RootlessEUID is set, runc creates a new userns for the container. + // (config.json needs to contain userns settings) + RootlessEUID bool `json:"rootless_euid,omitempty"` + + // RootlessCgroups is set when unlikely to have the full access to cgroups. + // When RootlessCgroups is set, cgroups errors are ignored. + RootlessCgroups bool `json:"rootless_cgroups,omitempty"` } type Hooks struct { diff --git a/libcontainer/configs/validate/rootless.go b/libcontainer/configs/validate/rootless.go index 8c3954ce251..393d9e81ee9 100644 --- a/libcontainer/configs/validate/rootless.go +++ b/libcontainer/configs/validate/rootless.go @@ -2,23 +2,18 @@ package validate import ( "fmt" - "os" - "reflect" "strings" "github.com/opencontainers/runc/libcontainer/configs" ) -var ( - geteuid = os.Geteuid - getegid = os.Getegid -) - -func (v *ConfigValidator) rootless(config *configs.Config) error { - if err := rootlessMappings(config); err != nil { +// rootlessEUID makes sure that the config can be applied when runc +// is being executed as a non-root user (euid != 0) in the current user namespace. +func (v *ConfigValidator) rootlessEUID(config *configs.Config) error { + if err := rootlessEUIDMappings(config); err != nil { return err } - if err := rootlessMount(config); err != nil { + if err := rootlessEUIDMount(config); err != nil { return err } @@ -38,46 +33,24 @@ func hasIDMapping(id int, mappings []configs.IDMap) bool { return false } -func rootlessMappings(config *configs.Config) error { - if euid := geteuid(); euid != 0 { - if !config.Namespaces.Contains(configs.NEWUSER) { - return fmt.Errorf("rootless containers require user namespaces") - } - if len(config.UidMappings) == 0 { - return fmt.Errorf("rootless containers requires at least one UID mapping") - } - if len(config.GidMappings) == 0 { - return fmt.Errorf("rootless containers requires at least one GID mapping") - } +func rootlessEUIDMappings(config *configs.Config) error { + if !config.Namespaces.Contains(configs.NEWUSER) { + return fmt.Errorf("rootless container requires user namespaces") } - return nil -} - -// cgroup verifies that the user isn't trying to set any cgroup limits or paths. -func rootlessCgroup(config *configs.Config) error { - // Nothing set at all. - if config.Cgroups == nil || config.Cgroups.Resources == nil { - return nil + if len(config.UidMappings) == 0 { + return fmt.Errorf("rootless containers requires at least one UID mapping") } - - // Used for comparing to the zero value. - left := reflect.ValueOf(*config.Cgroups.Resources) - right := reflect.Zero(left.Type()) - - // This is all we need to do, since specconv won't add cgroup options in - // rootless mode. - if !reflect.DeepEqual(left.Interface(), right.Interface()) { - return fmt.Errorf("cannot specify resource limits in rootless container") + if len(config.GidMappings) == 0 { + return fmt.Errorf("rootless containers requires at least one GID mapping") } - return nil } // mount verifies that the user isn't trying to set up any mounts they don't have // the rights to do. In addition, it makes sure that no mount has a `uid=` or // `gid=` option that doesn't resolve to root. -func rootlessMount(config *configs.Config) error { +func rootlessEUIDMount(config *configs.Config) error { // XXX: We could whitelist allowed devices at this point, but I'm not // convinced that's a good idea. The kernel is the best arbiter of // access control. diff --git a/libcontainer/configs/validate/rootless_test.go b/libcontainer/configs/validate/rootless_test.go index bfb3eef39b5..59d15575dd7 100644 --- a/libcontainer/configs/validate/rootless_test.go +++ b/libcontainer/configs/validate/rootless_test.go @@ -6,15 +6,11 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" ) -func init() { - geteuid = func() int { return 1337 } - getegid = func() int { return 7331 } -} - -func rootlessConfig() *configs.Config { +func rootlessEUIDConfig() *configs.Config { return &configs.Config{ - Rootfs: "/var", - Rootless: true, + Rootfs: "/var", + RootlessEUID: true, + RootlessCgroups: true, Namespaces: configs.Namespaces( []configs.Namespace{ {Type: configs.NEWUSER}, @@ -22,14 +18,14 @@ func rootlessConfig() *configs.Config { ), UidMappings: []configs.IDMap{ { - HostID: geteuid(), + HostID: 1337, ContainerID: 0, Size: 1, }, }, GidMappings: []configs.IDMap{ { - HostID: getegid(), + HostID: 7331, ContainerID: 0, Size: 1, }, @@ -37,51 +33,51 @@ func rootlessConfig() *configs.Config { } } -func TestValidateRootless(t *testing.T) { +func TestValidateRootlessEUID(t *testing.T) { validator := New() - config := rootlessConfig() + config := rootlessEUIDConfig() if err := validator.Validate(config); err != nil { t.Errorf("Expected error to not occur: %+v", err) } } -/* rootlessMappings() */ +/* rootlessEUIDMappings */ -func TestValidateRootlessUserns(t *testing.T) { +func TestValidateRootlessEUIDUserns(t *testing.T) { validator := New() - config := rootlessConfig() + config := rootlessEUIDConfig() config.Namespaces = nil if err := validator.Validate(config); err == nil { t.Errorf("Expected error to occur if user namespaces not set") } } -func TestValidateRootlessMappingUid(t *testing.T) { +func TestValidateRootlessEUIDMappingUid(t *testing.T) { validator := New() - config := rootlessConfig() + config := rootlessEUIDConfig() config.UidMappings = nil if err := validator.Validate(config); err == nil { t.Errorf("Expected error to occur if no uid mappings provided") } } -func TestValidateRootlessMappingGid(t *testing.T) { +func TestValidateNonZeroEUIDMappingGid(t *testing.T) { validator := New() - config := rootlessConfig() + config := rootlessEUIDConfig() config.GidMappings = nil if err := validator.Validate(config); err == nil { t.Errorf("Expected error to occur if no gid mappings provided") } } -/* rootlessMount() */ +/* rootlessEUIDMount() */ -func TestValidateRootlessMountUid(t *testing.T) { - config := rootlessConfig() +func TestValidateRootlessEUIDMountUid(t *testing.T) { + config := rootlessEUIDConfig() validator := New() config.Mounts = []*configs.Mount{ @@ -119,8 +115,8 @@ func TestValidateRootlessMountUid(t *testing.T) { } } -func TestValidateRootlessMountGid(t *testing.T) { - config := rootlessConfig() +func TestValidateRootlessEUIDMountGid(t *testing.T) { + config := rootlessEUIDConfig() validator := New() config.Mounts = []*configs.Mount{ diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index b36e553d207..fc7b01e4356 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -44,8 +44,8 @@ func (v *ConfigValidator) Validate(config *configs.Config) error { if err := v.intelrdt(config); err != nil { return err } - if config.Rootless { - if err := v.rootless(config); err != nil { + if config.RootlessEUID { + if err := v.rootlessEUID(config); err != nil { return err } } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 9f551fd096f..3c1bbcc7f48 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -59,7 +59,8 @@ type State struct { // Platform specific fields below here - // Specifies if the container was started under the rootless mode. + // Specified if the container was started under the rootless mode. + // Set to true if BaseState.Config.RootlessEUID && BaseState.Config.RootlessCgroups Rootless bool `json:"rootless"` // Path to all the cgroups setup for a container. Key is cgroup subsystem name @@ -522,14 +523,15 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, return nil, err } return &setnsProcess{ - cmd: cmd, - cgroupPaths: c.cgroupManager.GetPaths(), - intelRdtPath: state.IntelRdtPath, - childPipe: childPipe, - parentPipe: parentPipe, - config: c.newInitConfig(p), - process: p, - bootstrapData: data, + cmd: cmd, + cgroupPaths: c.cgroupManager.GetPaths(), + rootlessCgroups: c.config.RootlessCgroups, + intelRdtPath: state.IntelRdtPath, + childPipe: childPipe, + parentPipe: parentPipe, + config: c.newInitConfig(p), + process: p, + bootstrapData: data, }, nil } @@ -545,7 +547,8 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { PassedFilesCount: len(process.ExtraFiles), ContainerId: c.ID(), NoNewPrivileges: c.config.NoNewPrivileges, - Rootless: c.config.Rootless, + RootlessEUID: c.config.RootlessEUID, + RootlessCgroups: c.config.RootlessCgroups, AppArmorProfile: c.config.AppArmorProfile, ProcessLabel: c.config.ProcessLabel, Rlimits: c.config.Rlimits, @@ -613,16 +616,16 @@ func (c *linuxContainer) Resume() error { func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { // XXX(cyphar): This requires cgroups. - if c.config.Rootless { - return nil, fmt.Errorf("cannot get OOM notifications from rootless container") + if c.config.RootlessCgroups { + logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups") } return notifyOnOOM(c.cgroupManager.GetPaths()) } func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) { // XXX(cyphar): This requires cgroups. - if c.config.Rootless { - return nil, fmt.Errorf("cannot get memory pressure notifications from rootless container") + if c.config.RootlessCgroups { + logrus.Warn("getting memory pressure notifications may fail if you don't have the full access to cgroups") } return notifyMemoryPressure(c.cgroupManager.GetPaths(), level) } @@ -866,12 +869,11 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { c.m.Lock() defer c.m.Unlock() + // Checkpoint is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS(). + // (CLI prints a warning) // TODO(avagin): Figure out how to make this work nicely. CRIU 2.0 has // support for doing unprivileged dumps, but the setup of // rootless containers might make this complicated. - if c.config.Rootless { - return fmt.Errorf("cannot checkpoint a rootless container") - } // criu 1.5.2 => 10502 if err := c.checkCriuVersion(10502); err != nil { @@ -1105,11 +1107,10 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { var extraFiles []*os.File + // Restore is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS(). + // (CLI prints a warning) // TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have // support for unprivileged restore at the moment. - if c.config.Rootless { - return fmt.Errorf("cannot restore a rootless container") - } // criu 1.5.2 => 10502 if err := c.checkCriuVersion(10502); err != nil { @@ -1717,7 +1718,7 @@ func (c *linuxContainer) currentState() (*State, error) { InitProcessStartTime: startTime, Created: c.created, }, - Rootless: c.config.Rootless, + Rootless: c.config.RootlessEUID && c.config.RootlessCgroups, CgroupPaths: c.cgroupManager.GetPaths(), IntelRdtPath: intelRdtPath, NamespacePaths: make(map[configs.NamespaceType]string), @@ -1818,7 +1819,7 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na if !joinExistingUser { // write uid mappings if len(c.config.UidMappings) > 0 { - if c.config.Rootless && c.newuidmapPath != "" { + if c.config.RootlessEUID && c.newuidmapPath != "" { r.AddData(&Bytemsg{ Type: UidmapPathAttr, Value: []byte(c.newuidmapPath), @@ -1844,7 +1845,7 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na Type: GidmapAttr, Value: b, }) - if c.config.Rootless && c.newgidmapPath != "" { + if c.config.RootlessEUID && c.newgidmapPath != "" { r.AddData(&Bytemsg{ Type: GidmapPathAttr, Value: []byte(c.newgidmapPath), @@ -1869,8 +1870,8 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na // write rootless r.AddData(&Boolmsg{ - Type: RootlessAttr, - Value: c.config.Rootless, + Type: RootlessEUIDAttr, + Value: c.config.RootlessEUID, }) return bytes.NewReader(r.Serialize()), nil diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index 612ccd74b98..4f65b91857c 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -77,7 +77,7 @@ func Cgroupfs(l *LinuxFactory) error { // containers that use the native cgroups filesystem implementation to create // and manage cgroups. The difference between RootlessCgroupfs and Cgroupfs is // that RootlessCgroupfs can transparently handle permission errors that occur -// during rootless container setup (while still allowing cgroup usage if +// during rootless container (including euid=0 in userns) setup (while still allowing cgroup usage if // they've been set up properly). func RootlessCgroupfs(l *LinuxFactory) error { l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index b19f00ed969..7743d4a4fde 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -65,7 +65,8 @@ type initConfig struct { CreateConsole bool `json:"create_console"` ConsoleWidth uint16 `json:"console_width"` ConsoleHeight uint16 `json:"console_height"` - Rootless bool `json:"rootless"` + RootlessEUID bool `json:"rootless_euid,omitempty"` + RootlessCgroups bool `json:"rootless_cgroups,omitempty"` } type initer interface { @@ -283,7 +284,7 @@ func setupUser(config *initConfig) error { return fmt.Errorf("cannot set gid to unmapped user in user namespace") } - if config.Rootless { + if config.RootlessEUID { // We cannot set any additional groups in a rootless container and thus // we bail if the user asked us to do so. TODO: We currently can't do // this check earlier, but if libcontainer.Process.User was typesafe @@ -303,7 +304,7 @@ func setupUser(config *initConfig) error { // There's nothing we can do about /etc/group entries, so we silently // ignore setting groups here (since the user didn't explicitly ask us to // set the group). - if !config.Rootless { + if !config.RootlessEUID { suppGroups := append(execUser.Sgids, addGroups...) if err := unix.Setgroups(suppGroups); err != nil { return err diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go index ed7f986df8d..1d4f5033aa2 100644 --- a/libcontainer/message_linux.go +++ b/libcontainer/message_linux.go @@ -10,16 +10,16 @@ import ( // list of known message types we want to send to bootstrap program // The number is randomly chosen to not conflict with known netlink types const ( - InitMsg uint16 = 62000 - CloneFlagsAttr uint16 = 27281 - NsPathsAttr uint16 = 27282 - UidmapAttr uint16 = 27283 - GidmapAttr uint16 = 27284 - SetgroupAttr uint16 = 27285 - OomScoreAdjAttr uint16 = 27286 - RootlessAttr uint16 = 27287 - UidmapPathAttr uint16 = 27288 - GidmapPathAttr uint16 = 27289 + InitMsg uint16 = 62000 + CloneFlagsAttr uint16 = 27281 + NsPathsAttr uint16 = 27282 + UidmapAttr uint16 = 27283 + GidmapAttr uint16 = 27284 + SetgroupAttr uint16 = 27285 + OomScoreAdjAttr uint16 = 27286 + RootlessEUIDAttr uint16 = 27287 + UidmapPathAttr uint16 = 27288 + GidmapPathAttr uint16 = 27289 ) type Int32msg struct { diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index a4cd1399d9e..bc7461fa3de 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -82,7 +82,7 @@ struct nlconfig_t { uint8_t is_setgroup; /* Rootless container settings. */ - uint8_t is_rootless; + uint8_t is_rootless_euid; /* boolean */ char *uidmappath; size_t uidmappath_len; char *gidmappath; @@ -100,7 +100,7 @@ struct nlconfig_t { #define GIDMAP_ATTR 27284 #define SETGROUP_ATTR 27285 #define OOM_SCORE_ADJ_ATTR 27286 -#define ROOTLESS_ATTR 27287 +#define ROOTLESS_EUID_ATTR 27287 #define UIDMAPPATH_ATTR 27288 #define GIDMAPPATH_ATTR 27289 @@ -419,8 +419,8 @@ static void nl_parse(int fd, struct nlconfig_t *config) case CLONE_FLAGS_ATTR: config->cloneflags = readint32(current); break; - case ROOTLESS_ATTR: - config->is_rootless = readint8(current); + case ROOTLESS_EUID_ATTR: + config->is_rootless_euid = readint8(current); /* boolean */ break; case OOM_SCORE_ADJ_ATTR: config->oom_score_adj = current; @@ -687,7 +687,7 @@ void nsexec(void) * newuidmap/newgidmap shall be used. */ - if (config.is_rootless && !config.is_setgroup) + if (config.is_rootless_euid && !config.is_setgroup) update_setgroups(child, SETGROUPS_DENY); /* Set up mappings. */ @@ -953,7 +953,7 @@ void nsexec(void) if (setgid(0) < 0) bail("setgid failed"); - if (!config.is_rootless && config.is_setgroup) { + if (!config.is_rootless_euid && config.is_setgroup) { if (setgroups(0, NULL) < 0) bail("setgroups failed"); } diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 58980b0594e..78a13fcde63 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -46,15 +46,16 @@ type parentProcess interface { } type setnsProcess struct { - cmd *exec.Cmd - parentPipe *os.File - childPipe *os.File - cgroupPaths map[string]string - intelRdtPath string - config *initConfig - fds []string - process *Process - bootstrapData io.Reader + cmd *exec.Cmd + parentPipe *os.File + childPipe *os.File + cgroupPaths map[string]string + rootlessCgroups bool + intelRdtPath string + config *initConfig + fds []string + process *Process + bootstrapData io.Reader } func (p *setnsProcess) startTime() (uint64, error) { @@ -86,7 +87,7 @@ func (p *setnsProcess) start() (err error) { return newSystemErrorWithCause(err, "executing setns process") } if len(p.cgroupPaths) > 0 { - if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil { + if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil && !p.rootlessCgroups { return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) } } diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go index c113b337f34..827ca9e781d 100644 --- a/libcontainer/specconv/example.go +++ b/libcontainer/specconv/example.go @@ -156,7 +156,7 @@ func Example() *specs.Spec { } // ToRootless converts the given spec file into one that should work with -// rootless containers, by removing incompatible options and adding others that +// rootless containers (euid != 0), by removing incompatible options and adding others that // are needed. func ToRootless(spec *specs.Spec) { var namespaces []specs.LinuxNamespace diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 7951497efa3..089b9a740e3 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -148,7 +148,8 @@ type CreateOpts struct { NoPivotRoot bool NoNewKeyring bool Spec *specs.Spec - Rootless bool + RootlessEUID bool + RootlessCgroups bool } // CreateLibcontainerConfig creates a new libcontainer configuration from a @@ -176,13 +177,14 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { labels = append(labels, fmt.Sprintf("%s=%s", k, v)) } config := &configs.Config{ - Rootfs: rootfsPath, - NoPivotRoot: opts.NoPivotRoot, - Readonlyfs: spec.Root.Readonly, - Hostname: spec.Hostname, - Labels: append(labels, fmt.Sprintf("bundle=%s", cwd)), - NoNewKeyring: opts.NoNewKeyring, - Rootless: opts.Rootless, + Rootfs: rootfsPath, + NoPivotRoot: opts.NoPivotRoot, + Readonlyfs: spec.Root.Readonly, + Hostname: spec.Hostname, + Labels: append(labels, fmt.Sprintf("bundle=%s", cwd)), + NoNewKeyring: opts.NoNewKeyring, + RootlessEUID: opts.RootlessEUID, + RootlessCgroups: opts.RootlessCgroups, } exists := false @@ -332,12 +334,9 @@ func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) { c.Path = myCgroupPath } - // In rootless containers, any attempt to make cgroup changes will fail. - // libcontainer will validate this and we shouldn't add any cgroup options - // the user didn't specify. - if !opts.Rootless { - c.Resources.AllowedDevices = allowedDevices - } + // In rootless containers, any attempt to make cgroup changes is likely to fail. + // libcontainer will validate this but ignores the error. + c.Resources.AllowedDevices = allowedDevices if spec.Linux != nil { r := spec.Linux.Resources if r == nil { @@ -490,10 +489,8 @@ func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) { } } } - if !opts.Rootless { - // append the default allowed devices to the end of the list - c.Resources.Devices = append(c.Resources.Devices, allowedDevices...) - } + // append the default allowed devices to the end of the list + c.Resources.Devices = append(c.Resources.Devices, allowedDevices...) return c, nil } diff --git a/libcontainer/specconv/spec_linux_test.go b/libcontainer/specconv/spec_linux_test.go index 98ffdbf989d..96a8480f18e 100644 --- a/libcontainer/specconv/spec_linux_test.go +++ b/libcontainer/specconv/spec_linux_test.go @@ -417,7 +417,7 @@ func TestDupNamespaces(t *testing.T) { } } -func TestRootlessSpecconvValidate(t *testing.T) { +func TestNonZeroEUIDCompatibleSpecconvValidate(t *testing.T) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { t.Skip("userns is unsupported") } @@ -430,7 +430,8 @@ func TestRootlessSpecconvValidate(t *testing.T) { CgroupName: "ContainerID", UseSystemdCgroup: false, Spec: spec, - Rootless: true, + RootlessEUID: true, + RootlessCgroups: true, } config, err := CreateLibcontainerConfig(opts) diff --git a/main.go b/main.go index 278399a560d..072447d17da 100644 --- a/main.go +++ b/main.go @@ -63,13 +63,8 @@ func main() { app.Version = strings.Join(v, "\n") root := "/run/runc" - rootless, err := isRootless(nil) - if err != nil { - fatal(err) - } - if rootless { - runtimeDir := os.Getenv("XDG_RUNTIME_DIR") - if runtimeDir != "" { + if shouldHonorXDGRuntimeDir() { + if runtimeDir := os.Getenv("XDG_RUNTIME_DIR"); runtimeDir != "" { root = runtimeDir + "/runc" // According to the XDG specification, we need to set anything in // XDG_RUNTIME_DIR to have a sticky bit if we don't want it to get @@ -115,7 +110,7 @@ func main() { cli.StringFlag{ Name: "rootless", Value: "auto", - Usage: "enable rootless mode ('true', 'false', or 'auto')", + Usage: "ignore cgroup permission errors ('true', 'false', or 'auto')", }, } app.Commands = []cli.Command{ diff --git a/pause.go b/pause.go index 3b98dbbbf3a..4e3f8c9a5fe 100644 --- a/pause.go +++ b/pause.go @@ -2,7 +2,10 @@ package main -import "github.com/urfave/cli" +import ( + "github.com/sirupsen/logrus" + "github.com/urfave/cli" +) var pauseCommand = cli.Command{ Name: "pause", @@ -18,6 +21,13 @@ Use runc list to identiy instances of containers and their current status.`, if err := checkArgs(context, 1, exactArgs); err != nil { return err } + rootlessCg, err := shouldUseRootlessCgroupManager(context) + if err != nil { + return err + } + if rootlessCg { + logrus.Warnf("runc pause may fail if you don't have the full access to cgroups") + } container, err := getContainer(context) if err != nil { return err @@ -44,6 +54,13 @@ Use runc list to identiy instances of containers and their current status.`, if err := checkArgs(context, 1, exactArgs); err != nil { return err } + rootlessCg, err := shouldUseRootlessCgroupManager(context) + if err != nil { + return err + } + if rootlessCg { + logrus.Warn("runc resume may fail if you don't have the full access to cgroups") + } container, err := getContainer(context) if err != nil { return err diff --git a/ps.go b/ps.go index eec9d5f569c..e7f635f4f49 100644 --- a/ps.go +++ b/ps.go @@ -10,6 +10,7 @@ import ( "strconv" "strings" + "github.com/sirupsen/logrus" "github.com/urfave/cli" ) @@ -28,13 +29,12 @@ var psCommand = cli.Command{ if err := checkArgs(context, 1, minArgs); err != nil { return err } - // XXX: Currently not supported with rootless containers. - rootless, err := isRootless(context) + rootlessCg, err := shouldUseRootlessCgroupManager(context) if err != nil { return err } - if rootless { - return fmt.Errorf("runc ps requires root") + if rootlessCg { + logrus.Warn("runc ps may fail if you don't have the full access to cgroups") } container, err := getContainer(context) diff --git a/restore.go b/restore.go index 134fa06fdc9..0a08cd12267 100644 --- a/restore.go +++ b/restore.go @@ -3,10 +3,11 @@ package main import ( - "fmt" "os" "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/sirupsen/logrus" "github.com/urfave/cli" ) @@ -96,12 +97,8 @@ using the runc checkpoint command.`, return err } // XXX: Currently this is untested with rootless containers. - rootless, err := isRootless(context) - if err != nil { - return err - } - if rootless { - return fmt.Errorf("runc restore requires root") + if os.Geteuid() != 0 || system.RunningInUserNS() { + logrus.Warn("runc checkpoint is untested with rootless containers") } spec, err := setupSpec(context) diff --git a/rootless_linux.go b/rootless_linux.go new file mode 100644 index 00000000000..c281682f57a --- /dev/null +++ b/rootless_linux.go @@ -0,0 +1,57 @@ +// +build linux + +package main + +import ( + "os" + + "github.com/opencontainers/runc/libcontainer/system" + "github.com/urfave/cli" +) + +func shouldUseRootlessCgroupManager(context *cli.Context) (bool, error) { + if context != nil { + b, err := parseBoolOrAuto(context.GlobalString("rootless")) + if err != nil { + return false, err + } + if b != nil { + return *b, nil + } + // nil b stands for "auto detect" + } + if context.GlobalBool("systemd-cgroup") { + return false, nil + } + if os.Geteuid() != 0 { + return true, nil + } + if !system.RunningInUserNS() { + // euid == 0 , in the initial ns (i.e. the real root) + return false, nil + } + // euid = 0, in a userns. + // As we are unaware of cgroups path, we can't determine whether we have the full + // access to the cgroups path. + // Either way, we can safely decide to use the rootless cgroups manager. + return true, nil +} + +func shouldHonorXDGRuntimeDir() bool { + if os.Getenv("XDG_RUNTIME_DIR") == "" { + return false + } + if os.Geteuid() != 0 { + return true + } + if !system.RunningInUserNS() { + // euid == 0 , in the initial ns (i.e. the real root) + // in this case, we should use /run/runc and ignore + // $XDG_RUNTIME_DIR (e.g. /run/user/0) for backward + // compatibility. + return false + } + // euid = 0, in a userns. + u, ok := os.LookupEnv("USER") + return !ok || u != "root" +} diff --git a/spec.go b/spec.go index 26e9754ef19..bdafa744201 100644 --- a/spec.go +++ b/spec.go @@ -61,7 +61,11 @@ container on your host. Alternatively, you can start a rootless container, which has the ability to run without root privileges. For this to work, the specification file needs to be adjusted accordingly. You can pass the parameter --rootless to this command to -generate a proper rootless spec file.`, +generate a proper rootless spec file. + +Note that --rootless is not needed when you execute runc as the root in a user namespace +created by an unprivileged user. +`, Flags: []cli.Flag{ cli.StringFlag{ Name: "bundle, b", diff --git a/utils_linux.go b/utils_linux.go index c6a3489737c..07f3a216974 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -16,7 +16,6 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/specconv" - "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runtime-spec/specs-go" @@ -39,11 +38,11 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) { // We default to cgroupfs, and can only use systemd if the system is a // systemd box. cgroupManager := libcontainer.Cgroupfs - rootless, err := isRootless(context) + rootlessCg, err := shouldUseRootlessCgroupManager(context) if err != nil { return nil, err } - if rootless { + if rootlessCg { cgroupManager = libcontainer.RootlessCgroupfs } if context.GlobalBool("systemd-cgroup") { @@ -226,27 +225,8 @@ func createPidFile(path string, process *libcontainer.Process) error { return os.Rename(tmpName, path) } -func isRootless(context *cli.Context) (bool, error) { - if context != nil { - b, err := parseBoolOrAuto(context.GlobalString("rootless")) - if err != nil { - return false, err - } - if b != nil { - return *b, nil - } - // nil b stands for "auto detect" - } - // Even if os.Geteuid() == 0, it might still require rootless mode, - // especially when running within userns. - // So we use system.GetParentNSeuid() here. - // - // TODO(AkihiroSuda): how to support nested userns? - return system.GetParentNSeuid() != 0 || system.RunningInUserNS(), nil -} - func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) { - rootless, err := isRootless(context) + rootlessCg, err := shouldUseRootlessCgroupManager(context) if err != nil { return nil, err } @@ -256,7 +236,8 @@ func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcont NoPivotRoot: context.Bool("no-pivot"), NoNewKeyring: context.Bool("no-new-keyring"), Spec: spec, - Rootless: rootless, + RootlessEUID: os.Geteuid() != 0, + RootlessCgroups: rootlessCg, }) if err != nil { return nil, err