Skip to content

Commit

Permalink
userns: do not use an intermediate mount namespace
Browse files Browse the repository at this point in the history
We have an issue in the current implementation where the cleanup
process is not able to umount the storage as it is running in a
separate namespace.

Simplify the implementation for user namespaces by not using an
intermediate mount namespace.  For doing it, we need to relax the
permissions on the parent directories and allow browsing
them. Containers that are running without a user namespace, will still
maintain mode 0700 on their directory.

Signed-off-by: Giuseppe Scrivano <[email protected]>
  • Loading branch information
giuseppe committed Mar 25, 2019
1 parent 4e555f0 commit 0b8fc41
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 138 deletions.
12 changes: 1 addition & 11 deletions libpod/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,12 @@ type ContainerState struct {
ConfigPath string `json:"configPath,omitempty"`
// RunDir is a per-boot directory for container content
RunDir string `json:"runDir,omitempty"`
// DestinationRunDir is where the files in RunDir will be accessible for the container.
// It is different than RunDir when using userNS
DestinationRunDir string `json:"destinationRunDir,omitempty"`
// Mounted indicates whether the container's storage has been mounted
// for use
Mounted bool `json:"mounted,omitempty"`
// Mountpoint contains the path to the container's mounted storage as given
// by containers/storage. It can be different than RealMountpoint when
// usernamespaces are used
// by containers/storage.
Mountpoint string `json:"mountPoint,omitempty"`
// RealMountpoint contains the path to the container's mounted storage
RealMountpoint string `json:"realMountPoint,omitempty"`
// StartedTime is the time the container was started
StartedTime time.Time `json:"startedTime,omitempty"`
// FinishedTime is the time the container finished executing
Expand Down Expand Up @@ -187,10 +181,6 @@ type ContainerState struct {
// the path of the file on disk outside the container
BindMounts map[string]string `json:"bindMounts,omitempty"`

// UserNSRoot is the directory used as root for the container when using
// user namespaces.
UserNSRoot string `json:"userNSRoot,omitempty"`

// ExtensionStageHooks holds hooks which will be executed by libpod
// and not delegated to the OCI runtime.
ExtensionStageHooks map[string][]spec.Hook `json:"extensionStageHooks,omitempty"`
Expand Down
47 changes: 10 additions & 37 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,34 +310,19 @@ func (c *Container) setupStorage(ctx context.Context) error {
}

if !rootless.IsRootless() && (len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0) {
info, err := os.Stat(c.runtime.config.TmpDir)
if err != nil {
return errors.Wrapf(err, "cannot stat `%s`", c.runtime.config.TmpDir)
}
if err := os.Chmod(c.runtime.config.TmpDir, info.Mode()|0111); err != nil {
return errors.Wrapf(err, "cannot chmod `%s`", c.runtime.config.TmpDir)
}
root := filepath.Join(c.runtime.config.TmpDir, "containers-root", c.ID())
if err := os.MkdirAll(root, 0755); err != nil {
return errors.Wrapf(err, "error creating userNS tmpdir for container %s", c.ID())
}
if err := os.Chown(root, c.RootUID(), c.RootGID()); err != nil {
if err := os.Chown(containerInfo.RunDir, c.RootUID(), c.RootGID()); err != nil {
return err
}
c.state.UserNSRoot, err = filepath.EvalSymlinks(root)
if err != nil {
return errors.Wrapf(err, "failed to eval symlinks for %s", root)

if err := os.Chown(containerInfo.Dir, c.RootUID(), c.RootGID()); err != nil {
return err
}
}

c.config.ProcessLabel = containerInfo.ProcessLabel
c.config.MountLabel = containerInfo.MountLabel
c.config.StaticDir = containerInfo.Dir
c.state.RunDir = containerInfo.RunDir
c.state.DestinationRunDir = c.state.RunDir
if c.state.UserNSRoot != "" {
c.state.DestinationRunDir = filepath.Join(c.state.UserNSRoot, "rundir")
}

// Set the default Entrypoint and Command
if containerInfo.Config != nil {
Expand Down Expand Up @@ -372,12 +357,6 @@ func (c *Container) teardownStorage() error {
return errors.Wrapf(err, "failed to cleanup container %s storage", c.ID())
}

if c.state.UserNSRoot != "" {
if err := os.RemoveAll(c.state.UserNSRoot); err != nil {
return errors.Wrapf(err, "error removing userns root %q", c.state.UserNSRoot)
}
}

if err := c.runtime.storageService.DeleteContainer(c.ID()); err != nil {
// If the container has already been removed, warn but do not
// error - we wanted it gone, it is already gone.
Expand Down Expand Up @@ -432,6 +411,7 @@ func (c *Container) refresh() error {
if err != nil {
return errors.Wrapf(err, "error retrieving temporary directory for container %s", c.ID())
}
c.state.RunDir = dir

if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 {
info, err := os.Stat(c.runtime.config.TmpDir)
Expand All @@ -448,16 +428,6 @@ func (c *Container) refresh() error {
if err := os.Chown(root, c.RootUID(), c.RootGID()); err != nil {
return err
}
c.state.UserNSRoot, err = filepath.EvalSymlinks(root)
if err != nil {
return errors.Wrapf(err, "failed to eval symlinks for %s", root)
}
}

c.state.RunDir = dir
c.state.DestinationRunDir = c.state.RunDir
if c.state.UserNSRoot != "" {
c.state.DestinationRunDir = filepath.Join(c.state.UserNSRoot, "rundir")
}

// We need to pick up a new lock
Expand Down Expand Up @@ -1260,7 +1230,7 @@ func (c *Container) writeStringToRundir(destFile, output string) (string, error)
return "", err
}

return filepath.Join(c.state.DestinationRunDir, destFile), nil
return filepath.Join(c.state.RunDir, destFile), nil
}

// appendStringToRundir appends the provided string to the runtimedir file
Expand All @@ -1277,7 +1247,7 @@ func (c *Container) appendStringToRundir(destFile, output string) (string, error
return "", errors.Wrapf(err, "unable to write %s", destFileName)
}

return filepath.Join(c.state.DestinationRunDir, destFile), nil
return filepath.Join(c.state.RunDir, destFile), nil
}

// Save OCI spec to disk, replacing any existing specs for the container
Expand Down Expand Up @@ -1410,6 +1380,9 @@ func (c *Container) mount() (string, error) {
if err != nil {
return "", errors.Wrapf(err, "error resolving storage path for container %s", c.ID())
}
if err := os.Chown(mountPoint, c.RootUID(), c.RootGID()); err != nil {
return "", errors.Wrapf(err, "cannot chown %s to %d:%d", mountPoint, c.RootUID(), c.RootGID())
}
return mountPoint, nil
}

Expand Down
25 changes: 3 additions & 22 deletions libpod/container_internal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
"github.com/containers/libpod/pkg/lookup"
"github.com/containers/libpod/pkg/resolvconf"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/storage/pkg/idtools"
"github.com/cyphar/filepath-securejoin"
"github.com/opencontainers/runc/libcontainer/user"
spec "github.com/opencontainers/runtime-spec/specs-go"
Expand Down Expand Up @@ -99,11 +98,6 @@ func (c *Container) prepare() (err error) {
// Finish up mountStorage
c.state.Mounted = true
c.state.Mountpoint = mountPoint
if c.state.UserNSRoot == "" {
c.state.RealMountpoint = c.state.Mountpoint
} else {
c.state.RealMountpoint = filepath.Join(c.state.UserNSRoot, "mountpoint")
}

logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint)
}()
Expand Down Expand Up @@ -220,13 +214,6 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
}
}
m.Options = options

// If we are using a user namespace, we will use an intermediate
// directory to bind mount volumes
if c.state.UserNSRoot != "" && strings.HasPrefix(m.Source, c.runtime.config.VolumePath) {
newSourceDir := filepath.Join(c.state.UserNSRoot, "volumes")
m.Source = strings.Replace(m.Source, c.runtime.config.VolumePath, newSourceDir, 1)
}
}

g.SetProcessSelinuxLabel(c.ProcessLabel())
Expand Down Expand Up @@ -313,13 +300,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
}
}

if c.config.Rootfs == "" {
if err := idtools.MkdirAllAs(c.state.RealMountpoint, 0700, c.RootUID(), c.RootGID()); err != nil {
return nil, err
}
}

g.SetRootPath(c.state.RealMountpoint)
g.SetRootPath(c.state.Mountpoint)
g.AddAnnotation(crioAnnotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))

Expand Down Expand Up @@ -807,7 +788,7 @@ func (c *Container) makeBindMounts() error {
}

// Add Secret Mounts
secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.DefaultMountsFile, c.state.DestinationRunDir, c.RootUID(), c.RootGID())
secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.DefaultMountsFile, c.state.RunDir, c.RootUID(), c.RootGID())
for _, mount := range secretMounts {
if _, ok := c.state.BindMounts[mount.Destination]; !ok {
c.state.BindMounts[mount.Destination] = mount.Source
Expand Down Expand Up @@ -894,7 +875,7 @@ func (c *Container) generateResolvConf() (string, error) {
return "", err
}

return filepath.Join(c.state.DestinationRunDir, "resolv.conf"), nil
return filepath.Join(c.state.RunDir, "resolv.conf"), nil
}

// generateHosts creates a containers hosts file
Expand Down
91 changes: 28 additions & 63 deletions libpod/oci_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,14 @@
package libpod

import (
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"sync"
"syscall"

"github.com/containerd/cgroups"
"github.com/containers/libpod/utils"
"github.com/containers/storage/pkg/idtools"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
Expand Down Expand Up @@ -62,72 +59,40 @@ func newPipe() (parent *os.File, child *os.File, err error) {
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}

// CreateContainer creates a container in the OCI runtime
// TODO terminal support for container
// Presently just ignoring conmon opts related to it
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) {
if ctr.state.UserNSRoot == "" {
// no need of an intermediate mount ns
return r.createOCIContainer(ctr, cgroupParent, restoreOptions)
}
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
runtime.LockOSThread()

var fd *os.File
fd, err = os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid()))
// makeAccessible changes the path permission and each parent directory to have --x--x--x
func makeAccessible(path string, uid, gid int) error {
for ; path != "/"; path = filepath.Dir(path) {
st, err := os.Stat(path)
if err != nil {
return
}
defer fd.Close()

// create a new mountns on the current thread
if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
return
}
defer unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS)

// don't spread our mounts around
err = unix.Mount("/", "/", "none", unix.MS_REC|unix.MS_SLAVE, "")
if err != nil {
return
}
err = unix.Mount(ctr.state.Mountpoint, ctr.state.RealMountpoint, "none", unix.MS_BIND, "")
if err != nil {
return
if os.IsNotExist(err) {
return nil
}
return err
}
if err := idtools.MkdirAllAs(ctr.state.DestinationRunDir, 0700, ctr.RootUID(), ctr.RootGID()); err != nil {
return
if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
continue
}

err = unix.Mount(ctr.state.RunDir, ctr.state.DestinationRunDir, "none", unix.MS_BIND, "")
if err != nil {
return
if st.Mode()&0111 != 0111 {
if err := os.Chmod(path, os.FileMode(st.Mode()|0111)); err != nil {
return err
}
}
}
return nil
}

if ctr.state.UserNSRoot != "" {
_, err := os.Stat(ctr.runtime.config.VolumePath)
if err != nil && !os.IsNotExist(err) {
return
}
if err == nil {
volumesTarget := filepath.Join(ctr.state.UserNSRoot, "volumes")
if err := idtools.MkdirAs(volumesTarget, 0700, ctr.RootUID(), ctr.RootGID()); err != nil {
return
}
if err = unix.Mount(ctr.runtime.config.VolumePath, volumesTarget, "none", unix.MS_BIND, ""); err != nil {
return
}
// CreateContainer creates a container in the OCI runtime
// TODO terminal support for container
// Presently just ignoring conmon opts related to it
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) {
if len(ctr.config.IDMappings.UIDMap) != 0 || len(ctr.config.IDMappings.GIDMap) != 0 {
for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.VolumePath} {
if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil {
return err
}
}

err = r.createOCIContainer(ctr, cgroupParent, restoreOptions)
}()
wg.Wait()

return err
}
return r.createOCIContainer(ctr, cgroupParent, restoreOptions)
}

func rpmVersion(path string) string {
Expand Down
6 changes: 1 addition & 5 deletions libpod/runtime_ctr.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,7 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options ..
}

if !MountExists(ctr.config.Spec.Mounts, "/dev/shm") && ctr.config.ShmDir == "" {
if ctr.state.UserNSRoot == "" {
ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm")
} else {
ctr.config.ShmDir = filepath.Join(ctr.state.UserNSRoot, "shm")
}
ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm")
if err := os.MkdirAll(ctr.config.ShmDir, 0700); err != nil {
if !os.IsExist(err) {
return nil, errors.Wrapf(err, "unable to create shm %q dir", ctr.config.ShmDir)
Expand Down

0 comments on commit 0b8fc41

Please sign in to comment.