Skip to content

Commit

Permalink
rootless: new function to join existing conmon processes
Browse files Browse the repository at this point in the history
move the logic for joining existing namespaces down to the rootless
package.  In main_local we still retrieve the list of conmon pid files
and use it from the rootless package.

In addition, create a temporary user namespace for reading these
files, as the unprivileged user might not have enough privileges for
reading the conmon pid file, for example when running with a different
uidmap and root in the container is different than the rootless user.

Closes: containers#3187

Signed-off-by: Giuseppe Scrivano <[email protected]>
  • Loading branch information
giuseppe committed May 25, 2019
1 parent ce26aa7 commit ee11f3b
Show file tree
Hide file tree
Showing 4 changed files with 207 additions and 74 deletions.
44 changes: 10 additions & 34 deletions cmd/podman/main_local.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@ package main

import (
"context"
"io/ioutil"
"log/syslog"
"os"
"runtime/pprof"
"strconv"
"strings"
"syscall"

Expand Down Expand Up @@ -120,18 +118,10 @@ func setupRootless(cmd *cobra.Command, args []string) error {
return errors.Wrapf(err, "could not get pause process pid file path")
}

data, err := ioutil.ReadFile(pausePidPath)
if err != nil && !os.IsNotExist(err) {
return errors.Wrapf(err, "cannot read pause process pid file %s", pausePidPath)
}
if err == nil {
pausePid, err := strconv.Atoi(string(data))
if err != nil {
return errors.Wrapf(err, "cannot parse pause pid file %s", pausePidPath)
}
became, ret, err := rootless.JoinUserAndMountNS(uint(pausePid), "")
if _, err := os.Stat(pausePidPath); err == nil {
became, ret, err := rootless.TryJoinFromFilePaths("", false, []string{pausePidPath})
if err != nil {
logrus.Errorf("cannot join pause process pid %d. You may need to remove %s and stop all containers", pausePid, pausePidPath)
logrus.Errorf("cannot join pause process. You may need to remove %s and stop all containers", pausePidPath)
logrus.Errorf("you can use `system migrate` to recreate the pause process")
logrus.Errorf(err.Error())
os.Exit(1)
Expand All @@ -154,28 +144,13 @@ func setupRootless(cmd *cobra.Command, args []string) error {
logrus.Errorf(err.Error())
os.Exit(1)
}
var became bool
var ret int
if len(ctrs) == 0 {
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
} else {
for _, ctr := range ctrs {
data, err := ioutil.ReadFile(ctr.Config().ConmonPidFile)
if err != nil {
logrus.Errorf(err.Error())
continue
}
conmonPid, err := strconv.Atoi(string(data))
if err != nil {
logrus.Errorf(err.Error())
continue
}
became, ret, err = rootless.JoinUserAndMountNS(uint(conmonPid), pausePidPath)
if err == nil {
break
}
}

paths := []string{}
for _, ctr := range ctrs {
paths = append(paths, ctr.Config().ConmonPidFile)
}

became, ret, err := rootless.TryJoinFromFilePaths(pausePidPath, true, paths)
if err != nil {
logrus.Errorf(err.Error())
os.Exit(1)
Expand All @@ -185,6 +160,7 @@ func setupRootless(cmd *cobra.Command, args []string) error {
}
return nil
}

func setRLimits() error {
rlimits := new(syscall.Rlimit)
rlimits.Cur = 1048576
Expand Down
113 changes: 83 additions & 30 deletions pkg/rootless/rootless_linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,26 @@ syscall_clone (unsigned long flags, void *child_stack)
#endif
}

int
reexec_in_user_namespace_wait (int pid, int options)
{
pid_t p;
int status;

do
p = waitpid (pid, &status, 0);
while (p < 0 && errno == EINTR);

if (p < 0)
return -1;

if (WIFEXITED (status))
return WEXITSTATUS (status);
if (WIFSIGNALED (status))
return 128 + WTERMSIG (status);
return -1;
}

static int
create_pause_process (const char *pause_pid_file_path, char **argv)
{
Expand All @@ -369,6 +389,8 @@ create_pause_process (const char *pause_pid_file_path, char **argv)
while (r < 0 && errno == EINTR);
close (p[0]);

reexec_in_user_namespace_wait(r, 0);

return r == 1 && b == '0' ? 0 : -1;
}
else
Expand Down Expand Up @@ -573,8 +595,51 @@ check_proc_sys_userns_file (const char *path)
}
}

static int
copy_file_to_fd (const char *file_to_read, int outfd)
{
char buf[512];
int fd;

fd = open (file_to_read, O_RDONLY);
if (fd < 0)
return fd;

for (;;)
{
ssize_t r, w, t = 0;

do
r = read (fd, buf, sizeof buf);
while (r < 0 && errno == EINTR);
if (r < 0)
{
close (fd);
return r;
}

if (r == 0)
break;

while (t < r)
{
do
w = write (outfd, &buf[t], r - t);
while (w < 0 && errno == EINTR);
if (w < 0)
{
close (fd);
return w;
}
t += w;
}
}
close (fd);
return 0;
}

int
reexec_in_user_namespace (int ready, char *pause_pid_file_path)
reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd)
{
int ret;
pid_t pid;
Expand All @@ -598,11 +663,11 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
listen_pid = getenv("LISTEN_PID");
listen_fds = getenv("LISTEN_FDS");

if (listen_pid != NULL && listen_fds != NULL) {
if (strtol(listen_pid, NULL, 10) == getpid()) {
do_socket_activation = true;
if (listen_pid != NULL && listen_fds != NULL)
{
if (strtol(listen_pid, NULL, 10) == getpid())
do_socket_activation = true;
}
}

sprintf (uid, "%d", geteuid ());
sprintf (gid, "%d", getegid ());
Expand Down Expand Up @@ -658,11 +723,12 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
_exit (EXIT_FAILURE);
}

if (do_socket_activation) {
char s[32];
sprintf (s, "%d", getpid());
setenv ("LISTEN_PID", s, true);
}
if (do_socket_activation)
{
char s[32];
sprintf (s, "%d", getpid());
setenv ("LISTEN_PID", s, true);
}

setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1);
setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1);
Expand Down Expand Up @@ -721,27 +787,14 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
_exit (EXIT_FAILURE);
}

if (file_to_read && file_to_read[0])
{
ret = copy_file_to_fd (file_to_read, outputfd);
close (outputfd);
_exit (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}

execvp (argv[0], argv);

_exit (EXIT_FAILURE);
}

int
reexec_in_user_namespace_wait (int pid)
{
pid_t p;
int status;

do
p = waitpid (pid, &status, 0);
while (p < 0 && errno == EINTR);

if (p < 0)
return -1;

if (WIFEXITED (status))
return WEXITSTATUS (status);
if (WIFSIGNALED (status))
return 128 + WTERMSIG (status);
return -1;
}
112 changes: 102 additions & 10 deletions pkg/rootless/rootless_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ import (
#include <stdlib.h>
extern uid_t rootless_uid();
extern uid_t rootless_gid();
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path);
extern int reexec_in_user_namespace_wait(int pid);
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd);
extern int reexec_in_user_namespace_wait(int pid, int options);
extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path);
*/
import "C"
Expand Down Expand Up @@ -226,19 +226,15 @@ func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
return false, -1, errors.Errorf("cannot re-exec process")
}

ret := C.reexec_in_user_namespace_wait(pidC)
ret := C.reexec_in_user_namespace_wait(pidC, 0)
if ret < 0 {
return false, -1, errors.New("error waiting for the re-exec process")
}

return true, int(ret), nil
}

// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed
// into a new user namespace and the return code from the re-executed podman process.
// If podman was re-executed the caller needs to propagate the error code returned by the child
// process.
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (bool, int, error) {
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" {
return false, 0, runInUser()
Expand All @@ -249,6 +245,13 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
cPausePid := C.CString(pausePid)
defer C.free(unsafe.Pointer(cPausePid))

cFileToRead := C.CString(fileToRead)
defer C.free(unsafe.Pointer(cFileToRead))
var fileOutputFD C.int
if fileOutput != nil {
fileOutputFD = C.int(fileOutput.Fd())
}

runtime.LockOSThread()
defer runtime.UnlockOSThread()

Expand All @@ -262,7 +265,7 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
defer w.Close()
defer w.Write([]byte("0"))

pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid)
pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD)
pid := int(pidC)
if pid < 0 {
return false, -1, errors.Errorf("cannot re-exec process")
Expand Down Expand Up @@ -328,6 +331,10 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
return false, -1, errors.Wrapf(err, "read from sync pipe")
}

if fileOutput != nil {
return true, 0, nil
}

if b[0] == '2' {
// We have lost the race for writing the PID file, as probably another
// process created a namespace and wrote the PID.
Expand Down Expand Up @@ -368,10 +375,95 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
}
}()

ret := C.reexec_in_user_namespace_wait(pidC)
ret := C.reexec_in_user_namespace_wait(pidC, 0)
if ret < 0 {
return false, -1, errors.New("error waiting for the re-exec process")
}

return true, int(ret), nil
}

// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed
// into a new user namespace and the return code from the re-executed podman process.
// If podman was re-executed the caller needs to propagate the error code returned by the child
// process.
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
return becomeRootInUserNS(pausePid, "", nil)
}

// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
// processes to attempt joining their namespaces.
// If needNewNamespace is set, the file is read from a temporary user
// namespace, this is useful for containers that are running with a
// different uidmap and the unprivileged user has no way to read the
// file owned by the root in the container.
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
if len(paths) == 0 {
return BecomeRootInUserNS(pausePidPath)
}

var lastErr error
var pausePid int

for _, path := range paths {
if !needNewNamespace {
data, err := ioutil.ReadFile(path)
if err != nil {
lastErr = err
continue
}

pausePid, err = strconv.Atoi(string(data))
if err != nil {
lastErr = errors.Wrapf(err, "cannot parse file %s", path)
continue
}

lastErr = nil
break
} else {
fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0)
if err != nil {
lastErr = err
continue
}

r, w := os.NewFile(uintptr(fds[0]), "read file"), os.NewFile(uintptr(fds[1]), "write file")

defer w.Close()
defer r.Close()

if _, _, err := becomeRootInUserNS("", path, w); err != nil {
lastErr = err
continue
}

w.Close()
defer func() {
r.Close()
C.reexec_in_user_namespace_wait(-1, 0)
}()

b := make([]byte, 32)

n, err := r.Read(b)
if err != nil {
lastErr = errors.Wrapf(err, "cannot read %s\n", path)
continue
}

pausePid, err = strconv.Atoi(string(b[:n]))
if err == nil {
lastErr = nil
break
}
}
}
if lastErr != nil {
return false, 0, lastErr
}

return JoinUserAndMountNS(uint(pausePid), pausePidPath)
}
Loading

0 comments on commit ee11f3b

Please sign in to comment.