From 2550ded989efc889da1785b07865815b7e1b9415 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Wed, 12 Feb 2020 14:57:58 +0100 Subject: [PATCH 1/2] rootlessport: fix potential hang write to the error pipe only in case of an error. Otherwise we may end up in a race condition in the select statement below as the read from errChan happens before initComplete and the function returns immediately nil. Closes: https://github.com/containers/libpod/issues/5182 Signed-off-by: Giuseppe Scrivano --- pkg/rootlessport/rootlessport_linux.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/pkg/rootlessport/rootlessport_linux.go b/pkg/rootlessport/rootlessport_linux.go index 3e678d33a3..acdf3198bd 100644 --- a/pkg/rootlessport/rootlessport_linux.go +++ b/pkg/rootlessport/rootlessport_linux.go @@ -122,6 +122,7 @@ func parent() error { logrus.WithError(driverErr).Warn("parent driver exited") } errCh <- driverErr + close(errCh) }() opaque := driver.OpaqueForChild() logrus.Infof("opaque=%+v", opaque) @@ -142,7 +143,7 @@ func parent() error { }() // reexec the child process in the child netns - cmd := exec.Command(fmt.Sprintf("/proc/%d/exe", os.Getpid())) + cmd := exec.Command("/proc/self/exe") cmd.Args = []string{reexecChildKey} cmd.Stdin = childQuitR cmd.Stdout = &logrusWriter{prefix: "child"} @@ -164,12 +165,19 @@ func parent() error { logrus.Info("waiting for initComplete") // wait for the child to connect to the parent - select { - case <-initComplete: - logrus.Infof("initComplete is closed; parent and child established the communication channel") - case err := <-errCh: - return err +outer: + for { + select { + case <-initComplete: + logrus.Infof("initComplete is closed; parent and child established the communication channel") + break outer + case err := <-errCh: + if err != nil { + return err + } + } } + defer func() { logrus.Info("stopping parent driver") quit <- struct{}{} From 5b69e7f2ef3ad0bd73ad6ec4d24c033da6456ef2 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Wed, 12 Feb 2020 18:39:30 +0100 Subject: [PATCH 2/2] rootlessport: drop Pdeathsig in favor of Kill there is a race condition where the child process is immediately killed: [pid 2576752] arch_prctl(0x3001 /* ARCH_??? */, 0x7ffdf612f170) = -1 EINVAL (Invalid argument) [pid 2576752] access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) [pid 2576752] --- SIGTERM {si_signo=SIGTERM, si_code=SI_USER, si_pid=2576742, si_uid=0} --- [pid 2576752] +++ killed by SIGTERM +++ this happens because the parent process here really means the "parent thread". Since there is no way of running it on the main thread, let's skip this functionality altogether and use kill(2). Signed-off-by: Giuseppe Scrivano --- pkg/rootlessport/rootlessport_linux.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pkg/rootlessport/rootlessport_linux.go b/pkg/rootlessport/rootlessport_linux.go index acdf3198bd..2b51f4e09a 100644 --- a/pkg/rootlessport/rootlessport_linux.go +++ b/pkg/rootlessport/rootlessport_linux.go @@ -149,9 +149,6 @@ func parent() error { cmd.Stdout = &logrusWriter{prefix: "child"} cmd.Stderr = cmd.Stdout cmd.Env = append(os.Environ(), reexecChildEnvOpaque+"="+string(opaqueJSON)) - cmd.SysProcAttr = &syscall.SysProcAttr{ - Pdeathsig: syscall.SIGTERM, - } childNS, err := ns.GetNS(cfg.NetNSPath) if err != nil { return err @@ -163,6 +160,12 @@ func parent() error { return err } + defer func() { + if err := syscall.Kill(cmd.Process.Pid, syscall.SIGTERM); err != nil { + logrus.WithError(err).Warn("kill child process") + } + }() + logrus.Info("waiting for initComplete") // wait for the child to connect to the parent outer: