From 63e06acfe60ad52d7258e477ee111c2a800e39b7 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Wed, 18 Aug 2021 20:59:37 +0200 Subject: [PATCH 1/2] fix rootlessport flake When the rootlessport process is started the stdout/stderr are attached to the podman process. However once everything is setup podman exits and when the rootlessport process tries to write to stdout it will fail with SIGPIPE. The code handles this signal and puts /dev/null to stdout and stderr but this is not robust. I do not understand the exact cause but sometimes the process is still killed by SIGPIPE. Either go lost the signal or the process got already killed before the goroutine could handle it. Instead of handling SIGPIPE just set /dev/null to stdout and stderr before podman exits. With this there should be no race and no way to run into SIGPIPE errors. [NO TESTS NEEDED] Fixes #11248 Signed-off-by: Paul Holzinger --- pkg/rootlessport/rootlessport_linux.go | 35 +++++++------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/pkg/rootlessport/rootlessport_linux.go b/pkg/rootlessport/rootlessport_linux.go index ede216bfe0..9a2f93f8e6 100644 --- a/pkg/rootlessport/rootlessport_linux.go +++ b/pkg/rootlessport/rootlessport_linux.go @@ -20,7 +20,6 @@ import ( "net" "os" "os/exec" - "os/signal" "path/filepath" "github.com/containernetworking/plugins/pkg/ns" @@ -106,30 +105,6 @@ func parent() error { return err } - exitC := make(chan os.Signal, 1) - defer close(exitC) - - go func() { - sigC := make(chan os.Signal, 1) - signal.Notify(sigC, unix.SIGPIPE) - defer func() { - signal.Stop(sigC) - close(sigC) - }() - - select { - case s := <-sigC: - if s == unix.SIGPIPE { - if f, err := os.OpenFile("/dev/null", os.O_WRONLY, 0755); err == nil { - unix.Dup2(int(f.Fd()), 1) // nolint:errcheck - unix.Dup2(int(f.Fd()), 2) // nolint:errcheck - f.Close() - } - } - case <-exitC: - } - }() - socketDir := filepath.Join(cfg.TmpDir, "rp") err = os.MkdirAll(socketDir, 0700) if err != nil { @@ -251,8 +226,16 @@ outer: go serve(socket, driver) } - // write and close ReadyFD (convention is same as slirp4netns --ready-fd) logrus.Info("ready") + + // https://github.com/containers/podman/issues/11248 + // Copy /dev/null to stdout and stderr to prevent SIGPIPE errors + if f, err := os.OpenFile("/dev/null", os.O_WRONLY, 0755); err == nil { + unix.Dup2(int(f.Fd()), 1) // nolint:errcheck + unix.Dup2(int(f.Fd()), 2) // nolint:errcheck + f.Close() + } + // write and close ReadyFD (convention is same as slirp4netns --ready-fd) if _, err := readyW.Write([]byte("1")); err != nil { return err } From 15fff7d918a782875ab130a6d96665aedc0ba967 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Mon, 16 Aug 2021 11:03:16 +0200 Subject: [PATCH 2/2] Fix rootless cni dns without systemd stub resolver When a host uses systemd-resolved but not the resolved stub resolver the following symlinks are created: `/etc/resolv.conf` -> `/run/systemd/resolve/stub-resolv.conf` -> `/run/systemd/resolve/resolv.conf`. Because the code uses filepath.EvalSymlinks we put the new resolv.conf to `/run/systemd/resolve/resolv.conf` but the `/run/systemd/resolve/stub-resolv.conf` link does not exists in the mount ns. To fix this we will walk the symlinks manually until we reach the first one under `/run` and use this for the resolv.conf file destination. This fixes a regression which was introduced in e73d4829900c. Fixes #11222 Signed-off-by: Paul Holzinger --- libpod/networking_linux.go | 24 ++++++++++++++++++++---- test/system/500-networking.bats | 3 +++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index 8e9b5997c4..7eef04ddf1 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -173,11 +173,27 @@ func (r *RootlessCNI) Do(toRun func() error) error { // the link target will be available in the mount ns. // see: https://github.com/containers/podman/issues/10855 resolvePath := "/etc/resolv.conf" - resolvePath, err = filepath.EvalSymlinks(resolvePath) - if err != nil { - return err + for i := 0; i < 255; i++ { + // Do not use filepath.EvalSymlinks, we only want the first symlink under /run. + // If /etc/resolv.conf has more than one symlink under /run, e.g. + // -> /run/systemd/resolve/stub-resolv.conf -> /run/systemd/resolve/resolv.conf + // we would put the netns resolv.conf file to the last path. However this will + // break dns because the second link does not exists in the mount ns. + // see https://github.com/containers/podman/issues/11222 + link, err := os.Readlink(resolvePath) + if err != nil { + // if there is no symlink exit + break + } + resolvePath = filepath.Join(filepath.Dir(resolvePath), link) + if strings.HasPrefix(resolvePath, "/run/") { + break + } + if i == 254 { + return errors.New("too many symlinks while resolving /etc/resolv.conf") + } } - logrus.Debugf("The actual path of /etc/resolv.conf on the host is %q", resolvePath) + logrus.Debugf("The path of /etc/resolv.conf in the mount ns is %q", resolvePath) // When /etc/resolv.conf on the host is a symlink to /run/systemd/resolve/stub-resolv.conf, // we have to mount an empty filesystem on /run/systemd/resolve in the child namespace, // so as to isolate the directory from the host mount namespace. diff --git a/test/system/500-networking.bats b/test/system/500-networking.bats index 6ffee7eafc..3ebe45e639 100644 --- a/test/system/500-networking.bats +++ b/test/system/500-networking.bats @@ -210,6 +210,9 @@ load helpers $IMAGE nc -l -n -v -p $myport cid="$output" + # check that dns is working inside the container + run_podman exec $cid nslookup google.com + # emit random string, and check it teststring=$(random_string 30) echo "$teststring" | nc 127.0.0.1 $myport