From 3ff47748dea946775b8f4ec8e1c644c2efb3950b Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Wed, 1 Dec 2021 18:55:08 +0100 Subject: [PATCH] Fix possible rootless netns cleanup race rootlessNetNS.Cleanup() has an issue with how it detects if cleanup is needed, reading the container state is not good ebough because containers are first stopped and than cleanup will be called. So at one time two containers could wait for cleanup but the second one will fail because the first one triggered already the cleanup thus making rootless netns unavailable for the second container resulting in an teardown error. Instead of checking the container state we need to check the netns state. Secondly, podman unshare --rootless-netns should not do the cleanup. This causes more issues than it is worth fixing. Users also might want to use this to setup the namespace in a special way. If unshare also cleans this up right away we cannot do this. [NO NEW TESTS NEEDED] Fixes #12459 Signed-off-by: Paul Holzinger --- docs/source/markdown/podman-unshare.1.md | 5 ++--- libpod/networking_linux.go | 26 ++++++++++++------------ pkg/domain/infra/abi/system.go | 7 +++++-- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/docs/source/markdown/podman-unshare.1.md b/docs/source/markdown/podman-unshare.1.md index 5676b1be77..01393a862d 100644 --- a/docs/source/markdown/podman-unshare.1.md +++ b/docs/source/markdown/podman-unshare.1.md @@ -35,7 +35,6 @@ Print usage statement Join the rootless network namespace used for CNI and netavark networking. It can be used to connect to a rootless container via IP address (bridge networking). This is otherwise not possible from the host network namespace. -_Note: Using this option with more than one unshare session can have unexpected results._ ## Exit Codes @@ -57,13 +56,13 @@ the exit codes follow the `chroot` standard, see below: **127** Executing a _contained command_ and the _command_ cannot be found - $ podman run busybox foo; echo $? + $ podman unshare foo; echo $? Error: fork/exec /usr/bin/bogus: no such file or directory 127 **Exit code** _contained command_ exit code - $ podman run busybox /bin/sh -c 'exit 3'; echo $? + $ podman unshare /bin/sh -c 'exit 3'; echo $? 3 ## EXAMPLE diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index 7d1214183f..b734b9c95e 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -322,17 +322,14 @@ func (r *RootlessNetNS) Do(toRun func() error) error { // Cleanup the rootless network namespace if needed. // It checks if we have running containers with the bridge network mode. -// Cleanup() will try to lock RootlessNetNS, therefore you have to call -// it with an unlocked lock. +// Cleanup() expects that r.Lock is locked func (r *RootlessNetNS) Cleanup(runtime *Runtime) error { _, err := os.Stat(r.dir) if os.IsNotExist(err) { // the directory does not exists no need for cleanup return nil } - r.Lock.Lock() - defer r.Lock.Unlock() - running := func(c *Container) bool { + activeNetns := func(c *Container) bool { // no bridge => no need to check if !c.config.NetMode.IsBridge() { return false @@ -352,15 +349,18 @@ func (r *RootlessNetNS) Cleanup(runtime *Runtime) error { return false } - state := c.state.State - return state == define.ContainerStateRunning + // only check for an active netns, we cannot use the container state + // because not running does not mean that the netns does not need cleanup + // only if the netns is empty we know that we do not need cleanup + return c.state.NetNS != nil } - ctrs, err := runtime.GetContainersWithoutLock(running) + ctrs, err := runtime.GetContainersWithoutLock(activeNetns) if err != nil { return err } - // no cleanup if we found containers - if len(ctrs) > 0 { + // no cleanup if we found no other containers with a netns + // we will always find one container (the container cleanup that is currently calling us) + if len(ctrs) > 1 { return nil } logrus.Debug("Cleaning up rootless network namespace") @@ -809,10 +809,10 @@ func (r *Runtime) teardownNetwork(ns string, opts types.NetworkOptions) error { if rootlessNetNS != nil { // execute the cni setup in the rootless net ns err = rootlessNetNS.Do(tearDownPod) - rootlessNetNS.Lock.Unlock() - if err == nil { - err = rootlessNetNS.Cleanup(r) + if cerr := rootlessNetNS.Cleanup(r); cerr != nil { + logrus.WithError(err).Error("failed to cleanup rootless netns") } + rootlessNetNS.Lock.Unlock() } else { err = tearDownPod() } diff --git a/pkg/domain/infra/abi/system.go b/pkg/domain/infra/abi/system.go index 7da7754f29..e6c9d850b5 100644 --- a/pkg/domain/infra/abi/system.go +++ b/pkg/domain/infra/abi/system.go @@ -365,9 +365,12 @@ func (ic *ContainerEngine) Unshare(ctx context.Context, args []string, options e if err != nil { return err } - // make sure to unlock, unshare can run for a long time + // Make sure to unlock, unshare can run for a long time. rootlessNetNS.Lock.Unlock() - defer rootlessNetNS.Cleanup(ic.Libpod) + // We do not want to cleanup the netns after unshare. + // The problem is that we cannot know if we need to cleanup and + // secondly unshare should allow user to setup the namespace with + // special things, e.g. potentially macvlan or something like that. return rootlessNetNS.Do(unshare) } return unshare()