From 2051e54e013103ed36403f751e1467eaacac7d90 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Mon, 3 Apr 2023 15:56:39 +0200 Subject: [PATCH] rootless netns: recover from invalid netns I made a change in c/common[1] to prevent duplicates in netns names. This now causes problem in podman[2] where the rootless netns will no longer work after the netns got invalid but the underlying path still exists. AFAICT this happens when the podman pause process got killed and we are now in a different user namespace. While I do not know what causes this, this commit should make it at least possible to recover from this situation automatically as it used to be before[1]. the problem with that is that containers started before it will not be able to talk to contianers started after this. A restart of the previous container will fix it but this was also the case before. [NO NEW TESTS NEEDED] [1] https://github.com/containers/common/pull/1381 [2] https://github.com/containers/podman/issues/17903#issuecomment-1494169843 Signed-off-by: Paul Holzinger --- libpod/networking_linux.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index d7d1f4bc74..89eb6bbb08 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -365,15 +365,26 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) { netnsName := fmt.Sprintf("%s-%x", rootlessNetNsName, hash[:10]) path := filepath.Join(nsDir, netnsName) - ns, err := ns.GetNS(path) + nsReference, err := ns.GetNS(path) if err != nil { if !new { // return an error if we could not get the namespace and should no create one return nil, fmt.Errorf("getting rootless network namespace: %w", err) } + + // When the netns is not valid but the file exists we have to remove it first, + // https://github.com/containers/common/pull/1381 changed the behavior from + // NewNSWithName()so it will now error whe the file already exists. + // https://github.com/containers/podman/issues/17903#issuecomment-1494329622 + if errors.As(err, &ns.NSPathNotNSErr{}) { + logrus.Infof("rootless netns is no longer valid: %v", err) + // ignore errors, if something is wrong NewNSWithName() will fail below anyway + _ = os.Remove(path) + } + // create a new namespace logrus.Debugf("creating rootless network namespace with name %q", netnsName) - ns, err = netns.NewNSWithName(netnsName) + nsReference, err = netns.NewNSWithName(netnsName) if err != nil { return nil, fmt.Errorf("creating rootless network namespace: %w", err) } @@ -408,7 +419,7 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) { } // Note we do not use --exit-fd, we kill this process by pid cmdArgs = append(cmdArgs, "-c", "-r", "3") - cmdArgs = append(cmdArgs, "--netns-type=path", ns.Path(), "tap0") + cmdArgs = append(cmdArgs, "--netns-type=path", nsReference.Path(), "tap0") cmd := exec.Command(path, cmdArgs...) logrus.Debugf("slirp4netns command: %s", strings.Join(cmd.Args, " ")) @@ -540,7 +551,7 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) { // Important set rootlessNetNS as last step. // Do not return any errors after this. rootlessNetNS = &RootlessNetNS{ - ns: ns, + ns: nsReference, dir: rootlessNetNsDir, Lock: lock, }