From d222a392cdf2928fe7a05fc98cde0524a99aebc5 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 25 Apr 2023 14:50:56 +0200 Subject: [PATCH] rootless: support joining contianers that use host ns The problem right now is that --ns contianer: syntax causes use to add the namespace path to the spec which means the runtime will try to call setns on that. This works fine for private namespaces but when the host namspace is used by the container a rootless user is not allowed to join that namespace so the setns call will return with permission denied. The fix is to effectively switch the container to the `host` mode instead of `container:` when the mention container used the host ns. I tried to fix this deep into the libpod call when we assign these namespaces but the problem is that this does not work correctly because these namespace require much more setup. Mainly different kind of mount points to work correctly. We already have similar work-arounds in place for pods because they also need this. For some reason this does not work with the user namespace, I don't know why and I don't think it is really needed so I left this out just to get at least the rest working. The original issue only reported this for the network namespace. Fixes #18027 Signed-off-by: Paul Holzinger --- pkg/specgen/generate/namespaces.go | 45 +++++++++++++++++++++---- test/system/195-run-namesapces.bats | 52 +++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 7 deletions(-) create mode 100644 test/system/195-run-namesapces.bats diff --git a/pkg/specgen/generate/namespaces.go b/pkg/specgen/generate/namespaces.go index ff5154cec5..4a3d451b94 100644 --- a/pkg/specgen/generate/namespaces.go +++ b/pkg/specgen/generate/namespaces.go @@ -136,7 +136,13 @@ func namespaceOptions(s *specgen.SpecGenerator, rt *libpod.Runtime, pod *libpod. if err != nil { return nil, fmt.Errorf("looking up container to share pid namespace with: %w", err) } - toReturn = append(toReturn, libpod.WithPIDNSFrom(pidCtr)) + if rootless.IsRootless() && pidCtr.NamespaceMode(spec.PIDNamespace, pidCtr.ConfigNoCopy().Spec) == host { + // Treat this the same as host, the problem is the runtime tries to do a + // setns call and this will fail when it is the host ns as rootless user. + s.PidNS.NSMode = specgen.Host + } else { + toReturn = append(toReturn, libpod.WithPIDNSFrom(pidCtr)) + } } // IPC @@ -158,9 +164,16 @@ func namespaceOptions(s *specgen.SpecGenerator, rt *libpod.Runtime, pod *libpod. if ipcCtr.ConfigNoCopy().NoShmShare { return nil, fmt.Errorf("joining IPC of container %s is not allowed: non-shareable IPC (hint: use IpcMode:shareable for the donor container)", ipcCtr.ID()) } - toReturn = append(toReturn, libpod.WithIPCNSFrom(ipcCtr)) - if !ipcCtr.ConfigNoCopy().NoShm { - toReturn = append(toReturn, libpod.WithShmDir(ipcCtr.ShmDir())) + if rootless.IsRootless() && ipcCtr.NamespaceMode(spec.IPCNamespace, ipcCtr.ConfigNoCopy().Spec) == host { + // Treat this the same as host, the problem is the runtime tries to do a + // setns call and this will fail when it is the host ns as rootless user. + s.IpcNS.NSMode = specgen.Host + toReturn = append(toReturn, libpod.WithShmDir("/dev/shm")) + } else { + toReturn = append(toReturn, libpod.WithIPCNSFrom(ipcCtr)) + if !ipcCtr.ConfigNoCopy().NoShm { + toReturn = append(toReturn, libpod.WithShmDir(ipcCtr.ShmDir())) + } } case specgen.None: toReturn = append(toReturn, libpod.WithNoShm(true)) @@ -187,7 +200,13 @@ func namespaceOptions(s *specgen.SpecGenerator, rt *libpod.Runtime, pod *libpod. if err != nil { return nil, fmt.Errorf("looking up container to share uts namespace with: %w", err) } - toReturn = append(toReturn, libpod.WithUTSNSFrom(utsCtr)) + if rootless.IsRootless() && utsCtr.NamespaceMode(spec.UTSNamespace, utsCtr.ConfigNoCopy().Spec) == host { + // Treat this the same as host, the problem is the runtime tries to do a + // setns call and this will fail when it is the host ns as rootless user. + s.UtsNS.NSMode = specgen.Host + } else { + toReturn = append(toReturn, libpod.WithUTSNSFrom(utsCtr)) + } } // User @@ -257,7 +276,13 @@ func namespaceOptions(s *specgen.SpecGenerator, rt *libpod.Runtime, pod *libpod. if err != nil { return nil, fmt.Errorf("looking up container to share cgroup namespace with: %w", err) } - toReturn = append(toReturn, libpod.WithCgroupNSFrom(cgroupCtr)) + if rootless.IsRootless() && cgroupCtr.NamespaceMode(spec.CgroupNamespace, cgroupCtr.ConfigNoCopy().Spec) == host { + // Treat this the same as host, the problem is the runtime tries to do a + // setns call and this will fail when it is the host ns as rootless user. + s.CgroupNS.NSMode = specgen.Host + } else { + toReturn = append(toReturn, libpod.WithCgroupNSFrom(cgroupCtr)) + } } if s.CgroupParent != "" { @@ -285,7 +310,13 @@ func namespaceOptions(s *specgen.SpecGenerator, rt *libpod.Runtime, pod *libpod. if err != nil { return nil, fmt.Errorf("looking up container to share net namespace with: %w", err) } - toReturn = append(toReturn, libpod.WithNetNSFrom(netCtr)) + if rootless.IsRootless() && netCtr.NamespaceMode(spec.NetworkNamespace, netCtr.ConfigNoCopy().Spec) == host { + // Treat this the same as host, the problem is the runtime tries to do a + // setns call and this will fail when it is the host ns as rootless user. + s.NetNS.NSMode = specgen.Host + } else { + toReturn = append(toReturn, libpod.WithNetNSFrom(netCtr)) + } case specgen.Slirp: portMappings, expose, err := createPortMappings(s, imageData) if err != nil { diff --git a/test/system/195-run-namesapces.bats b/test/system/195-run-namesapces.bats new file mode 100644 index 0000000000..17c30cc818 --- /dev/null +++ b/test/system/195-run-namesapces.bats @@ -0,0 +1,52 @@ +#!/usr/bin/env bats -*- bats -*- +# +# Tests for the namespace options +# + +load helpers + +@test "podman test all namespaces" { + # format is nsname | option name + tests=" +cgroup | cgroupns +ipc | ipc +net | network +pid | pid +uts | uts +" + + for nstype in private host; do + while read name option; do + local cname="c_${name}_$(random_string)" + # ipc is special, private does not allow joining from another container. + # Instead we must use "shareable". + local type=$nstype + if [ "$name" = "ipc" ] && [ "$type" = "private" ]; then + type="shareable" + fi + + run_podman run --name $cname --$option $type -d $IMAGE sh -c \ + "readlink /proc/self/ns/$name; sleep inf" + + run_podman run --rm --$option container:$cname $IMAGE readlink /proc/self/ns/$name + con2_ns="$output" + + run readlink /proc/self/ns/$name + host_ns="$output" + + run_podman logs $cname + con1_ns="$output" + + assert "$con1_ns" == "$con2_ns" "($name) namespace matches (type: $type)" + local matcher="==" + if [[ "$type" != "host" ]]; then + matcher="!=" + fi + assert "$con1_ns" $matcher "$host_ns" "expected host namespace to ($matcher) (type: $type)" + + run_podman rm -f -t0 $cname + done < <(parse_table "$tests") + done +} + +# vim: filetype=sh