From 54b588c07d05858c9bbc523eeff0badb85d53f76 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 18 Feb 2021 13:51:27 +0100 Subject: [PATCH 01/13] rootless cni without infra container Instead of creating an extra container create a network and mount namespace inside the podman user namespace. This ns is used to for rootless cni operations. This helps to align the rootless and rootful network code path. If we run as rootless we just have to set up a extra net ns and initialize slirp4netns in it. The ocicni lib will be called in that net ns. This design allows allows easier maintenance, no extra container with pause processes, support for rootless cni with --uidmap and possibly more. The biggest problem is backwards compatibility. I don't think live migration can be possible. If the user reboots or restart all cni containers everything should work as expected again. The user is left with the rootless-cni-infa container and image but this can safely be removed. To make the existing cni configs work we need execute the cni plugins in a extra mount namespace. This ensures that we can safely mount over /run and /var which have to be writeable for the cni plugins without removing access to these files by the main podman process. One caveat is that we need to keep the netns files at `XDG_RUNTIME_DIR/netns` accessible. `XDG_RUNTIME_DIR/rootless-cni/{run,var}` will be mounted to `/{run,var}`. To ensure that we keep the netns directory we bind mount this relative to the new root location, e.g. XDG_RUNTIME_DIR/rootless-cni/run/user/1000/netns before we mount the run directory. The run directory is mounted recursive, this makes the netns directory at the same path accessible as before. This also allows iptables-legacy to work because /run/xtables.lock is now writeable. Signed-off-by: Paul Holzinger --- libpod/container_internal_linux.go | 6 +- libpod/network/create.go | 6 +- libpod/networking_linux.go | 488 ++++++++++++++++++++++------- libpod/runtime.go | 10 +- pkg/netns/netns_linux.go | 26 +- test/system/500-networking.bats | 7 - 6 files changed, 396 insertions(+), 147 deletions(-) diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index a136fb72d8..d167bf188b 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -92,11 +92,7 @@ func (c *Container) prepare() error { // Set up network namespace if not already set up noNetNS := c.state.NetNS == nil if c.config.CreateNetNS && noNetNS && !c.config.PostConfigureNetNS { - if rootless.IsRootless() && len(c.config.Networks) > 0 { - netNS, networkStatus, createNetNSErr = AllocRootlessCNI(context.Background(), c) - } else { - netNS, networkStatus, createNetNSErr = c.runtime.createNetNS(c) - } + netNS, networkStatus, createNetNSErr = c.runtime.createNetNS(c) if createNetNSErr != nil { return } diff --git a/libpod/network/create.go b/libpod/network/create.go index 1a5aa82fcf..4fe9b445f9 100644 --- a/libpod/network/create.go +++ b/libpod/network/create.go @@ -11,7 +11,6 @@ import ( "github.com/containernetworking/cni/pkg/version" "github.com/containers/common/pkg/config" "github.com/containers/podman/v3/pkg/domain/entities" - "github.com/containers/podman/v3/pkg/rootless" "github.com/containers/podman/v3/pkg/util" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -223,9 +222,8 @@ func createBridge(name string, options entities.NetworkCreateOptions, runtimeCon plugins = append(plugins, NewPortMapPlugin()) plugins = append(plugins, NewFirewallPlugin()) plugins = append(plugins, NewTuningPlugin()) - // if we find the dnsname plugin or are rootless, we add configuration for it - // the rootless-cni-infra container has the dnsname plugin always installed - if (HasDNSNamePlugin(runtimeConfig.Network.CNIPluginDirs) || rootless.IsRootless()) && !options.DisableDNS { + // if we find the dnsname plugin we add configuration for it + if HasDNSNamePlugin(runtimeConfig.Network.CNIPluginDirs) && !options.DisableDNS { if options.Internal { logrus.Warnf("dnsname and --internal networks are incompatible. dnsname plugin not configured for network %s", name) } else { diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index 8bf532f66c..aac02d8cc9 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -4,7 +4,6 @@ package libpod import ( "bytes" - "context" "crypto/rand" "fmt" "io" @@ -29,7 +28,9 @@ import ( "github.com/containers/podman/v3/pkg/netns" "github.com/containers/podman/v3/pkg/rootless" "github.com/containers/podman/v3/pkg/rootlessport" + "github.com/containers/podman/v3/pkg/util" "github.com/cri-o/ocicni/pkg/ocicni" + "github.com/opencontainers/selinux/go-selinux/label" "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" @@ -46,6 +47,9 @@ const ( // slirp4netnsMTU the default MTU override slirp4netnsMTU = 65520 + + // rootlessCNINSName is the file name for the rootless network namespace bind mount + rootlessCNINSName = "rootless-cni-ns" ) // Get an OCICNI network config @@ -102,8 +106,222 @@ func (r *Runtime) getPodNetwork(id, name, nsPath string, networks []string, port return ctrNetwork } +type rootlessCNI struct { + ns ns.NetNS + dir string +} + +func (r *rootlessCNI) Do(toRun func() error) error { + err := r.ns.Do(func(_ ns.NetNS) error { + // before we can run the given function + // we have to setup all mounts correctly + + // create a new mount namespace + // this should happen inside the netns thread + err := unix.Unshare(unix.CLONE_NEWNS) + if err != nil { + return errors.Wrapf(err, "cannot create a new mount namespace") + } + + netNsDir, err := netns.GetNSRunDir() + if err != nil { + return errors.Wrap(err, "could not get network namespace directory") + } + newNetNsDir := filepath.Join(r.dir, netNsDir) + // mount the netns into the new run to keep them accessible + // otherwise cni setup will fail because it cannot access the netns files + err = unix.Mount(netNsDir, newNetNsDir, "none", unix.MS_BIND|unix.MS_SHARED|unix.MS_REC, "") + if err != nil { + return errors.Wrap(err, "failed to mount netns directory for rootless cni") + } + + // also keep /run/systemd if it exists + // many files are symlinked into this dir, for example systemd-resolved links + // /etc/resolv.conf but the dnsname plugin needs access to this file + runSystemd := "/run/systemd" + _, err = os.Stat(runSystemd) + if err == nil { + newRunSystemd := filepath.Join(r.dir, runSystemd[1:]) + err = unix.Mount(runSystemd, newRunSystemd, "none", unix.MS_BIND|unix.MS_REC, "") + if err != nil { + return errors.Wrap(err, "failed to mount /run/systemd directory for rootless cni") + } + } + + // cni plugins need access to /var and /run + runDir := filepath.Join(r.dir, "run") + varDir := filepath.Join(r.dir, "var") + // make sure to mount var first + err = unix.Mount(varDir, "/var", "none", unix.MS_BIND, "") + if err != nil { + return errors.Wrap(err, "failed to mount /var for rootless cni") + } + // recursive mount to keep the netns mount + err = unix.Mount(runDir, "/run", "none", unix.MS_BIND|unix.MS_REC, "") + if err != nil { + return errors.Wrap(err, "failed to mount /run for rootless cni") + } + + // run the given function in the correct namespace + err = toRun() + return err + }) + return err +} + +// getRootlessCNINetNs returns the rootless cni object. If create is set to true +// the rootless cni namespace will be created if it does not exists already. +func (r *Runtime) getRootlessCNINetNs(new bool) (*rootlessCNI, error) { + var rootlessCNINS *rootlessCNI + if rootless.IsRootless() { + runDir, err := util.GetRuntimeDir() + if err != nil { + return nil, err + } + cniDir := filepath.Join(runDir, "rootless-cni") + + nsDir, err := netns.GetNSRunDir() + if err != nil { + return nil, err + } + path := filepath.Join(nsDir, rootlessCNINSName) + ns, err := ns.GetNS(path) + if err != nil { + if new { + // create a new namespace + logrus.Debug("creating rootless cni network namespace") + ns, err = netns.NewNSWithName(rootlessCNINSName) + if err != nil { + return nil, errors.Wrap(err, "error creating rootless cni network namespace") + } + + // setup slirp4netns here + path := r.config.Engine.NetworkCmdPath + if path == "" { + var err error + path, err = exec.LookPath("slirp4netns") + if err != nil { + return nil, err + } + } + + syncR, syncW, err := os.Pipe() + if err != nil { + return nil, errors.Wrapf(err, "failed to open pipe") + } + defer errorhandling.CloseQuiet(syncR) + defer errorhandling.CloseQuiet(syncW) + + netOptions, err := parseSlirp4netnsNetworkOptions(r, nil) + if err != nil { + return nil, err + } + slirpFeatures, err := checkSlirpFlags(path) + if err != nil { + return nil, errors.Wrapf(err, "error checking slirp4netns binary %s: %q", path, err) + } + cmdArgs, err := createBasicSlirp4netnsCmdArgs(netOptions, slirpFeatures) + if err != nil { + return nil, err + } + // the slirp4netns arguments being passed are describes as follows: + // from the slirp4netns documentation: https://github.com/rootless-containers/slirp4netns + // -c, --configure Brings up the tap interface + // -e, --exit-fd=FD specify the FD for terminating slirp4netns + // -r, --ready-fd=FD specify the FD to write to when the initialization steps are finished + cmdArgs = append(cmdArgs, "-c", "-r", "3") + cmdArgs = append(cmdArgs, "--netns-type=path", ns.Path(), "tap0") + + cmd := exec.Command(path, cmdArgs...) + logrus.Debugf("slirp4netns command: %s", strings.Join(cmd.Args, " ")) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + + // workaround for https://github.com/rootless-containers/slirp4netns/pull/153 + if !netOptions.noPivotRoot && slirpFeatures.HasEnableSandbox { + cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNS + cmd.SysProcAttr.Unshareflags = syscall.CLONE_NEWNS + } + + // Leak one end of the pipe in slirp4netns + cmd.ExtraFiles = append(cmd.ExtraFiles, syncW) + + logPath := filepath.Join(r.config.Engine.TmpDir, "slirp4netns-rootless-cni.log") + logFile, err := os.Create(logPath) + if err != nil { + return nil, errors.Wrapf(err, "failed to open slirp4netns log file %s", logPath) + } + defer logFile.Close() + // Unlink immediately the file so we won't need to worry about cleaning it up later. + // It is still accessible through the open fd logFile. + if err := os.Remove(logPath); err != nil { + return nil, errors.Wrapf(err, "delete file %s", logPath) + } + cmd.Stdout = logFile + cmd.Stderr = logFile + if err := cmd.Start(); err != nil { + return nil, errors.Wrapf(err, "failed to start slirp4netns process") + } + defer func() { + if err := cmd.Process.Release(); err != nil { + logrus.Errorf("unable to release command process: %q", err) + } + }() + + if err := waitForSync(syncR, cmd, logFile, 1*time.Second); err != nil { + return nil, err + } + + // create cni directories to store files + // they will be bind mounted to the correct location in a extra mount ns + err = os.MkdirAll(filepath.Join(cniDir, "var"), 0700) + if err != nil { + return nil, errors.Wrap(err, "could not create rootless-cni var directory") + } + runDir := filepath.Join(cniDir, "run") + err = os.MkdirAll(runDir, 0700) + if err != nil { + return nil, errors.Wrap(err, "could not create rootless-cni run directory") + } + // relabel the new run directory to the iptables /run label + // this is important, otherwise the iptables command will fail + err = label.Relabel(runDir, "system_u:object_r:iptables_var_run_t:s0", false) + if err != nil { + return nil, errors.Wrap(err, "could not create relabel rootless-cni run directory") + } + // create systemd run directory + err = os.MkdirAll(filepath.Join(runDir, "systemd"), 0700) + if err != nil { + return nil, errors.Wrap(err, "could not create rootless-cni systemd directory") + } + // create the directory for the netns files at the same location + // relative to the rootless-cni location + err = os.MkdirAll(filepath.Join(cniDir, nsDir), 0700) + if err != nil { + return nil, errors.Wrap(err, "could not create rootless-cni netns directory") + } + } else { + // return a error if we could not get the namespace and should no create one + return nil, errors.Wrap(err, "error getting rootless cni network namespace") + } + } + + rootlessCNINS = &rootlessCNI{ + ns: ns, + dir: cniDir, + } + } + return rootlessCNINS, nil +} + // Create and configure a new network namespace for a container func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) ([]*cnitypes.Result, error) { + rootlessCNINS, err := r.getRootlessCNINetNs(true) + if err != nil { + return nil, err + } + var requestedIP net.IP if ctr.requestedIP != nil { requestedIP = ctr.requestedIP @@ -147,17 +365,31 @@ func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) ([]*cnitypes.Re podNetwork.Aliases = aliases } - results, err := r.netPlugin.SetUpPod(podNetwork) - if err != nil { - return nil, errors.Wrapf(err, "error configuring network namespace for container %s", ctr.ID()) - } - defer func() { + var results []ocicni.NetResult + setUpPod := func() error { + results, err = r.netPlugin.SetUpPod(podNetwork) if err != nil { - if err2 := r.netPlugin.TearDownPod(podNetwork); err2 != nil { - logrus.Errorf("Error tearing down partially created network namespace for container %s: %v", ctr.ID(), err2) - } + return errors.Wrapf(err, "error configuring network namespace for container %s", ctr.ID()) } - }() + defer func() { + if err != nil { + if err2 := r.netPlugin.TearDownPod(podNetwork); err2 != nil { + logrus.Errorf("Error tearing down partially created network namespace for container %s: %v", ctr.ID(), err2) + } + } + }() + return nil + } + // rootlessCNINS is nil if we are root + if rootlessCNINS != nil { + // execute the cni setup in the rootless net ns + err = rootlessCNINS.Do(setUpPod) + } else { + err = setUpPod() + } + if err != nil { + return nil, err + } networkStatus := make([]*cnitypes.Result, 0) for idx, r := range results { @@ -192,7 +424,7 @@ func (r *Runtime) createNetNS(ctr *Container) (n ns.NetNS, q []*cnitypes.Result, logrus.Debugf("Made network namespace at %s for container %s", ctrNS.Path(), ctr.ID()) networkStatus := []*cnitypes.Result{} - if !rootless.IsRootless() && !ctr.config.NetMode.IsSlirp4netns() { + if !ctr.config.NetMode.IsSlirp4netns() { networkStatus, err = r.configureNetNS(ctr, ctrNS) } return ctrNS, networkStatus, err @@ -221,6 +453,17 @@ type slirp4netnsCmd struct { Args slirp4netnsCmdArg `json:"arguments"` } +type slirp4netnsNetworkOptions struct { + cidr string + disableHostLoopback bool + enableIPv6 bool + isSlirpHostForward bool + noPivotRoot bool + mtu int + outboundAddr string + outboundAddr6 string +} + func checkSlirpFlags(path string) (*slirpFeatures, error) { cmd := exec.Command(path, "--help") out, err := cmd.CombinedOutput() @@ -256,163 +499,171 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) error { return nil } -// setupSlirp4netns can be called in rootful as well as in rootless -func (r *Runtime) setupSlirp4netns(ctr *Container) error { - path := r.config.Engine.NetworkCmdPath - slirpOptions := r.config.Engine.NetworkCmdOptions - noPivotRoot := r.config.Engine.NoPivotRoot - if path == "" { - var err error - path, err = exec.LookPath("slirp4netns") - if err != nil { - logrus.Errorf("could not find slirp4netns, the network namespace won't be configured: %v", err) - return nil - } - } - - syncR, syncW, err := os.Pipe() - if err != nil { - return errors.Wrapf(err, "failed to open pipe") +func parseSlirp4netnsNetworkOptions(r *Runtime, extraOptions []string) (*slirp4netnsNetworkOptions, error) { + slirpOptions := append(r.config.Engine.NetworkCmdOptions, extraOptions...) + slirp4netnsOpts := &slirp4netnsNetworkOptions{ + // overwrite defaults + disableHostLoopback: true, + mtu: slirp4netnsMTU, + noPivotRoot: r.config.Engine.NoPivotRoot, } - defer errorhandling.CloseQuiet(syncR) - defer errorhandling.CloseQuiet(syncW) - - havePortMapping := len(ctr.Config().PortMappings) > 0 - logPath := filepath.Join(ctr.runtime.config.Engine.TmpDir, fmt.Sprintf("slirp4netns-%s.log", ctr.config.ID)) - - cidr := "" - isSlirpHostForward := false - disableHostLoopback := true - enableIPv6 := false - outboundAddr := "" - outboundAddr6 := "" - mtu := slirp4netnsMTU - - if ctr.config.NetworkOptions != nil { - slirpOptions = append(slirpOptions, ctr.config.NetworkOptions["slirp4netns"]...) - } - for _, o := range slirpOptions { parts := strings.SplitN(o, "=", 2) if len(parts) < 2 { - return errors.Errorf("unknown option for slirp4netns: %q", o) + return nil, errors.Errorf("unknown option for slirp4netns: %q", o) } option, value := parts[0], parts[1] switch option { case "cidr": ipv4, _, err := net.ParseCIDR(value) if err != nil || ipv4.To4() == nil { - return errors.Errorf("invalid cidr %q", value) + return nil, errors.Errorf("invalid cidr %q", value) } - cidr = value + slirp4netnsOpts.cidr = value case "port_handler": switch value { case "slirp4netns": - isSlirpHostForward = true + slirp4netnsOpts.isSlirpHostForward = true case "rootlesskit": - isSlirpHostForward = false + slirp4netnsOpts.isSlirpHostForward = false default: - return errors.Errorf("unknown port_handler for slirp4netns: %q", value) + return nil, errors.Errorf("unknown port_handler for slirp4netns: %q", value) } case "allow_host_loopback": switch value { case "true": - disableHostLoopback = false + slirp4netnsOpts.disableHostLoopback = false case "false": - disableHostLoopback = true + slirp4netnsOpts.disableHostLoopback = true default: - return errors.Errorf("invalid value of allow_host_loopback for slirp4netns: %q", value) + return nil, errors.Errorf("invalid value of allow_host_loopback for slirp4netns: %q", value) } case "enable_ipv6": switch value { case "true": - enableIPv6 = true + slirp4netnsOpts.enableIPv6 = true case "false": - enableIPv6 = false + slirp4netnsOpts.enableIPv6 = false default: - return errors.Errorf("invalid value of enable_ipv6 for slirp4netns: %q", value) + return nil, errors.Errorf("invalid value of enable_ipv6 for slirp4netns: %q", value) } case "outbound_addr": ipv4 := net.ParseIP(value) if ipv4 == nil || ipv4.To4() == nil { _, err := net.InterfaceByName(value) if err != nil { - return errors.Errorf("invalid outbound_addr %q", value) + return nil, errors.Errorf("invalid outbound_addr %q", value) } } - outboundAddr = value + slirp4netnsOpts.outboundAddr = value case "outbound_addr6": ipv6 := net.ParseIP(value) if ipv6 == nil || ipv6.To4() != nil { _, err := net.InterfaceByName(value) if err != nil { - return errors.Errorf("invalid outbound_addr6: %q", value) + return nil, errors.Errorf("invalid outbound_addr6: %q", value) } } - outboundAddr6 = value + slirp4netnsOpts.outboundAddr6 = value case "mtu": - mtu, err = strconv.Atoi(value) - if mtu < 68 || err != nil { - return errors.Errorf("invalid mtu %q", value) + var err error + slirp4netnsOpts.mtu, err = strconv.Atoi(value) + if slirp4netnsOpts.mtu < 68 || err != nil { + return nil, errors.Errorf("invalid mtu %q", value) } default: - return errors.Errorf("unknown option for slirp4netns: %q", o) + return nil, errors.Errorf("unknown option for slirp4netns: %q", o) } } + return slirp4netnsOpts, nil +} +func createBasicSlirp4netnsCmdArgs(options *slirp4netnsNetworkOptions, features *slirpFeatures) ([]string, error) { cmdArgs := []string{} - slirpFeatures, err := checkSlirpFlags(path) - if err != nil { - return errors.Wrapf(err, "error checking slirp4netns binary %s: %q", path, err) - } - if disableHostLoopback && slirpFeatures.HasDisableHostLoopback { + if options.disableHostLoopback && features.HasDisableHostLoopback { cmdArgs = append(cmdArgs, "--disable-host-loopback") } - if mtu > -1 && slirpFeatures.HasMTU { - cmdArgs = append(cmdArgs, fmt.Sprintf("--mtu=%d", mtu)) + if options.mtu > -1 && features.HasMTU { + cmdArgs = append(cmdArgs, fmt.Sprintf("--mtu=%d", options.mtu)) } - if !noPivotRoot && slirpFeatures.HasEnableSandbox { + if !options.noPivotRoot && features.HasEnableSandbox { cmdArgs = append(cmdArgs, "--enable-sandbox") } - if slirpFeatures.HasEnableSeccomp { + if features.HasEnableSeccomp { cmdArgs = append(cmdArgs, "--enable-seccomp") } - if cidr != "" { - if !slirpFeatures.HasCIDR { - return errors.Errorf("cidr not supported") + if options.cidr != "" { + if !features.HasCIDR { + return nil, errors.Errorf("cidr not supported") } - cmdArgs = append(cmdArgs, fmt.Sprintf("--cidr=%s", cidr)) + cmdArgs = append(cmdArgs, fmt.Sprintf("--cidr=%s", options.cidr)) } - if enableIPv6 { - if !slirpFeatures.HasIPv6 { - return errors.Errorf("enable_ipv6 not supported") + if options.enableIPv6 { + if !features.HasIPv6 { + return nil, errors.Errorf("enable_ipv6 not supported") } cmdArgs = append(cmdArgs, "--enable-ipv6") } - if outboundAddr != "" { - if !slirpFeatures.HasOutboundAddr { - return errors.Errorf("outbound_addr not supported") + if options.outboundAddr != "" { + if !features.HasOutboundAddr { + return nil, errors.Errorf("outbound_addr not supported") } - cmdArgs = append(cmdArgs, fmt.Sprintf("--outbound-addr=%s", outboundAddr)) + cmdArgs = append(cmdArgs, fmt.Sprintf("--outbound-addr=%s", options.outboundAddr)) } - if outboundAddr6 != "" { - if !slirpFeatures.HasOutboundAddr || !slirpFeatures.HasIPv6 { - return errors.Errorf("outbound_addr6 not supported") + if options.outboundAddr6 != "" { + if !features.HasOutboundAddr || !features.HasIPv6 { + return nil, errors.Errorf("outbound_addr6 not supported") } - if !enableIPv6 { - return errors.Errorf("enable_ipv6=true is required for outbound_addr6") + if !options.enableIPv6 { + return nil, errors.Errorf("enable_ipv6=true is required for outbound_addr6") } - cmdArgs = append(cmdArgs, fmt.Sprintf("--outbound-addr6=%s", outboundAddr6)) + cmdArgs = append(cmdArgs, fmt.Sprintf("--outbound-addr6=%s", options.outboundAddr6)) } - var apiSocket string - if havePortMapping && isSlirpHostForward { - apiSocket = filepath.Join(ctr.runtime.config.Engine.TmpDir, fmt.Sprintf("%s.net", ctr.config.ID)) - cmdArgs = append(cmdArgs, "--api-socket", apiSocket) + return cmdArgs, nil +} + +// setupSlirp4netns can be called in rootful as well as in rootless +func (r *Runtime) setupSlirp4netns(ctr *Container) error { + path := r.config.Engine.NetworkCmdPath + if path == "" { + var err error + path, err = exec.LookPath("slirp4netns") + if err != nil { + logrus.Errorf("could not find slirp4netns, the network namespace won't be configured: %v", err) + return nil + } + } + + syncR, syncW, err := os.Pipe() + if err != nil { + return errors.Wrapf(err, "failed to open pipe") + } + defer errorhandling.CloseQuiet(syncR) + defer errorhandling.CloseQuiet(syncW) + + havePortMapping := len(ctr.Config().PortMappings) > 0 + logPath := filepath.Join(ctr.runtime.config.Engine.TmpDir, fmt.Sprintf("slirp4netns-%s.log", ctr.config.ID)) + + ctrNetworkSlipOpts := []string{} + if ctr.config.NetworkOptions != nil { + ctrNetworkSlipOpts = append(ctrNetworkSlipOpts, ctr.config.NetworkOptions["slirp4netns"]...) + } + netOptions, err := parseSlirp4netnsNetworkOptions(r, ctrNetworkSlipOpts) + if err != nil { + return err + } + slirpFeatures, err := checkSlirpFlags(path) + if err != nil { + return errors.Wrapf(err, "error checking slirp4netns binary %s: %q", path, err) + } + cmdArgs, err := createBasicSlirp4netnsCmdArgs(netOptions, slirpFeatures) + if err != nil { + return err } // the slirp4netns arguments being passed are describes as follows: @@ -421,6 +672,12 @@ func (r *Runtime) setupSlirp4netns(ctr *Container) error { // -e, --exit-fd=FD specify the FD for terminating slirp4netns // -r, --ready-fd=FD specify the FD to write to when the initialization steps are finished cmdArgs = append(cmdArgs, "-c", "-e", "3", "-r", "4") + + var apiSocket string + if havePortMapping && netOptions.isSlirpHostForward { + apiSocket = filepath.Join(ctr.runtime.config.Engine.TmpDir, fmt.Sprintf("%s.net", ctr.config.ID)) + cmdArgs = append(cmdArgs, "--api-socket", apiSocket) + } netnsPath := "" if !ctr.config.PostConfigureNetNS { ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe() @@ -444,7 +701,7 @@ func (r *Runtime) setupSlirp4netns(ctr *Container) error { } // workaround for https://github.com/rootless-containers/slirp4netns/pull/153 - if !noPivotRoot && slirpFeatures.HasEnableSandbox { + if !netOptions.noPivotRoot && slirpFeatures.HasEnableSandbox { cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNS cmd.SysProcAttr.Unshareflags = syscall.CLONE_NEWNS } @@ -478,7 +735,7 @@ func (r *Runtime) setupSlirp4netns(ctr *Container) error { } if havePortMapping { - if isSlirpHostForward { + if netOptions.isSlirpHostForward { return r.setupRootlessPortMappingViaSlirp(ctr, cmd, apiSocket) } return r.setupRootlessPortMappingViaRLK(ctr, netnsPath) @@ -789,8 +1046,11 @@ func (r *Runtime) teardownCNI(ctr *Container) error { return err } - // rootless containers do not use the CNI plugin directly - if !rootless.IsRootless() && !ctr.config.NetMode.IsSlirp4netns() && len(networks) > 0 { + if !ctr.config.NetMode.IsSlirp4netns() && len(networks) > 0 { + rootlessCNINS, err := r.getRootlessCNINetNs(false) + if err != nil { + return err + } var requestedIP net.IP if ctr.requestedIP != nil { requestedIP = ctr.requestedIP @@ -811,9 +1071,21 @@ func (r *Runtime) teardownCNI(ctr *Container) error { podNetwork := r.getPodNetwork(ctr.ID(), ctr.Name(), ctr.state.NetNS.Path(), networks, ctr.config.PortMappings, requestedIP, requestedMAC, ctr.state.NetInterfaceDescriptions) - if err := r.netPlugin.TearDownPod(podNetwork); err != nil { - return errors.Wrapf(err, "error tearing down CNI namespace configuration for container %s", ctr.ID()) + tearDownPod := func() error { + if err := r.netPlugin.TearDownPod(podNetwork); err != nil { + return errors.Wrapf(err, "error tearing down CNI namespace configuration for container %s", ctr.ID()) + } + return nil + } + + // rootlessCNINS is nil if we are root + if rootlessCNINS != nil { + // execute the cni setup in the rootless net ns + err = rootlessCNINS.Do(tearDownPod) + } else { + err = tearDownPod() } + return err } return nil } @@ -824,18 +1096,6 @@ func (r *Runtime) teardownNetNS(ctr *Container) error { return err } - networks, _, err := ctr.networks() - if err != nil { - return err - } - - // CNI-in-slirp4netns - if rootless.IsRootless() && len(networks) != 0 { - if err := DeallocRootlessCNI(context.Background(), ctr); err != nil { - return errors.Wrapf(err, "error tearing down CNI-in-slirp4netns for container %s", ctr.ID()) - } - } - // First unmount the namespace if err := netns.UnmountNS(ctr.state.NetNS); err != nil { return errors.Wrapf(err, "error unmounting network namespace for container %s", ctr.ID()) diff --git a/libpod/runtime.go b/libpod/runtime.go index 201482c659..d4bb691ef7 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -436,13 +436,11 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) { } // Set up the CNI net plugin - if !rootless.IsRootless() { - netPlugin, err := ocicni.InitCNI(runtime.config.Network.DefaultNetwork, runtime.config.Network.NetworkConfigDir, runtime.config.Network.CNIPluginDirs...) - if err != nil { - return errors.Wrapf(err, "error configuring CNI network plugin") - } - runtime.netPlugin = netPlugin + netPlugin, err := ocicni.InitCNI(runtime.config.Network.DefaultNetwork, runtime.config.Network.NetworkConfigDir, runtime.config.Network.CNIPluginDirs...) + if err != nil { + return errors.Wrapf(err, "error configuring CNI network plugin") } + runtime.netPlugin = netPlugin // We now need to see if the system has restarted // We check for the presence of a file in our tmp directory to verify this diff --git a/pkg/netns/netns_linux.go b/pkg/netns/netns_linux.go index 0b7d1782c9..ecefb65ffd 100644 --- a/pkg/netns/netns_linux.go +++ b/pkg/netns/netns_linux.go @@ -35,9 +35,9 @@ import ( "golang.org/x/sys/unix" ) -// get NSRunDir returns the dir of where to create the netNS. When running +// GetNSRunDir returns the dir of where to create the netNS. When running // rootless, it needs to be at a location writable by user. -func getNSRunDir() (string, error) { +func GetNSRunDir() (string, error) { if rootless.IsRootless() { rootlessDir, err := util.GetRuntimeDir() if err != nil { @@ -51,15 +51,21 @@ func getNSRunDir() (string, error) { // NewNS creates a new persistent (bind-mounted) network namespace and returns // an object representing that namespace, without switching to it. func NewNS() (ns.NetNS, error) { - nsRunDir, err := getNSRunDir() + b := make([]byte, 16) + _, err := rand.Reader.Read(b) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to generate random netns name: %v", err) } + nsName := fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]) + return NewNSWithName(nsName) +} - b := make([]byte, 16) - _, err = rand.Reader.Read(b) +// NewNSWithName creates a new persistent (bind-mounted) network namespace and returns +// an object representing that namespace, without switching to it. +func NewNSWithName(name string) (ns.NetNS, error) { + nsRunDir, err := GetNSRunDir() if err != nil { - return nil, fmt.Errorf("failed to generate random netns name: %v", err) + return nil, err } // Create the directory for mounting network namespaces @@ -93,10 +99,8 @@ func NewNS() (ns.NetNS, error) { } } - nsName := fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]) - // create an empty file at the mount point - nsPath := path.Join(nsRunDir, nsName) + nsPath := path.Join(nsRunDir, name) mountPointFd, err := os.Create(nsPath) if err != nil { return nil, err @@ -177,7 +181,7 @@ func NewNS() (ns.NetNS, error) { // UnmountNS unmounts the NS held by the netns object func UnmountNS(ns ns.NetNS) error { - nsRunDir, err := getNSRunDir() + nsRunDir, err := GetNSRunDir() if err != nil { return err } diff --git a/test/system/500-networking.bats b/test/system/500-networking.bats index 4868ad6a0f..804dd46b12 100644 --- a/test/system/500-networking.bats +++ b/test/system/500-networking.bats @@ -143,13 +143,6 @@ load helpers run_podman network rm $mynetname run_podman 1 network rm $mynetname - - # rootless CNI leaves behind an image pulled by SHA, hence with no tag. - # Remove it if present; we can only remove it by ID. - run_podman images --format '{{.Id}}' rootless-cni-infra - if [ -n "$output" ]; then - run_podman rmi $output - fi } @test "podman network reload" { From 00b2ec5e6f8ad332411271df1bdd968493cab2c2 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 18 Feb 2021 14:53:53 +0100 Subject: [PATCH 02/13] Add rootless support for cni and --uidmap This is supported with the new rootless cni logic. Signed-off-by: Paul Holzinger --- libpod/container_internal.go | 4 +--- libpod/networking_linux.go | 7 +++++-- pkg/specgen/generate/namespaces.go | 3 --- test/e2e/run_networking_test.go | 20 ++++++++++++-------- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 106e2569b7..a53027ab23 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -966,9 +966,7 @@ func (c *Container) completeNetworkSetup() error { if err := c.syncContainer(); err != nil { return err } - if rootless.IsRootless() { - return c.runtime.setupRootlessNetNS(c) - } else if c.config.NetMode.IsSlirp4netns() { + if c.config.NetMode.IsSlirp4netns() { return c.runtime.setupSlirp4netns(c) } if err := c.runtime.setupNetNS(c); err != nil { diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index aac02d8cc9..1bfb82cdba 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -972,8 +972,11 @@ func (r *Runtime) setupNetNS(ctr *Container) error { if _, err := rand.Reader.Read(b); err != nil { return errors.Wrapf(err, "failed to generate random netns name") } - - nsPath := fmt.Sprintf("/run/netns/cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]) + nsPath, err := netns.GetNSRunDir() + if err != nil { + return err + } + nsPath = filepath.Join(nsPath, fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])) if err := os.MkdirAll(filepath.Dir(nsPath), 0711); err != nil { return err diff --git a/pkg/specgen/generate/namespaces.go b/pkg/specgen/generate/namespaces.go index b87375a925..845dfdad77 100644 --- a/pkg/specgen/generate/namespaces.go +++ b/pkg/specgen/generate/namespaces.go @@ -236,9 +236,6 @@ func namespaceOptions(ctx context.Context, s *specgen.SpecGenerator, rt *libpod. case specgen.Private: fallthrough case specgen.Bridge: - if postConfigureNetNS && rootless.IsRootless() { - return nil, errors.New("CNI networks not supported with user namespaces") - } portMappings, err := createPortMappings(ctx, s, img) if err != nil { return nil, err diff --git a/test/e2e/run_networking_test.go b/test/e2e/run_networking_test.go index 0e6e636bc3..43eb8fe4e8 100644 --- a/test/e2e/run_networking_test.go +++ b/test/e2e/run_networking_test.go @@ -641,22 +641,26 @@ var _ = Describe("Podman run networking", func() { Expect(run.OutputToString()).To(ContainSubstring(ipAddr)) }) - It("podman rootless fails custom CNI network with --uidmap", func() { - SkipIfNotRootless("The configuration works with rootless") - + It("podman cni network works across user ns", func() { netName := stringid.GenerateNonCryptoID() create := podmanTest.Podman([]string{"network", "create", netName}) create.WaitWithDefaultTimeout() Expect(create.ExitCode()).To(BeZero()) defer podmanTest.removeCNINetwork(netName) - run := podmanTest.Podman([]string{"run", "--rm", "--net", netName, "--uidmap", "0:1:4096", ALPINE, "true"}) + name := "nc-server" + run := podmanTest.Podman([]string{"run", "-d", "--name", name, "--net", netName, ALPINE, "nc", "-l", "-p", "8080"}) + run.WaitWithDefaultTimeout() + Expect(run.ExitCode()).To(Equal(0)) + + run = podmanTest.Podman([]string{"run", "--rm", "--net", netName, "--uidmap", "0:1:4096", ALPINE, "sh", "-c", fmt.Sprintf("echo podman | nc -w 1 %s.dns.podman 8080", name)}) run.WaitWithDefaultTimeout() - Expect(run.ExitCode()).To(Equal(125)) + Expect(run.ExitCode()).To(Equal(0)) - remove := podmanTest.Podman([]string{"network", "rm", netName}) - remove.WaitWithDefaultTimeout() - Expect(remove.ExitCode()).To(BeZero()) + log := podmanTest.Podman([]string{"logs", name}) + log.WaitWithDefaultTimeout() + Expect(log.ExitCode()).To(Equal(0)) + Expect(log.OutputToString()).To(Equal("podman")) }) It("podman run with new:pod and static-ip", func() { From 0743ead71289cf6198a14fddf071972df9b6a332 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 18 Feb 2021 23:37:03 +0100 Subject: [PATCH 03/13] Fix pod infra container cni network setup For rootless users the infra container used the slirp4netns net mode even when bridge was requested. We can support bridge networking for rootless users so we have allow this. The default is not changed. Signed-off-by: Paul Holzinger --- libpod/runtime_pod_infra_linux.go | 2 +- pkg/specgen/generate/pod_create.go | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/libpod/runtime_pod_infra_linux.go b/libpod/runtime_pod_infra_linux.go index 0a09e40eae..1ae375ed98 100644 --- a/libpod/runtime_pod_infra_linux.go +++ b/libpod/runtime_pod_infra_linux.go @@ -104,7 +104,7 @@ func (r *Runtime) makeInfraContainer(ctx context.Context, p *Pod, imgName, rawIm default: // Since user namespace sharing is not implemented, we only need to check if it's rootless netmode := "bridge" - if isRootless || p.config.InfraContainer.Slirp4netns { + if p.config.InfraContainer.Slirp4netns { netmode = "slirp4netns" if len(p.config.InfraContainer.NetworkOptions) != 0 { options = append(options, WithNetworkOptions(p.config.InfraContainer.NetworkOptions)) diff --git a/pkg/specgen/generate/pod_create.go b/pkg/specgen/generate/pod_create.go index 5d7bf19308..20151f016b 100644 --- a/pkg/specgen/generate/pod_create.go +++ b/pkg/specgen/generate/pod_create.go @@ -4,6 +4,7 @@ import ( "context" "github.com/containers/podman/v3/libpod" + "github.com/containers/podman/v3/pkg/rootless" "github.com/containers/podman/v3/pkg/specgen" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -94,8 +95,19 @@ func createPodOptions(p *specgen.PodSpecGenerator, rt *libpod.Runtime) ([]libpod } switch p.NetNS.NSMode { - case specgen.Bridge, specgen.Default, "": - logrus.Debugf("Pod using default network mode") + case specgen.Default, "": + if p.NoInfra { + logrus.Debugf("No networking because the infra container is missing") + break + } + if rootless.IsRootless() { + logrus.Debugf("Pod will use slirp4netns") + options = append(options, libpod.WithPodSlirp4netns(p.NetworkOptions)) + } else { + logrus.Debugf("Pod using bridge network mode") + } + case specgen.Bridge: + logrus.Debugf("Pod using bridge network mode") case specgen.Host: logrus.Debugf("Pod will use host networking") options = append(options, libpod.WithPodHostNetwork()) From 94e67ba9a27537a6d08122831e3b1b8d574f531e Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Sun, 21 Feb 2021 00:07:42 +0100 Subject: [PATCH 04/13] Move slirp4netns functions into an extra file This should make maintenance easier. Signed-off-by: Paul Holzinger --- libpod/networking_linux.go | 520 ----------------------------- libpod/networking_slirp4netns.go | 538 +++++++++++++++++++++++++++++++ 2 files changed, 538 insertions(+), 520 deletions(-) create mode 100644 libpod/networking_slirp4netns.go diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index 1bfb82cdba..2877427068 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -3,18 +3,14 @@ package libpod import ( - "bytes" "crypto/rand" "fmt" - "io" - "io/ioutil" "net" "os" "os/exec" "path/filepath" "regexp" "sort" - "strconv" "strings" "syscall" "time" @@ -27,7 +23,6 @@ import ( "github.com/containers/podman/v3/pkg/errorhandling" "github.com/containers/podman/v3/pkg/netns" "github.com/containers/podman/v3/pkg/rootless" - "github.com/containers/podman/v3/pkg/rootlessport" "github.com/containers/podman/v3/pkg/util" "github.com/cri-o/ocicni/pkg/ocicni" "github.com/opencontainers/selinux/go-selinux/label" @@ -430,57 +425,6 @@ func (r *Runtime) createNetNS(ctr *Container) (n ns.NetNS, q []*cnitypes.Result, return ctrNS, networkStatus, err } -type slirpFeatures struct { - HasDisableHostLoopback bool - HasMTU bool - HasEnableSandbox bool - HasEnableSeccomp bool - HasCIDR bool - HasOutboundAddr bool - HasIPv6 bool -} - -type slirp4netnsCmdArg struct { - Proto string `json:"proto,omitempty"` - HostAddr string `json:"host_addr"` - HostPort int32 `json:"host_port"` - GuestAddr string `json:"guest_addr"` - GuestPort int32 `json:"guest_port"` -} - -type slirp4netnsCmd struct { - Execute string `json:"execute"` - Args slirp4netnsCmdArg `json:"arguments"` -} - -type slirp4netnsNetworkOptions struct { - cidr string - disableHostLoopback bool - enableIPv6 bool - isSlirpHostForward bool - noPivotRoot bool - mtu int - outboundAddr string - outboundAddr6 string -} - -func checkSlirpFlags(path string) (*slirpFeatures, error) { - cmd := exec.Command(path, "--help") - out, err := cmd.CombinedOutput() - if err != nil { - return nil, errors.Wrapf(err, "slirp4netns %q", out) - } - return &slirpFeatures{ - HasDisableHostLoopback: strings.Contains(string(out), "--disable-host-loopback"), - HasMTU: strings.Contains(string(out), "--mtu"), - HasEnableSandbox: strings.Contains(string(out), "--enable-sandbox"), - HasEnableSeccomp: strings.Contains(string(out), "--enable-seccomp"), - HasCIDR: strings.Contains(string(out), "--cidr"), - HasOutboundAddr: strings.Contains(string(out), "--outbound-addr"), - HasIPv6: strings.Contains(string(out), "--enable-ipv6"), - }, nil -} - // Configure the network namespace for a rootless container func (r *Runtime) setupRootlessNetNS(ctr *Container) error { if ctr.config.NetMode.IsSlirp4netns() { @@ -499,470 +443,6 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) error { return nil } -func parseSlirp4netnsNetworkOptions(r *Runtime, extraOptions []string) (*slirp4netnsNetworkOptions, error) { - slirpOptions := append(r.config.Engine.NetworkCmdOptions, extraOptions...) - slirp4netnsOpts := &slirp4netnsNetworkOptions{ - // overwrite defaults - disableHostLoopback: true, - mtu: slirp4netnsMTU, - noPivotRoot: r.config.Engine.NoPivotRoot, - } - for _, o := range slirpOptions { - parts := strings.SplitN(o, "=", 2) - if len(parts) < 2 { - return nil, errors.Errorf("unknown option for slirp4netns: %q", o) - } - option, value := parts[0], parts[1] - switch option { - case "cidr": - ipv4, _, err := net.ParseCIDR(value) - if err != nil || ipv4.To4() == nil { - return nil, errors.Errorf("invalid cidr %q", value) - } - slirp4netnsOpts.cidr = value - case "port_handler": - switch value { - case "slirp4netns": - slirp4netnsOpts.isSlirpHostForward = true - case "rootlesskit": - slirp4netnsOpts.isSlirpHostForward = false - default: - return nil, errors.Errorf("unknown port_handler for slirp4netns: %q", value) - } - case "allow_host_loopback": - switch value { - case "true": - slirp4netnsOpts.disableHostLoopback = false - case "false": - slirp4netnsOpts.disableHostLoopback = true - default: - return nil, errors.Errorf("invalid value of allow_host_loopback for slirp4netns: %q", value) - } - case "enable_ipv6": - switch value { - case "true": - slirp4netnsOpts.enableIPv6 = true - case "false": - slirp4netnsOpts.enableIPv6 = false - default: - return nil, errors.Errorf("invalid value of enable_ipv6 for slirp4netns: %q", value) - } - case "outbound_addr": - ipv4 := net.ParseIP(value) - if ipv4 == nil || ipv4.To4() == nil { - _, err := net.InterfaceByName(value) - if err != nil { - return nil, errors.Errorf("invalid outbound_addr %q", value) - } - } - slirp4netnsOpts.outboundAddr = value - case "outbound_addr6": - ipv6 := net.ParseIP(value) - if ipv6 == nil || ipv6.To4() != nil { - _, err := net.InterfaceByName(value) - if err != nil { - return nil, errors.Errorf("invalid outbound_addr6: %q", value) - } - } - slirp4netnsOpts.outboundAddr6 = value - case "mtu": - var err error - slirp4netnsOpts.mtu, err = strconv.Atoi(value) - if slirp4netnsOpts.mtu < 68 || err != nil { - return nil, errors.Errorf("invalid mtu %q", value) - } - default: - return nil, errors.Errorf("unknown option for slirp4netns: %q", o) - } - } - return slirp4netnsOpts, nil -} - -func createBasicSlirp4netnsCmdArgs(options *slirp4netnsNetworkOptions, features *slirpFeatures) ([]string, error) { - cmdArgs := []string{} - if options.disableHostLoopback && features.HasDisableHostLoopback { - cmdArgs = append(cmdArgs, "--disable-host-loopback") - } - if options.mtu > -1 && features.HasMTU { - cmdArgs = append(cmdArgs, fmt.Sprintf("--mtu=%d", options.mtu)) - } - if !options.noPivotRoot && features.HasEnableSandbox { - cmdArgs = append(cmdArgs, "--enable-sandbox") - } - if features.HasEnableSeccomp { - cmdArgs = append(cmdArgs, "--enable-seccomp") - } - - if options.cidr != "" { - if !features.HasCIDR { - return nil, errors.Errorf("cidr not supported") - } - cmdArgs = append(cmdArgs, fmt.Sprintf("--cidr=%s", options.cidr)) - } - - if options.enableIPv6 { - if !features.HasIPv6 { - return nil, errors.Errorf("enable_ipv6 not supported") - } - cmdArgs = append(cmdArgs, "--enable-ipv6") - } - - if options.outboundAddr != "" { - if !features.HasOutboundAddr { - return nil, errors.Errorf("outbound_addr not supported") - } - cmdArgs = append(cmdArgs, fmt.Sprintf("--outbound-addr=%s", options.outboundAddr)) - } - - if options.outboundAddr6 != "" { - if !features.HasOutboundAddr || !features.HasIPv6 { - return nil, errors.Errorf("outbound_addr6 not supported") - } - if !options.enableIPv6 { - return nil, errors.Errorf("enable_ipv6=true is required for outbound_addr6") - } - cmdArgs = append(cmdArgs, fmt.Sprintf("--outbound-addr6=%s", options.outboundAddr6)) - } - - return cmdArgs, nil -} - -// setupSlirp4netns can be called in rootful as well as in rootless -func (r *Runtime) setupSlirp4netns(ctr *Container) error { - path := r.config.Engine.NetworkCmdPath - if path == "" { - var err error - path, err = exec.LookPath("slirp4netns") - if err != nil { - logrus.Errorf("could not find slirp4netns, the network namespace won't be configured: %v", err) - return nil - } - } - - syncR, syncW, err := os.Pipe() - if err != nil { - return errors.Wrapf(err, "failed to open pipe") - } - defer errorhandling.CloseQuiet(syncR) - defer errorhandling.CloseQuiet(syncW) - - havePortMapping := len(ctr.Config().PortMappings) > 0 - logPath := filepath.Join(ctr.runtime.config.Engine.TmpDir, fmt.Sprintf("slirp4netns-%s.log", ctr.config.ID)) - - ctrNetworkSlipOpts := []string{} - if ctr.config.NetworkOptions != nil { - ctrNetworkSlipOpts = append(ctrNetworkSlipOpts, ctr.config.NetworkOptions["slirp4netns"]...) - } - netOptions, err := parseSlirp4netnsNetworkOptions(r, ctrNetworkSlipOpts) - if err != nil { - return err - } - slirpFeatures, err := checkSlirpFlags(path) - if err != nil { - return errors.Wrapf(err, "error checking slirp4netns binary %s: %q", path, err) - } - cmdArgs, err := createBasicSlirp4netnsCmdArgs(netOptions, slirpFeatures) - if err != nil { - return err - } - - // the slirp4netns arguments being passed are describes as follows: - // from the slirp4netns documentation: https://github.com/rootless-containers/slirp4netns - // -c, --configure Brings up the tap interface - // -e, --exit-fd=FD specify the FD for terminating slirp4netns - // -r, --ready-fd=FD specify the FD to write to when the initialization steps are finished - cmdArgs = append(cmdArgs, "-c", "-e", "3", "-r", "4") - - var apiSocket string - if havePortMapping && netOptions.isSlirpHostForward { - apiSocket = filepath.Join(ctr.runtime.config.Engine.TmpDir, fmt.Sprintf("%s.net", ctr.config.ID)) - cmdArgs = append(cmdArgs, "--api-socket", apiSocket) - } - netnsPath := "" - if !ctr.config.PostConfigureNetNS { - ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe() - if err != nil { - return errors.Wrapf(err, "failed to create rootless network sync pipe") - } - netnsPath = ctr.state.NetNS.Path() - cmdArgs = append(cmdArgs, "--netns-type=path", netnsPath, "tap0") - } else { - defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR) - defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW) - netnsPath = fmt.Sprintf("/proc/%d/ns/net", ctr.state.PID) - // we don't use --netns-path here (unavailable for slirp4netns < v0.4) - cmdArgs = append(cmdArgs, fmt.Sprintf("%d", ctr.state.PID), "tap0") - } - - cmd := exec.Command(path, cmdArgs...) - logrus.Debugf("slirp4netns command: %s", strings.Join(cmd.Args, " ")) - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - - // workaround for https://github.com/rootless-containers/slirp4netns/pull/153 - if !netOptions.noPivotRoot && slirpFeatures.HasEnableSandbox { - cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNS - cmd.SysProcAttr.Unshareflags = syscall.CLONE_NEWNS - } - - // Leak one end of the pipe in slirp4netns, the other will be sent to conmon - cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncR, syncW) - - logFile, err := os.Create(logPath) - if err != nil { - return errors.Wrapf(err, "failed to open slirp4netns log file %s", logPath) - } - defer logFile.Close() - // Unlink immediately the file so we won't need to worry about cleaning it up later. - // It is still accessible through the open fd logFile. - if err := os.Remove(logPath); err != nil { - return errors.Wrapf(err, "delete file %s", logPath) - } - cmd.Stdout = logFile - cmd.Stderr = logFile - if err := cmd.Start(); err != nil { - return errors.Wrapf(err, "failed to start slirp4netns process") - } - defer func() { - if err := cmd.Process.Release(); err != nil { - logrus.Errorf("unable to release command process: %q", err) - } - }() - - if err := waitForSync(syncR, cmd, logFile, 1*time.Second); err != nil { - return err - } - - if havePortMapping { - if netOptions.isSlirpHostForward { - return r.setupRootlessPortMappingViaSlirp(ctr, cmd, apiSocket) - } - return r.setupRootlessPortMappingViaRLK(ctr, netnsPath) - } - return nil -} - -func waitForSync(syncR *os.File, cmd *exec.Cmd, logFile io.ReadSeeker, timeout time.Duration) error { - prog := filepath.Base(cmd.Path) - if len(cmd.Args) > 0 { - prog = cmd.Args[0] - } - b := make([]byte, 16) - for { - if err := syncR.SetDeadline(time.Now().Add(timeout)); err != nil { - return errors.Wrapf(err, "error setting %s pipe timeout", prog) - } - // FIXME: return err as soon as proc exits, without waiting for timeout - if _, err := syncR.Read(b); err == nil { - break - } else { - if os.IsTimeout(err) { - // Check if the process is still running. - var status syscall.WaitStatus - pid, err := syscall.Wait4(cmd.Process.Pid, &status, syscall.WNOHANG, nil) - if err != nil { - return errors.Wrapf(err, "failed to read %s process status", prog) - } - if pid != cmd.Process.Pid { - continue - } - if status.Exited() { - // Seek at the beginning of the file and read all its content - if _, err := logFile.Seek(0, 0); err != nil { - logrus.Errorf("could not seek log file: %q", err) - } - logContent, err := ioutil.ReadAll(logFile) - if err != nil { - return errors.Wrapf(err, "%s failed", prog) - } - return errors.Errorf("%s failed: %q", prog, logContent) - } - if status.Signaled() { - return errors.Errorf("%s killed by signal", prog) - } - continue - } - return errors.Wrapf(err, "failed to read from %s sync pipe", prog) - } - } - return nil -} - -func (r *Runtime) setupRootlessPortMappingViaRLK(ctr *Container, netnsPath string) error { - syncR, syncW, err := os.Pipe() - if err != nil { - return errors.Wrapf(err, "failed to open pipe") - } - defer errorhandling.CloseQuiet(syncR) - defer errorhandling.CloseQuiet(syncW) - - logPath := filepath.Join(ctr.runtime.config.Engine.TmpDir, fmt.Sprintf("rootlessport-%s.log", ctr.config.ID)) - logFile, err := os.Create(logPath) - if err != nil { - return errors.Wrapf(err, "failed to open rootlessport log file %s", logPath) - } - defer logFile.Close() - // Unlink immediately the file so we won't need to worry about cleaning it up later. - // It is still accessible through the open fd logFile. - if err := os.Remove(logPath); err != nil { - return errors.Wrapf(err, "delete file %s", logPath) - } - - if !ctr.config.PostConfigureNetNS { - ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe() - if err != nil { - return errors.Wrapf(err, "failed to create rootless port sync pipe") - } - } - - childIP := slirp4netnsIP -outer: - for _, r := range ctr.state.NetworkStatus { - for _, i := range r.IPs { - ipv4 := i.Address.IP.To4() - if ipv4 != nil { - childIP = ipv4.String() - break outer - } - } - } - - cfg := rootlessport.Config{ - Mappings: ctr.config.PortMappings, - NetNSPath: netnsPath, - ExitFD: 3, - ReadyFD: 4, - TmpDir: ctr.runtime.config.Engine.TmpDir, - ChildIP: childIP, - } - cfgJSON, err := json.Marshal(cfg) - if err != nil { - return err - } - cfgR := bytes.NewReader(cfgJSON) - var stdout bytes.Buffer - cmd := exec.Command(fmt.Sprintf("/proc/%d/exe", os.Getpid())) - cmd.Args = []string{rootlessport.ReexecKey} - // Leak one end of the pipe in rootlessport process, the other will be sent to conmon - - if ctr.rootlessPortSyncR != nil { - defer errorhandling.CloseQuiet(ctr.rootlessPortSyncR) - } - - cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncR, syncW) - cmd.Stdin = cfgR - // stdout is for human-readable error, stderr is for debug log - cmd.Stdout = &stdout - cmd.Stderr = io.MultiWriter(logFile, &logrusDebugWriter{"rootlessport: "}) - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - if err := cmd.Start(); err != nil { - return errors.Wrapf(err, "failed to start rootlessport process") - } - defer func() { - if err := cmd.Process.Release(); err != nil { - logrus.Errorf("unable to release rootlessport process: %q", err) - } - }() - if err := waitForSync(syncR, cmd, logFile, 3*time.Second); err != nil { - stdoutStr := stdout.String() - if stdoutStr != "" { - // err contains full debug log and too verbose, so return stdoutStr - logrus.Debug(err) - return errors.Errorf("rootlessport " + strings.TrimSuffix(stdoutStr, "\n")) - } - return err - } - logrus.Debug("rootlessport is ready") - return nil -} - -func (r *Runtime) setupRootlessPortMappingViaSlirp(ctr *Container, cmd *exec.Cmd, apiSocket string) (err error) { - const pidWaitTimeout = 60 * time.Second - chWait := make(chan error) - go func() { - interval := 25 * time.Millisecond - for i := time.Duration(0); i < pidWaitTimeout; i += interval { - // Check if the process is still running. - var status syscall.WaitStatus - pid, err := syscall.Wait4(cmd.Process.Pid, &status, syscall.WNOHANG, nil) - if err != nil { - break - } - if pid != cmd.Process.Pid { - continue - } - if status.Exited() || status.Signaled() { - chWait <- fmt.Errorf("slirp4netns exited with status %d", status.ExitStatus()) - } - time.Sleep(interval) - } - }() - defer close(chWait) - - // wait that API socket file appears before trying to use it. - if _, err := WaitForFile(apiSocket, chWait, pidWaitTimeout); err != nil { - return errors.Wrapf(err, "waiting for slirp4nets to create the api socket file %s", apiSocket) - } - - // for each port we want to add we need to open a connection to the slirp4netns control socket - // and send the add_hostfwd command. - for _, i := range ctr.config.PortMappings { - conn, err := net.Dial("unix", apiSocket) - if err != nil { - return errors.Wrapf(err, "cannot open connection to %s", apiSocket) - } - defer func() { - if err := conn.Close(); err != nil { - logrus.Errorf("unable to close connection: %q", err) - } - }() - hostIP := i.HostIP - if hostIP == "" { - hostIP = "0.0.0.0" - } - apiCmd := slirp4netnsCmd{ - Execute: "add_hostfwd", - Args: slirp4netnsCmdArg{ - Proto: i.Protocol, - HostAddr: hostIP, - HostPort: i.HostPort, - GuestPort: i.ContainerPort, - }, - } - // create the JSON payload and send it. Mark the end of request shutting down writes - // to the socket, as requested by slirp4netns. - data, err := json.Marshal(&apiCmd) - if err != nil { - return errors.Wrapf(err, "cannot marshal JSON for slirp4netns") - } - if _, err := conn.Write([]byte(fmt.Sprintf("%s\n", data))); err != nil { - return errors.Wrapf(err, "cannot write to control socket %s", apiSocket) - } - if err := conn.(*net.UnixConn).CloseWrite(); err != nil { - return errors.Wrapf(err, "cannot shutdown the socket %s", apiSocket) - } - buf := make([]byte, 2048) - readLength, err := conn.Read(buf) - if err != nil { - return errors.Wrapf(err, "cannot read from control socket %s", apiSocket) - } - // if there is no 'error' key in the received JSON data, then the operation was - // successful. - var y map[string]interface{} - if err := json.Unmarshal(buf[0:readLength], &y); err != nil { - return errors.Wrapf(err, "error parsing error status from slirp4netns") - } - if e, found := y["error"]; found { - return errors.Errorf("error from slirp4netns while setting up port redirection: %v", e) - } - } - logrus.Debug("slirp4netns port-forwarding setup via add_hostfwd is ready") - return nil -} - // Configure the network namespace using the container process func (r *Runtime) setupNetNS(ctr *Container) error { nsProcess := fmt.Sprintf("/proc/%d/ns/net", ctr.state.PID) diff --git a/libpod/networking_slirp4netns.go b/libpod/networking_slirp4netns.go new file mode 100644 index 0000000000..72ab3c919b --- /dev/null +++ b/libpod/networking_slirp4netns.go @@ -0,0 +1,538 @@ +// +build linux + +package libpod + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "net" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "syscall" + "time" + + "github.com/containers/podman/v3/pkg/errorhandling" + "github.com/containers/podman/v3/pkg/rootlessport" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +type slirpFeatures struct { + HasDisableHostLoopback bool + HasMTU bool + HasEnableSandbox bool + HasEnableSeccomp bool + HasCIDR bool + HasOutboundAddr bool + HasIPv6 bool +} + +type slirp4netnsCmdArg struct { + Proto string `json:"proto,omitempty"` + HostAddr string `json:"host_addr"` + HostPort int32 `json:"host_port"` + GuestAddr string `json:"guest_addr"` + GuestPort int32 `json:"guest_port"` +} + +type slirp4netnsCmd struct { + Execute string `json:"execute"` + Args slirp4netnsCmdArg `json:"arguments"` +} + +type slirp4netnsNetworkOptions struct { + cidr string + disableHostLoopback bool + enableIPv6 bool + isSlirpHostForward bool + noPivotRoot bool + mtu int + outboundAddr string + outboundAddr6 string +} + +func checkSlirpFlags(path string) (*slirpFeatures, error) { + cmd := exec.Command(path, "--help") + out, err := cmd.CombinedOutput() + if err != nil { + return nil, errors.Wrapf(err, "slirp4netns %q", out) + } + return &slirpFeatures{ + HasDisableHostLoopback: strings.Contains(string(out), "--disable-host-loopback"), + HasMTU: strings.Contains(string(out), "--mtu"), + HasEnableSandbox: strings.Contains(string(out), "--enable-sandbox"), + HasEnableSeccomp: strings.Contains(string(out), "--enable-seccomp"), + HasCIDR: strings.Contains(string(out), "--cidr"), + HasOutboundAddr: strings.Contains(string(out), "--outbound-addr"), + HasIPv6: strings.Contains(string(out), "--enable-ipv6"), + }, nil +} + +func parseSlirp4netnsNetworkOptions(r *Runtime, extraOptions []string) (*slirp4netnsNetworkOptions, error) { + slirpOptions := append(r.config.Engine.NetworkCmdOptions, extraOptions...) + slirp4netnsOpts := &slirp4netnsNetworkOptions{ + // overwrite defaults + disableHostLoopback: true, + mtu: slirp4netnsMTU, + noPivotRoot: r.config.Engine.NoPivotRoot, + } + for _, o := range slirpOptions { + parts := strings.SplitN(o, "=", 2) + if len(parts) < 2 { + return nil, errors.Errorf("unknown option for slirp4netns: %q", o) + } + option, value := parts[0], parts[1] + switch option { + case "cidr": + ipv4, _, err := net.ParseCIDR(value) + if err != nil || ipv4.To4() == nil { + return nil, errors.Errorf("invalid cidr %q", value) + } + slirp4netnsOpts.cidr = value + case "port_handler": + switch value { + case "slirp4netns": + slirp4netnsOpts.isSlirpHostForward = true + case "rootlesskit": + slirp4netnsOpts.isSlirpHostForward = false + default: + return nil, errors.Errorf("unknown port_handler for slirp4netns: %q", value) + } + case "allow_host_loopback": + switch value { + case "true": + slirp4netnsOpts.disableHostLoopback = false + case "false": + slirp4netnsOpts.disableHostLoopback = true + default: + return nil, errors.Errorf("invalid value of allow_host_loopback for slirp4netns: %q", value) + } + case "enable_ipv6": + switch value { + case "true": + slirp4netnsOpts.enableIPv6 = true + case "false": + slirp4netnsOpts.enableIPv6 = false + default: + return nil, errors.Errorf("invalid value of enable_ipv6 for slirp4netns: %q", value) + } + case "outbound_addr": + ipv4 := net.ParseIP(value) + if ipv4 == nil || ipv4.To4() == nil { + _, err := net.InterfaceByName(value) + if err != nil { + return nil, errors.Errorf("invalid outbound_addr %q", value) + } + } + slirp4netnsOpts.outboundAddr = value + case "outbound_addr6": + ipv6 := net.ParseIP(value) + if ipv6 == nil || ipv6.To4() != nil { + _, err := net.InterfaceByName(value) + if err != nil { + return nil, errors.Errorf("invalid outbound_addr6: %q", value) + } + } + slirp4netnsOpts.outboundAddr6 = value + case "mtu": + var err error + slirp4netnsOpts.mtu, err = strconv.Atoi(value) + if slirp4netnsOpts.mtu < 68 || err != nil { + return nil, errors.Errorf("invalid mtu %q", value) + } + default: + return nil, errors.Errorf("unknown option for slirp4netns: %q", o) + } + } + return slirp4netnsOpts, nil +} + +func createBasicSlirp4netnsCmdArgs(options *slirp4netnsNetworkOptions, features *slirpFeatures) ([]string, error) { + cmdArgs := []string{} + if options.disableHostLoopback && features.HasDisableHostLoopback { + cmdArgs = append(cmdArgs, "--disable-host-loopback") + } + if options.mtu > -1 && features.HasMTU { + cmdArgs = append(cmdArgs, fmt.Sprintf("--mtu=%d", options.mtu)) + } + if !options.noPivotRoot && features.HasEnableSandbox { + cmdArgs = append(cmdArgs, "--enable-sandbox") + } + if features.HasEnableSeccomp { + cmdArgs = append(cmdArgs, "--enable-seccomp") + } + + if options.cidr != "" { + if !features.HasCIDR { + return nil, errors.Errorf("cidr not supported") + } + cmdArgs = append(cmdArgs, fmt.Sprintf("--cidr=%s", options.cidr)) + } + + if options.enableIPv6 { + if !features.HasIPv6 { + return nil, errors.Errorf("enable_ipv6 not supported") + } + cmdArgs = append(cmdArgs, "--enable-ipv6") + } + + if options.outboundAddr != "" { + if !features.HasOutboundAddr { + return nil, errors.Errorf("outbound_addr not supported") + } + cmdArgs = append(cmdArgs, fmt.Sprintf("--outbound-addr=%s", options.outboundAddr)) + } + + if options.outboundAddr6 != "" { + if !features.HasOutboundAddr || !features.HasIPv6 { + return nil, errors.Errorf("outbound_addr6 not supported") + } + if !options.enableIPv6 { + return nil, errors.Errorf("enable_ipv6=true is required for outbound_addr6") + } + cmdArgs = append(cmdArgs, fmt.Sprintf("--outbound-addr6=%s", options.outboundAddr6)) + } + + return cmdArgs, nil +} + +// setupSlirp4netns can be called in rootful as well as in rootless +func (r *Runtime) setupSlirp4netns(ctr *Container) error { + path := r.config.Engine.NetworkCmdPath + if path == "" { + var err error + path, err = exec.LookPath("slirp4netns") + if err != nil { + logrus.Errorf("could not find slirp4netns, the network namespace won't be configured: %v", err) + return nil + } + } + + syncR, syncW, err := os.Pipe() + if err != nil { + return errors.Wrapf(err, "failed to open pipe") + } + defer errorhandling.CloseQuiet(syncR) + defer errorhandling.CloseQuiet(syncW) + + havePortMapping := len(ctr.Config().PortMappings) > 0 + logPath := filepath.Join(ctr.runtime.config.Engine.TmpDir, fmt.Sprintf("slirp4netns-%s.log", ctr.config.ID)) + + ctrNetworkSlipOpts := []string{} + if ctr.config.NetworkOptions != nil { + ctrNetworkSlipOpts = append(ctrNetworkSlipOpts, ctr.config.NetworkOptions["slirp4netns"]...) + } + netOptions, err := parseSlirp4netnsNetworkOptions(r, ctrNetworkSlipOpts) + if err != nil { + return err + } + slirpFeatures, err := checkSlirpFlags(path) + if err != nil { + return errors.Wrapf(err, "error checking slirp4netns binary %s: %q", path, err) + } + cmdArgs, err := createBasicSlirp4netnsCmdArgs(netOptions, slirpFeatures) + if err != nil { + return err + } + + // the slirp4netns arguments being passed are describes as follows: + // from the slirp4netns documentation: https://github.com/rootless-containers/slirp4netns + // -c, --configure Brings up the tap interface + // -e, --exit-fd=FD specify the FD for terminating slirp4netns + // -r, --ready-fd=FD specify the FD to write to when the initialization steps are finished + cmdArgs = append(cmdArgs, "-c", "-e", "3", "-r", "4") + + var apiSocket string + if havePortMapping && netOptions.isSlirpHostForward { + apiSocket = filepath.Join(ctr.runtime.config.Engine.TmpDir, fmt.Sprintf("%s.net", ctr.config.ID)) + cmdArgs = append(cmdArgs, "--api-socket", apiSocket) + } + netnsPath := "" + if !ctr.config.PostConfigureNetNS { + ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe() + if err != nil { + return errors.Wrapf(err, "failed to create rootless network sync pipe") + } + netnsPath = ctr.state.NetNS.Path() + cmdArgs = append(cmdArgs, "--netns-type=path", netnsPath, "tap0") + } else { + defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR) + defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW) + netnsPath = fmt.Sprintf("/proc/%d/ns/net", ctr.state.PID) + // we don't use --netns-path here (unavailable for slirp4netns < v0.4) + cmdArgs = append(cmdArgs, fmt.Sprintf("%d", ctr.state.PID), "tap0") + } + + cmd := exec.Command(path, cmdArgs...) + logrus.Debugf("slirp4netns command: %s", strings.Join(cmd.Args, " ")) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + + // workaround for https://github.com/rootless-containers/slirp4netns/pull/153 + if !netOptions.noPivotRoot && slirpFeatures.HasEnableSandbox { + cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNS + cmd.SysProcAttr.Unshareflags = syscall.CLONE_NEWNS + } + + // Leak one end of the pipe in slirp4netns, the other will be sent to conmon + cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncR, syncW) + + logFile, err := os.Create(logPath) + if err != nil { + return errors.Wrapf(err, "failed to open slirp4netns log file %s", logPath) + } + defer logFile.Close() + // Unlink immediately the file so we won't need to worry about cleaning it up later. + // It is still accessible through the open fd logFile. + if err := os.Remove(logPath); err != nil { + return errors.Wrapf(err, "delete file %s", logPath) + } + cmd.Stdout = logFile + cmd.Stderr = logFile + if err := cmd.Start(); err != nil { + return errors.Wrapf(err, "failed to start slirp4netns process") + } + defer func() { + if err := cmd.Process.Release(); err != nil { + logrus.Errorf("unable to release command process: %q", err) + } + }() + + if err := waitForSync(syncR, cmd, logFile, 1*time.Second); err != nil { + return err + } + + if havePortMapping { + if netOptions.isSlirpHostForward { + return r.setupRootlessPortMappingViaSlirp(ctr, cmd, apiSocket) + } + return r.setupRootlessPortMappingViaRLK(ctr, netnsPath) + } + return nil +} + +func waitForSync(syncR *os.File, cmd *exec.Cmd, logFile io.ReadSeeker, timeout time.Duration) error { + prog := filepath.Base(cmd.Path) + if len(cmd.Args) > 0 { + prog = cmd.Args[0] + } + b := make([]byte, 16) + for { + if err := syncR.SetDeadline(time.Now().Add(timeout)); err != nil { + return errors.Wrapf(err, "error setting %s pipe timeout", prog) + } + // FIXME: return err as soon as proc exits, without waiting for timeout + if _, err := syncR.Read(b); err == nil { + break + } else { + if os.IsTimeout(err) { + // Check if the process is still running. + var status syscall.WaitStatus + pid, err := syscall.Wait4(cmd.Process.Pid, &status, syscall.WNOHANG, nil) + if err != nil { + return errors.Wrapf(err, "failed to read %s process status", prog) + } + if pid != cmd.Process.Pid { + continue + } + if status.Exited() { + // Seek at the beginning of the file and read all its content + if _, err := logFile.Seek(0, 0); err != nil { + logrus.Errorf("could not seek log file: %q", err) + } + logContent, err := ioutil.ReadAll(logFile) + if err != nil { + return errors.Wrapf(err, "%s failed", prog) + } + return errors.Errorf("%s failed: %q", prog, logContent) + } + if status.Signaled() { + return errors.Errorf("%s killed by signal", prog) + } + continue + } + return errors.Wrapf(err, "failed to read from %s sync pipe", prog) + } + } + return nil +} + +func (r *Runtime) setupRootlessPortMappingViaRLK(ctr *Container, netnsPath string) error { + syncR, syncW, err := os.Pipe() + if err != nil { + return errors.Wrapf(err, "failed to open pipe") + } + defer errorhandling.CloseQuiet(syncR) + defer errorhandling.CloseQuiet(syncW) + + logPath := filepath.Join(ctr.runtime.config.Engine.TmpDir, fmt.Sprintf("rootlessport-%s.log", ctr.config.ID)) + logFile, err := os.Create(logPath) + if err != nil { + return errors.Wrapf(err, "failed to open rootlessport log file %s", logPath) + } + defer logFile.Close() + // Unlink immediately the file so we won't need to worry about cleaning it up later. + // It is still accessible through the open fd logFile. + if err := os.Remove(logPath); err != nil { + return errors.Wrapf(err, "delete file %s", logPath) + } + + if !ctr.config.PostConfigureNetNS { + ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe() + if err != nil { + return errors.Wrapf(err, "failed to create rootless port sync pipe") + } + } + + childIP := slirp4netnsIP +outer: + for _, r := range ctr.state.NetworkStatus { + for _, i := range r.IPs { + ipv4 := i.Address.IP.To4() + if ipv4 != nil { + childIP = ipv4.String() + break outer + } + } + } + + cfg := rootlessport.Config{ + Mappings: ctr.config.PortMappings, + NetNSPath: netnsPath, + ExitFD: 3, + ReadyFD: 4, + TmpDir: ctr.runtime.config.Engine.TmpDir, + ChildIP: childIP, + } + cfgJSON, err := json.Marshal(cfg) + if err != nil { + return err + } + cfgR := bytes.NewReader(cfgJSON) + var stdout bytes.Buffer + cmd := exec.Command(fmt.Sprintf("/proc/%d/exe", os.Getpid())) + cmd.Args = []string{rootlessport.ReexecKey} + // Leak one end of the pipe in rootlessport process, the other will be sent to conmon + + if ctr.rootlessPortSyncR != nil { + defer errorhandling.CloseQuiet(ctr.rootlessPortSyncR) + } + + cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncR, syncW) + cmd.Stdin = cfgR + // stdout is for human-readable error, stderr is for debug log + cmd.Stdout = &stdout + cmd.Stderr = io.MultiWriter(logFile, &logrusDebugWriter{"rootlessport: "}) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + if err := cmd.Start(); err != nil { + return errors.Wrapf(err, "failed to start rootlessport process") + } + defer func() { + if err := cmd.Process.Release(); err != nil { + logrus.Errorf("unable to release rootlessport process: %q", err) + } + }() + if err := waitForSync(syncR, cmd, logFile, 3*time.Second); err != nil { + stdoutStr := stdout.String() + if stdoutStr != "" { + // err contains full debug log and too verbose, so return stdoutStr + logrus.Debug(err) + return errors.Errorf("rootlessport " + strings.TrimSuffix(stdoutStr, "\n")) + } + return err + } + logrus.Debug("rootlessport is ready") + return nil +} + +func (r *Runtime) setupRootlessPortMappingViaSlirp(ctr *Container, cmd *exec.Cmd, apiSocket string) (err error) { + const pidWaitTimeout = 60 * time.Second + chWait := make(chan error) + go func() { + interval := 25 * time.Millisecond + for i := time.Duration(0); i < pidWaitTimeout; i += interval { + // Check if the process is still running. + var status syscall.WaitStatus + pid, err := syscall.Wait4(cmd.Process.Pid, &status, syscall.WNOHANG, nil) + if err != nil { + break + } + if pid != cmd.Process.Pid { + continue + } + if status.Exited() || status.Signaled() { + chWait <- fmt.Errorf("slirp4netns exited with status %d", status.ExitStatus()) + } + time.Sleep(interval) + } + }() + defer close(chWait) + + // wait that API socket file appears before trying to use it. + if _, err := WaitForFile(apiSocket, chWait, pidWaitTimeout); err != nil { + return errors.Wrapf(err, "waiting for slirp4nets to create the api socket file %s", apiSocket) + } + + // for each port we want to add we need to open a connection to the slirp4netns control socket + // and send the add_hostfwd command. + for _, i := range ctr.config.PortMappings { + conn, err := net.Dial("unix", apiSocket) + if err != nil { + return errors.Wrapf(err, "cannot open connection to %s", apiSocket) + } + defer func() { + if err := conn.Close(); err != nil { + logrus.Errorf("unable to close connection: %q", err) + } + }() + hostIP := i.HostIP + if hostIP == "" { + hostIP = "0.0.0.0" + } + apiCmd := slirp4netnsCmd{ + Execute: "add_hostfwd", + Args: slirp4netnsCmdArg{ + Proto: i.Protocol, + HostAddr: hostIP, + HostPort: i.HostPort, + GuestPort: i.ContainerPort, + }, + } + // create the JSON payload and send it. Mark the end of request shutting down writes + // to the socket, as requested by slirp4netns. + data, err := json.Marshal(&apiCmd) + if err != nil { + return errors.Wrapf(err, "cannot marshal JSON for slirp4netns") + } + if _, err := conn.Write([]byte(fmt.Sprintf("%s\n", data))); err != nil { + return errors.Wrapf(err, "cannot write to control socket %s", apiSocket) + } + if err := conn.(*net.UnixConn).CloseWrite(); err != nil { + return errors.Wrapf(err, "cannot shutdown the socket %s", apiSocket) + } + buf := make([]byte, 2048) + readLength, err := conn.Read(buf) + if err != nil { + return errors.Wrapf(err, "cannot read from control socket %s", apiSocket) + } + // if there is no 'error' key in the received JSON data, then the operation was + // successful. + var y map[string]interface{} + if err := json.Unmarshal(buf[0:readLength], &y); err != nil { + return errors.Wrapf(err, "error parsing error status from slirp4netns") + } + if e, found := y["error"]; found { + return errors.Errorf("error from slirp4netns while setting up port redirection: %v", e) + } + } + logrus.Debug("slirp4netns port-forwarding setup via add_hostfwd is ready") + return nil +} From 294c90b05e068badb759b7618b4d156a75f7fb69 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Sun, 21 Feb 2021 00:13:42 +0100 Subject: [PATCH 05/13] Enable rootless network connect/disconnect With the new rootless cni supporting network connect/disconnect is easy. Combine common setps into extra functions to prevent code duplication. Signed-off-by: Paul Holzinger --- libpod/networking_linux.go | 102 ++++++++++---------- test/e2e/network_connect_disconnect_test.go | 25 +++-- 2 files changed, 62 insertions(+), 65 deletions(-) diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index 2877427068..a5056d8341 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -310,13 +310,36 @@ func (r *Runtime) getRootlessCNINetNs(new bool) (*rootlessCNI, error) { return rootlessCNINS, nil } -// Create and configure a new network namespace for a container -func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) ([]*cnitypes.Result, error) { +// setUpOCICNIPod will set up the cni networks, on error it will also tear down the cni +// networks. If rootless it will join/create the rootless cni namespace. +func (r *Runtime) setUpOCICNIPod(podNetwork ocicni.PodNetwork) ([]ocicni.NetResult, error) { rootlessCNINS, err := r.getRootlessCNINetNs(true) if err != nil { return nil, err } + var results []ocicni.NetResult + setUpPod := func() error { + results, err = r.netPlugin.SetUpPod(podNetwork) + if err != nil { + if err2 := r.netPlugin.TearDownPod(podNetwork); err2 != nil { + logrus.Errorf("Error tearing down partially created network namespace for container %s: %v", podNetwork.ID, err2) + } + return errors.Wrapf(err, "error configuring network namespace for container %s", podNetwork.ID) + } + return nil + } + // rootlessCNINS is nil if we are root + if rootlessCNINS != nil { + // execute the cni setup in the rootless net ns + err = rootlessCNINS.Do(setUpPod) + } else { + err = setUpPod() + } + return results, err +} +// Create and configure a new network namespace for a container +func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) ([]*cnitypes.Result, error) { var requestedIP net.IP if ctr.requestedIP != nil { requestedIP = ctr.requestedIP @@ -360,28 +383,7 @@ func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) ([]*cnitypes.Re podNetwork.Aliases = aliases } - var results []ocicni.NetResult - setUpPod := func() error { - results, err = r.netPlugin.SetUpPod(podNetwork) - if err != nil { - return errors.Wrapf(err, "error configuring network namespace for container %s", ctr.ID()) - } - defer func() { - if err != nil { - if err2 := r.netPlugin.TearDownPod(podNetwork); err2 != nil { - logrus.Errorf("Error tearing down partially created network namespace for container %s: %v", ctr.ID(), err2) - } - } - }() - return nil - } - // rootlessCNINS is nil if we are root - if rootlessCNINS != nil { - // execute the cni setup in the rootless net ns - err = rootlessCNINS.Do(setUpPod) - } else { - err = setUpPod() - } + results, err := r.setUpOCICNIPod(podNetwork) if err != nil { return nil, err } @@ -514,6 +516,28 @@ func (r *Runtime) closeNetNS(ctr *Container) error { return nil } +// Tear down a container's CNI network configuration and joins the +// rootless net ns as rootless user +func (r *Runtime) teardownOCICNIPod(podNetwork ocicni.PodNetwork) error { + rootlessCNINS, err := r.getRootlessCNINetNs(false) + if err != nil { + return err + } + tearDownPod := func() error { + err := r.netPlugin.TearDownPod(podNetwork) + return errors.Wrapf(err, "error tearing down CNI namespace configuration for container %s", podNetwork.ID) + } + + // rootlessCNINS is nil if we are root + if rootlessCNINS != nil { + // execute the cni setup in the rootless net ns + err = rootlessCNINS.Do(tearDownPod) + } else { + err = tearDownPod() + } + return err +} + // Tear down a container's CNI network configuration, but do not tear down the // namespace itself. func (r *Runtime) teardownCNI(ctr *Container) error { @@ -530,10 +554,6 @@ func (r *Runtime) teardownCNI(ctr *Container) error { } if !ctr.config.NetMode.IsSlirp4netns() && len(networks) > 0 { - rootlessCNINS, err := r.getRootlessCNINetNs(false) - if err != nil { - return err - } var requestedIP net.IP if ctr.requestedIP != nil { requestedIP = ctr.requestedIP @@ -553,21 +573,7 @@ func (r *Runtime) teardownCNI(ctr *Container) error { } podNetwork := r.getPodNetwork(ctr.ID(), ctr.Name(), ctr.state.NetNS.Path(), networks, ctr.config.PortMappings, requestedIP, requestedMAC, ctr.state.NetInterfaceDescriptions) - - tearDownPod := func() error { - if err := r.netPlugin.TearDownPod(podNetwork); err != nil { - return errors.Wrapf(err, "error tearing down CNI namespace configuration for container %s", ctr.ID()) - } - return nil - } - - // rootlessCNINS is nil if we are root - if rootlessCNINS != nil { - // execute the cni setup in the rootless net ns - err = rootlessCNINS.Do(tearDownPod) - } else { - err = tearDownPod() - } + err = r.teardownOCICNIPod(podNetwork) return err } return nil @@ -920,7 +926,7 @@ func (c *Container) NetworkDisconnect(nameOrID, netName string, force bool) erro } podConfig := c.runtime.getPodNetwork(c.ID(), c.Name(), c.state.NetNS.Path(), []string{netName}, c.config.PortMappings, nil, nil, c.state.NetInterfaceDescriptions) - if err := c.runtime.netPlugin.TearDownPod(podConfig); err != nil { + if err := c.runtime.teardownOCICNIPod(podConfig); err != nil { return err } @@ -984,7 +990,7 @@ func (c *Container) NetworkConnect(nameOrID, netName string, aliases []string) e podConfig := c.runtime.getPodNetwork(c.ID(), c.Name(), c.state.NetNS.Path(), []string{netName}, c.config.PortMappings, nil, nil, c.state.NetInterfaceDescriptions) podConfig.Aliases = make(map[string][]string, 1) podConfig.Aliases[netName] = aliases - results, err := c.runtime.netPlugin.SetUpPod(podConfig) + results, err := c.runtime.setUpOCICNIPod(podConfig) if err != nil { return err } @@ -1031,9 +1037,6 @@ func (c *Container) NetworkConnect(nameOrID, netName string, aliases []string) e // DisconnectContainerFromNetwork removes a container from its CNI network func (r *Runtime) DisconnectContainerFromNetwork(nameOrID, netName string, force bool) error { - if rootless.IsRootless() { - return errors.New("network connect is not enabled for rootless containers") - } ctr, err := r.LookupContainer(nameOrID) if err != nil { return err @@ -1043,9 +1046,6 @@ func (r *Runtime) DisconnectContainerFromNetwork(nameOrID, netName string, force // ConnectContainerToNetwork connects a container to a CNI network func (r *Runtime) ConnectContainerToNetwork(nameOrID, netName string, aliases []string) error { - if rootless.IsRootless() { - return errors.New("network disconnect is not enabled for rootless containers") - } ctr, err := r.LookupContainer(nameOrID) if err != nil { return err diff --git a/test/e2e/network_connect_disconnect_test.go b/test/e2e/network_connect_disconnect_test.go index e9a7b421f0..6974c76143 100644 --- a/test/e2e/network_connect_disconnect_test.go +++ b/test/e2e/network_connect_disconnect_test.go @@ -33,14 +33,12 @@ var _ = Describe("Podman network connect and disconnect", func() { }) It("bad network name in disconnect should result in error", func() { - SkipIfRootless("network connect and disconnect are only rootful") dis := podmanTest.Podman([]string{"network", "disconnect", "foobar", "test"}) dis.WaitWithDefaultTimeout() Expect(dis.ExitCode()).ToNot(BeZero()) }) It("bad container name in network disconnect should result in error", func() { - SkipIfRootless("network connect and disconnect are only rootful") netName := "aliasTest" + stringid.GenerateNonCryptoID() session := podmanTest.Podman([]string{"network", "create", netName}) session.WaitWithDefaultTimeout() @@ -72,7 +70,6 @@ var _ = Describe("Podman network connect and disconnect", func() { }) It("podman network disconnect", func() { - SkipIfRootless("network connect and disconnect are only rootful") netName := "aliasTest" + stringid.GenerateNonCryptoID() session := podmanTest.Podman([]string{"network", "create", netName}) session.WaitWithDefaultTimeout() @@ -102,14 +99,12 @@ var _ = Describe("Podman network connect and disconnect", func() { }) It("bad network name in connect should result in error", func() { - SkipIfRootless("network connect and disconnect are only rootful") dis := podmanTest.Podman([]string{"network", "connect", "foobar", "test"}) dis.WaitWithDefaultTimeout() Expect(dis.ExitCode()).ToNot(BeZero()) }) It("bad container name in network connect should result in error", func() { - SkipIfRootless("network connect and disconnect are only rootful") netName := "aliasTest" + stringid.GenerateNonCryptoID() session := podmanTest.Podman([]string{"network", "create", netName}) session.WaitWithDefaultTimeout() @@ -141,7 +136,6 @@ var _ = Describe("Podman network connect and disconnect", func() { }) It("podman connect on a container that already is connected to the network should error", func() { - SkipIfRootless("network connect and disconnect are only rootful") netName := "aliasTest" + stringid.GenerateNonCryptoID() session := podmanTest.Podman([]string{"network", "create", netName}) session.WaitWithDefaultTimeout() @@ -159,7 +153,6 @@ var _ = Describe("Podman network connect and disconnect", func() { It("podman network connect", func() { SkipIfRemote("This requires a pending PR to be merged before it will work") - SkipIfRootless("network connect and disconnect are only rootful") netName := "aliasTest" + stringid.GenerateNonCryptoID() session := podmanTest.Podman([]string{"network", "create", netName}) session.WaitWithDefaultTimeout() @@ -203,18 +196,23 @@ var _ = Describe("Podman network connect and disconnect", func() { }) It("podman network connect when not running", func() { - SkipIfRootless("network connect and disconnect are only rootful") - netName := "aliasTest" + stringid.GenerateNonCryptoID() - session := podmanTest.Podman([]string{"network", "create", netName}) + netName1 := "connect1" + stringid.GenerateNonCryptoID() + session := podmanTest.Podman([]string{"network", "create", netName1}) session.WaitWithDefaultTimeout() Expect(session.ExitCode()).To(BeZero()) - defer podmanTest.removeCNINetwork(netName) + defer podmanTest.removeCNINetwork(netName1) - ctr := podmanTest.Podman([]string{"create", "--name", "test", ALPINE, "top"}) + netName2 := "connect2" + stringid.GenerateNonCryptoID() + session = podmanTest.Podman([]string{"network", "create", netName2}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(BeZero()) + defer podmanTest.removeCNINetwork(netName2) + + ctr := podmanTest.Podman([]string{"create", "--name", "test", "--network", netName1, ALPINE, "top"}) ctr.WaitWithDefaultTimeout() Expect(ctr.ExitCode()).To(BeZero()) - dis := podmanTest.Podman([]string{"network", "connect", netName, "test"}) + dis := podmanTest.Podman([]string{"network", "connect", netName2, "test"}) dis.WaitWithDefaultTimeout() Expect(dis.ExitCode()).To(BeZero()) @@ -286,7 +284,6 @@ var _ = Describe("Podman network connect and disconnect", func() { }) It("podman network disconnect when not running", func() { - SkipIfRootless("network connect and disconnect are only rootful") netName1 := "aliasTest" + stringid.GenerateNonCryptoID() session := podmanTest.Podman([]string{"network", "create", netName1}) session.WaitWithDefaultTimeout() From 8627de28bc685b6150db170ef373a9296ed09b03 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 23 Feb 2021 13:56:29 +0100 Subject: [PATCH 06/13] Fix dnsname test Signed-off-by: Paul Holzinger --- test/e2e/run_networking_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/run_networking_test.go b/test/e2e/run_networking_test.go index 43eb8fe4e8..4c66e2823c 100644 --- a/test/e2e/run_networking_test.go +++ b/test/e2e/run_networking_test.go @@ -766,7 +766,7 @@ var _ = Describe("Podman run networking", func() { Expect(session.ExitCode()).To(Equal(1)) Expect(session.ErrorToString()).To(ContainSubstring("can't resolve 'con1'")) - session = podmanTest.Podman([]string{"run", "--name", "con4", "--network", net, ALPINE, "nslookup", pod2}) + session = podmanTest.Podman([]string{"run", "--name", "con4", "--network", net, ALPINE, "nslookup", pod2 + ".dns.podman"}) session.WaitWithDefaultTimeout() Expect(session.ExitCode()).To(BeZero()) }) From db19224b6dc35ac59062013256c73364eb024a99 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 23 Feb 2021 15:16:15 +0100 Subject: [PATCH 07/13] Only use rootless RLK when the container has ports Do not invoke the rootlesskit port forwarder when the container has no ports. Signed-off-by: Paul Holzinger --- libpod/networking_linux.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index a5056d8341..d927b17d8d 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -436,7 +436,7 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) error { if err != nil { return err } - if len(networks) > 0 { + if len(networks) > 0 && len(ctr.config.PortMappings) > 0 { // set up port forwarder for CNI-in-slirp4netns netnsPath := ctr.state.NetNS.Path() // TODO: support slirp4netns port forwarder as well From d7e003f362bea73ca08ef224dba8c38543a2e953 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Wed, 17 Mar 2021 09:30:55 +0100 Subject: [PATCH 08/13] Remove unused rootless-cni-infra container files Signed-off-by: Paul Holzinger --- contrib/rootless-cni-infra/Containerfile | 36 -- contrib/rootless-cni-infra/README.md | 25 -- contrib/rootless-cni-infra/rootless-cni-infra | 181 --------- libpod/networking_linux.go | 12 + libpod/rootless_cni_linux.go | 372 ------------------ 5 files changed, 12 insertions(+), 614 deletions(-) delete mode 100644 contrib/rootless-cni-infra/Containerfile delete mode 100644 contrib/rootless-cni-infra/README.md delete mode 100755 contrib/rootless-cni-infra/rootless-cni-infra delete mode 100644 libpod/rootless_cni_linux.go diff --git a/contrib/rootless-cni-infra/Containerfile b/contrib/rootless-cni-infra/Containerfile deleted file mode 100644 index 4324f39d2b..0000000000 --- a/contrib/rootless-cni-infra/Containerfile +++ /dev/null @@ -1,36 +0,0 @@ -ARG GOLANG_VERSION=1.15 -ARG ALPINE_VERSION=3.12 -ARG CNI_VERSION=v0.8.0 -ARG CNI_PLUGINS_VERSION=v0.8.7 -ARG DNSNAME_VERSION=v1.1.1 - -FROM golang:${GOLANG_VERSION}-alpine${ALPINE_VERSION} AS golang-base -RUN apk add --no-cache git - -FROM golang-base AS cnitool -RUN git clone https://github.com/containernetworking/cni /go/src/github.com/containernetworking/cni -WORKDIR /go/src/github.com/containernetworking/cni -ARG CNI_VERSION -RUN git checkout ${CNI_VERSION} -RUN go build -o /cnitool ./cnitool - -FROM golang-base AS dnsname -RUN git clone https://github.com/containers/dnsname /go/src/github.com/containers/dnsname -WORKDIR /go/src/github.com/containers/dnsname -ARG DNSNAME_VERSION -RUN git checkout ${DNSNAME_VERSION} -RUN go build -o /dnsname ./plugins/meta/dnsname - -FROM alpine:${ALPINE_VERSION} -RUN apk add --no-cache curl dnsmasq iptables ip6tables iproute2 -ARG TARGETARCH -ARG CNI_PLUGINS_VERSION -RUN mkdir -p /opt/cni/bin && \ - curl -fsSL https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-${TARGETARCH}-${CNI_PLUGINS_VERSION}.tgz | tar xz -C /opt/cni/bin -COPY --from=cnitool /cnitool /usr/local/bin -COPY --from=dnsname /dnsname /opt/cni/bin -COPY rootless-cni-infra /usr/local/bin -ENV CNI_PATH=/opt/cni/bin -CMD ["sleep", "infinity"] - -ENV ROOTLESS_CNI_INFRA_VERSION=5 diff --git a/contrib/rootless-cni-infra/README.md b/contrib/rootless-cni-infra/README.md deleted file mode 100644 index c43b4cf491..0000000000 --- a/contrib/rootless-cni-infra/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# rootless-cni-infra - -Infra container for CNI-in-slirp4netns. - -## How it works - -When a CNI network is specified for `podman run` in rootless mode, Podman launches the `rootless-cni-infra` container to execute CNI plugins inside slirp4netns. - -The infra container is created per user, by executing an equivalent of: -`podman run -d --name rootless-cni-infra --pid=host --privileged -v $HOME/.config/cni/net.d:/etc/cni/net.d rootless-cni-infra`. -The infra container is automatically deleted when no CNI network is in use. - -Podman then allocates a CNI netns in the infra container, by executing an equivalent of: -`podman exec rootless-cni-infra rootless-cni-infra alloc $CONTAINER_ID $NETWORK_NAME $POD_NAME`. - -The allocated netns is deallocated when the container is being removed, by executing an equivalent of: -`podman exec rootless-cni-infra rootless-cni-infra dealloc $CONTAINER_ID $NETWORK_NAME`. - -The container images live on `quay.io/libpod/rootless-cni-infra`. The tags have the format `$version-$architecture`. Please make sure to increase the version number in the Containerfile (i.e., `ROOTLESS_CNI_INFRA_VERSION`) when applying changes to this directory. After committing the changes, upload the image(s) with the corresponding tag. - -## Directory layout - -* `/run/rootless-cni-infra/${CONTAINER_ID}/pid`: PID of the `sleep infinity` process that corresponds to the allocated netns -* `/run/rootless-cni-infra/${CONTAINER_ID}/attached/${NETWORK_NAME}`: CNI result -* `/run/rootless-cni-infra/${CONTAINER_ID}/attached-args/${NETWORK_NAME}`: CNI args diff --git a/contrib/rootless-cni-infra/rootless-cni-infra b/contrib/rootless-cni-infra/rootless-cni-infra deleted file mode 100755 index cceb8d817a..0000000000 --- a/contrib/rootless-cni-infra/rootless-cni-infra +++ /dev/null @@ -1,181 +0,0 @@ -#!/bin/sh -set -eu - -ARG0="$0" -BASE="/run/rootless-cni-infra" - -wait_unshare_net() { - pid="$1" - # NOTE: busybox shell doesn't support the `for ((i=0; i < $MAX; i++)); do foo; done` statement - i=0 - while :; do - if [ "$(readlink /proc/self/ns/net)" != "$(readlink /proc/${pid}/ns/net)" ]; then - break - fi - sleep 0.1 - if [ $i -ge 10 ]; then - echo >&2 "/proc/${pid}/ns/net cannot be unshared" - exit 1 - fi - i=$((i + 1)) - done -} - -# CLI subcommand: "alloc $CONTAINER_ID $NETWORK_NAME $POD_NAME $IP $MAC $CAP_ARGS" -cmd_entrypoint_alloc() { - if [ "$#" -ne 6 ]; then - echo >&2 "Usage: $ARG0 alloc CONTAINER_ID NETWORK_NAME POD_NAME IP MAC CAP_ARGS" - exit 1 - fi - - ID="$1" - NET="$2" - K8S_POD_NAME="$3" - IP="$4" - MAC="$5" - CAP_ARGS="$6" - - dir="${BASE}/${ID}" - mkdir -p "${dir}/attached" "${dir}/attached-args" - - pid="" - if [ -f "${dir}/pid" ]; then - pid=$(cat "${dir}/pid") - else - unshare -n sleep infinity & - pid="$!" - wait_unshare_net "${pid}" - echo "${pid}" >"${dir}/pid" - nsenter -t "${pid}" -n ip link set lo up - fi - CNI_ARGS="IgnoreUnknown=1;K8S_POD_NAME=${K8S_POD_NAME}" - if [ "$IP" ]; then - CNI_ARGS="$CNI_ARGS;IP=${IP}" - fi - if [ "$MAC" ]; then - CNI_ARGS="$CNI_ARGS;MAC=${MAC}" - fi - if [ "$CAP_ARGS" ]; then - CAP_ARGS="$CAP_ARGS" - fi - nwcount=$(find "${dir}/attached" -type f | wc -l) - CNI_IFNAME="eth${nwcount}" - export CNI_ARGS CNI_IFNAME CAP_ARGS - cnitool add "${NET}" "/proc/${pid}/ns/net" >"${dir}/attached/${NET}" - echo "${CNI_ARGS}" >"${dir}/attached-args/${NET}" - - # return the result - ns="/proc/${pid}/ns/net" - echo "{\"ns\":\"${ns}\"}" -} - -# CLI subcommand: "dealloc $CONTAINER_ID $NETWORK_NAME" -cmd_entrypoint_dealloc() { - if [ "$#" -ne 2 ]; then - echo >&2 "Usage: $ARG0 dealloc CONTAINER_ID NETWORK_NAME" - exit 1 - fi - - ID=$1 - NET=$2 - - dir="${BASE}/${ID}" - if [ ! -f "${dir}/pid" ]; then - exit 0 - fi - pid=$(cat "${dir}/pid") - if [ -f "${dir}/attached-args/${NET}" ]; then - CNI_ARGS=$(cat "${dir}/attached-args/${NET}") - export CNI_ARGS - fi - cnitool del "${NET}" "/proc/${pid}/ns/net" - rm -f "${dir}/attached/${NET}" "${dir}/attached-args/${NET}" - - nwcount=$(find "${dir}/attached" -type f | wc -l) - if [ "${nwcount}" = 0 ]; then - kill -9 "${pid}" - rm -rf "${dir}" - fi - - # return empty json - echo "{}" -} - -# CLI subcommand: "is-idle" -cmd_entrypoint_is_idle() { - if [ ! -d ${BASE} ]; then - echo '{"idle": true}' - elif [ -z "$(ls -1 ${BASE})" ]; then - echo '{"idle": true}' - else - echo '{"idle": false}' - fi -} - -# CLI subcommand: "print-cni-result $CONTAINER_ID $NETWORK_NAME" -cmd_entrypoint_print_cni_result() { - if [ "$#" -ne 2 ]; then - echo >&2 "Usage: $ARG0 print-cni-result CONTAINER_ID NETWORK_NAME" - exit 1 - fi - - ID=$1 - NET=$2 - - # the result shall be CNI JSON - cat "${BASE}/${ID}/attached/${NET}" -} - -# CLI subcommand: "print-netns-path $CONTAINER_ID" -cmd_entrypoint_print_netns_path() { - if [ "$#" -ne 1 ]; then - echo >&2 "Usage: $ARG0 print-netns-path CONTAINER_ID" - exit 1 - fi - - ID=$1 - - pid=$(cat "${BASE}/${ID}/pid") - path="/proc/${pid}/ns/net" - - # return the result - echo "{\"path\":\"${path}\"}" -} - -# CLI subcommand: "help" -cmd_entrypoint_help() { - echo "Usage: ${ARG0} COMMAND" - echo - echo "Rootless CNI Infra container" - echo - echo "Commands:" - echo " alloc Allocate a netns" - echo " dealloc Deallocate a netns" - echo " is-idle Print whether the infra container is idle" - echo " print-cni-result Print CNI result" - echo " print-netns-path Print netns path" - echo " help Print help" - echo " version Print version" -} - -# CLI subcommand: "version" -cmd_entrypoint_version() { - echo "{\"version\": \"${ROOTLESS_CNI_INFRA_VERSION}\"}" -} - -# parse args -command="${1:-}" -if [ -z "$command" ]; then - echo >&2 "No command was specified. Run \`${ARG0} help\` to see the usage." - exit 1 -fi - -command_func=$(echo "cmd_entrypoint_${command}" | sed -e "s/-/_/g") -if ! command -v "${command_func}" >/dev/null 2>&1; then - echo >&2 "Unknown command: ${command}. Run \`${ARG0} help\` to see the usage." - exit 1 -fi - -# start the command func -shift -"${command_func}" "$@" diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index d927b17d8d..2734f77599 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -338,6 +338,18 @@ func (r *Runtime) setUpOCICNIPod(podNetwork ocicni.PodNetwork) ([]ocicni.NetResu return results, err } +// getCNIPodName return the pod name (hostname) used by CNI and the dnsname plugin. +// If we are in the pod network namespace use the pod name otherwise the container name +func getCNIPodName(c *Container) string { + if c.config.NetMode.IsPod() || c.IsInfra() { + pod, err := c.runtime.GetPod(c.PodID()) + if err == nil { + return pod.Name() + } + } + return c.Name() +} + // Create and configure a new network namespace for a container func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) ([]*cnitypes.Result, error) { var requestedIP net.IP diff --git a/libpod/rootless_cni_linux.go b/libpod/rootless_cni_linux.go deleted file mode 100644 index df690e914f..0000000000 --- a/libpod/rootless_cni_linux.go +++ /dev/null @@ -1,372 +0,0 @@ -// +build linux - -package libpod - -import ( - "bytes" - "context" - "io" - "path/filepath" - "runtime" - - cnitypes "github.com/containernetworking/cni/pkg/types/current" - "github.com/containernetworking/plugins/pkg/ns" - "github.com/containers/podman/v3/libpod/define" - "github.com/containers/podman/v3/libpod/image" - "github.com/containers/podman/v3/pkg/env" - "github.com/containers/podman/v3/pkg/util" - "github.com/containers/storage/pkg/lockfile" - "github.com/hashicorp/go-multierror" - spec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/runtime-tools/generate" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" -) - -// Built from ../contrib/rootless-cni-infra. -var rootlessCNIInfraImage = map[string]string{ - "amd64": "quay.io/libpod/rootless-cni-infra@sha256:adf352454666f7ce9ca3e1098448b5ee18f89c4516471ec99447ec9ece917f36", // 5-amd64 -} - -const ( - rootlessCNIInfraContainerNamespace = "podman-system" - rootlessCNIInfraContainerName = "rootless-cni-infra" -) - -// AllocRootlessCNI allocates a CNI netns inside the rootless CNI infra container. -// Locks "rootless-cni-infra.lck". -// -// When the infra container is not running, it is created. -// -// AllocRootlessCNI does not lock c. c should be already locked. -func AllocRootlessCNI(ctx context.Context, c *Container) (ns.NetNS, []*cnitypes.Result, error) { - networks, _, err := c.networks() - if err != nil { - return nil, nil, err - } - if len(networks) == 0 { - return nil, nil, errors.New("rootless CNI networking requires that the container has joined at least one CNI network") - } - l, err := getRootlessCNIInfraLock(c.runtime) - if err != nil { - return nil, nil, err - } - l.Lock() - defer l.Unlock() - infra, err := ensureRootlessCNIInfraContainerRunning(ctx, c.runtime) - if err != nil { - return nil, nil, err - } - k8sPodName := getCNIPodName(c) // passed to CNI as K8S_POD_NAME - ip := "" - if c.config.StaticIP != nil { - ip = c.config.StaticIP.String() - } - mac := "" - if c.config.StaticMAC != nil { - mac = c.config.StaticMAC.String() - } - aliases, err := c.runtime.state.GetAllNetworkAliases(c) - if err != nil { - return nil, nil, err - } - capArgs := "" - // add network aliases json encoded as capabilityArgs for cni - if len(aliases) > 0 { - capabilityArgs := make(map[string]interface{}) - capabilityArgs["aliases"] = aliases - b, err := json.Marshal(capabilityArgs) - if err != nil { - return nil, nil, err - } - capArgs = string(b) - } - - cniResults := make([]*cnitypes.Result, len(networks)) - for i, nw := range networks { - cniRes, err := rootlessCNIInfraCallAlloc(infra, c.ID(), nw, k8sPodName, ip, mac, capArgs) - if err != nil { - return nil, nil, err - } - cniResults[i] = cniRes - } - nsObj, err := rootlessCNIInfraGetNS(infra, c.ID()) - if err != nil { - return nil, nil, err - } - logrus.Debugf("rootless CNI: container %q will join %q", c.ID(), nsObj.Path()) - return nsObj, cniResults, nil -} - -// DeallocRootlessCNI deallocates a CNI netns inside the rootless CNI infra container. -// Locks "rootless-cni-infra.lck". -// -// When the infra container is no longer needed, it is removed. -// -// DeallocRootlessCNI does not lock c. c should be already locked. -func DeallocRootlessCNI(ctx context.Context, c *Container) error { - networks, _, err := c.networks() - if err != nil { - return err - } - if len(networks) == 0 { - return errors.New("rootless CNI networking requires that the container has joined at least one CNI network") - } - l, err := getRootlessCNIInfraLock(c.runtime) - if err != nil { - return err - } - l.Lock() - defer l.Unlock() - infra, _ := getRootlessCNIInfraContainer(c.runtime) - if infra == nil { - return nil - } - var errs *multierror.Error - for _, nw := range networks { - err := rootlessCNIInfraCallDealloc(infra, c.ID(), nw) - if err != nil { - errs = multierror.Append(errs, err) - } - } - if isIdle, err := rootlessCNIInfraIsIdle(infra); isIdle || err != nil { - if err != nil { - logrus.Warn(err) - } - logrus.Debugf("rootless CNI: removing infra container %q", infra.ID()) - infra.lock.Lock() - defer infra.lock.Unlock() - if err := c.runtime.removeContainer(ctx, infra, true, false, true); err != nil { - return err - } - logrus.Debugf("rootless CNI: removed infra container %q", infra.ID()) - } - return errs.ErrorOrNil() -} - -func getRootlessCNIInfraLock(r *Runtime) (lockfile.Locker, error) { - fname := filepath.Join(r.config.Engine.TmpDir, "rootless-cni-infra.lck") - return lockfile.GetLockfile(fname) -} - -// getCNIPodName return the pod name (hostname) used by CNI and the dnsname plugin. -// If we are in the pod network namespace use the pod name otherwise the container name -func getCNIPodName(c *Container) string { - if c.config.NetMode.IsPod() || c.IsInfra() { - pod, err := c.runtime.GetPod(c.PodID()) - if err == nil { - return pod.Name() - } - } - return c.Name() -} - -func rootlessCNIInfraCallAlloc(infra *Container, id, nw, k8sPodName, ip, mac, capArgs string) (*cnitypes.Result, error) { - logrus.Debugf("rootless CNI: alloc %q, %q, %q, %q, %q, %q", id, nw, k8sPodName, ip, mac, capArgs) - var err error - - _, err = rootlessCNIInfraExec(infra, "alloc", id, nw, k8sPodName, ip, mac, capArgs) - if err != nil { - return nil, err - } - cniResStr, err := rootlessCNIInfraExec(infra, "print-cni-result", id, nw) - if err != nil { - return nil, err - } - var cniRes cnitypes.Result - if err := json.Unmarshal([]byte(cniResStr), &cniRes); err != nil { - return nil, errors.Wrapf(err, "unmarshaling as cnitypes.Result: %q", cniResStr) - } - return &cniRes, nil -} - -func rootlessCNIInfraCallDealloc(infra *Container, id, nw string) error { - logrus.Debugf("rootless CNI: dealloc %q, %q", id, nw) - _, err := rootlessCNIInfraExec(infra, "dealloc", id, nw) - return err -} - -func rootlessCNIInfraIsIdle(infra *Container) (bool, error) { - type isIdle struct { - Idle bool `json:"idle"` - } - resStr, err := rootlessCNIInfraExec(infra, "is-idle") - if err != nil { - return false, err - } - var res isIdle - if err := json.Unmarshal([]byte(resStr), &res); err != nil { - return false, errors.Wrapf(err, "unmarshaling as isIdle: %q", resStr) - } - return res.Idle, nil -} - -func rootlessCNIInfraGetNS(infra *Container, id string) (ns.NetNS, error) { - type printNetnsPath struct { - Path string `json:"path"` - } - resStr, err := rootlessCNIInfraExec(infra, "print-netns-path", id) - if err != nil { - return nil, err - } - var res printNetnsPath - if err := json.Unmarshal([]byte(resStr), &res); err != nil { - return nil, errors.Wrapf(err, "unmarshaling as printNetnsPath: %q", resStr) - } - nsObj, err := ns.GetNS(res.Path) - if err != nil { - return nil, err - } - return nsObj, nil -} - -func getRootlessCNIInfraContainer(r *Runtime) (*Container, error) { - containers, err := r.GetContainersWithoutLock(func(c *Container) bool { - return c.Namespace() == rootlessCNIInfraContainerNamespace && - c.Name() == rootlessCNIInfraContainerName - }) - if err != nil { - return nil, err - } - if len(containers) == 0 { - return nil, nil - } - return containers[0], nil -} - -func ensureRootlessCNIInfraContainerRunning(ctx context.Context, r *Runtime) (*Container, error) { - c, err := getRootlessCNIInfraContainer(r) - if err != nil { - return nil, err - } - if c == nil { - return startRootlessCNIInfraContainer(ctx, r) - } - st, err := c.ContainerState() - if err != nil { - return nil, err - } - if st.State == define.ContainerStateRunning { - logrus.Debugf("rootless CNI: infra container %q is already running", c.ID()) - return c, nil - } - logrus.Debugf("rootless CNI: infra container %q is %q, being started", c.ID(), st.State) - if err := c.initAndStart(ctx); err != nil { - return nil, err - } - logrus.Debugf("rootless CNI: infra container %q is running", c.ID()) - return c, nil -} - -func startRootlessCNIInfraContainer(ctx context.Context, r *Runtime) (*Container, error) { - imageName, ok := rootlessCNIInfraImage[runtime.GOARCH] - if !ok { - return nil, errors.Errorf("cannot find rootless-podman-network-sandbox image for %s", runtime.GOARCH) - } - logrus.Debugf("rootless CNI: ensuring image %q to exist", imageName) - newImage, err := r.ImageRuntime().New(ctx, imageName, "", "", nil, nil, - image.SigningOptions{}, nil, util.PullImageMissing, nil) - if err != nil { - return nil, err - } - logrus.Debugf("rootless CNI: image %q is ready", imageName) - - g, err := generate.New("linux") - if err != nil { - return nil, err - } - g.SetupPrivileged(true) - // Set --pid=host for ease of propagating "/proc/PID/ns/net" string - if err := g.RemoveLinuxNamespace(string(spec.PIDNamespace)); err != nil { - return nil, err - } - g.RemoveMount("/proc") - procMount := spec.Mount{ - Destination: "/proc", - Type: "bind", - Source: "/proc", - Options: []string{"rbind", "nosuid", "noexec", "nodev"}, - } - g.AddMount(procMount) - // Mount CNI networks - etcCNINetD := spec.Mount{ - Destination: "/etc/cni/net.d", - Type: "bind", - Source: r.config.Network.NetworkConfigDir, - Options: []string{"ro", "bind"}, - } - g.AddMount(etcCNINetD) - - inspectData, err := newImage.Inspect(ctx) - if err != nil { - return nil, err - } - imageEnv, err := env.ParseSlice(inspectData.Config.Env) - if err != nil { - return nil, err - } - for k, v := range imageEnv { - g.AddProcessEnv(k, v) - } - if len(inspectData.Config.Cmd) == 0 { - return nil, errors.Errorf("rootless CNI infra image %q has no command specified", imageName) - } - g.SetProcessArgs(inspectData.Config.Cmd) - - var options []CtrCreateOption - options = append(options, WithRootFSFromImage(newImage.ID(), imageName, imageName)) - options = append(options, WithCtrNamespace(rootlessCNIInfraContainerNamespace)) - options = append(options, WithName(rootlessCNIInfraContainerName)) - options = append(options, WithPrivileged(true)) - options = append(options, WithSecLabels([]string{"disable"})) - options = append(options, WithRestartPolicy("always")) - options = append(options, WithNetNS(nil, false, "slirp4netns", nil)) - c, err := r.NewContainer(ctx, g.Config, options...) - if err != nil { - return nil, err - } - logrus.Debugf("rootless CNI infra container %q is created, now being started", c.ID()) - if err := c.initAndStart(ctx); err != nil { - return nil, err - } - logrus.Debugf("rootless CNI: infra container %q is running", c.ID()) - - return c, nil -} - -func rootlessCNIInfraExec(c *Container, args ...string) (string, error) { - cmd := "rootless-cni-infra" - var ( - outB bytes.Buffer - errB bytes.Buffer - streams define.AttachStreams - config ExecConfig - ) - streams.OutputStream = &nopWriteCloser{Writer: &outB} - streams.ErrorStream = &nopWriteCloser{Writer: &errB} - streams.AttachOutput = true - streams.AttachError = true - config.Command = append([]string{cmd}, args...) - config.Privileged = true - logrus.Debugf("rootlessCNIInfraExec: c.ID()=%s, config=%+v, streams=%v, begin", - c.ID(), config, streams) - code, err := c.Exec(&config, &streams, nil) - logrus.Debugf("rootlessCNIInfraExec: c.ID()=%s, config=%+v, streams=%v, end (code=%d, err=%v)", - c.ID(), config, streams, code, err) - if err != nil { - return "", err - } - if code != 0 { - return "", errors.Errorf("command %s %v in container %s failed with status %d, stdout=%q, stderr=%q", - cmd, args, c.ID(), code, outB.String(), errB.String()) - } - return outB.String(), nil -} - -type nopWriteCloser struct { - io.Writer -} - -func (nwc *nopWriteCloser) Close() error { - return nil -} From 954d92082534ac775761558b455408aca2246e36 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Sun, 21 Mar 2021 17:32:40 +0100 Subject: [PATCH 09/13] Make the docker-compose test work rootless Make sure the DOCKER_SOCK location is accessible by the user when run rootless. Alos set the DOCKER_HOST env var to ensure docker-compose will use the non default location. Cleanup steps such as `rm` or `umount` must be run inside podman unshare otherwise they can fail due missing privileges. Change the curl test to use --retry-all-errors otherwise the tests will flake. The web server inside the container will return http code 500 sometimes, most likely because it is not fully ready to accept connections. With --retry-all-errors curl will retry instead of failing and thus the test will work. Signed-off-by: Paul Holzinger --- test/compose/test-compose | 45 ++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/test/compose/test-compose b/test/compose/test-compose index 9558fbf58b..3cda8514e8 100755 --- a/test/compose/test-compose +++ b/test/compose/test-compose @@ -13,7 +13,8 @@ TEST_ROOTDIR=$(realpath $(dirname $0)) # Podman executable PODMAN_BIN=$(realpath $TEST_ROOTDIR/../../bin)/podman -# Local path to docker socket (we will add the unix:/ prefix when we need it) +# Local path to docker socket with unix prefix +# The path will be changed for rootless users DOCKER_SOCK=/var/run/docker.sock # END stuff you can but probably shouldn't customize @@ -40,6 +41,13 @@ echo 0 >$failures_file ############################################################################### # BEGIN infrastructure code - the helper functions used in tests themselves +################# +# is_rootless # Check if we run as normal user +################# +function is_rootless() { + [ "$(id -u)" -ne 0 ] +} + ######### # die # Exit error with a message to stderr ######### @@ -155,7 +163,7 @@ function test_port() { local op="$2" # '=' or '~' local expect="$3" # what to expect from curl output - local actual=$(curl --retry 5 --retry-connrefused -s http://127.0.0.1:$port/) + local actual=$(curl --retry 10 --retry-all-errors -s http://127.0.0.1:$port/) local curl_rc=$? if [ $curl_rc -ne 0 ]; then _show_ok 0 "$testname - curl failed with status $curl_rc" @@ -179,7 +187,12 @@ function start_service() { test -x $PODMAN_BIN || die "Not found: $PODMAN_BIN" # FIXME: use ${testname} subdir but we can't: 50-char limit in runroot - rm -rf $WORKDIR/{root,runroot,cni} + if ! is_rootless; then + rm -rf $WORKDIR/{root,runroot,cni} + else + $PODMAN_BIN unshare rm -rf $WORKDIR/{root,runroot,cni} + fi + rm -f $DOCKER_SOCK mkdir --mode 0755 $WORKDIR/{root,runroot,cni} chcon --reference=/var/lib/containers $WORKDIR/root cp /etc/cni/net.d/*podman*conflist $WORKDIR/cni/ @@ -190,7 +203,7 @@ function start_service() { --cgroup-manager=systemd \ --cni-config-dir $WORKDIR/cni \ system service \ - --time 0 unix:/$DOCKER_SOCK \ + --time 0 unix://$DOCKER_SOCK \ &> $WORKDIR/server.log & service_pid=$! @@ -239,6 +252,14 @@ done ############################################################################### # BEGIN entry handler (subtest invoker) +# When rootless use a socket path accessible by the rootless user +if is_rootless; then + DOCKER_SOCK="$WORKDIR/docker.sock" + DOCKER_HOST="unix://$DOCKER_SOCK" + # export DOCKER_HOST docker-compose will use it + export DOCKER_HOST +fi + # Identify the tests to run. If called with args, use those as globs. tests_to_run=() if [ -n "$*" ]; then @@ -322,7 +343,11 @@ for t in ${tests_to_run[@]}; do wait $service_pid # FIXME: otherwise we get EBUSY - umount $WORKDIR/root/overlay &>/dev/null + if ! is_rootless; then + umount $WORKDIR/root/overlay &>/dev/null + else + $PODMAN_BIN unshare umount $WORKDIR/root/overlay &>/dev/null + fi # FIXME: run 'podman ps'? # rm -rf $WORKDIR/${testname} @@ -336,9 +361,13 @@ done test_count=$(<$testcounter_file) failure_count=$(<$failures_file) -#if [ -z "$PODMAN_TESTS_KEEP_WORKDIR" ]; then -# rm -rf $WORKDIR -#fi +if [ -z "$PODMAN_TESTS_KEEP_WORKDIR" ]; then + if ! is_rootless; then + rm -rf $WORKDIR + else + $PODMAN_BIN unshare rm -rf $WORKDIR + fi +fi echo "1..${test_count}" From 676fcb722f31f3f8eedb017bee828002d23b68f8 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Sun, 21 Mar 2021 18:23:31 +0100 Subject: [PATCH 10/13] Add new docker-compose test for two networks Also fix the tests so we can use the podman function with the output. Signed-off-by: Paul Holzinger --- test/compose/mount_and_label/tests.sh | 3 ++- test/compose/test-compose | 7 ++++--- test/compose/two_networks/Readme.md | 8 ++++++++ test/compose/two_networks/docker-compose.yml | 11 +++++++++++ test/compose/two_networks/tests.sh | 7 +++++++ 5 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 test/compose/two_networks/Readme.md create mode 100644 test/compose/two_networks/docker-compose.yml create mode 100644 test/compose/two_networks/tests.sh diff --git a/test/compose/mount_and_label/tests.sh b/test/compose/mount_and_label/tests.sh index 07ff089b53..fa929bed6d 100644 --- a/test/compose/mount_and_label/tests.sh +++ b/test/compose/mount_and_label/tests.sh @@ -1,4 +1,5 @@ # -*- bash -*- test_port 5000 = "Podman rulez!" -podman container inspect -l --format '{{.Config.Labels}}' | grep "the_best" +podman container inspect -l --format '{{.Config.Labels}}' +like "$output" "io.podman:the_best" "$testname : Container label is set" diff --git a/test/compose/test-compose b/test/compose/test-compose index 3cda8514e8..704c71a9fa 100755 --- a/test/compose/test-compose +++ b/test/compose/test-compose @@ -224,10 +224,11 @@ function start_service() { ############ function podman() { echo "\$ podman $*" >>$WORKDIR/output.log - $PODMAN_BIN \ + output=$($PODMAN_BIN \ --root $WORKDIR/root \ --runroot $WORKDIR/runroot \ - "$@" >>$WORKDIR/output.log 2>&1 + "$@") + echo -n "$output" >>$WORKDIR/output.log } ################### @@ -329,7 +330,7 @@ for t in ${tests_to_run[@]}; do fi # Done. Clean up. - docker-compose down &> $logfile + docker-compose down &>> $logfile rc=$? if [[ $rc -eq 0 ]]; then _show_ok 1 "$testname - down" diff --git a/test/compose/two_networks/Readme.md b/test/compose/two_networks/Readme.md new file mode 100644 index 0000000000..471004f7df --- /dev/null +++ b/test/compose/two_networks/Readme.md @@ -0,0 +1,8 @@ +two networks +=============== + +This test checks that we can create containers with more than one network. + +Validation +------------ +* podman container inspect two_networks_con1_1 --format '{{len .NetworkSettings.Networks}}' shows 2 diff --git a/test/compose/two_networks/docker-compose.yml b/test/compose/two_networks/docker-compose.yml new file mode 100644 index 0000000000..686396cccf --- /dev/null +++ b/test/compose/two_networks/docker-compose.yml @@ -0,0 +1,11 @@ +version: '3' +services: + con1: + image: alpine + command: top + networks: + - net1 + - net2 +networks: + net1: + net2: diff --git a/test/compose/two_networks/tests.sh b/test/compose/two_networks/tests.sh new file mode 100644 index 0000000000..1cc88aa5f6 --- /dev/null +++ b/test/compose/two_networks/tests.sh @@ -0,0 +1,7 @@ +# -*- bash -*- + +podman container inspect two_networks_con1_1 --format '{{len .NetworkSettings.Networks}}' +is "$output" "2" "$testname : Container is connected to both networks" +podman container inspect two_networks_con1_1 --format '{{.NetworkSettings.Networks}}' +like "$output" "two_networks_net1" "$testname : First network name exists" +like "$output" "two_networks_net2" "$testname : Second network name exists" From 6cd807e3b7ce52ecfdfc07d0a04fc99a88b4dd28 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Mon, 22 Mar 2021 15:30:38 +0100 Subject: [PATCH 11/13] Cleanup the rootless cni namespace Delte the network namespace and kill the slirp4netns process when it is no longer needed. Signed-off-by: Paul Holzinger --- libpod/networking_linux.go | 102 +++++++++++++++++++++++++++++++++---- 1 file changed, 93 insertions(+), 9 deletions(-) diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index 2734f77599..5c43ebb8b9 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -5,12 +5,14 @@ package libpod import ( "crypto/rand" "fmt" + "io/ioutil" "net" "os" "os/exec" "path/filepath" "regexp" "sort" + "strconv" "strings" "syscall" "time" @@ -24,6 +26,7 @@ import ( "github.com/containers/podman/v3/pkg/netns" "github.com/containers/podman/v3/pkg/rootless" "github.com/containers/podman/v3/pkg/util" + "github.com/containers/storage/pkg/lockfile" "github.com/cri-o/ocicni/pkg/ocicni" "github.com/opencontainers/selinux/go-selinux/label" "github.com/pkg/errors" @@ -102,8 +105,9 @@ func (r *Runtime) getPodNetwork(id, name, nsPath string, networks []string, port } type rootlessCNI struct { - ns ns.NetNS - dir string + ns ns.NetNS + dir string + lock lockfile.Locker } func (r *rootlessCNI) Do(toRun func() error) error { @@ -164,6 +168,66 @@ func (r *rootlessCNI) Do(toRun func() error) error { return err } +// cleanup the rootless cni namespace if needed +// check if we have running containers with the bridge network mode +func (r *rootlessCNI) cleanup(runtime *Runtime) error { + r.lock.Lock() + defer r.lock.Unlock() + running := func(c *Container) bool { + // we cannot use c.state() because it will try to lock the container + // using c.state.State directly should be good enough for this use case + state := c.state.State + return state == define.ContainerStateRunning + } + ctrs, err := runtime.GetContainersWithoutLock(running) + if err != nil { + return err + } + cleanup := true + for _, ctr := range ctrs { + if ctr.config.NetMode.IsBridge() { + cleanup = false + } + } + if cleanup { + // make sure the the cni results (cache) dir is empty + // libpod instances with another root dir are not covered by the check above + // this allows several libpod instances to use the same rootless cni ns + contents, err := ioutil.ReadDir(filepath.Join(r.dir, "var/lib/cni/results")) + if (err == nil && len(contents) == 0) || os.IsNotExist(err) { + logrus.Debug("Cleaning up rootless cni namespace") + err = netns.UnmountNS(r.ns) + if err != nil { + return err + } + // make the following errors not fatal + err = r.ns.Close() + if err != nil { + logrus.Error(err) + } + b, err := ioutil.ReadFile(filepath.Join(r.dir, "rootless-cni-slirp4netns.pid")) + if err == nil { + var i int + i, err = strconv.Atoi(string(b)) + if err == nil { + // kill the slirp process so we do not leak it + err = syscall.Kill(i, syscall.SIGTERM) + } + } + if err != nil { + logrus.Errorf("failed to kill slirp4netns process: %s", err) + } + err = os.RemoveAll(r.dir) + if err != nil { + logrus.Error(err) + } + } else if err != nil && !os.IsNotExist(err) { + logrus.Errorf("could not read rootless cni directory, skipping cleanup: %s", err) + } + } + return nil +} + // getRootlessCNINetNs returns the rootless cni object. If create is set to true // the rootless cni namespace will be created if it does not exists already. func (r *Runtime) getRootlessCNINetNs(new bool) (*rootlessCNI, error) { @@ -174,6 +238,18 @@ func (r *Runtime) getRootlessCNINetNs(new bool) (*rootlessCNI, error) { return nil, err } cniDir := filepath.Join(runDir, "rootless-cni") + err = os.MkdirAll(cniDir, 0700) + if err != nil { + return nil, errors.Wrap(err, "could not create rootless-cni directory") + } + + lfile := filepath.Join(cniDir, "rootless-cni.lck") + lock, err := lockfile.GetLockfile(lfile) + if err != nil { + return nil, errors.Wrap(err, "failed to get rootless-cni lockfile") + } + lock.Lock() + defer lock.Unlock() nsDir, err := netns.GetNSRunDir() if err != nil { @@ -219,11 +295,7 @@ func (r *Runtime) getRootlessCNINetNs(new bool) (*rootlessCNI, error) { if err != nil { return nil, err } - // the slirp4netns arguments being passed are describes as follows: - // from the slirp4netns documentation: https://github.com/rootless-containers/slirp4netns - // -c, --configure Brings up the tap interface - // -e, --exit-fd=FD specify the FD for terminating slirp4netns - // -r, --ready-fd=FD specify the FD to write to when the initialization steps are finished + // Note we do not use --exit-fd, we kill this process by pid cmdArgs = append(cmdArgs, "-c", "-r", "3") cmdArgs = append(cmdArgs, "--netns-type=path", ns.Path(), "tap0") @@ -258,6 +330,14 @@ func (r *Runtime) getRootlessCNINetNs(new bool) (*rootlessCNI, error) { if err := cmd.Start(); err != nil { return nil, errors.Wrapf(err, "failed to start slirp4netns process") } + // create pid file for the slirp4netns process + // this is need to kill the process in the cleanup + pid := strconv.Itoa(cmd.Process.Pid) + err = ioutil.WriteFile(filepath.Join(cniDir, "rootless-cni-slirp4netns.pid"), []byte(pid), 0700) + if err != nil { + errors.Wrap(err, "unable to write rootless-cni slirp4netns pid file") + } + defer func() { if err := cmd.Process.Release(); err != nil { logrus.Errorf("unable to release command process: %q", err) @@ -303,8 +383,9 @@ func (r *Runtime) getRootlessCNINetNs(new bool) (*rootlessCNI, error) { } rootlessCNINS = &rootlessCNI{ - ns: ns, - dir: cniDir, + ns: ns, + dir: cniDir, + lock: lock, } } return rootlessCNINS, nil @@ -544,6 +625,9 @@ func (r *Runtime) teardownOCICNIPod(podNetwork ocicni.PodNetwork) error { if rootlessCNINS != nil { // execute the cni setup in the rootless net ns err = rootlessCNINS.Do(tearDownPod) + if err == nil { + err = rootlessCNINS.cleanup(r) + } } else { err = tearDownPod() } From 973807092d10406c039ab4b376f2fd74f456be1d Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Fri, 26 Mar 2021 10:41:01 +0100 Subject: [PATCH 12/13] Use the slrip4netns dns in the rootless cni ns If a user only has a local dns server in the resolv.conf file the dns resolution will fail. Instead we create a new resolv.conf which will use the slirp4netns dns. Signed-off-by: Paul Holzinger --- libpod/networking_linux.go | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index 5c43ebb8b9..157c854311 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -24,6 +24,7 @@ import ( "github.com/containers/podman/v3/libpod/network" "github.com/containers/podman/v3/pkg/errorhandling" "github.com/containers/podman/v3/pkg/netns" + "github.com/containers/podman/v3/pkg/resolvconf" "github.com/containers/podman/v3/pkg/rootless" "github.com/containers/podman/v3/pkg/util" "github.com/containers/storage/pkg/lockfile" @@ -134,9 +135,14 @@ func (r *rootlessCNI) Do(toRun func() error) error { return errors.Wrap(err, "failed to mount netns directory for rootless cni") } + // mount resolv.conf to make use of the host dns + err = unix.Mount(filepath.Join(r.dir, "resolv.conf"), "/etc/resolv.conf", "none", unix.MS_BIND, "") + if err != nil { + return errors.Wrap(err, "failed to mount resolv.conf for rootless cni") + } + // also keep /run/systemd if it exists - // many files are symlinked into this dir, for example systemd-resolved links - // /etc/resolv.conf but the dnsname plugin needs access to this file + // many files are symlinked into this dir, for example /dev/log runSystemd := "/run/systemd" _, err = os.Stat(runSystemd) if err == nil { @@ -348,6 +354,29 @@ func (r *Runtime) getRootlessCNINetNs(new bool) (*rootlessCNI, error) { return nil, err } + // build a new resolv.conf file which uses the slirp4netns dns server address + resolveIP := slirp4netnsDNS + if netOptions.cidr != "" { + _, cidr, err := net.ParseCIDR(netOptions.cidr) + if err != nil { + return nil, errors.Wrap(err, "failed to parse slirp4netns cidr") + } + // the slirp dns ip is always the third ip in the subnet + cidr.IP[len(cidr.IP)-1] = cidr.IP[len(cidr.IP)-1] + 3 + resolveIP = cidr.IP.String() + } + conf, err := resolvconf.Get() + if err != nil { + return nil, err + } + searchDomains := resolvconf.GetSearchDomains(conf.Content) + dnsOptions := resolvconf.GetOptions(conf.Content) + + _, err = resolvconf.Build(filepath.Join(cniDir, "resolv.conf"), []string{resolveIP}, searchDomains, dnsOptions) + if err != nil { + return nil, errors.Wrap(err, "failed to create rootless cni resolv.conf") + } + // create cni directories to store files // they will be bind mounted to the correct location in a extra mount ns err = os.MkdirAll(filepath.Join(cniDir, "var"), 0700) From d1e32dc6c6ccb24786cd095befcde761062442a0 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 1 Apr 2021 00:32:56 +0200 Subject: [PATCH 13/13] Add rootless docker-compose test to the CI Signed-off-by: Paul Holzinger --- .cirrus.yml | 7 ++++++- contrib/cirrus/runner.sh | 25 +++++++++++++------------ 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index e09db2a813..074f2f4e0e 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -440,7 +440,7 @@ apiv2_test_task: time_script: '$SCRIPT_BASE/logcollector.sh time' compose_test_task: - name: "compose test on $DISTRO_NV" + name: "compose test on $DISTRO_NV ($PRIV_NAME)" alias: compose_test only_if: *not_docs skip: *tags @@ -450,6 +450,11 @@ compose_test_task: env: <<: *stdenvars TEST_FLAVOR: compose + matrix: + - env: + PRIV_NAME: root + - env: + PRIV_NAME: rootless clone_script: *noop # Comes from cache gopath_cache: *ro_gopath_cache setup_script: *setup diff --git a/contrib/cirrus/runner.sh b/contrib/cirrus/runner.sh index 507d22e138..f52e107cc9 100755 --- a/contrib/cirrus/runner.sh +++ b/contrib/cirrus/runner.sh @@ -288,18 +288,6 @@ dotest() { exec_container # does not return fi; - # shellcheck disable=SC2154 - if [[ "$PRIV_NAME" == "rootless" ]] && [[ "$UID" -eq 0 ]]; then - req_env_vars ROOTLESS_USER - msg "Re-executing runner through ssh as user '$ROOTLESS_USER'" - msg "************************************************************" - set -x - exec ssh $ROOTLESS_USER@localhost \ - -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ - -o CheckHostIP=no $GOSRC/$SCRIPT_BASE/runner.sh - # does not return - fi - # containers/automation sets this to 0 for its dbg() function # but the e2e integration tests are also sensitive to it. unset DEBUG @@ -340,6 +328,19 @@ msg "************************************************************" ((${SETUP_ENVIRONMENT:-0})) || \ die "Expecting setup_environment.sh to have completed successfully" +# shellcheck disable=SC2154 +if [[ "$PRIV_NAME" == "rootless" ]] && [[ "$UID" -eq 0 ]]; then + req_env_vars ROOTLESS_USER + msg "Re-executing runner through ssh as user '$ROOTLESS_USER'" + msg "************************************************************" + set -x + exec ssh $ROOTLESS_USER@localhost \ + -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + -o CheckHostIP=no $GOSRC/$SCRIPT_BASE/runner.sh + # Does not return! +fi +# else: not running rootless, do nothing special + cd "${GOSRC}/" handler="_run_${TEST_FLAVOR}"