Skip to content

Commit

Permalink
rootless: use RootlessKit port forwarder
Browse files Browse the repository at this point in the history
RootlessKit port forwarder has a lot of advantages over the slirp4netns port forwarder:

* Very high throughput.
  Benchmark result on Travis: socat: 5.2 Gbps, slirp4netns: 8.3 Gbps, RootlessKit: 27.3 Gbps
  (https://travis-ci.org/rootless-containers/rootlesskit/builds/597056377)

* Connections from the host are treated as 127.0.0.1 rather than 10.0.2.2 in the namespace.
  No UDP issue (containers#4586)

* No tcp_rmem issue (containers#4537)

* Probably works with IPv6. Even if not, it is trivial to support IPv6.  (containers#4311)

* Easily extensible for future support of SCTP

* Easily extensible for future support of `lxc-user-nic` SUID network

RootlessKit port forwarder has been already adopted as the default port forwarder by Rootless Docker/Moby,
and no issue has been reported AFAIK.

As the port forwarder is imported as a Go package, no `rootlesskit` binary is required for Podman.

Fix containers#4586
May-fix containers#4559
Fix containers#4537
May-fix containers#4311

See https://github.com/rootless-containers/rootlesskit/blob/v0.7.0/pkg/port/builtin/builtin.go

Signed-off-by: Akihiro Suda <[email protected]>
  • Loading branch information
AkihiroSuda committed Jan 8, 2020
1 parent c41fd09 commit da7595a
Show file tree
Hide file tree
Showing 19 changed files with 1,528 additions and 161 deletions.
4 changes: 1 addition & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ require (
github.com/containernetworking/cni v0.7.2-0.20190904153231-83439463f784
github.com/containernetworking/plugins v0.8.2
github.com/containers/buildah v1.12.0
github.com/containers/conmon v2.0.2+incompatible // indirect
github.com/containers/image/v5 v5.1.0
github.com/containers/psgo v1.4.0
github.com/containers/storage v1.15.4
Expand All @@ -37,7 +36,6 @@ require (
github.com/hashicorp/go-multierror v1.0.0
github.com/hpcloud/tail v1.0.0
github.com/json-iterator/go v1.1.8
github.com/mattn/go-isatty v0.0.8 // indirect
github.com/mrunalp/fileutils v0.0.0-20171103030105-7d4729fb3618
github.com/onsi/ginkgo v1.10.3
github.com/onsi/gomega v1.7.1
Expand All @@ -51,13 +49,13 @@ require (
github.com/pkg/errors v0.8.1
github.com/pkg/profile v1.4.0
github.com/pmezard/go-difflib v1.0.0
github.com/rootless-containers/rootlesskit v0.7.1
github.com/seccomp/containers-golang v0.0.0-20190312124753-8ca8945ccf5f
github.com/sirupsen/logrus v1.4.2
github.com/spf13/cobra v0.0.5
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.4.0
github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2
github.com/uber-go/atomic v1.4.0 // indirect
github.com/uber/jaeger-client-go v2.20.1+incompatible
github.com/uber/jaeger-lib v0.0.0-20190122222657-d036253de8f5 // indirect
github.com/varlink/go v0.0.0-20190502142041-0f1d566d194b
Expand Down
66 changes: 10 additions & 56 deletions go.sum

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions libpod/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ type Container struct {
rootlessSlirpSyncR *os.File
rootlessSlirpSyncW *os.File

rootlessPortSyncR *os.File
rootlessPortSyncW *os.File

// A restored container should have the same IP address as before
// being checkpointed. If requestedIP is set it will be used instead
// of config.StaticIP.
Expand Down
205 changes: 103 additions & 102 deletions libpod/networking_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
package libpod

import (
"bytes"
"crypto/rand"
"fmt"
"io"
"io/ioutil"
"net"
"os"
Expand All @@ -20,6 +22,7 @@ import (
"github.com/containers/libpod/pkg/errorhandling"
"github.com/containers/libpod/pkg/netns"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/libpod/pkg/rootlessport"
"github.com/cri-o/ocicni/pkg/ocicni"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -151,19 +154,6 @@ func (r *Runtime) createNetNS(ctr *Container) (n ns.NetNS, q []*cnitypes.Result,
return ctrNS, networkStatus, err
}

type slirp4netnsCmdArg struct {
Proto string `json:"proto,omitempty"`
HostAddr string `json:"host_addr"`
HostPort int32 `json:"host_port"`
GuestAddr string `json:"guest_addr"`
GuestPort int32 `json:"guest_port"`
}

type slirp4netnsCmd struct {
Execute string `json:"execute"`
Args slirp4netnsCmdArg `json:"arguments"`
}

func checkSlirpFlags(path string) (bool, bool, bool, error) {
cmd := exec.Command(path, "--help")
out, err := cmd.CombinedOutput()
Expand Down Expand Up @@ -194,13 +184,9 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
defer errorhandling.CloseQuiet(syncW)

havePortMapping := len(ctr.Config().PortMappings) > 0
apiSocket := filepath.Join(ctr.runtime.config.TmpDir, fmt.Sprintf("%s.net", ctr.config.ID))
logPath := filepath.Join(ctr.runtime.config.TmpDir, fmt.Sprintf("slirp4netns-%s.log", ctr.config.ID))

cmdArgs := []string{}
if havePortMapping {
cmdArgs = append(cmdArgs, "--api-socket", apiSocket)
}
dhp, mtu, sandbox, err := checkSlirpFlags(path)
if err != nil {
return errors.Wrapf(err, "error checking slirp4netns binary %s: %q", path, err)
Expand All @@ -221,15 +207,19 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
// -e, --exit-fd=FD specify the FD for terminating slirp4netns
// -r, --ready-fd=FD specify the FD to write to when the initialization steps are finished
cmdArgs = append(cmdArgs, "-c", "-e", "3", "-r", "4")
netnsPath := ""
if !ctr.config.PostConfigureNetNS {
ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to create rootless network sync pipe")
}
cmdArgs = append(cmdArgs, "--netns-type=path", ctr.state.NetNS.Path(), "tap0")
netnsPath = ctr.state.NetNS.Path()
cmdArgs = append(cmdArgs, "--netns-type=path", netnsPath, "tap0")
} else {
defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR)
defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW)
netnsPath = fmt.Sprintf("/proc/%d/ns/net", ctr.state.PID)
// we don't use --netns-path here (unavailable for slirp4netns < v0.4)
cmdArgs = append(cmdArgs, fmt.Sprintf("%d", ctr.state.PID), "tap0")
}

Expand Down Expand Up @@ -269,11 +259,27 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
}
}()

if err := waitForSync(syncR, cmd, logFile, 1*time.Second); err != nil {
return err
}

if havePortMapping {
return r.setupRootlessPortMapping(ctr, netnsPath)
}
return nil
}

func waitForSync(syncR *os.File, cmd *exec.Cmd, logFile io.ReadSeeker, timeout time.Duration) error {
prog := filepath.Base(cmd.Path)
if len(cmd.Args) > 0 {
prog = cmd.Args[0]
}
b := make([]byte, 16)
for {
if err := syncR.SetDeadline(time.Now().Add(1 * time.Second)); err != nil {
return errors.Wrapf(err, "error setting slirp4netns pipe timeout")
if err := syncR.SetDeadline(time.Now().Add(timeout)); err != nil {
return errors.Wrapf(err, "error setting %s pipe timeout", prog)
}
// FIXME: return err as soon as proc exits, without waiting for timeout
if _, err := syncR.Read(b); err == nil {
break
} else {
Expand All @@ -282,7 +288,7 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
var status syscall.WaitStatus
pid, err := syscall.Wait4(cmd.Process.Pid, &status, syscall.WNOHANG, nil)
if err != nil {
return errors.Wrapf(err, "failed to read slirp4netns process status")
return errors.Wrapf(err, "failed to read %s process status", prog)
}
if pid != cmd.Process.Pid {
continue
Expand All @@ -294,100 +300,86 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
}
logContent, err := ioutil.ReadAll(logFile)
if err != nil {
return errors.Wrapf(err, "slirp4netns failed")
return errors.Wrapf(err, "%s failed", prog)
}
return errors.Errorf("slirp4netns failed: %q", logContent)
return errors.Errorf("%s failed: %q", prog, logContent)
}
if status.Signaled() {
return errors.New("slirp4netns killed by signal")
return errors.Errorf("%s killed by signal", prog)
}
continue
}
return errors.Wrapf(err, "failed to read from slirp4netns sync pipe")
return errors.Wrapf(err, "failed to read from %s sync pipe", prog)
}
}
return nil
}

if havePortMapping {
const pidWaitTimeout = 60 * time.Second
chWait := make(chan error)
go func() {
interval := 25 * time.Millisecond
for i := time.Duration(0); i < pidWaitTimeout; i += interval {
// Check if the process is still running.
var status syscall.WaitStatus
pid, err := syscall.Wait4(cmd.Process.Pid, &status, syscall.WNOHANG, nil)
if err != nil {
break
}
if pid != cmd.Process.Pid {
continue
}
if status.Exited() || status.Signaled() {
chWait <- fmt.Errorf("slirp4netns exited with status %d", status.ExitStatus())
}
time.Sleep(interval)
}
}()
defer close(chWait)
func (r *Runtime) setupRootlessPortMapping(ctr *Container, netnsPath string) (err error) {
syncR, syncW, err := os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to open pipe")
}
defer errorhandling.CloseQuiet(syncR)
defer errorhandling.CloseQuiet(syncW)

// wait that API socket file appears before trying to use it.
if _, err := WaitForFile(apiSocket, chWait, pidWaitTimeout); err != nil {
return errors.Wrapf(err, "waiting for slirp4nets to create the api socket file %s", apiSocket)
}
logPath := filepath.Join(ctr.runtime.config.TmpDir, fmt.Sprintf("rootlessport-%s.log", ctr.config.ID))
logFile, err := os.Create(logPath)
if err != nil {
return errors.Wrapf(err, "failed to open rootlessport log file %s", logPath)
}
defer logFile.Close()
// Unlink immediately the file so we won't need to worry about cleaning it up later.
// It is still accessible through the open fd logFile.
if err := os.Remove(logPath); err != nil {
return errors.Wrapf(err, "delete file %s", logPath)
}

// for each port we want to add we need to open a connection to the slirp4netns control socket
// and send the add_hostfwd command.
for _, i := range ctr.config.PortMappings {
conn, err := net.Dial("unix", apiSocket)
if err != nil {
return errors.Wrapf(err, "cannot open connection to %s", apiSocket)
}
defer func() {
if err := conn.Close(); err != nil {
logrus.Errorf("unable to close connection: %q", err)
}
}()
hostIP := i.HostIP
if hostIP == "" {
hostIP = "0.0.0.0"
}
cmd := slirp4netnsCmd{
Execute: "add_hostfwd",
Args: slirp4netnsCmdArg{
Proto: i.Protocol,
HostAddr: hostIP,
HostPort: i.HostPort,
GuestPort: i.ContainerPort,
},
}
// create the JSON payload and send it. Mark the end of request shutting down writes
// to the socket, as requested by slirp4netns.
data, err := json.Marshal(&cmd)
if err != nil {
return errors.Wrapf(err, "cannot marshal JSON for slirp4netns")
}
if _, err := conn.Write([]byte(fmt.Sprintf("%s\n", data))); err != nil {
return errors.Wrapf(err, "cannot write to control socket %s", apiSocket)
}
if err := conn.(*net.UnixConn).CloseWrite(); err != nil {
return errors.Wrapf(err, "cannot shutdown the socket %s", apiSocket)
}
buf := make([]byte, 2048)
readLength, err := conn.Read(buf)
if err != nil {
return errors.Wrapf(err, "cannot read from control socket %s", apiSocket)
}
// if there is no 'error' key in the received JSON data, then the operation was
// successful.
var y map[string]interface{}
if err := json.Unmarshal(buf[0:readLength], &y); err != nil {
return errors.Wrapf(err, "error parsing error status from slirp4netns")
}
if e, found := y["error"]; found {
return errors.Errorf("error from slirp4netns while setting up port redirection: %v", e)
}
ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to create rootless port sync pipe")
}
cfg := rootlessport.Config{
Mappings: ctr.config.PortMappings,
NetNSPath: netnsPath,
ExitFD: 3,
ReadyFD: 4,
}
cfgJSON, err := json.Marshal(cfg)
if err != nil {
return err
}
cfgR := bytes.NewReader(cfgJSON)
var stdout bytes.Buffer
cmd := exec.Command(fmt.Sprintf("/proc/%d/exe", os.Getpid()))
cmd.Args = []string{rootlessport.ReexecKey}
// Leak one end of the pipe in rootlessport process, the other will be sent to conmon
cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncR, syncW)
cmd.Stdin = cfgR
// stdout is for human-readable error, stderr is for debug log
cmd.Stdout = &stdout
cmd.Stderr = io.MultiWriter(logFile, &logrusDebugWriter{"rootlessport: "})
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
if err := cmd.Start(); err != nil {
return errors.Wrapf(err, "failed to start rootlessport process")
}
defer func() {
if err := cmd.Process.Release(); err != nil {
logrus.Errorf("unable to release rootlessport process: %q", err)
}
}()
if err := waitForSync(syncR, cmd, logFile, 3*time.Second); err != nil {
stdoutStr := stdout.String()
if stdoutStr != "" {
// err contains full debug log and too verbose, so return stdoutStr
logrus.Debug(err)
return errors.Errorf("failed to expose ports via rootlessport: %q", stdoutStr)
}
return err
}
logrus.Debug("rootlessport is ready")
return nil
}

Expand Down Expand Up @@ -587,3 +579,12 @@ func (c *Container) getContainerNetworkInfo(data *InspectContainerData) *Inspect
}
return data
}

type logrusDebugWriter struct {
prefix string
}

func (w *logrusDebugWriter) Write(p []byte) (int, error) {
logrus.Debugf("%s%s", w.prefix, string(p))
return len(p), nil
}
9 changes: 9 additions & 0 deletions libpod/oci_conmon_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,15 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
}
// Leak one end in conmon, the other one will be leaked into slirp4netns
cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW)

if ctr.rootlessPortSyncR != nil {
defer errorhandling.CloseQuiet(ctr.rootlessPortSyncR)
}
if ctr.rootlessPortSyncW != nil {
defer errorhandling.CloseQuiet(ctr.rootlessPortSyncW)
// Leak one end in conmon, the other one will be leaked into rootlessport
cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncW)
}
}

err = startCommandGivenSelinux(cmd)
Expand Down
Loading

0 comments on commit da7595a

Please sign in to comment.