Skip to content

Commit

Permalink
[WIP] [Carry 2535] rootless: support detach-netns mode
Browse files Browse the repository at this point in the history
> [!CAUTION]
> WIP.
>
> - `nerdctl rm` hangs for while:
> ```
> WARN[0026] failed to remove container state dir /home/suda/.local/share/nerdctl/1935db59/containers/default/8fcdf0e2516e1b372d2e53a78aa82ab871e38b3479cf8205c43a994c56803f78  error="<nil>"
> ```
>
> - `--net=host` fails:
> ```
> error during container init: error mounting "sysfs" to rootfs at "/sys": mount src=sysfs, dst=/sys, dstFD=/proc/self/fd/7, flags=0xf: operation not permitted: unknown
> ```

When RootlessKit v2.0 (rootless-containers/rootlesskit PR 379) is
installed, `containerd-rootless.sh` launches it with `--detach-netns`
so as to run the daemon in the host network namespace.

This will enable:
- Accelerated (and deflaked) `nerdctl pull`, `nerdctl push`, `nerdctl build`, etc
- Proper support for `nerdctl pull 127.0.0.1:.../...`
- Proper support for `nerdctl run --net=host`

Replaces Fahed Dorgaa's PR 2535

Co-authored-by: fahed dorgaa <[email protected]>
Signed-off-by: Akihiro Suda <[email protected]>
  • Loading branch information
AkihiroSuda and fahedouch committed Jan 1, 2024
1 parent ff2fe3b commit fb193e0
Show file tree
Hide file tree
Showing 10 changed files with 153 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ ARG STARGZ_SNAPSHOTTER_VERSION=v0.15.1
# Extra deps: Encryption
ARG IMGCRYPT_VERSION=v1.1.9
# Extra deps: Rootless
ARG ROOTLESSKIT_VERSION=v1.1.1
ARG ROOTLESSKIT_VERSION=v2.0.0-alpha.2
ARG SLIRP4NETNS_VERSION=v1.2.2
# Extra deps: bypass4netns
ARG BYPASS4NETNS_VERSION=v0.3.0
Expand Down
6 changes: 6 additions & 0 deletions Dockerfile.d/SHA256SUMS.d/rootlesskit-v2.0.0-alpha.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
7c8c07c159aef32b5e68f5b8dc508dc422499744f61fa916c246bfae16a0d85c rootlesskit-aarch64.tar.gz
683ba2c34bfa4a3477f9c50508a233c94b71a6eeaa0ee080abd1ebc2c09a8b9c rootlesskit-armv7l.tar.gz
844b6297021d914be9f266e341ff77da4b98f43074504fe9cc020ae5c61a791d rootlesskit-ppc64le.tar.gz
d317e9c519d862508d6659083f9e1773573e899aa6e48e89d121211e5e823b6a rootlesskit-riscv64.tar.gz
720b425c608f8ab4326354582bc92825031d9d8c40865df155c2c7cb8368f115 rootlesskit-s390x.tar.gz
d29edd2e3d903974754edb14b251ef19bfa9317e6626436fac760d1213879e8d rootlesskit-x86_64.tar.gz
7 changes: 6 additions & 1 deletion extras/rootless/containerd-rootless-setuptool.sh
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,12 @@ cmd_entrypoint_check() {
cmd_entrypoint_nsenter() {
# No need to call init()
pid=$(cat "$XDG_RUNTIME_DIR/containerd-rootless/child_pid")
exec nsenter --no-fork --wd="$(pwd)" --preserve-credentials -m -n -U -t "$pid" -- "$@"
n=""
# If RootlessKit is running with `--detach-netns` mode, we do NOT enter the detached netns here
if [ ! -e "$XDG_RUNTIME_DIR/containerd-rootless/netns" ]; then
n="-n"
fi
exec nsenter --no-fork --wd="$(pwd)" --preserve-credentials -m $n -U -t "$pid" -- "$@"
}

show_systemd_error() {
Expand Down
3 changes: 3 additions & 0 deletions extras/rootless/containerd-rootless.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ if [ -z $_CONTAINERD_ROOTLESS_CHILD ]; then
export _CONTAINERD_ROOTLESS_SELINUX
fi
fi
if rootlesskit --help | grep -qw -- "--detach-netns"; then
CONTAINERD_ROOTLESS_ROOTLESSKIT_FLAGS=--detach-netns $CONTAINERD_ROOTLESS_ROOTLESSKIT_FLAGS
fi
# Re-exec the script via RootlessKit, so as to create unprivileged {user,mount,network} namespaces.
#
# --copy-up allows removing/creating files in the directories by creating tmpfs and symlinks
Expand Down
22 changes: 22 additions & 0 deletions pkg/cmd/container/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ import (
"github.com/containerd/nerdctl/pkg/namestore"
"github.com/containerd/nerdctl/pkg/platformutil"
"github.com/containerd/nerdctl/pkg/referenceutil"
"github.com/containerd/nerdctl/pkg/rootlessutil"
"github.com/containerd/nerdctl/pkg/strutil"
dockercliopts "github.com/docker/cli/opts"
dockeropts "github.com/docker/docker/opts"
Expand Down Expand Up @@ -418,6 +419,27 @@ func GenerateLogURI(dataStore string) (*url.URL, error) {
}

func withNerdctlOCIHook(cmd string, args []string) (oci.SpecOpts, error) {
detachedNetNS, err := rootlessutil.DetachedNetNS()
if err != nil {
return nil, fmt.Errorf("failed to check whether RootlessKit is running with --detach-netns: %w", err)
}
if detachedNetNS != "" {
// Rewrite {cmd, args} if RootlessKit with running with --detach-netns, so that the hook can gain
// CAP_NET_ADMIN in the namespaces.
// - Old:
// - cmd: "/usr/local/bin/nerdctl"
// - args: {"--data-root=/foo"}
// - New:
// - cmd: "/usr/bin/nsenter"
// - args: {"-n/run/user/1000/containerd-rootless/netns", "--", "/usr/local/bin/nerdctl", "--data-root=/foo"}
oldCmd, oldArgs := cmd, args
cmd, err = exec.LookPath("nsenter")
if err != nil {
return nil, err
}
args = append([]string{"-n" + detachedNetNS, "--", oldCmd}, oldArgs...)
}

args = append([]string{cmd}, append(args, "internal", "oci-hook")...)
return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error {
if s.Hooks == nil {
Expand Down
33 changes: 33 additions & 0 deletions pkg/containerutil/container_network_manager_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ package containerutil
import (
"context"
"errors"
"fmt"
"io/fs"
"path/filepath"

"github.com/containerd/containerd"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/oci"
"github.com/containerd/log"
"github.com/containerd/nerdctl/pkg/api/types"
Expand All @@ -32,6 +34,7 @@ import (
"github.com/containerd/nerdctl/pkg/netutil"
"github.com/containerd/nerdctl/pkg/resolvconf"
"github.com/containerd/nerdctl/pkg/rootlessutil"
"github.com/opencontainers/runtime-spec/specs-go"
)

// Verifies that the internal network settings are correct.
Expand Down Expand Up @@ -121,6 +124,16 @@ func (m *cniNetworkManager) ContainerNetworkingOpts(_ context.Context, container
}
}

if rootlessutil.IsRootless() && m.netNs == nil {
detachedNetNS, err := rootlessutil.DetachedNetNS()
if err != nil {
return nil, nil, fmt.Errorf("failed to check whether RootlessKit is running with --detach-netns: %w", err)
}
if detachedNetNS != "" {
opts = append(opts, withRootlessKitDetachedNetNS(detachedNetNS, stateDir))
}
}

return opts, cOpts, nil
}

Expand Down Expand Up @@ -169,3 +182,23 @@ func (m *cniNetworkManager) buildResolvConf(resolvConfPath string) error {
_, err = resolvconf.Build(resolvConfPath, append(slirp4Dns, nameServers...), searchDomains, dnsOptions)
return err
}

// withRootlessKitDetachedNetNS is used when all the following conditions are satisfied:
// - Rootless mode
// - RootlessKit is running with detach-netns mode (available since RootlessKit v2.0)
// - the network manager is not configured to none/host/container mode
func withRootlessKitDetachedNetNS(detachedNetNS, containerStateDir string) func(context.Context, oci.Client, *containers.Container, *oci.Spec) error {
return func(_ context.Context, _ oci.Client, c *containers.Container, s *oci.Spec) error {
for i := range s.Linux.Namespaces {
n := &s.Linux.Namespaces[i]
if n.Type == specs.NetworkNamespace && n.Path == "" {
newChildNetNS := filepath.Join(containerStateDir, "netns")
if err := netutil.NewNestedNetNS(detachedNetNS, newChildNetNS); err != nil {
return fmt.Errorf("failed to nest a new netns %q inside %q: %w", newChildNetNS, detachedNetNS, err)
}
n.Path = newChildNetNS
}
}
return nil
}
}
38 changes: 38 additions & 0 deletions pkg/netutil/netutil_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package netutil

import (
"fmt"
"os"
"os/exec"
)

// NewNestedNetNS creates a new netns that is nested in the parent.
func NewNestedNetNS(parent, newChild string) error {
if err := os.WriteFile(newChild, nil, 0400); err != nil {
return err
}
// this is hard (not impossible though) to reimplement in Go: https://github.com/cloudflare/slirpnetstack/commit/d7766a8a77f0093d3cb7a94bd0ccbe3f67d411ba
cmd := exec.Command("nsenter", "-n"+parent, "--",
"unshare", "-n", "--", "mount", "--bind", "/proc/self/ns/net", newChild)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("failed to execute %v: %w (out=%q)", cmd.Args, err, string(out))
}
return nil
}
15 changes: 14 additions & 1 deletion pkg/rootlessutil/parent_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,16 @@ func ParentMain(hostGatewayIP string) error {
return err
}

detachedNetNSPath, err := detachedNetNS(stateDir)
if err != nil {
return err
}
detachNetNSMode := detachedNetNSPath != ""
log.L.Debugf("RootlessKit detach-netns mode: %v", detachNetNSMode)
if err != nil {
return err
}

wd, err := os.Getwd()
if err != nil {
return err
Expand All @@ -92,10 +102,13 @@ func ParentMain(hostGatewayIP string) error {
"-r/", // root dir (busybox nsenter wants this to be explicitly specified),
"-w" + wd, // work dir
"--preserve-credentials",
"-m", "-n", "-U",
"-m", "-U",
"-t", strconv.Itoa(childPid),
"-F", // no fork
}
if !detachNetNSMode {
args = append(args, "-n")
}
args = append(args, os.Args...)
log.L.Debugf("rootless parent main: executing %q with %v", arg0, args)

Expand Down
26 changes: 26 additions & 0 deletions pkg/rootlessutil/rootlessutil_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package rootlessutil

import (
"errors"
"fmt"
"os"
"path/filepath"
Expand Down Expand Up @@ -80,3 +81,28 @@ func RootlessContainredSockAddress() (string, error) {
}
return filepath.Join(fmt.Sprintf("/proc/%d/root/run/containerd/containerd.sock", childPid)), nil
}

// DetachedNetNS returns non-empty netns path if RootlessKit is running with --detach-netns mode.
// Otherwise returns "" without an error.
func DetachedNetNS() (string, error) {
if !IsRootless() {
return "", nil
}
stateDir, err := RootlessKitStateDir()
if err != nil {
return "", err
}
return detachedNetNS(stateDir)
}

func detachedNetNS(stateDir string) (string, error) {
p := filepath.Join(stateDir, "netns")
if _, err := os.Stat(p); err != nil {
if errors.Is(err, os.ErrNotExist) {
return "", nil
}
return "", err
} else {

Check warning on line 105 in pkg/rootlessutil/rootlessutil_linux.go

View workflow job for this annotation

GitHub Actions / lint

indent-error-flow: if block ends with a return statement, so drop this else and outdent its block (move short variable declaration to its own line if necessary) (revive)
return p, nil
}
}
4 changes: 4 additions & 0 deletions pkg/rootlessutil/rootlessutil_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,7 @@ func ParentMain(hostGatewayIP string) error {
func RootlessContainredSockAddress() (string, error) {
return "", fmt.Errorf("cannot inspect RootlessKit state on non-Linux hosts")
}

func DetachedNetNS() (string, error) {
return "", nil
}

0 comments on commit fb193e0

Please sign in to comment.