From da48dd912c48f125ff2d1ae1b1378b41122eec6b Mon Sep 17 00:00:00 2001 From: Etienne Perot Date: Wed, 30 Oct 2024 22:11:34 -0700 Subject: [PATCH] runsc: When mounting a new procfs fails, fall back to recursive bind-mount. As part of sandbox startup, `runsc` needs to set up a chroot environment with a minimal working `procfs` filesystem mounted within. However, doing so from within a container (as applications like Dangerzone do) may fail, because in the container runtime's default configuration, some paths of the procfs filesystem visible from within the container may be obstructed. This prevents mounting new unobstructed instances of `procfs`. This change detects this case and falls back to the previous behavior of using a recursive bind-mount of `/proc` in such a case. The obstructed subdirectories of procfs are preserved in this case, which is fine because we only need a very minimal subset of `procfs` to actually work. Additionally, `runsc` actually only needs a few kernel parameter files and `/proc/self` in order to work. So this change sets up a `tmpfs` mount that contains just those files, with the kernel parameter files being plainly copied and `/proc/self` being a symlink to the one present in the mounted view of `procfs` (regardless of which mounting method was used). The `runtime_in_docker` test will continuously verify that this fallback mechanism works to avoid similar breakage in the future. Credits to @avagin for figuring out this solution. Fixes #10944. PiperOrigin-RevId: 691672104 --- runsc/cmd/chroot.go | 62 ++++++++++++++++++++++++++++-- test/e2e/runtime_in_docker_test.go | 12 ------ 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go index 00f0cf0a58..a1373f88c3 100644 --- a/runsc/cmd/chroot.go +++ b/runsc/cmd/chroot.go @@ -82,6 +82,63 @@ func copyFile(dst, src string) error { return err } +// setupMinimalProcfs creates a minimal procfs-like tree at `${chroot}/proc`. +func setupMinimalProcfs(chroot string) error { + // We can't always directly mount procfs because it may be obstructed + // by submounts within it. See https://gvisor.dev/issue/10944. + // All we really need from procfs is /proc/self and a few kernel + // parameter files, which are typically not obstructed. + // So we create a tmpfs at /proc and manually copy the kernel parameter + // files into it. Then, to get /proc/self, we mount either a new + // instance of procfs (if possible), or a recursive bind mount of the + // procfs we do have access to (which still contains the obstructed + // submounts but /proc/self is not obstructed), and we symlink + // our /proc/self to the one in that mount. + procRoot := filepath.Join(chroot, "/proc") + if err := os.Mkdir(procRoot, 0755); err != nil { + return fmt.Errorf("error creating /proc in chroot: %v", err) + } + if err := specutils.SafeMount("runsc-proc", procRoot, "tmpfs", + unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC, "", "/proc"); err != nil { + return fmt.Errorf("error mounting tmpfs in /proc: %v", err) + } + for _, d := range []string{ + "/proc/sys", + "/proc/sys/kernel", + "/proc/sys/vm", + } { + if err := os.Mkdir(filepath.Join(chroot, d), 0755); err != nil { + return fmt.Errorf("error creating directory %q: %v", filepath.Join(chroot, d), err) + } + } + for _, f := range []string{ + "/proc/sys/vm/mmap_min_addr", + "/proc/sys/kernel/cap_last_cap", + } { + if err := copyFile(filepath.Join(chroot, f), f); err != nil { + return fmt.Errorf("failed to copy %q -> %q: %w", f, filepath.Join(chroot, f), err) + } + } + flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY) + procSubmountDir := "sandbox-proc" + if newProcfsErr := mountInChroot(chroot, "proc", "/proc/"+procSubmountDir, "proc", flags); newProcfsErr != nil { + log.Debugf("Unable to mount a new instance of the procfs file system at %q (%v); trying a recursive bind mount instead.", filepath.Join(procRoot, procSubmountDir), newProcfsErr) + procSubmountDir = "host-proc" + if bindErr := mountInChroot(chroot, "/proc", "/proc/"+procSubmountDir, "bind", + unix.MS_BIND|unix.MS_REC|flags); bindErr != nil { + return fmt.Errorf("error recursively bind-mounting proc at %q (%w) after also failing to mount a new procfs instance there (%v)", filepath.Join(procRoot, procSubmountDir), bindErr, newProcfsErr) + } + log.Debugf("Successfully mounted a recursive bind mount of procfs at %q; continuing.", filepath.Join(procRoot, procSubmountDir)) + } + if err := os.Symlink(procSubmountDir+"/self", filepath.Join(procRoot, "self")); err != nil { + return fmt.Errorf("error creating symlink %q -> %q: %w", filepath.Join(procRoot, "self"), procSubmountDir+"/self", err) + } + if err := os.Chmod(procRoot, 0o111); err != nil { + return fmt.Errorf("error chmodding %q: %v", procRoot, err) + } + return nil +} + // setUpChroot creates an empty directory with runsc mounted at /runsc and proc // mounted at /proc. func setUpChroot(spec *specs.Spec, conf *config.Config) error { @@ -109,9 +166,8 @@ func setUpChroot(spec *specs.Spec, conf *config.Config) error { log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err) } - flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY) - if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil { - return fmt.Errorf("error mounting proc in chroot: %v", err) + if err := setupMinimalProcfs(chroot); err != nil { + return fmt.Errorf("error setting up minimal procfs in chroot %q: %v", chroot, err) } if err := tpuProxyUpdateChroot("/", chroot, spec, conf); err != nil { diff --git a/test/e2e/runtime_in_docker_test.go b/test/e2e/runtime_in_docker_test.go index 97c7e5aa96..9924c2c1d2 100644 --- a/test/e2e/runtime_in_docker_test.go +++ b/test/e2e/runtime_in_docker_test.go @@ -74,18 +74,6 @@ func (test testVariant) run(ctx context.Context, logger testutil.Logger, runscPa ReadOnly: false, }) } - // Mount an unobstructed view of procfs at /proc2 so that the runtime - // can mount a fresh procfs. - // TODO(gvisor.dev/issue/10944): Remove this once issue is fixed. - opts.Mounts = append(opts.Mounts, mount.Mount{ - Type: mount.TypeBind, - Source: "/proc", - Target: "/proc2", - ReadOnly: false, - BindOptions: &mount.BindOptions{ - NonRecursive: true, - }, - }) const wantMessage = "It became a jumble of words, a litany, almost a kind of glossolalia." args := []string{ "/runtime",