From 50fd9993c9041690be24fd6a15899f616ea1457c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 24 Oct 2024 01:16:44 +0000 Subject: [PATCH] gofer: open volumes from the initial userns when gofer can't open them from its user namespace. The gofer namespace can have custom user/group mappings that can prevent it to access volumes. This change repeats the behaviour of runc. If we can't open a volume from the gofer userns, we are trying to open it from the initial userns. For that, we create a small rpc server that the gofer communicates with. Signed-off-by: Andrei Vagin --- runsc/cmd/gofer.go | 35 ++++++++++-- runsc/container/BUILD | 3 + runsc/container/container.go | 22 ++++++++ runsc/container/gofer_rpc.go | 103 +++++++++++++++++++++++++++++++++++ 4 files changed, 158 insertions(+), 5 deletions(-) create mode 100644 runsc/container/gofer_rpc.go diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 08cdc76c4b..27cdcb2351 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -32,9 +32,11 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/devices/tpuproxy/vfio" "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/urpc" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/cmd/util" "gvisor.dev/gvisor/runsc/config" + "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/flag" "gvisor.dev/gvisor/runsc/fsgofer" "gvisor.dev/gvisor/runsc/fsgofer/filter" @@ -91,6 +93,7 @@ type Gofer struct { specFD int mountsFD int + rpcFD int profileFDs profile.FDArgs syncFDs goferSyncFDs stopProfiling func() @@ -123,6 +126,7 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) { f.IntVar(&g.devIoFD, "dev-io-fd", -1, "optional FD to connect /dev gofer server") f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec") f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).") + f.IntVar(&g.rpcFD, "rpc-fd", -1, "RPC file descriptor.") // Add synchronization FD flags. g.syncFDs.setFlags(f) @@ -153,8 +157,16 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcomm g.syncFDs.syncNVProxy() g.syncFDs.syncUsernsForRootless() + rpcClntSock, err := unet.NewSocket(g.rpcFD) + if err != nil { + util.Fatalf("creating rpc socket: %v", err) + } + + rpcClnt := urpc.NewClient(rpcClntSock) + defer rpcClnt.Close() + if g.setUpRoot { - if err := g.setupRootFS(spec, conf); err != nil { + if err := g.setupRootFS(spec, conf, rpcClnt); err != nil { util.Fatalf("Error setting up root FS: %v", err) } if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { @@ -162,6 +174,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcomm defer cleanupUnmounter() } } + rpcClnt.Close() if g.applyCaps { overrides := g.syncFDs.flags() overrides["apply-caps"] = "false" @@ -369,7 +382,7 @@ func (g *Gofer) writeMounts(mounts []specs.Mount) error { // It is protected by selinux rules. const procFDBindMount = "/proc/fs" -func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config) error { +func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config, rpcClnt *urpc.Client) error { // Convert all shared mounts into slaves to be sure that nothing will be // propagated outside of our namespace. procPath := "/proc" @@ -437,7 +450,7 @@ func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config) error { } // Replace the current spec, with the clean spec with symlinks resolved. - if err := g.setupMounts(conf, spec.Mounts, root, procPath); err != nil { + if err := g.setupMounts(conf, spec.Mounts, root, procPath, rpcClnt); err != nil { util.Fatalf("error setting up FS: %v", err) } @@ -487,7 +500,7 @@ func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config) error { // setupMounts bind mounts all mounts specified in the spec in their correct // location inside root. It will resolve relative paths and symlinks. It also // creates directories as needed. -func (g *Gofer) setupMounts(conf *config.Config, mounts []specs.Mount, root, procPath string) error { +func (g *Gofer) setupMounts(conf *config.Config, mounts []specs.Mount, root, procPath string, rpcClnt *urpc.Client) error { mountIdx := 1 // First index is for rootfs. for _, m := range mounts { if !specutils.IsGoferMount(m) { @@ -511,7 +524,19 @@ func (g *Gofer) setupMounts(conf *config.Config, mounts []specs.Mount, root, pro } log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) - if err := specutils.SafeSetupAndMount(m.Source, dst, m.Type, flags, procPath); err != nil { + src := m.Source + var fd *os.File + if err := unix.Access(src, unix.R_OK); err != nil { + var res container.OpenMountResult + if err := rpcClnt.Call("goferRPC.OpenMount", &m, &res); err != nil { + return fmt.Errorf("opening %s: %w", m.Source, err) + } + fd = res.Files[0] + src = fmt.Sprintf("%s/self/fd/%d", procPath, fd.Fd()) + } + err = specutils.SafeSetupAndMount(src, dst, m.Type, flags, procPath) + fd.Close() + if err != nil { return fmt.Errorf("mounting %+v: %v", m, err) } diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 135a918cbb..ca7b6473c5 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -9,6 +9,7 @@ go_library( name = "container", srcs = [ "container.go", + "gofer_rpc.go", "hook.go", "state_file.go", "status.go", @@ -28,6 +29,8 @@ go_library( "//pkg/sighandling", "//pkg/state/statefile", "//pkg/sync", + "//pkg/unet", + "//pkg/urpc", "//runsc/boot", "//runsc/cgroup", "//runsc/config", diff --git a/runsc/container/container.go b/runsc/container/container.go index a87c48c865..123fa452c6 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -41,6 +41,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sighandling" "gvisor.dev/gvisor/pkg/state/statefile" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/urpc" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/cgroup" "gvisor.dev/gvisor/runsc/config" @@ -1272,6 +1274,25 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu } donations.DonateAndClose("mounts-fd", mountsGofer) + rpcServ, rpcClnt, err := unet.SocketPair(false) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to create an rpc socket pair: %w", err) + } + rpcClntFD, _ := rpcClnt.Release() + donations.DonateAndClose("rpc-fd", os.NewFile(uintptr(rpcClntFD), "gofer-rpc")) + rpcPidCh := make(chan int, 1) + defer close(rpcPidCh) + go func() { + pid := <-rpcPidCh + if pid == 0 { + rpcServ.Close() + return + } + s := urpc.NewServer() + s.Register(&goferRPC{goferPID: pid}) + s.StartHandling(rpcServ) + }() + // Count the number of mounts that needs an IO file. ioFileCount := 0 for _, cfg := range c.GoferMountConfs { @@ -1370,6 +1391,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu log.Infof("Gofer started, PID: %d", cmd.Process.Pid) c.GoferPid = cmd.Process.Pid c.goferIsChild = true + rpcPidCh <- cmd.Process.Pid // Set up and synchronize rootless mode userns mappings. if rootlessEUID { diff --git a/runsc/container/gofer_rpc.go b/runsc/container/gofer_rpc.go new file mode 100644 index 0000000000..9898ee56b4 --- /dev/null +++ b/runsc/container/gofer_rpc.go @@ -0,0 +1,103 @@ +// Copyright 2024 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package container + +import ( + "fmt" + "os" + "runtime" + "sync" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/urpc" +) + +type openMountRequest struct { + mount *specs.Mount + result *OpenMountResult + done chan error +} + +type goferRPC struct { + mu sync.Mutex + openMountRequests chan *openMountRequest + goferPID int +} + +// OpenMountResult is a result of the rpcp.OpenMount call. +type OpenMountResult struct { + urpc.FilePayload +} + +func (rpc *goferRPC) handleRequest(req *openMountRequest) { + defer close(req.done) + fd, err := os.OpenFile(req.mount.Source, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + req.done <- err + return + } + req.result.Files = []*os.File{fd} +} + +func (rpc *goferRPC) openMountLoop() error { + if err := unix.Unshare(unix.CLONE_FS); err != nil { + return fmt.Errorf("open mount thread: %w", err) + } + nsFd, err := os.Open(fmt.Sprintf("/proc/%d/ns/mnt", rpc.goferPID)) + if err != nil { + return fmt.Errorf("open mount thread: open container mntns: %w", err) + } + defer nsFd.Close() + if err := unix.Setns(int(nsFd.Fd()), unix.CLONE_NEWNS); err != nil { + return fmt.Errorf("open mount thread: join container mntns: %w", err) + } + for req := range rpc.openMountRequests { + rpc.handleRequest(req) + } + return nil +} + +// OpenMount is a helper rpc call that a gofer process uses when it can't +// open/create a mount. +func (rpc *goferRPC) OpenMount(m *specs.Mount, res *OpenMountResult) error { + rpc.mu.Lock() + defer rpc.mu.Unlock() + + if rpc.openMountRequests == nil { + rpc.openMountRequests = make(chan *openMountRequest) + go func() { + // This goroutine holds the current threads forever. It + // never exits, because child proccesses can set + // PDEATHSIG. It can't serve other go-routines, because + // it does unshare CLONE_FS. + runtime.LockOSThread() + if err := rpc.openMountLoop(); err != nil { + for req := range rpc.openMountRequests { + req.done <- err + } + } + panic("unreachable") + }() + } + req := openMountRequest{ + mount: m, + result: res, + done: make(chan error), + } + rpc.openMountRequests <- &req + err := <-req.done + return err +}