Skip to content

Commit

Permalink
Merge pull request from GHSA-xr7r-f8xq-vfvv
Browse files Browse the repository at this point in the history
fix GHSA-xr7r-f8xq-vfvv and harden fd leaks
  • Loading branch information
cyphar authored Jan 31, 2024
2 parents 8454bbb + d8edada commit 0212048
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 38 deletions.
31 changes: 16 additions & 15 deletions libcontainer/cgroups/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,16 @@ var (
// TestMode is set to true by unit tests that need "fake" cgroupfs.
TestMode bool

cgroupFd int = -1
prepOnce sync.Once
prepErr error
resolveFlags uint64
cgroupRootHandle *os.File
prepOnce sync.Once
prepErr error
resolveFlags uint64
)

func prepareOpenat2() error {
prepOnce.Do(func() {
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
Flags: unix.O_DIRECTORY | unix.O_PATH,
Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC,
})
if err != nil {
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
Expand All @@ -86,15 +86,16 @@ func prepareOpenat2() error {
}
return
}
file := os.NewFile(uintptr(fd), cgroupfsDir)

var st unix.Statfs_t
if err = unix.Fstatfs(fd, &st); err != nil {
if err := unix.Fstatfs(int(file.Fd()), &st); err != nil {
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
logrus.Warnf("falling back to securejoin: %s", prepErr)
return
}

cgroupFd = fd

cgroupRootHandle = file
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
if st.Type == unix.CGROUP2_SUPER_MAGIC {
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
Expand All @@ -121,29 +122,29 @@ func openFile(dir, file string, flags int) (*os.File, error) {
return openFallback(path, flags, mode)
}

fd, err := unix.Openat2(cgroupFd, relPath,
fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath,
&unix.OpenHow{
Resolve: resolveFlags,
Flags: uint64(flags) | unix.O_CLOEXEC,
Mode: uint64(mode),
})
if err != nil {
err = &os.PathError{Op: "openat2", Path: path, Err: err}
// Check if cgroupFd is still opened to cgroupfsDir
// Check if cgroupRootHandle is still opened to cgroupfsDir
// (happens when this package is incorrectly used
// across the chroot/pivot_root/mntns boundary, or
// when /sys/fs/cgroup is remounted).
//
// TODO: if such usage will ever be common, amend this
// to reopen cgroupFd and retry openat2.
fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(cgroupFd))
// to reopen cgroupRootHandle and retry openat2.
fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(int(cgroupRootHandle.Fd())))
defer closer()
fdDest, _ := os.Readlink(fdPath)
if fdDest != cgroupfsDir {
// Wrap the error so it is clear that cgroupFd
// Wrap the error so it is clear that cgroupRootHandle
// is opened to an unexpected/wrong directory.
err = fmt.Errorf("cgroupFd %d unexpectedly opened to %s != %s: %w",
cgroupFd, fdDest, cgroupfsDir, err)
err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w",
cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err)
}
return nil, err
}
Expand Down
9 changes: 9 additions & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,15 @@ func (c *Container) start(process *Process) (retErr error) {
}()
}

// Before starting "runc init", mark all non-stdio open files as O_CLOEXEC
// to make sure we don't leak any files into "runc init". Any files to be
// passed to "runc init" through ExtraFiles will get dup2'd by the Go
// runtime and thus their O_CLOEXEC flag will be cleared. This is some
// additional protection against attacks like CVE-2024-21626, by making
// sure we never leak files to "runc init" we didn't intend to.
if err := utils.CloseExecFrom(3); err != nil {
return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err)
}
if err := parent.start(); err != nil {
return fmt.Errorf("unable to start container process: %w", err)
}
Expand Down
33 changes: 32 additions & 1 deletion libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"net"
"os"
"path/filepath"
"runtime"
"runtime/debug"
"strconv"
Expand Down Expand Up @@ -89,7 +90,7 @@ func Init() {
}
// Normally, StartInitialization() never returns, meaning
// if we are here, it had failed.
os.Exit(1)
os.Exit(255)
}

// Normally, this function does not return. If it returns, with or without an
Expand Down Expand Up @@ -274,6 +275,32 @@ func populateProcessEnvironment(env []string) error {
return nil
}

// verifyCwd ensures that the current directory is actually inside the mount
// namespace root of the current process.
func verifyCwd() error {
// getcwd(2) on Linux detects if cwd is outside of the rootfs of the
// current mount namespace root, and in that case prefixes "(unreachable)"
// to the returned string. glibc's getcwd(3) and Go's Getwd() both detect
// when this happens and return ENOENT rather than returning a non-absolute
// path. In both cases we can therefore easily detect if we have an invalid
// cwd by checking the return value of getcwd(3). See getcwd(3) for more
// details, and CVE-2024-21626 for the security issue that motivated this
// check.
//
// We have to use unix.Getwd() here because os.Getwd() has a workaround for
// $PWD which involves doing stat(.), which can fail if the current
// directory is inaccessible to the container process.
if wd, err := unix.Getwd(); errors.Is(err, unix.ENOENT) {
return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected")
} else if err != nil {
return fmt.Errorf("failed to verify if current working directory is safe: %w", err)
} else if !filepath.IsAbs(wd) {
// We shouldn't ever hit this, but check just in case.
return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd)
}
return nil
}

// finalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaked file descriptors
// before executing the command inside the namespace
Expand Down Expand Up @@ -332,6 +359,10 @@ func finalizeNamespace(config *initConfig) error {
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
}
}
// Make sure our final working directory is inside the container.
if err := verifyCwd(); err != nil {
return err
}
if err := system.ClearKeepCaps(); err != nil {
return fmt.Errorf("unable to clear keep caps: %w", err)
}
Expand Down
20 changes: 10 additions & 10 deletions libcontainer/integration/seccomp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
libseccomp "github.com/seccomp/libseccomp-golang"
)

func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
func TestSeccompDenySyslogWithErrno(t *testing.T) {
if testing.Short() {
return
}
Expand All @@ -25,7 +25,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "getcwd",
Name: "syslog",
Action: configs.Errno,
ErrnoRet: &errnoRet,
},
Expand All @@ -39,7 +39,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
buffers := newStdBuffers()
pwd := &libcontainer.Process{
Cwd: "/",
Args: []string{"pwd"},
Args: []string{"dmesg"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Expand All @@ -65,17 +65,17 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
}

if exitCode == 0 {
t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode)
}

expected := "pwd: getcwd: No such process"
expected := "dmesg: klogctl: No such process"
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
}
}

func TestSeccompDenyGetcwd(t *testing.T) {
func TestSeccompDenySyslog(t *testing.T) {
if testing.Short() {
return
}
Expand All @@ -85,7 +85,7 @@ func TestSeccompDenyGetcwd(t *testing.T) {
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "getcwd",
Name: "syslog",
Action: configs.Errno,
},
},
Expand All @@ -98,7 +98,7 @@ func TestSeccompDenyGetcwd(t *testing.T) {
buffers := newStdBuffers()
pwd := &libcontainer.Process{
Cwd: "/",
Args: []string{"pwd"},
Args: []string{"dmesg"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Expand All @@ -124,10 +124,10 @@ func TestSeccompDenyGetcwd(t *testing.T) {
}

if exitCode == 0 {
t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode)
}

expected := "pwd: getcwd: Operation not permitted"
expected := "dmesg: klogctl: Operation not permitted"
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
Expand Down
19 changes: 19 additions & 0 deletions libcontainer/setns_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
)

// linuxSetnsInit performs the container's initialization for running a new process
Expand Down Expand Up @@ -138,5 +139,23 @@ func (l *linuxSetnsInit) Init() error {
l.config.Args[0] = name
return system.Fexecve(l.dmzExe.Fd(), l.config.Args, os.Environ())
}
// Close all file descriptors we are not passing to the container. This is
// necessary because the execve target could use internal runc fds as the
// execve path, potentially giving access to binary files from the host
// (which can then be opened by container processes, leading to container
// escapes). Note that because this operation will close any open file
// descriptors that are referenced by (*os.File) handles from underneath
// the Go runtime, we must not do any file operations after this point
// (otherwise the (*os.File) finaliser could close the wrong file). See
// CVE-2024-21626 for more information as to why this protection is
// necessary.
//
// This is not needed for runc-dmz, because the extra execve(2) step means
// that all O_CLOEXEC file descriptors have already been closed and thus
// the second execve(2) from runc-dmz cannot access internal file
// descriptors from runc.
if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
return err
}
return system.Exec(name, l.config.Args, os.Environ())
}
18 changes: 18 additions & 0 deletions libcontainer/standard_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,5 +281,23 @@ func (l *linuxStandardInit) Init() error {
l.config.Args[0] = name
return system.Fexecve(l.dmzExe.Fd(), l.config.Args, os.Environ())
}
// Close all file descriptors we are not passing to the container. This is
// necessary because the execve target could use internal runc fds as the
// execve path, potentially giving access to binary files from the host
// (which can then be opened by container processes, leading to container
// escapes). Note that because this operation will close any open file
// descriptors that are referenced by (*os.File) handles from underneath
// the Go runtime, we must not do any file operations after this point
// (otherwise the (*os.File) finaliser could close the wrong file). See
// CVE-2024-21626 for more information as to why this protection is
// necessary.
//
// This is not needed for runc-dmz, because the extra execve(2) step means
// that all O_CLOEXEC file descriptors have already been closed and thus
// the second execve(2) from runc-dmz cannot access internal file
// descriptors from runc.
if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
return err
}
return system.Exec(name, l.config.Args, os.Environ())
}
74 changes: 62 additions & 12 deletions libcontainer/utils/utils_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"runtime"
"strconv"
"sync"
_ "unsafe" // for go:linkname

securejoin "github.com/cyphar/filepath-securejoin"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -53,14 +54,11 @@ func haveCloseRangeCloexec() bool {
return haveCloseRangeCloexecBool
}

// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for
// the process (except for those below the given fd value).
func CloseExecFrom(minFd int) error {
if haveCloseRangeCloexec() {
err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC)
return os.NewSyscallError("close_range", err)
}
type fdFunc func(fd int)

// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
// the current process.
func fdRangeFrom(minFd int, fn fdFunc) error {
procSelfFd, closer := ProcThreadSelf("fd")
defer closer()

Expand Down Expand Up @@ -88,15 +86,67 @@ func CloseExecFrom(minFd int) error {
if fd < minFd {
continue
}
// Intentionally ignore errors from unix.CloseOnExec -- the cases where
// this might fail are basically file descriptors that have already
// been closed (including and especially the one that was created when
// os.ReadDir did the "opendir" syscall).
unix.CloseOnExec(fd)
// Ignore the file descriptor we used for readdir, as it will be closed
// when we return.
if uintptr(fd) == fdDir.Fd() {
continue
}
// Run the closure.
fn(fd)
}
return nil
}

// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
// equal to minFd in the current process.
func CloseExecFrom(minFd int) error {
// Use close_range(CLOSE_RANGE_CLOEXEC) if possible.
if haveCloseRangeCloexec() {
err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC)
return os.NewSyscallError("close_range", err)
}
// Otherwise, fall back to the standard loop.
return fdRangeFrom(minFd, unix.CloseOnExec)
}

//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor

// In order to make sure we do not close the internal epoll descriptors the Go
// runtime uses, we need to ensure that we skip descriptors that match
// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing,
// unfortunately there's no other way to be sure we're only keeping the file
// descriptors the Go runtime needs. Hopefully nothing blows up doing this...
func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive

// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the
// current process, except for those critical to Go's runtime (such as the
// netpoll management descriptors).
//
// NOTE: That this function is incredibly dangerous to use in most Go code, as
// closing file descriptors from underneath *os.File handles can lead to very
// bad behaviour (the closed file descriptor can be re-used and then any
// *os.File operations would apply to the wrong file). This function is only
// intended to be called from the last stage of runc init.
func UnsafeCloseFrom(minFd int) error {
// We cannot use close_range(2) even if it is available, because we must
// not close some file descriptors.
return fdRangeFrom(minFd, func(fd int) {
if runtime_IsPollDescriptor(uintptr(fd)) {
// These are the Go runtimes internal netpoll file descriptors.
// These file descriptors are operated on deep in the Go scheduler,
// and closing those files from underneath Go can result in panics.
// There is no issue with keeping them because they are not
// executable and are not useful to an attacker anyway. Also we
// don't have any choice.
return
}
// There's nothing we can do about errors from close(2), and the
// only likely error to be seen is EBADF which indicates the fd was
// already closed (in which case, we got what we wanted).
_ = unix.Close(fd)
})
}

// NewSockPair returns a new SOCK_STREAM unix socket pair.
func NewSockPair(name string) (parent, child *os.File, err error) {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
Expand Down

0 comments on commit 0212048

Please sign in to comment.