From 5182c5b40c2e7afa7fedc524afaa7f2ff7007f8a Mon Sep 17 00:00:00 2001 From: Etienne Perot Date: Sun, 14 Apr 2024 20:49:27 -0700 Subject: [PATCH] Remove support for the /safezone volume. Per discussion on https://github.com/freedomofpress/dangerzone/pull/590, the need for this volume will soon go away. This makes gVisor integration much easier, because it removes the need to preserve file access and ownership of the files in this volume from within the gVisor sandbox. The `/sandboxed_entrypoint.sh` file is no longer necessary, and the `/entrypoint.py` file is massively simplified. This also allows the use of `--userns=nomap` in Podman. --- Dockerfile | 20 +- dangerzone/gvisor_wrapper/entrypoint.py | 295 +++--------------- .../gvisor_wrapper/sandboxed_entrypoint.sh | 29 -- dangerzone/isolation_provider/container.py | 1 + 4 files changed, 56 insertions(+), 289 deletions(-) delete mode 100755 dangerzone/gvisor_wrapper/sandboxed_entrypoint.sh diff --git a/Dockerfile b/Dockerfile index 86b1af5b3..35574af53 100644 --- a/Dockerfile +++ b/Dockerfile @@ -75,7 +75,10 @@ COPY conversion /opt/dangerzone/dangerzone/conversion # Add the unprivileged user. # NOTE: A tmpfs will be mounted over /home/dangerzone directory, # so nothing within it from the image will be persisted. -RUN adduser -s /bin/true -h /home/dangerzone -D dangerzone +ARG DANGERZONE_UID=65042 +ARG DANGERZONE_GID=65042 +RUN addgroup -g "$DANGERZONE_GID" dangerzone && \ + adduser -u "$DANGERZONE_UID" -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone ########################################### # gVisor wrapper image @@ -87,19 +90,12 @@ RUN apk --no-cache -U upgrade && \ su-exec RUN mkdir --mode=0755 -p /dangerzone-image/rootfs COPY --from=dangerzone-image / /dangerzone-image/rootfs -RUN ARCH="$(uname -m)"; \ - URL="https://storage.googleapis.com/gvisor/releases/release/latest/${ARCH}"; \ - wget "${URL}/runsc" "${URL}/runsc.sha512" && \ +COPY gvisor_wrapper/entrypoint.py / +RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \ + wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \ sha512sum -c runsc.sha512 && \ rm -f runsc.sha512 && \ - chmod 555 runsc && \ + chmod 555 runsc /entrypoint.py && \ mv runsc /usr/bin/ -COPY gvisor_wrapper/entrypoint.py gvisor_wrapper/sandboxed_entrypoint.sh / -RUN mv sandboxed_entrypoint.sh /dangerzone-image/rootfs/sandboxed_entrypoint.sh && \ - chmod 555 /entrypoint.py /dangerzone-image/rootfs/sandboxed_entrypoint.sh && \ - mkdir -p /wrapped-safezone /var/run/runsc - -# /safezone is a directory through which Pixels to PDF receives files -VOLUME /safezone ENTRYPOINT ["/entrypoint.py"] diff --git a/dangerzone/gvisor_wrapper/entrypoint.py b/dangerzone/gvisor_wrapper/entrypoint.py index d056791d5..29d306efb 100755 --- a/dangerzone/gvisor_wrapper/entrypoint.py +++ b/dangerzone/gvisor_wrapper/entrypoint.py @@ -1,70 +1,15 @@ #!/usr/bin/python3 import argparse -import grp import json import os -import pwd -import random import shlex -import string import subprocess import sys -import time import typing # This script wraps the command-line arguments passed to it to run as an # unprivileged user in a gVisor sandbox. -# It is meant to work in both Docker and Podman, which differ in how this -# script is invokved. -# With Docker, the Docker daemon runs as root on the machine, which is -# likely a different user than the one running the Dangerzone application. -# With Podman, which we run in rootless mode, there is only one non-root user -# that is running Podman. In this case, we are UID 0 in the Podman-created -# user namespace, but this user maps to the user running the Dangerzone -# application. -# The script first tries to establish a "common denominator" setup between -# these two situations by checking the owner of the /safezone volume, which -# is mounted by Dangerzone and is owned by the user running this application. -# If this script is not running as this user (i.e. Docker in root mode), it -# re-executes itself as the user owning /safezone. This brings it to the same -# situation as Podman running in rootless mode: there is only one user mapped -# into this user namespace as UID 0, and that user is the person running the -# Dangerzone application on their machine. They do not have root in the -# initial user namespace. No other users are mapped in the user namespace -# we're in. -# However, we now have a second problem: we also want the application running -# within the sandbox to be running as a non-root user with minimal privileges. -# We cannot create a new user here, because such a user would be unmapped in -# the initial user namespace and any attempt to make it into a child user -# namespace (which starting a gVisor sandbox requires) would fail. -# Therefore, the only place where this new user can exist is within the -# gVisor sandbox. -# But now we have a new problem: This user will not have write access to the -# /safezone directory, and any file it does create would be mapped to a -# meaningless user on the host. -# So this script uses a two-volume approach. -# The /safezone directory on the host is mapped to the /host-safezone -# directory in the gVisor sandbox, while a new tmpfs volume is created -# as the sandbox's /safezone directory. -# Then, inside the sandbox right on startup, all files are moved from -# /host-safezone to /safezone and chown'd to the sandbox-only "dangerzone" -# user. Then, when the unprivileged command finishes running, all files -# in the sandbox's /safezone are chown'd back to the sandbox's root user -# (which corresponds to our root user, which in turn corresponds to the -# real user on the host running Dangerzone), and moved back to /host-safezone -# (which makes them show up in the /safezone volume of this container, which -# in turn means they are finally visible on the host). -# This approach is mostly transparent from the perspective of whoever is -# running this container, with the caveats that: -# - All documents in /safezone must fit in RAM, since they live in tmpfs. -# - The resulting documents are only visible to the host after the -# unprivileged command finishes running (as opposed to being available -# as conversion progresses). -# One alternative to this approach would be to only have the root user exist -# in the sandbox, and to use it directly. It would be possible to drop all -# capabilities from the OCI config below, but it does mean running as UID 0 -# within the sandbox. # Define flags. parser = argparse.ArgumentParser( @@ -77,14 +22,6 @@ parser.add_argument( "--pre_gvisor", action="store_true", help="Run command without gVisor wrapping" ) -parser.add_argument( - "--pre_new_userns", action="store_true", help="Run command before changing userns" -) -parser.add_argument( - "--pre_sandboxed_entrypoint", - action="store_true", - help="Run command in gVisor but without sandboxed_entrypoint.sh", -) parser.add_argument( "--gvisor_debug", action="store_true", help="Enable gVisor debug logging" ) @@ -144,174 +81,57 @@ parser_args.append("command") # To satisfy the parser's `command` argument. args = parser.parse_args(parser_args) -if args.pre_new_userns: - if args.gvisor_debug: - print( - "Executing command before userns switch:", - " ".join(shlex.quote(s) for s in wrapped_command), - file=sys.stderr, - ) - try: - os.execvp(wrapped_command[0], wrapped_command) - except Exception as e: - raise e.__class__("Process %s failed: %s" % (wrapped_command, e)) - else: - assert False, "This code should never be reachable" - -# Monkeypatch `os` module for things added in Python 3.12. -# This can go away once the python3 alpine package is updated to 3.12. -if "unshare" not in os.__dict__ or "CLONE_NEWUSER" not in os.__dict__: - import ctypes - - libc = ctypes.CDLL(None) - libc.unshare.argtypes = [ctypes.c_int] - get_errno_loc = libc.__errno_location - get_errno_loc.restype.restype = ctypes.POINTER(ctypes.c_int) # type: ignore[union-attr] - - def unshare_monkeypatch(flags: int) -> None: - rc = libc.unshare(flags) - if rc == -1: - raise Exception(os.strerror(get_errno_loc()[0])) - - os.unshare = unshare_monkeypatch # type: ignore[attr-defined] - os.CLONE_NEWUSER = 268435456 # type: ignore[attr-defined] - -# Check that we are running as the user that owns /safezone. -# If not, re-exec. -my_uid = os.getuid() -my_gid = os.getgid() -safezone_st = os.lstat("/safezone") - -if my_uid == 0 and (safezone_st.st_uid != my_uid or safezone_st.st_gid != my_gid): - # Need to switch into the user who owns the /safezone directory. - # This helps preserve the correct user permissions on Docker. - # The user and group for this UID/GID pair need to exist in the - # container too before we can use them; if they don't exist, - # create them. - # We use random group/user names in order to minimize risk of conflict - # with existing users in the container. - try: - group_name = grp.getgrgid(safezone_st.st_gid).gr_name - except KeyError: - add_group_argv = ( - "/usr/sbin/addgroup", - "-g", - str(safezone_st.st_gid), - "danger" - + "".join(random.choices(string.ascii_lowercase + string.digits, k=24)), - ) - if args.gvisor_debug: - print( - "Creating new group:", - " ".join(shlex.quote(s) for s in add_group_argv), - file=sys.stderr, - ) - subprocess.run(add_group_argv, check=True) - group_name = grp.getgrgid(safezone_st.st_gid).gr_name - try: - user_name = pwd.getpwuid(safezone_st.st_uid).pw_name - except KeyError: - add_user_argv = ( - "/usr/sbin/adduser", +# Find the UID/GID of who we should run as within the sandbox. +sandboxed_uid = int( + subprocess.check_output( + ( + "chroot", + "/dangerzone-image/rootfs", + "id", "-u", - str(safezone_st.st_uid), - "-s", - "/bin/true", - "-G", - group_name, - "-D", - "-H", - "danger" - + "".join(random.choices(string.ascii_lowercase + string.digits, k=24)), + "dangerzone", ) - if args.gvisor_debug: - print( - "Creating new user:", - " ".join(shlex.quote(s) for s in add_user_argv), - file=sys.stderr, - ) - subprocess.run(add_user_argv, check=True) - user_name = pwd.getpwuid(safezone_st.st_uid).pw_name - user_and_group = "%s:%s" % (user_name, group_name) - # Align permissions of rootfs and runsc state directory to the user we will - # run it as: - chown_argv = ( - "/bin/chown", - "-R", - user_and_group, - "/var/run/runsc", - "/wrapped-safezone", - "/dangerzone-image", ) - if args.gvisor_debug: - print( - "Setting permissions to sandbox user:", - " ".join(shlex.quote(s) for s in add_group_argv), - file=sys.stderr, - ) - subprocess.run(chown_argv, check=True) - - # Switch to target user. - su_exec_argv = ("su-exec", user_and_group) + tuple(sys.argv) - if args.gvisor_debug: - print( - "Re-executing as", - user_and_group, - "->", - " ".join(shlex.quote(s) for s in su_exec_argv), - file=sys.stderr, - ) - try: - os.execv("/sbin/su-exec", su_exec_argv) - except Exception as e: - raise e.__class__("su-exec %s failed: %s" % (sys.argv, e)) - else: - assert False, "This code should never be reachable" - -if my_uid != 0: - # If we are not UID 0, create a user namespace where we are mapped to it. - if args.gvisor_debug: - print( - "Current UID/GID is %d:%d; creating new user namespace..." - % (my_uid, my_gid), - file=sys.stderr, +) +assert sandboxed_uid != 0, "Unexpectedly read 0 as the sandboxed dangerzone UID" +sandboxed_gid = int( + subprocess.check_output( + ( + "chroot", + "/dangerzone-image/rootfs", + "id", + "-g", + "dangerzone", ) - os.unshare(os.CLONE_NEWUSER) # type: ignore[attr-defined] - with os.fdopen( - os.open("/proc/self/setgroups", flags=os.O_WRONLY), "wt" - ) as setgroups_fd: - setgroups_fd.write("deny") - with os.fdopen( - os.open("/proc/self/uid_map", flags=os.O_WRONLY), "wt" - ) as uid_map_fd: - uid_map_fd.write("0 %d 1" % (my_uid,)) - with os.fdopen( - os.open("/proc/self/gid_map", flags=os.O_WRONLY), "wt" - ) as gid_map_fd: - gid_map_fd.write("0 %d 1" % (my_gid,)) - # Re-exec. - if args.gvisor_debug: - print("Re-execing:", " ".join(shlex.quote(s) for s in sys.argv)) - try: - os.execvp(sys.argv[0], sys.argv) - except Exception as e: - raise e.__class__("Re-execing %s failed: %s" % (sys.argv, e)) - else: - assert False, "This code should never be reachable" + ) +) +assert sandboxed_gid != 0, "Unexpectedly read 0 as the sandboxed dangerzone GID" -# By this point, we are running as the same user that owns /safezone and -# that user is mapped to UID 0 in a dedicated user namespace. +# Wrap the command with `su-exec` to execute as the intended in-sandbox +# UID/GID, and execute `su-exec` as root. This requires the sandbox's initial +# process to have the CAP_SETUID and CAP_SETGID capabilities, but these are +# not inherited after exec. +# This can all be removed and simplified once gvisor.dev/issue/9918 is fixed. +gvisor_issue_9918_is_fixed = False +sandbox_capabilities = [] +if not gvisor_issue_9918_is_fixed and not args.pre_gvisor: + wrapped_command = [ + "su-exec", + "%d:%d" % (sandboxed_uid, sandboxed_gid), + ] + wrapped_command + sandboxed_uid = 0 + sandboxed_gid = 0 + sandbox_capabilities = ["CAP_SETUID", "CAP_SETGID"] # Build and write container OCI config. -oci_command = wrapped_command -if not args.pre_sandboxed_entrypoint: - oci_command = ["/sandboxed_entrypoint.sh"] + oci_command - oci_config: dict[str, typing.Any] = { "ociVersion": "1.0.0", "process": { - "user": {"uid": 0, "gid": 0}, - "args": oci_command, + "user": { + "uid": sandboxed_uid, + "gid": sandboxed_gid, + }, + "args": wrapped_command, "env": [ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "PYTHONPATH=/opt/dangerzone", @@ -319,14 +139,10 @@ def unshare_monkeypatch(flags: int) -> None: ], "cwd": "/", "capabilities": { - # See the long comment above as to why this is needed. - # CAP_CHOWN is needed to chown the safezone files back and forth. - # CAP_SETUID and CAP_SETGID are required to switch to the - # unprivileged user. - "bounding": ["CAP_CHOWN", "CAP_SETUID", "CAP_SETGID"], - "effective": ["CAP_CHOWN", "CAP_SETUID", "CAP_SETGID"], - "inheritable": ["CAP_CHOWN", "CAP_SETUID", "CAP_SETGID"], - "permitted": ["CAP_CHOWN", "CAP_SETUID", "CAP_SETGID"], + "bounding": [], + "effective": sandbox_capabilities, + "inheritable": [], + "permitted": sandbox_capabilities, }, "rlimits": [ {"type": "RLIMIT_NOFILE", "hard": 4096, "soft": 4096}, @@ -340,21 +156,6 @@ def unshare_monkeypatch(flags: int) -> None: "type": "proc", "source": "proc", }, - # /safezone is a tmpfs which will be owned by the unprivileged user - # which lives only in the sandbox. See comment above. - { - "destination": "/safezone", - "type": "tmpfs", - "source": "tmpfs", - "options": ["nosuid", "noexec", "nodev"], - }, - # /host-safezone is where the host's /safezone is actually mounted. - { - "destination": "/host-safezone", - "type": "none", - "source": "/safezone", - "options": ["bind", "nosuid", "noexec", "nodev", "rw"], - }, { "destination": "/dev", "type": "tmpfs", @@ -408,7 +209,7 @@ def unshare_monkeypatch(flags: int) -> None: continue oci_config["process"]["env"].append("%s=%s" % (key, val)) if args.gvisor_debug: - print("Command inside gVisor sandbox:", oci_command, file=sys.stderr) + print("Command inside gVisor sandbox:", wrapped_command, file=sys.stderr) print("OCI config:", file=sys.stderr) json.dump(oci_config, sys.stderr, indent=2, sort_keys=True) # json.dump doesn't print a trailing newline, so print one here: @@ -450,9 +251,7 @@ def unshare_monkeypatch(flags: int) -> None: if args.gvisor_debug: print( - "Running", - runsc_binary, - "with command line:", + "Running gVisor with command line:", " ".join(shlex.quote(s) for s in runsc_argv), file=sys.stderr, ) diff --git a/dangerzone/gvisor_wrapper/sandboxed_entrypoint.sh b/dangerzone/gvisor_wrapper/sandboxed_entrypoint.sh deleted file mode 100755 index ea1721e00..000000000 --- a/dangerzone/gvisor_wrapper/sandboxed_entrypoint.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh - -# This file runs within the gVisor sandbox. -# Read `entrypoint.py` for why this is needed. - -set -euo pipefail - -# Move files over from /host-safezone to /safezone. -if [[ "$(ls -1 /host-safezone | wc -l)" -gt 0 ]]; then - mv /host-safezone/* /safezone/ -fi -# chown them as the unprivileged user. -chown -R dangerzone:dangerzone /safezone - -# Run the unprivileged command. -set +e -su-exec dangerzone:dangerzone "$@" -retcode="$?" -set -e - -# Move files back from /safezone to /host-safezone. -if [[ -d /safezone ]] && [[ "$(ls -1 /safezone | wc -l)" -gt 0 ]]; then - # chown them back to the user that exists on the host. - chown -R root:root /safezone - mv /safezone/* /host-safezone/ -fi - -# Mirror the exit code of the unprivileged command. -exit "$retcode" diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 175686cf6..a2a6f4c72 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -56,6 +56,7 @@ def get_runtime_security_args() -> List[str]: if Container.get_runtime_name() == "podman": security_args = ["--log-driver", "none"] security_args += ["--security-opt", "no-new-privileges"] + security_args += ["--userns", "nomap"] else: security_args = ["--security-opt=no-new-privileges:true"] # Needed for running rootlesskit, which gVisor uses. cf: