From 43db64ecf96475f7f9c9d2a8d7125231a21936ee Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 23 Sep 2024 22:02:22 +0200 Subject: [PATCH] mkcomposefs: add --sandbox flag This commit adds a new --sandbox flag to mkcomposefs, enabling an attempt to isolate the process within a restricted environment. The sandbox limits the process's access to host resources, reducing potential attack surfaces. It is a best-effort attempt and does not guarantee full isolation. Signed-off-by: Giuseppe Scrivano --- ci/Containerfile.c9s-bootc | 2 +- hacking/installdeps.sh | 2 + man/mkcomposefs.md | 5 + tools/meson.build | 6 +- tools/mkcomposefs-sandbox.c | 210 ++++++++++++++++++++++++++++++++++++ tools/mkcomposefs-sandbox.h | 18 ++++ tools/mkcomposefs.c | 23 +++- 7 files changed, 262 insertions(+), 4 deletions(-) create mode 100644 tools/mkcomposefs-sandbox.c create mode 100644 tools/mkcomposefs-sandbox.h diff --git a/ci/Containerfile.c9s-bootc b/ci/Containerfile.c9s-bootc index 1297d6ac..27f1b031 100644 --- a/ci/Containerfile.c9s-bootc +++ b/ci/Containerfile.c9s-bootc @@ -1,6 +1,6 @@ FROM quay.io/centos/centos:stream9 as build RUN dnf -y install dnf-utils zstd && dnf config-manager --enable crb && dnf builddep -y composefs && \ - dnf -y install meson + dnf -y install meson libseccomp-devel libcap-devel COPY . /build WORKDIR /build RUN set -x; ls -al; meson setup target --prefix=/usr && meson compile -C target && \ diff --git a/hacking/installdeps.sh b/hacking/installdeps.sh index df7b792e..85fde985 100755 --- a/hacking/installdeps.sh +++ b/hacking/installdeps.sh @@ -17,6 +17,8 @@ PACKAGES=" \ python3 \ libcap2-bin \ meson \ + libseccomp-dev \ + libcap-dev \ " # Split required and optional packages based on input variable ALLOW_MISSING: diff --git a/man/mkcomposefs.md b/man/mkcomposefs.md index f0c45121..f5f673ed 100644 --- a/man/mkcomposefs.md +++ b/man/mkcomposefs.md @@ -74,6 +74,11 @@ will be a mountable composefs image. : Number of threads to be used to calculate the file digests and copy. Default thread count is the number of processors when *--threads* is not specified. +**\-\-sandbox** + It runs the current process in a sandboxed environment using different Linux kernel + features. It is a best effort attempt to limit what the process can access while it + is processing an untrusted input. + # FORMAT VERSIONING Composefs images are binary reproduceable, meaning that for a given diff --git a/tools/meson.build b/tools/meson.build index 309205bf..f1e4e5b5 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -1,10 +1,12 @@ libcomposefs_dep = declare_dependency(link_with : libcomposefs, include_directories : config_inc) +seccomp_dep = dependency('libseccomp', required : true) + thread_dep = dependency('threads') executable('mkcomposefs', - 'mkcomposefs.c', - dependencies : [libcomposefs_dep, thread_dep], + ['mkcomposefs.c', 'mkcomposefs-sandbox.c'], + dependencies : [libcomposefs_dep, thread_dep, seccomp_dep], install : true, ) diff --git a/tools/mkcomposefs-sandbox.c b/tools/mkcomposefs-sandbox.c new file mode 100644 index 00000000..129b1803 --- /dev/null +++ b/tools/mkcomposefs-sandbox.c @@ -0,0 +1,210 @@ +/* lcfs + Copyright (C) 2021-2024 Giuseppe Scrivano + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _GNU_SOURCE + +#include "config.h" + +#include "mkcomposefs-sandbox.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void do_seccomp_sandbox(void) +{ + scmp_filter_ctx ctx; + int ret; + size_t i; + int syscalls[] = { + SCMP_SYS(brk), SCMP_SYS(close), SCMP_SYS(exit), + SCMP_SYS(exit_group), SCMP_SYS(fstat), SCMP_SYS(lseek), + SCMP_SYS(mmap), SCMP_SYS(mremap), SCMP_SYS(munmap), + SCMP_SYS(newfstatat), SCMP_SYS(read), SCMP_SYS(readv), + SCMP_SYS(sysinfo), SCMP_SYS(write), SCMP_SYS(writev), + }; + + /* Use ENOSYS by default so that libraries can attempt a fallback syscall instead of failing immediately. */ + ctx = seccomp_init(SCMP_ACT_ERRNO(ENOSYS)); + if (ctx == NULL) + err(EXIT_FAILURE, "seccomp_init"); + + for (i = 0; i < sizeof(syscalls) / sizeof(syscalls[0]); i++) { + ret = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0); + if (ret < 0) { + errno = -ret; + err(EXIT_FAILURE, "seccomp_rule_add"); + } + } + + ret = seccomp_load(ctx); + if (ret < 0) { + errno = -ret; + err(EXIT_FAILURE, "seccomp_load"); + } +} + +static int pivot_root(const char *new_root, const char *put_old) +{ + return syscall(__NR_pivot_root, new_root, put_old); +} + +static void do_namespace_sandbox(void) +{ + uid_t uid = geteuid(); + gid_t gid = getegid(); + int ret, fd; + int old_root; + char *cwd; + + ret = unshare(CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWUTS | + CLONE_NEWIPC | CLONE_NEWNET); + if (ret < 0) + return; + + fd = open("/proc/self/setgroups", O_WRONLY | O_CLOEXEC); + if (fd < 0) + err(EXIT_FAILURE, "open /proc/self/setgroups"); + ret = write(fd, "deny", 4); + if (ret < 0) + err(EXIT_FAILURE, "write to /proc/self/gid_map"); + close(fd); + + fd = open("/proc/self/gid_map", O_WRONLY | O_CLOEXEC); + if (fd < 0) + err(EXIT_FAILURE, "open /proc/self/gid_map"); + ret = dprintf(fd, "0 %d 1\n", gid); + if (ret < 0) + err(EXIT_FAILURE, "write to /proc/self/gid_map"); + close(fd); + + fd = open("/proc/self/uid_map", O_WRONLY | O_CLOEXEC); + if (fd < 0) + err(EXIT_FAILURE, "open /proc/self/uid_map"); + ret = dprintf(fd, "0 %d 1\n", uid); + if (ret < 0) + err(EXIT_FAILURE, "write to /proc/self/uid_map"); + close(fd); + + ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL); + if (ret < 0) + err(EXIT_FAILURE, "mount /"); + + cwd = get_current_dir_name(); + if (!cwd) + err(EXIT_FAILURE, "get_current_dir_name"); + + ret = mount(NULL, cwd, "tmpfs", 0, NULL); + if (ret < 0) + err(EXIT_FAILURE, "mount tmpfs"); + + old_root = open("/", O_PATH | O_DIRECTORY | O_CLOEXEC); + if (old_root < 0) + err(EXIT_FAILURE, "open /"); + + ret = chdir(cwd); + if (ret < 0) + err(EXIT_FAILURE, "chdir cwd"); + + free(cwd); + cwd = NULL; + + ret = pivot_root(".", "."); + if (ret < 0) + err(EXIT_FAILURE, "pivot_root"); + + ret = fchdir(old_root); + if (ret < 0) + err(EXIT_FAILURE, "fchdir"); + close(old_root); + + ret = umount2(".", MNT_DETACH); + if (ret < 0) + err(EXIT_FAILURE, "umount2"); + + ret = chdir("/"); + if (ret < 0) + err(EXIT_FAILURE, "fchdir"); +} + +static void drop_caps(void) +{ + struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 }; + struct __user_cap_data_struct data[2] = { { 0 } }; + int ret, cap; + ret = prctl(PR_SET_KEEPCAPS, 0, 0, 0, 0); + if (ret < 0) + err(EXIT_FAILURE, "prctl(PR_SET_KEEPCAPS)"); + + for (cap = 0;; cap++) { + ret = prctl(PR_CAPBSET_DROP, cap, 0, 0, 0); + if (ret < 0 && errno != EINVAL) + err(EXIT_FAILURE, "prctl(PR_CAPBSET_DROP)"); + if (ret < 0) + break; + } + + ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0); + if (ret < 0 && errno != EINVAL) + err(EXIT_FAILURE, "prctl(PR_CAP_AMBIENT)"); + + ret = capset(&hdr, data); + if (ret < 0 && errno != EINVAL) + err(EXIT_FAILURE, "capset"); +} + +static void do_set_oom_score_adj(void) +{ + int fd, ret; + + fd = open("/proc/self/oom_score_adj", O_WRONLY); + if (fd < 0) + err(EXIT_FAILURE, "open /proc/self/oom_score_adj"); + + ret = write(fd, "1000", 4); + if (ret < 0) + err(EXIT_FAILURE, "write to /proc/self/oom_score_adj"); + + close(fd); +} + +void create_sandbox(void) +{ + do_set_oom_score_adj(); + do_namespace_sandbox(); + drop_caps(); + + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) + err(EXIT_FAILURE, "prctl(PR_SET_NO_NEW_PRIVS)"); + + do_seccomp_sandbox(); +} diff --git a/tools/mkcomposefs-sandbox.h b/tools/mkcomposefs-sandbox.h new file mode 100644 index 00000000..dc28b82f --- /dev/null +++ b/tools/mkcomposefs-sandbox.h @@ -0,0 +1,18 @@ +/* lcfs + Copyright (C) 2021-2024 Giuseppe Scrivano + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +void create_sandbox(void); diff --git a/tools/mkcomposefs.c b/tools/mkcomposefs.c index ae291fd5..30d993a8 100644 --- a/tools/mkcomposefs.c +++ b/tools/mkcomposefs.c @@ -23,6 +23,8 @@ #include "libcomposefs/lcfs-utils.h" #include "libcomposefs/lcfs-internal.h" +#include "mkcomposefs-sandbox.h" + #include #include #include @@ -70,6 +72,7 @@ static __attribute__((format(printf, 1, 2))) char *make_error(const char *fmt, . #define OPT_MIN_VERSION 114 #define OPT_THREADS 115 #define OPT_MAX_VERSION 116 +#define OPT_SANDBOX 117 static size_t split_at(const char **start, size_t *length, char split_char, bool *partial) @@ -1467,7 +1470,8 @@ static void usage(const char *argv0) " --from-file The source is a dump file, not a directory\n" " --min-version=N Use this minimal format version (default=%d)\n" " --max-version=N Use this maxium format version (default=%d)\n" - " --threads=N Use this to override the default number of threads used to calculate digest and copy files (default=%d)\n", + " --threads=N Use this to override the default number of threads used to calculate digest and copy files (default=%d)\n" + " --sandbox Sandbox the process before processing the input file\n", bin, LCFS_DEFAULT_VERSION_MIN, LCFS_DEFAULT_VERSION_MAX, get_cpu_count()); } @@ -1541,6 +1545,12 @@ int main(int argc, char **argv) flag: NULL, val: OPT_THREADS }, + { + name: "sandbox", + has_arg: no_argument, + flag: NULL, + val: OPT_SANDBOX + }, {}, }; struct lcfs_write_options_s options = { 0 }; @@ -1559,6 +1569,7 @@ int main(int argc, char **argv) FILE *out_file; char *failed_path; bool version_set = false; + bool use_sandbox = false; long min_version = 0; long max_version = 0; char *end; @@ -1624,6 +1635,9 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } break; + case OPT_SANDBOX: + use_sandbox = true; + break; case ':': fprintf(stderr, "option needs a value\n"); exit(EXIT_FAILURE); @@ -1698,6 +1712,9 @@ int main(int argc, char **argv) close_input = true; } + if (use_sandbox) + create_sandbox(); + char *err = NULL; root = tree_from_dump(input, &err); if (root == NULL) { @@ -1716,6 +1733,10 @@ int main(int argc, char **argv) buildflag_copy &= ~LCFS_BUILD_BY_DIGEST; buildflag_copy |= LCFS_BUILD_NO_INLINE; + if (use_sandbox) + err(EXIT_FAILURE, + "the sandbox option is supported only with --from-file"); + root = lcfs_build(AT_FDCWD, src_path, buildflag_copy, &failed_path); if (root == NULL) err(EXIT_FAILURE, "error accessing %s", failed_path);