From 4c506f4eede495981897de3f1c8701a169ed7702 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Thu, 30 Jun 2022 14:53:13 +0200 Subject: [PATCH] prepare-root: Use composefs as root filesystem This changes ostree-prepare-root to use the .ostree.cfs image as a composefs filesystem, instead of the checkout. Currently composefs is *always* used (if enabled at build time), but long term we want to somehow make this optional while still not lessening trust in the system by allowing an attacker to disable its use. The final layout when this is active is: / ro composefs mount /sysroot "real" root /etc rw bind mount to $deploydir/etc /var rw bind mount to $vardir In order for this to work, we need to change the way prepare-root works. Currently it works with CWD of $deploydir which is a bind mount (so it can be later moved). However, we can't mount the composefs at $deploydir, because then it will cover the etc dir from the underlying deploydir and then we can't bind mount it. Instead we change both codepaths to mount work with the destination /sysroot.tmp, leaving the "real" $deploydir as CWD. I.e. the extra bind mount is in /sysroot.tmp in the !use_composefs case, or the composefs mount in the use_composefs case. This is really not that different from before, as we had to temporarily use /sysroot.tmp at the end before anyway. A further note. I didn't test the overlayfs testcase, but the comment mentions that you can't mount overlayfs on top of a readonly mount. That seems incompatible with composefs. --- Makefile-switchroot.am | 13 +++- src/switchroot/ostree-prepare-root.c | 97 +++++++++++++++++----------- 2 files changed, 70 insertions(+), 40 deletions(-) diff --git a/Makefile-switchroot.am b/Makefile-switchroot.am index 104ec0cdf3..8c486e4935 100644 --- a/Makefile-switchroot.am +++ b/Makefile-switchroot.am @@ -27,7 +27,9 @@ ostree_prepare_root_SOURCES = \ src/switchroot/ostree-mount-util.h \ src/switchroot/ostree-prepare-root.c \ $(NULL) +ostree_prepare_root_CFLAGS = ostree_prepare_root_CPPFLAGS = $(AM_CPPFLAGS) +ostree_prepare_root_LDADD = if BUILDOPT_USE_STATIC_COMPILER # ostree-prepare-root can be used as init in a system without a populated /lib. @@ -43,10 +45,10 @@ if BUILDOPT_USE_STATIC_COMPILER ostree_boot_SCRIPTS += ostree-prepare-root ostree-prepare-root : $(ostree_prepare_root_SOURCES) - $(STATIC_COMPILER) -o $@ -static $(top_srcdir)/src/switchroot/ostree-prepare-root.c $(ostree_prepare_root_CPPFLAGS) $(AM_CFLAGS) $(DEFAULT_INCLUDES) -DOSTREE_PREPARE_ROOT_STATIC=1 + $(STATIC_COMPILER) -o $@ -static $(top_srcdir)/src/switchroot/ostree-prepare-root.c $(ostree_prepare_root_CPPFLAGS) $(AM_CFLAGS) $(DEFAULT_INCLUDES) $(OT_DEP_COMPOSEFS_CFLAGS) $(OT_DEP_COMPOSEFS_LIBS) -DOSTREE_PREPARE_ROOT_STATIC=1 else ostree_boot_PROGRAMS += ostree-prepare-root -ostree_prepare_root_CFLAGS = $(AM_CFLAGS) -Isrc/switchroot +ostree_prepare_root_CFLAGS += $(AM_CFLAGS) -Isrc/switchroot endif ostree_remount_SOURCES = \ @@ -56,9 +58,14 @@ ostree_remount_SOURCES = \ ostree_remount_CPPFLAGS = $(AM_CPPFLAGS) $(OT_INTERNAL_GIO_UNIX_CFLAGS) -Isrc/switchroot -I$(srcdir)/libglnx ostree_remount_LDADD = $(AM_LDFLAGS) $(OT_INTERNAL_GIO_UNIX_LIBS) libglnx.la +if USE_COMPOSEFS +ostree_prepare_root_CFLAGS += $(OT_DEP_COMPOSEFS_CFLAGS) +ostree_prepare_root_LDADD += $(OT_DEP_COMPOSEFS_LIBS) +endif + if BUILDOPT_SYSTEMD ostree_prepare_root_CPPFLAGS += -DHAVE_SYSTEMD=1 -ostree_prepare_root_LDADD = $(AM_LDFLAGS) $(LIBSYSTEMD_LIBS) +ostree_prepare_root_LDADD += $(AM_LDFLAGS) $(LIBSYSTEMD_LIBS) endif # This is the "new mode" of using a generator for /var; see diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index a5fbc8a810..2fd091c8b6 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -77,6 +77,10 @@ #define OSTREE_PREPARE_ROOT_DEPLOYMENT_MSG SD_ID128_MAKE(71,70,33,6a,73,ba,46,01,ba,d3,1a,f8,88,aa,0d,f7) #endif +#ifdef HAVE_COMPOSEFS +#include +#endif + #include "ostree-mount-util.h" static inline bool @@ -219,6 +223,11 @@ main(int argc, char *argv[]) /* Query the repository configuration - this is an operating system builder * choice. More info: https://github.com/ostreedev/ostree/pull/1767 */ +#ifdef HAVE_COMPOSEFS + const bool use_composefs = 1; /* TODO: Read this from some config, maybe kernel cmdline for trust? */ +#else + const bool use_composefs = 0; +#endif const bool sysroot_readonly = sysroot_is_configured_ro (root_arg); const bool sysroot_currently_writable = !path_is_on_readonly_fs (root_arg); #ifdef USE_LIBSYSTEMD @@ -239,16 +248,37 @@ main(int argc, char *argv[]) if (mount (NULL, "/", NULL, MS_REC | MS_PRIVATE | MS_SILENT, NULL) < 0) err (EXIT_FAILURE, "failed to make \"/\" private mount"); - /* Make deploy_path a bind mount, so we can move it later */ - if (mount (deploy_path, deploy_path, NULL, MS_BIND | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to make initial bind mount %s", deploy_path); + if (mkdir ("/sysroot.tmp", 0755) < 0) + err (EXIT_FAILURE, "couldn't create temporary sysroot /sysroot.tmp"); - /* chdir to our new root. We need to do this after bind-mounting it over - * itself otherwise our cwd is still on the non-bind-mounted filesystem - * below. */ + /* Run in the deploy_path dir so we can use relative paths below */ if (chdir (deploy_path) < 0) err (EXIT_FAILURE, "failed to chdir to deploy_path"); + /* We construct the new sysroot in /sysroot.tmp, which is either the composfs + mount or a bind mount of the deploy-dir */ + if (use_composefs) + { +#ifdef HAVE_COMPOSEFS + const char *objdirs[] = { "/sysroot/ostree/repo/objects" }; + struct lcfs_mount_options_s cfs_options = { + objdirs, 1, + }; + + if (lcfs_mount_image(".ostree.cfs", "/sysroot.tmp", &cfs_options) < 0) + err (EXIT_FAILURE, "Failed to mount composefs"); +#else + err (EXIT_FAILURE, "Composefs not supported"); +#endif + } + else + { + /* Make /sysroot.tmp a bind mount, so we can move it later */ + if (mount (deploy_path, "/sysroot.tmp", NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to make initial bind mount %s", deploy_path); + } + + /* This will result in a system with /sysroot read-only. Thus, two additional * writable bind-mounts (for /etc and /var) are required later on. */ if (sysroot_readonly) @@ -274,7 +304,7 @@ main(int argc, char *argv[]) { if (snprintf (srcpath, sizeof(srcpath), "%s/boot", root_mountpoint) < 0) err (EXIT_FAILURE, "failed to assemble /boot path"); - if (mount (srcpath, "boot", NULL, MS_BIND | MS_SILENT, NULL) < 0) + if (mount (srcpath, "/systree.tmp/boot", NULL, MS_BIND | MS_SILENT, NULL) < 0) err (EXIT_FAILURE, "failed to bind mount %s to boot", srcpath); } } @@ -282,13 +312,13 @@ main(int argc, char *argv[]) /* Prepare /etc. * No action required if sysroot is writable. Otherwise, a bind-mount for * the deployment needs to be created and remounted as read/write. */ - if (sysroot_readonly) + if (sysroot_readonly || use_composefs) { /* Bind-mount /etc (at deploy path), and remount as writable. */ - if (mount ("etc", "etc", NULL, MS_BIND | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to prepare /etc bind-mount at %s", srcpath); - if (mount ("etc", "etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to make writable /etc bind-mount at %s", srcpath); + if (mount ("etc", "/sysroot.tmp/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc"); + if (mount ("/sysroot.tmp/etc", "/sysroot.tmp/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc"); } /* Prepare /usr. @@ -296,28 +326,28 @@ main(int argc, char *argv[]) if (lstat (".usr-ovl-work", &stbuf) == 0) { /* Do we have a persistent overlayfs for /usr? If so, mount it now. */ - const char usr_ovl_options[] = "lowerdir=usr,upperdir=.usr-ovl-upper,workdir=.usr-ovl-work"; + const char usr_ovl_options[] = "lowerdir=/sysroot.tmp/usr,upperdir=.usr-ovl-upper,workdir=.usr-ovl-work"; /* Except overlayfs barfs if we try to mount it on a read-only * filesystem. For this use case I think admins are going to be * okay if we remount the rootfs here, rather than waiting until * later boot and `systemd-remount-fs.service`. */ - if (path_is_on_readonly_fs (".")) + if (path_is_on_readonly_fs ("/sysroot.tmp")) { - if (mount (".", ".", NULL, MS_REMOUNT | MS_SILENT, NULL) < 0) + if (mount ("/sysroot.tmp", "/sysroot.tmp", NULL, MS_REMOUNT | MS_SILENT, NULL) < 0) err (EXIT_FAILURE, "failed to remount rootfs writable (for overlayfs)"); } - if (mount ("overlay", "usr", "overlay", MS_SILENT, usr_ovl_options) < 0) + if (mount ("overlay", "/sysroot.tmp/usr", "overlay", MS_SILENT, usr_ovl_options) < 0) err (EXIT_FAILURE, "failed to mount /usr overlayfs"); } - else + else if (!use_composefs) { - /* Otherwise, a read-only bind mount for /usr */ - if (mount ("usr", "usr", NULL, MS_BIND | MS_SILENT, NULL) < 0) + /* Otherwise, a read-only bind mount for /usr. (Not needed for composefs) */ + if (mount ("/sysroot.tmp/usr", "/sysroot.tmp/usr", NULL, MS_BIND | MS_SILENT, NULL) < 0) err (EXIT_FAILURE, "failed to bind mount (class:readonly) /usr"); - if (mount ("usr", "usr", NULL, MS_BIND | MS_REMOUNT | MS_RDONLY | MS_SILENT, NULL) < 0) + if (mount ("/sysroot.tmp/usr", "/sysroot.tmp/usr", NULL, MS_BIND | MS_REMOUNT | MS_RDONLY | MS_SILENT, NULL) < 0) err (EXIT_FAILURE, "failed to bind mount (class:readonly) /usr"); } @@ -350,7 +380,7 @@ main(int argc, char *argv[]) */ if (mount_var) { - if (mount ("../../var", "var", NULL, MS_BIND | MS_SILENT, NULL) < 0) + if (mount ("../../var", "/sysroot.tmp/var", NULL, MS_BIND | MS_SILENT, NULL) < 0) err (EXIT_FAILURE, "failed to bind mount ../../var to var"); } @@ -362,6 +392,9 @@ main(int argc, char *argv[]) if (!running_as_pid1) touch_run_ostree (); + if (chdir ("/sysroot.tmp") < 0) + err (EXIT_FAILURE, "failed to chdir to /sysroot.tmp"); + if (strcmp(root_mountpoint, "/") == 0) { /* pivot_root rotates two mount points around. In this instance . (the @@ -376,29 +409,19 @@ main(int argc, char *argv[]) else { /* In this instance typically we have our ready made-up up root at - * /sysroot/ostree/deploy/.../ (deploy_path) and the real rootfs at - * /sysroot (root_mountpoint). We want to end up with our made-up root at + * /sysroot.tmp and the real rootfs at /sysroot (root_mountpoint). + * We want to end up with our made-up root at * /sysroot/ and the real rootfs under /sysroot/sysroot as systemd will be * responsible for moving /sysroot to /. - * - * We need to do this in 3 moves to avoid trying to move /sysroot under - * itself: - * - * 1. /sysroot/ostree/deploy/... -> /sysroot.tmp - * 2. /sysroot -> /sysroot.tmp/sysroot - * 3. /sysroot.tmp -> /sysroot */ - if (mkdir ("/sysroot.tmp", 0755) < 0) - err (EXIT_FAILURE, "couldn't create temporary sysroot /sysroot.tmp"); - - if (mount (deploy_path, "/sysroot.tmp", NULL, MS_MOVE | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to MS_MOVE '%s' to '/sysroot.tmp'", deploy_path); - if (mount (root_mountpoint, "sysroot", NULL, MS_MOVE | MS_SILENT, NULL) < 0) err (EXIT_FAILURE, "failed to MS_MOVE '%s' to 'sysroot'", root_mountpoint); if (mount (".", root_mountpoint, NULL, MS_MOVE | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to MS_MOVE %s to %s", deploy_path, root_mountpoint); + err (EXIT_FAILURE, "failed to MS_MOVE /sysroot.tmp to %s", root_mountpoint); + + if (chdir ("/sysroot") < 0) + err (EXIT_FAILURE, "failed to chdir to /sysroot.tmp"); if (rmdir ("/sysroot.tmp") < 0) err (EXIT_FAILURE, "couldn't remove temporary sysroot /sysroot.tmp");