From 4982306e67f79260b1319f4acde03d1eb11d5060 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Thu, 4 Jan 2024 11:14:39 -0500 Subject: [PATCH 1/2] lib/deploy: Round to block size in early prune space check When we estimate how much space a new bootcsum dir will use, we weren't accounting for the space overhead from files not using the last filesystem block completely. This doesn't matter much if counting a few files, but e.g. on FCOS aarch64, we include lots of small devicetree blobs in the bootfs. That loss can add up to enough for the `fallocate()` check to pass but copying still hitting `ENOSPC` later on. I think a better fix here is to change approach entirely and instead refactor `install_deployment_kernel()` so that we can call just the copying bits of it as part of the early prune logic. We'll get a more accurate assessment and it's not lost work since we won't need to recopy later on. Also this would not require having to keep in sync the estimator and the install bits. That said, this is blocking FCOS releases, so I went with a more tactical fix for now. Fixes: https://github.com/coreos/fedora-coreos-tracker/issues/1637 --- src/libostree/ostree-sysroot-deploy.c | 42 +++++++++++++++--------- src/libotutil/ot-fs-utils.c | 18 +++++++--- src/libotutil/ot-fs-utils.h | 4 +-- tests/kolainst/destructive/auto-prune.sh | 33 +++++++++++++++++-- 4 files changed, 73 insertions(+), 24 deletions(-) diff --git a/src/libostree/ostree-sysroot-deploy.c b/src/libostree/ostree-sysroot-deploy.c index c7148208bc..d6f3dcb61f 100644 --- a/src/libostree/ostree-sysroot-deploy.c +++ b/src/libostree/ostree-sysroot-deploy.c @@ -2450,7 +2450,8 @@ write_deployments_finish (OstreeSysroot *self, GCancellable *cancellable, GError } static gboolean -add_file_size_if_nonnull (int dfd, const char *path, guint64 *inout_size, GError **error) +add_file_size_if_nonnull (int dfd, const char *path, guint64 blocksize, guint64 *inout_size, + GError **error) { if (path == NULL) return TRUE; @@ -2460,14 +2461,21 @@ add_file_size_if_nonnull (int dfd, const char *path, guint64 *inout_size, GError return FALSE; *inout_size += stbuf.st_size; + if (blocksize > 0) + { + off_t rem = stbuf.st_size % blocksize; + if (rem > 0) + *inout_size += blocksize - rem; + } + return TRUE; } /* calculates the total size of the bootcsum dir in /boot after we would copy * it. This reflects the logic in install_deployment_kernel(). */ static gboolean -get_kernel_layout_size (OstreeSysroot *self, OstreeDeployment *deployment, guint64 *out_size, - GCancellable *cancellable, GError **error) +get_kernel_layout_size (OstreeSysroot *self, OstreeDeployment *deployment, guint64 blocksize, + guint64 *out_size, GCancellable *cancellable, GError **error) { g_autofree char *deployment_dirpath = ostree_sysroot_get_deployment_dirpath (self, deployment); glnx_autofd int deployment_dfd = -1; @@ -2479,11 +2487,11 @@ get_kernel_layout_size (OstreeSysroot *self, OstreeDeployment *deployment, guint return FALSE; guint64 bootdir_size = 0; - if (!add_file_size_if_nonnull (kernel_layout->boot_dfd, kernel_layout->kernel_srcpath, + if (!add_file_size_if_nonnull (kernel_layout->boot_dfd, kernel_layout->kernel_srcpath, blocksize, &bootdir_size, error)) return FALSE; if (!add_file_size_if_nonnull (kernel_layout->boot_dfd, kernel_layout->initramfs_srcpath, - &bootdir_size, error)) + blocksize, &bootdir_size, error)) return FALSE; if (kernel_layout->devicetree_srcpath) { @@ -2491,22 +2499,22 @@ get_kernel_layout_size (OstreeSysroot *self, OstreeDeployment *deployment, guint if (kernel_layout->devicetree_namever) { if (!add_file_size_if_nonnull (kernel_layout->boot_dfd, kernel_layout->devicetree_srcpath, - &bootdir_size, error)) + blocksize, &bootdir_size, error)) return FALSE; } else { guint64 dirsize = 0; if (!ot_get_dir_size (kernel_layout->boot_dfd, kernel_layout->devicetree_srcpath, - &dirsize, cancellable, error)) + blocksize, &dirsize, cancellable, error)) return FALSE; bootdir_size += dirsize; } } if (!add_file_size_if_nonnull (kernel_layout->boot_dfd, kernel_layout->kernel_hmac_srcpath, - &bootdir_size, error)) + blocksize, &bootdir_size, error)) return FALSE; - if (!add_file_size_if_nonnull (kernel_layout->boot_dfd, kernel_layout->aboot_srcpath, + if (!add_file_size_if_nonnull (kernel_layout->boot_dfd, kernel_layout->aboot_srcpath, blocksize, &bootdir_size, error)) return FALSE; @@ -2583,6 +2591,11 @@ auto_early_prune_old_deployments (OstreeSysroot *self, GPtrArray *new_deployment g_autoptr (GHashTable) new_bootcsums = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL); + /* get bootfs block size */ + struct statvfs stvfsbuf; + if (TEMP_FAILURE_RETRY (fstatvfs (self->boot_fd, &stvfsbuf)) < 0) + return glnx_throw_errno_prefix (error, "fstatvfs(boot)"); + g_auto (GStrv) bootdirs = NULL; if (!_ostree_sysroot_list_all_boot_directories (self, &bootdirs, cancellable, error)) return glnx_prefix_error (error, "listing bootcsum directories in bootfs"); @@ -2597,7 +2610,8 @@ auto_early_prune_old_deployments (OstreeSysroot *self, GPtrArray *new_deployment guint64 bootdir_size; g_autofree char *ostree_bootdir = g_build_filename ("ostree", bootdir, NULL); - if (!ot_get_dir_size (self->boot_fd, ostree_bootdir, &bootdir_size, cancellable, error)) + if (!ot_get_dir_size (self->boot_fd, ostree_bootdir, stvfsbuf.f_bsize, &bootdir_size, + cancellable, error)) return FALSE; /* for our purposes of sizing bootcsums, it's highly unlikely we need a @@ -2609,10 +2623,7 @@ auto_early_prune_old_deployments (OstreeSysroot *self, GPtrArray *new_deployment * that users report it and we tweak this code to handle this. * * An alternative is working with the block size instead, which would - * be easier to handle. But ideally, `ot_get_dir_size` would be block - * size aware too for better accuracy, which is awkward since the - * function itself is generic over directories and doesn't consider - * e.g. mount points from different filesystems. */ + * be easier to handle. */ g_printerr ("bootcsum %s size exceeds %u; disabling auto-prune optimization\n", bootdir, G_MAXUINT); return TRUE; @@ -2640,7 +2651,8 @@ auto_early_prune_old_deployments (OstreeSysroot *self, GPtrArray *new_deployment } guint64 bootdir_size = 0; - if (!get_kernel_layout_size (self, deployment, &bootdir_size, cancellable, error)) + if (!get_kernel_layout_size (self, deployment, stvfsbuf.f_bsize, &bootdir_size, cancellable, + error)) return FALSE; /* see similar logic in previous loop */ diff --git a/src/libotutil/ot-fs-utils.c b/src/libotutil/ot-fs-utils.c index efbb2f207e..1e961a986c 100644 --- a/src/libotutil/ot-fs-utils.c +++ b/src/libotutil/ot-fs-utils.c @@ -227,11 +227,12 @@ ot_parse_file_by_line (const char *path, gboolean (*cb) (const char *, void *, G return TRUE; } -/* Calculate the size of the files contained in a directory. Symlinks are not - * followed. */ +/* Calculate the size of the files contained in a directory. Symlinks are + * not followed. If `blocksize` is nonzero, all sizes are rounded to its next + * multiple. */ gboolean -ot_get_dir_size (int dfd, const char *path, guint64 *out_size, GCancellable *cancellable, - GError **error) +ot_get_dir_size (int dfd, const char *path, guint64 blocksize, guint64 *out_size, + GCancellable *cancellable, GError **error) { g_auto (GLnxDirFdIterator) dfd_iter = { 0, @@ -256,11 +257,18 @@ ot_get_dir_size (int dfd, const char *path, guint64 *out_size, GCancellable *can return FALSE; *out_size += stbuf.st_size; + if (blocksize > 0) + { + off_t rem = stbuf.st_size % blocksize; + if (rem > 0) + *out_size += blocksize - rem; + } } else if (dent->d_type == DT_DIR) { guint64 subdir_size; - if (!ot_get_dir_size (dfd_iter.fd, dent->d_name, &subdir_size, cancellable, error)) + if (!ot_get_dir_size (dfd_iter.fd, dent->d_name, blocksize, &subdir_size, cancellable, + error)) return FALSE; *out_size += subdir_size; diff --git a/src/libotutil/ot-fs-utils.h b/src/libotutil/ot-fs-utils.h index b64adce247..7df79ba2a6 100644 --- a/src/libotutil/ot-fs-utils.h +++ b/src/libotutil/ot-fs-utils.h @@ -75,7 +75,7 @@ GBytes *ot_fd_readall_or_mmap (int fd, goffset offset, GError **error); gboolean ot_parse_file_by_line (const char *path, gboolean (*cb) (const char *, void *, GError **), void *cbdata, GCancellable *cancellable, GError **error); -gboolean ot_get_dir_size (int dfd, const char *path, guint64 *out_size, GCancellable *cancellable, - GError **error); +gboolean ot_get_dir_size (int dfd, const char *path, guint64 blocksize, guint64 *out_size, + GCancellable *cancellable, GError **error); G_END_DECLS diff --git a/tests/kolainst/destructive/auto-prune.sh b/tests/kolainst/destructive/auto-prune.sh index 1ec1534e52..5081275e07 100755 --- a/tests/kolainst/destructive/auto-prune.sh +++ b/tests/kolainst/destructive/auto-prune.sh @@ -106,6 +106,7 @@ rpm-ostree rollback # to not actually fit (because some filesystems like ext4 include reserved # overhead in their f_bfree count for some reason) will still trigger the auto- # prune logic. +# https://github.com/ostreedev/ostree/pull/2866 unconsume_bootfs_space @@ -114,10 +115,10 @@ unconsume_bootfs_space unshare -m bash -c \ "mount -o rw,remount /boot && \ cp /usr/lib/modules/`uname -r`/{vmlinuz,initramfs.img} /boot" -free_blocks=$(stat --file-system /boot -c '%f') +free_blocks_kernel_and_initrd=$(stat --file-system /boot -c '%f') unshare -m bash -c \ "mount -o rw,remount /boot && rm /boot/{vmlinuz,initramfs.img}" -consume_bootfs_space "$((free_blocks))" +consume_bootfs_space "$((free_blocks_kernel_and_initrd))" rpm-ostree rebase :modkernel1 # Disable auto-pruning to verify we reproduce the bug @@ -133,4 +134,32 @@ ostree admin finalize-staged |& tee out.txt assert_file_has_content out.txt "updating bootloader in two steps" rm out.txt +# Below, we test that the size estimator is blocksize aware. This catches the +# case where the dtb contains many small files such that there's a lot of wasted +# block space we need to account for. +# https://github.com/coreos/fedora-coreos-tracker/issues/1637 + +unconsume_bootfs_space + +mkdir -p rootfs/usr/lib/modules/`uname -r`/dtb +(set +x; for i in {1..10000}; do echo -n x > rootfs/usr/lib/modules/`uname -r`/dtb/$i; done) +ostree commit --base modkernel1 -P --tree=dir=rootfs -b modkernel3 + +# a naive estimator would think all those files just take 10000 bytes +consume_bootfs_space "$((free_blocks_kernel_and_initrd - 10000))" + +rpm-ostree rebase :modkernel3 +# Disable auto-pruning to verify we reproduce the bug +if OSTREE_SYSROOT_OPTS=no-early-prune ostree admin finalize-staged |& tee out.txt; then + assert_not_reached "successfully wrote kernel without auto-pruning" +fi +assert_file_has_content out.txt "No space left on device" +rm out.txt + +# now, try again but with (now default) auto-pruning enabled +rpm-ostree rebase :modkernel3 +ostree admin finalize-staged |& tee out.txt +assert_file_has_content out.txt "updating bootloader in two steps" +rm out.txt + echo "ok bootfs auto-prune" From cc5747a60561db0b6c8a534d1556605599192a03 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Thu, 4 Jan 2024 11:14:40 -0500 Subject: [PATCH 2/2] lib/deploy: Add safety margin in early prune space check There are a few things the estimator doesn't account for, e.g. writing the new BLS entries. Rather than trying to perfect it (since I think we should change approach entirely -- see previous commit message), just add a 1M margin to the space check. --- src/libostree/ostree-sysroot-deploy.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/libostree/ostree-sysroot-deploy.c b/src/libostree/ostree-sysroot-deploy.c index d6f3dcb61f..5c941d5170 100644 --- a/src/libostree/ostree-sysroot-deploy.c +++ b/src/libostree/ostree-sysroot-deploy.c @@ -59,6 +59,12 @@ SD_ID128_MAKE (e8, 64, 6c, d6, 3d, ff, 46, 25, b7, 79, 09, a8, e7, a4, 09, 94) #endif +/* How much additional space we require available on top of what we accounted + * during the early prune fallocate space check. This accounts for anything not + * captured directly by `get_kernel_layout_size()` like writing new BLS entries. + */ +#define EARLY_PRUNE_SAFETY_MARGIN_SIZE (1 << 20) /* 1 MB */ + /* * Like symlinkat() but overwrites (atomically) an existing * symlink. @@ -2541,6 +2547,9 @@ dfd_fallocate_check (int dfd, off_t len, gboolean *out_passed, GError **error) if (!glnx_open_tmpfile_linkable_at (dfd, ".", O_WRONLY | O_CLOEXEC, &tmpf, error)) return FALSE; + /* add the safety margin */ + len += EARLY_PRUNE_SAFETY_MARGIN_SIZE; + *out_passed = TRUE; /* There's glnx_try_fallocate, but not with the same error semantics. */ if (TEMP_FAILURE_RETRY (fallocate (tmpf.fd, 0, 0, len)) < 0)