diff --git a/Makefile-tests.am b/Makefile-tests.am index 48363b9af6..2352b0b8a0 100644 --- a/Makefile-tests.am +++ b/Makefile-tests.am @@ -119,6 +119,7 @@ _installed_or_uninstalled_test_scripts = \ tests/test-summary-update.sh \ tests/test-summary-view.sh \ tests/test-no-initramfs.sh \ + tests/test-payload-link.sh \ $(NULL) experimental_test_scripts = \ diff --git a/man/ostree.repo-config.xml b/man/ostree.repo-config.xml index aa3abd54f1..cbc605f775 100644 --- a/man/ostree.repo-config.xml +++ b/man/ostree.repo-config.xml @@ -145,6 +145,13 @@ Boston, MA 02111-1307, USA. + + payload-link-threshold + An integer value that specifies a minimum file size for creating + a payload link. By default it is disabled. + + + diff --git a/src/libostree/ostree-repo-commit.c b/src/libostree/ostree-repo-commit.c index 70068138c9..875743b4d3 100644 --- a/src/libostree/ostree-repo-commit.c +++ b/src/libostree/ostree-repo-commit.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include "otutil.h" #include "ostree.h" @@ -40,6 +42,12 @@ #include "ostree-checksum-input-stream.h" #include "ostree-varint.h" +/* The standardized version of BTRFS_IOC_CLONE */ +#ifndef FICLONE +#define FICLONE _IOW(0x94, 9, int) +#endif + + /* If fsync is enabled and we're in a txn, we write into a staging dir for * commit, but we also allow direct writes into objects/ for e.g. hardlink * imports. @@ -589,6 +597,192 @@ create_regular_tmpfile_linkable_with_content (OstreeRepo *self, return TRUE; } +static gboolean +_check_support_reflink (OstreeRepo *self, gboolean *supported, GError **error) +{ + /* We have not checked yet if the file system supports reflinks, do it here */ + if (g_atomic_int_get (&self->fs_support_reflink) == 0) + { + g_auto(GLnxTmpfile) src_tmpf = { 0, }; + g_auto(GLnxTmpfile) dest_tmpf = { 0, }; + + if (!glnx_open_tmpfile_linkable_at (commit_tmp_dfd (self), ".", O_RDWR|O_CLOEXEC, + &src_tmpf, error)) + return FALSE; + if (!glnx_open_tmpfile_linkable_at (commit_tmp_dfd (self), ".", O_WRONLY|O_CLOEXEC, + &dest_tmpf, error)) + return FALSE; + + if (ioctl (dest_tmpf.fd, FICLONE, src_tmpf.fd) == 0) + g_atomic_int_set (&self->fs_support_reflink, 1); + else if (errno == EOPNOTSUPP) /* Ignore other kind of errors as they might be temporary failures */ + g_atomic_int_set (&self->fs_support_reflink, -1); + } + *supported = g_atomic_int_get (&self->fs_support_reflink) >= 0; + return TRUE; +} + +static gboolean +_create_payload_link (OstreeRepo *self, + const char *checksum, + const char *payload_checksum, + GFileInfo *file_info, + GCancellable *cancellable, + GError **error) +{ + gboolean reflinks_supported = FALSE; + if (!_check_support_reflink (self, &reflinks_supported, error)) + return FALSE; + + if (!reflinks_supported) + return TRUE; + + if (g_file_info_get_file_type (file_info) != G_FILE_TYPE_REGULAR + || !G_IN_SET(self->mode, OSTREE_REPO_MODE_BARE, OSTREE_REPO_MODE_BARE_USER, OSTREE_REPO_MODE_BARE_USER_ONLY)) + return TRUE; + + if (payload_checksum == NULL || g_file_info_get_size (file_info) < self->payload_link_threshold) + return TRUE; + + char target_buf[_OSTREE_LOOSE_PATH_MAX + _OSTREE_PAYLOAD_LINK_PREFIX_LEN]; + strcpy (target_buf, _OSTREE_PAYLOAD_LINK_PREFIX); + _ostree_loose_path (target_buf + _OSTREE_PAYLOAD_LINK_PREFIX_LEN, checksum, OSTREE_OBJECT_TYPE_FILE, self->mode); + + if (symlinkat (target_buf, commit_tmp_dfd (self), payload_checksum) < 0) + { + if (errno != EEXIST) + return glnx_throw_errno_prefix (error, "symlinkat"); + } + else + { + g_auto(OtCleanupUnlinkat) tmp_unlinker = { commit_tmp_dfd (self), g_strdup (payload_checksum) }; + if (!commit_path_final (self, payload_checksum, OSTREE_OBJECT_TYPE_PAYLOAD_LINK, &tmp_unlinker, cancellable, error)) + return FALSE; + } + + return TRUE; +} + +static gboolean +_import_payload_link (OstreeRepo *self, + OstreeRepo *source, + const char *checksum, + GCancellable *cancellable, + GError **error) +{ + gboolean reflinks_supported = FALSE; + g_autofree char *payload_checksum = NULL; + g_autoptr(GInputStream) is = NULL; + glnx_unref_object OtChecksumInstream *checksum_payload = NULL; + g_autoptr(GFileInfo) file_info = NULL; + + if (!_check_support_reflink (self, &reflinks_supported, error)) + return FALSE; + + if (!reflinks_supported) + return TRUE; + + if (!G_IN_SET(self->mode, OSTREE_REPO_MODE_BARE, OSTREE_REPO_MODE_BARE_USER, OSTREE_REPO_MODE_BARE_USER_ONLY)) + return TRUE; + + if (!ostree_repo_load_file (source, checksum, &is, &file_info, NULL, cancellable, error)) + return FALSE; + + if (g_file_info_get_file_type (file_info) != G_FILE_TYPE_REGULAR + || g_file_info_get_size (file_info) < self->payload_link_threshold) + return TRUE; + + checksum_payload = ot_checksum_instream_new (is, G_CHECKSUM_SHA256); + + guint64 remaining = g_file_info_get_size (file_info); + while (remaining) + { + char buf[8192]; + gssize ret = g_input_stream_read ((GInputStream *) checksum_payload, buf, + MIN (sizeof (buf), remaining), cancellable, error); + if (ret < 0) + return FALSE; + remaining -= ret; + } + payload_checksum = ot_checksum_instream_get_string (checksum_payload); + + return _create_payload_link (self, checksum, payload_checksum, file_info, cancellable, error); +} + +static gboolean +_try_clone_from_payload_link (OstreeRepo *self, + const char *payload_checksum, + GFileInfo *file_info, + GLnxTmpfile *tmpf, + GCancellable *cancellable, + GError **error) +{ + gboolean reflinks_supported = FALSE; + int dfd_searches[] = { -1, self->objects_dir_fd }; + if (self->commit_stagedir.initialized) + dfd_searches[0] = self->commit_stagedir.fd; + + if (!_check_support_reflink (self, &reflinks_supported, error)) + return FALSE; + + if (!reflinks_supported) + return TRUE; + + for (guint i = 0; i < G_N_ELEMENTS (dfd_searches); i++) + { + glnx_autofd int fdf = -1; + char loose_path_buf[_OSTREE_LOOSE_PATH_MAX]; + char loose_path_target_buf[_OSTREE_LOOSE_PATH_MAX]; + char target_buf[_OSTREE_LOOSE_PATH_MAX + _OSTREE_PAYLOAD_LINK_PREFIX_LEN]; + char target_checksum[OSTREE_SHA256_STRING_LEN+1]; + int dfd = dfd_searches[i]; + ssize_t size; + if (dfd == -1) + continue; + + _ostree_loose_path (loose_path_buf, payload_checksum, OSTREE_OBJECT_TYPE_PAYLOAD_LINK, self->mode); + + size = TEMP_FAILURE_RETRY (readlinkat (dfd, loose_path_buf, target_buf, sizeof (target_buf))); + if (size < 0) + { + if (errno == ENOENT) + continue; + return glnx_throw_errno_prefix (error, "readlinkat"); + } + + if (size < OSTREE_SHA256_STRING_LEN + _OSTREE_PAYLOAD_LINK_PREFIX_LEN) + return glnx_throw (error, "invalid data size for %s", loose_path_buf); + + sprintf (target_checksum, "%.2s%.62s", target_buf + _OSTREE_PAYLOAD_LINK_PREFIX_LEN, target_buf + _OSTREE_PAYLOAD_LINK_PREFIX_LEN + 3); + + _ostree_loose_path (loose_path_target_buf, target_checksum, OSTREE_OBJECT_TYPE_FILE, self->mode); + if (!ot_openat_ignore_enoent (dfd, loose_path_target_buf, &fdf, error)) + return FALSE; + + if (fdf < 0) + { + /* If the link is referring to an object that doesn't exist anymore in the repository, just unlink it. */ + if (!glnx_unlinkat (dfd, loose_path_buf, 0, error)) + return FALSE; + } + else + { + /* This undoes all of the previous writes; we want to generate reflinked data. */ + if (ftruncate (tmpf->fd, 0) < 0) + return glnx_throw_errno_prefix (error, "ftruncate"); + + if (glnx_regfile_copy_bytes (fdf, tmpf->fd, -1) < 0) + return glnx_throw_errno_prefix (error, "regfile copy"); + + return TRUE; + } + } + if (self->parent_repo) + return _try_clone_from_payload_link (self->parent_repo, payload_checksum, file_info, tmpf, cancellable, error); + + return TRUE; +} + /* The main driver for writing a content (regfile or symlink) object. * There are a variety of tricky cases here; for example, bare-user * repos store symlinks as regular files. Computing checksums @@ -616,6 +810,8 @@ write_content_object (OstreeRepo *self, GInputStream *file_input; /* Unowned alias */ g_autoptr(GInputStream) file_input_owned = NULL; /* We need a temporary for bare-user symlinks */ glnx_unref_object OtChecksumInstream *checksum_input = NULL; + glnx_unref_object OtChecksumInstream *checksum_payload_input = NULL; + const GFileType object_file_type = g_file_info_get_file_type (file_info); if (out_csum) { /* Previously we checksummed the input verbatim; now @@ -624,6 +820,7 @@ write_content_object (OstreeRepo *self, * it's not that's not a serious problem because we're still computing a * checksum over the data we actually use. */ + gboolean reflinks_supported = FALSE; g_autoptr(GBytes) header = _ostree_file_header_new (file_info, xattrs); size_t len; const guint8 *buf = g_bytes_get_data (header, &len); @@ -633,13 +830,26 @@ write_content_object (OstreeRepo *self, null_input = input = g_memory_input_stream_new_from_data ("", 0, NULL); checksum_input = ot_checksum_instream_new_with_start (input, G_CHECKSUM_SHA256, buf, len); - file_input = (GInputStream*)checksum_input; + + if (!_check_support_reflink (self, &reflinks_supported, error)) + return FALSE; + + if (xattrs == NULL || !G_IN_SET(self->mode, OSTREE_REPO_MODE_BARE, OSTREE_REPO_MODE_BARE_USER, OSTREE_REPO_MODE_BARE_USER_ONLY) || object_file_type != G_FILE_TYPE_REGULAR || + !reflinks_supported) + file_input = (GInputStream*)checksum_input; + else + { + /* The payload checksum-input reads from the full object checksum-input; this + * means it skips the header. + */ + checksum_payload_input = ot_checksum_instream_new ((GInputStream*)checksum_input, G_CHECKSUM_SHA256); + file_input = (GInputStream*)checksum_payload_input; + } } else file_input = input; gboolean phys_object_is_symlink = FALSE; - const GFileType object_file_type = g_file_info_get_file_type (file_info); switch (object_file_type) { case G_FILE_TYPE_REGULAR: @@ -765,6 +975,7 @@ write_content_object (OstreeRepo *self, } const char *actual_checksum = NULL; + g_autofree char *actual_payload_checksum = NULL; g_autofree char *actual_checksum_owned = NULL; if (!checksum_input) actual_checksum = expected_checksum; @@ -777,6 +988,9 @@ write_content_object (OstreeRepo *self, error)) return FALSE; } + + if (checksum_payload_input) + actual_payload_checksum = ot_checksum_instream_get_string (checksum_payload_input); } g_assert (actual_checksum != NULL); /* Pacify static analysis */ @@ -794,6 +1008,10 @@ write_content_object (OstreeRepo *self, g_mutex_lock (&self->txn_lock); self->txn.stats.content_objects_total++; g_mutex_unlock (&self->txn_lock); + + if (!_create_payload_link (self, actual_checksum, actual_payload_checksum, file_info, cancellable, error)) + return FALSE; + if (out_csum) *out_csum = ostree_checksum_to_bytes (actual_checksum); /* Note early return */ @@ -853,12 +1071,20 @@ write_content_object (OstreeRepo *self, repo_store_size_entry (self, actual_checksum, unpacked_size, stbuf.st_size); } + /* Check if a file with the same payload is present in the repository, + and in case try to reflink it */ + if (actual_payload_checksum && !_try_clone_from_payload_link (self, actual_payload_checksum, file_info, &tmpf, cancellable, error)) + return FALSE; + /* This path is for regular files */ if (!commit_loose_regfile_object (self, actual_checksum, &tmpf, uid, gid, mode, xattrs, cancellable, error)) return FALSE; + + if (!_create_payload_link (self, actual_checksum, actual_payload_checksum, file_info, cancellable, error)) + return FALSE; } /* Update statistics */ @@ -3999,7 +4225,11 @@ import_one_object_direct (OstreeRepo *dest_repo, if (!copy_detached_metadata (dest_repo, src_repo, checksum, cancellable, error)) return FALSE; } - + else if (objtype == OSTREE_OBJECT_TYPE_FILE) + { + if (!_import_payload_link (dest_repo, src_repo, checksum, cancellable, error)) + return FALSE; + } *out_was_supported = TRUE; return TRUE; } @@ -4092,7 +4322,14 @@ _ostree_repo_import_object (OstreeRepo *self, return FALSE; /* If we have it, we're done */ if (has_object) - return TRUE; + { + if (objtype == OSTREE_OBJECT_TYPE_FILE) + { + if (!_import_payload_link (self, source, checksum, cancellable, error)) + return FALSE; + } + return TRUE; + } if (OSTREE_OBJECT_TYPE_IS_META (objtype)) { diff --git a/src/libostree/ostree-repo-private.h b/src/libostree/ostree-repo-private.h index d61db28fd3..3078a9e293 100644 --- a/src/libostree/ostree-repo-private.h +++ b/src/libostree/ostree-repo-private.h @@ -53,6 +53,9 @@ G_BEGIN_DECLS #define OSTREE_SUMMARY_COLLECTION_ID "ostree.summary.collection-id" #define OSTREE_SUMMARY_COLLECTION_MAP "ostree.summary.collection-map" +#define _OSTREE_PAYLOAD_LINK_PREFIX "../" +#define _OSTREE_PAYLOAD_LINK_PREFIX_LEN (sizeof (_OSTREE_PAYLOAD_LINK_PREFIX) - 1) + /* Well-known keys for the additional metadata field in a commit in a ref entry * in a summary file. */ #define OSTREE_COMMIT_TIMESTAMP "ostree.commit.timestamp" @@ -161,6 +164,8 @@ struct OstreeRepo { gchar *collection_id; gboolean add_remotes_config_dir; /* Add new remotes in remotes.d dir */ gint lock_timeout_seconds; + guint64 payload_link_threshold; + gint fs_support_reflink; /* The underlying filesystem has support for ioctl (FICLONE..) */ OstreeRepo *parent_repo; }; diff --git a/src/libostree/ostree-repo-prune.c b/src/libostree/ostree-repo-prune.c index fc3cfa548e..2970228f2e 100644 --- a/src/libostree/ostree-repo-prune.c +++ b/src/libostree/ostree-repo-prune.c @@ -46,11 +46,14 @@ maybe_prune_loose_object (OtPruneData *data, GCancellable *cancellable, GError **error) { + gboolean reachable = FALSE; g_autoptr(GVariant) key = NULL; key = ostree_object_name_serialize (checksum, objtype); - if (!g_hash_table_lookup_extended (data->reachable, key, NULL, NULL)) + if (g_hash_table_lookup_extended (data->reachable, key, NULL, NULL)) + reachable = TRUE; + else { g_debug ("Pruning unneeded object %s.%s", checksum, ostree_object_type_to_string (objtype)); @@ -58,7 +61,37 @@ maybe_prune_loose_object (OtPruneData *data, { guint64 storage_size = 0; - if (objtype == OSTREE_OBJECT_TYPE_COMMIT) + if (objtype == OSTREE_OBJECT_TYPE_PAYLOAD_LINK) + { + ssize_t size; + char loose_path_buf[_OSTREE_LOOSE_PATH_MAX]; + char target_checksum[OSTREE_SHA256_STRING_LEN+1]; + char target_buf[_OSTREE_LOOSE_PATH_MAX + _OSTREE_PAYLOAD_LINK_PREFIX_LEN]; + + _ostree_loose_path (loose_path_buf, checksum, OSTREE_OBJECT_TYPE_PAYLOAD_LINK, data->repo->mode); + size = readlinkat (data->repo->objects_dir_fd, loose_path_buf, target_buf, sizeof (target_buf)); + if (size < 0) + return glnx_throw_errno_prefix (error, "readlinkat"); + + if (size < OSTREE_SHA256_STRING_LEN + _OSTREE_PAYLOAD_LINK_PREFIX_LEN) + return glnx_throw (error, "invalid data size for %s", loose_path_buf); + + sprintf (target_checksum, "%.2s%.62s", target_buf + _OSTREE_PAYLOAD_LINK_PREFIX_LEN, target_buf + _OSTREE_PAYLOAD_LINK_PREFIX_LEN + 3); + + g_autoptr(GVariant) target_key = ostree_object_name_serialize (target_checksum, OSTREE_OBJECT_TYPE_FILE); + + if (g_hash_table_lookup_extended (data->reachable, target_key, NULL, NULL)) + { + if (!ostree_repo_query_object_storage_size (data->repo, OSTREE_OBJECT_TYPE_FILE, target_checksum, + &storage_size, cancellable, error)) + return FALSE; + + reachable = storage_size >= data->repo->payload_link_threshold; + if (reachable) + goto exit; + } + } + else if (objtype == OSTREE_OBJECT_TYPE_COMMIT) { if (!ostree_repo_mark_commit_partial (data->repo, checksum, FALSE, error)) return FALSE; @@ -79,7 +112,9 @@ maybe_prune_loose_object (OtPruneData *data, else data->n_unreachable_content++; } - else + + exit: + if (reachable) { g_debug ("Keeping needed object %s.%s", checksum, ostree_object_type_to_string (objtype)); @@ -283,7 +318,7 @@ repo_prune_internal (OstreeRepo *self, * of traversing all commits, only refs will be used. Particularly * when combined with @depth, this is a convenient way to delete * history from the repository. - * + * * Use the %OSTREE_REPO_PRUNE_FLAGS_NO_PRUNE to just determine * statistics on objects that would be deleted, without actually * deleting them. diff --git a/src/libostree/ostree-repo.c b/src/libostree/ostree-repo.c index 29352751f5..894b3f6975 100644 --- a/src/libostree/ostree-repo.c +++ b/src/libostree/ostree-repo.c @@ -2827,6 +2827,15 @@ reload_core_config (OstreeRepo *self, return FALSE; } + { g_autofree char *payload_threshold = NULL; + + if (!ot_keyfile_get_value_with_default (self->config, "core", "payload-link-threshold", "-1", + &payload_threshold, error)) + return FALSE; + + self->payload_link_threshold = g_ascii_strtoull (payload_threshold, NULL, 10); + } + return TRUE; } diff --git a/tests/test-payload-link.sh b/tests/test-payload-link.sh new file mode 100755 index 0000000000..8cf4aef506 --- /dev/null +++ b/tests/test-payload-link.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# +# Copyright (C) 2018 Red Hat, Inc. +# +# SPDX-License-Identifier: LGPL-2.0+ +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +set -xeuo pipefail + +. $(dirname $0)/libtest.sh + +echo "1..1" + +setup_fake_remote_repo1 "archive" + +oldpwd=`pwd` +gnomerepo=$(cat httpd-address)/ostree/gnomerepo + +SUDO="sudo --non-interactive" + +cleanup() { + cd ${oldpwd} + ${SUDO} umount mnt || true + test -n "${blkdev}" && ${SUDO} losetup -d ${blkdev} || true + rm -rf mnt testblk.img +} +trap cleanup EXIT + +mkdir mnt +truncate -s 100MB testblk.img +if ! blkdev=$(${SUDO} losetup --find --show $(pwd)/testblk.img); then + echo "ok # SKIP not run when cannot setup loop device" + exit 0 +fi + +${SUDO} mkfs.xfs -m reflink=1 ${blkdev} + +${SUDO} mount ${blkdev} mnt + +test_tmpdir=$(pwd)/mnt +cd ${test_tmpdir} + +touch a +if cp --reflink a b; then + ostree_repo_init repo + ${CMD_PREFIX} ostree config --repo=repo set core.payload-link-threshold 0 + ${CMD_PREFIX} ostree --repo=repo remote add origin ${gnomerepo} --no-gpg-verify + ${CMD_PREFIX} ostree --repo=repo pull origin main 2>err.txt + if test `find repo -name '*.payload-link' | wc -l` = 0; then + fatal ".payload-link files not found" + fi + + find repo -name '*.payload-link' | while read i; + do + payload_checksum=$(basename $(dirname $i))$(basename $i .payload-link) + payload_checksum_calculated=$(sha256sum $(readlink -f $i) | cut -d ' ' -f 1) + if test $payload_checksum != $payload_checksum_calculated; then + fatal ".payload-link has the wrong checksum" + fi + done + echo "ok pull creates .payload-link" +else + echo "ok # SKIP no reflink support in the file system" +fi