From 7a41a77d356fa518b0f3edb8124f8c7a20315bf2 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 2 Feb 2018 14:01:08 +0100 Subject: [PATCH] commit: add logic for .payload-link When a new object is added to the repository, create a $PAYLOAD-SHA256.payload-link symlink file as well. The target of the symlink is the checksum of the object that was added the repository. Whenever we add a new object file, in addition to lookup if the file is already present with the same checksum we also check if an object with the same payload is in the repository. If a file with the same payload is already present in the repository, we copy it with `glnx_regfile_copy_bytes` that internally attempts to create a reflink (ioctl (..., FICLONE, ..)) to the target file if the file system supports it. This enables to have objects that share the payload but have a different inode and xattrs. By default the payload-link-threshold value is G_MAXUINT64 that disables the feature. Signed-off-by: Giuseppe Scrivano --- man/ostree.repo-config.xml | 7 + src/libostree/ostree-repo-commit.c | 245 +++++++++++++++++++++++++- src/libostree/ostree-repo-private.h | 5 + src/libostree/ostree-repo-prune.c | 44 ++++- src/libostree/ostree-repo.c | 9 + tests/installed/itest-payload-link.sh | 86 +++++++++ 6 files changed, 388 insertions(+), 8 deletions(-) create mode 100755 tests/installed/itest-payload-link.sh diff --git a/man/ostree.repo-config.xml b/man/ostree.repo-config.xml index aa3abd54f1..cbc605f775 100644 --- a/man/ostree.repo-config.xml +++ b/man/ostree.repo-config.xml @@ -145,6 +145,13 @@ Boston, MA 02111-1307, USA. + + payload-link-threshold + An integer value that specifies a minimum file size for creating + a payload link. By default it is disabled. + + + diff --git a/src/libostree/ostree-repo-commit.c b/src/libostree/ostree-repo-commit.c index 70068138c9..875743b4d3 100644 --- a/src/libostree/ostree-repo-commit.c +++ b/src/libostree/ostree-repo-commit.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include "otutil.h" #include "ostree.h" @@ -40,6 +42,12 @@ #include "ostree-checksum-input-stream.h" #include "ostree-varint.h" +/* The standardized version of BTRFS_IOC_CLONE */ +#ifndef FICLONE +#define FICLONE _IOW(0x94, 9, int) +#endif + + /* If fsync is enabled and we're in a txn, we write into a staging dir for * commit, but we also allow direct writes into objects/ for e.g. hardlink * imports. @@ -589,6 +597,192 @@ create_regular_tmpfile_linkable_with_content (OstreeRepo *self, return TRUE; } +static gboolean +_check_support_reflink (OstreeRepo *self, gboolean *supported, GError **error) +{ + /* We have not checked yet if the file system supports reflinks, do it here */ + if (g_atomic_int_get (&self->fs_support_reflink) == 0) + { + g_auto(GLnxTmpfile) src_tmpf = { 0, }; + g_auto(GLnxTmpfile) dest_tmpf = { 0, }; + + if (!glnx_open_tmpfile_linkable_at (commit_tmp_dfd (self), ".", O_RDWR|O_CLOEXEC, + &src_tmpf, error)) + return FALSE; + if (!glnx_open_tmpfile_linkable_at (commit_tmp_dfd (self), ".", O_WRONLY|O_CLOEXEC, + &dest_tmpf, error)) + return FALSE; + + if (ioctl (dest_tmpf.fd, FICLONE, src_tmpf.fd) == 0) + g_atomic_int_set (&self->fs_support_reflink, 1); + else if (errno == EOPNOTSUPP) /* Ignore other kind of errors as they might be temporary failures */ + g_atomic_int_set (&self->fs_support_reflink, -1); + } + *supported = g_atomic_int_get (&self->fs_support_reflink) >= 0; + return TRUE; +} + +static gboolean +_create_payload_link (OstreeRepo *self, + const char *checksum, + const char *payload_checksum, + GFileInfo *file_info, + GCancellable *cancellable, + GError **error) +{ + gboolean reflinks_supported = FALSE; + if (!_check_support_reflink (self, &reflinks_supported, error)) + return FALSE; + + if (!reflinks_supported) + return TRUE; + + if (g_file_info_get_file_type (file_info) != G_FILE_TYPE_REGULAR + || !G_IN_SET(self->mode, OSTREE_REPO_MODE_BARE, OSTREE_REPO_MODE_BARE_USER, OSTREE_REPO_MODE_BARE_USER_ONLY)) + return TRUE; + + if (payload_checksum == NULL || g_file_info_get_size (file_info) < self->payload_link_threshold) + return TRUE; + + char target_buf[_OSTREE_LOOSE_PATH_MAX + _OSTREE_PAYLOAD_LINK_PREFIX_LEN]; + strcpy (target_buf, _OSTREE_PAYLOAD_LINK_PREFIX); + _ostree_loose_path (target_buf + _OSTREE_PAYLOAD_LINK_PREFIX_LEN, checksum, OSTREE_OBJECT_TYPE_FILE, self->mode); + + if (symlinkat (target_buf, commit_tmp_dfd (self), payload_checksum) < 0) + { + if (errno != EEXIST) + return glnx_throw_errno_prefix (error, "symlinkat"); + } + else + { + g_auto(OtCleanupUnlinkat) tmp_unlinker = { commit_tmp_dfd (self), g_strdup (payload_checksum) }; + if (!commit_path_final (self, payload_checksum, OSTREE_OBJECT_TYPE_PAYLOAD_LINK, &tmp_unlinker, cancellable, error)) + return FALSE; + } + + return TRUE; +} + +static gboolean +_import_payload_link (OstreeRepo *self, + OstreeRepo *source, + const char *checksum, + GCancellable *cancellable, + GError **error) +{ + gboolean reflinks_supported = FALSE; + g_autofree char *payload_checksum = NULL; + g_autoptr(GInputStream) is = NULL; + glnx_unref_object OtChecksumInstream *checksum_payload = NULL; + g_autoptr(GFileInfo) file_info = NULL; + + if (!_check_support_reflink (self, &reflinks_supported, error)) + return FALSE; + + if (!reflinks_supported) + return TRUE; + + if (!G_IN_SET(self->mode, OSTREE_REPO_MODE_BARE, OSTREE_REPO_MODE_BARE_USER, OSTREE_REPO_MODE_BARE_USER_ONLY)) + return TRUE; + + if (!ostree_repo_load_file (source, checksum, &is, &file_info, NULL, cancellable, error)) + return FALSE; + + if (g_file_info_get_file_type (file_info) != G_FILE_TYPE_REGULAR + || g_file_info_get_size (file_info) < self->payload_link_threshold) + return TRUE; + + checksum_payload = ot_checksum_instream_new (is, G_CHECKSUM_SHA256); + + guint64 remaining = g_file_info_get_size (file_info); + while (remaining) + { + char buf[8192]; + gssize ret = g_input_stream_read ((GInputStream *) checksum_payload, buf, + MIN (sizeof (buf), remaining), cancellable, error); + if (ret < 0) + return FALSE; + remaining -= ret; + } + payload_checksum = ot_checksum_instream_get_string (checksum_payload); + + return _create_payload_link (self, checksum, payload_checksum, file_info, cancellable, error); +} + +static gboolean +_try_clone_from_payload_link (OstreeRepo *self, + const char *payload_checksum, + GFileInfo *file_info, + GLnxTmpfile *tmpf, + GCancellable *cancellable, + GError **error) +{ + gboolean reflinks_supported = FALSE; + int dfd_searches[] = { -1, self->objects_dir_fd }; + if (self->commit_stagedir.initialized) + dfd_searches[0] = self->commit_stagedir.fd; + + if (!_check_support_reflink (self, &reflinks_supported, error)) + return FALSE; + + if (!reflinks_supported) + return TRUE; + + for (guint i = 0; i < G_N_ELEMENTS (dfd_searches); i++) + { + glnx_autofd int fdf = -1; + char loose_path_buf[_OSTREE_LOOSE_PATH_MAX]; + char loose_path_target_buf[_OSTREE_LOOSE_PATH_MAX]; + char target_buf[_OSTREE_LOOSE_PATH_MAX + _OSTREE_PAYLOAD_LINK_PREFIX_LEN]; + char target_checksum[OSTREE_SHA256_STRING_LEN+1]; + int dfd = dfd_searches[i]; + ssize_t size; + if (dfd == -1) + continue; + + _ostree_loose_path (loose_path_buf, payload_checksum, OSTREE_OBJECT_TYPE_PAYLOAD_LINK, self->mode); + + size = TEMP_FAILURE_RETRY (readlinkat (dfd, loose_path_buf, target_buf, sizeof (target_buf))); + if (size < 0) + { + if (errno == ENOENT) + continue; + return glnx_throw_errno_prefix (error, "readlinkat"); + } + + if (size < OSTREE_SHA256_STRING_LEN + _OSTREE_PAYLOAD_LINK_PREFIX_LEN) + return glnx_throw (error, "invalid data size for %s", loose_path_buf); + + sprintf (target_checksum, "%.2s%.62s", target_buf + _OSTREE_PAYLOAD_LINK_PREFIX_LEN, target_buf + _OSTREE_PAYLOAD_LINK_PREFIX_LEN + 3); + + _ostree_loose_path (loose_path_target_buf, target_checksum, OSTREE_OBJECT_TYPE_FILE, self->mode); + if (!ot_openat_ignore_enoent (dfd, loose_path_target_buf, &fdf, error)) + return FALSE; + + if (fdf < 0) + { + /* If the link is referring to an object that doesn't exist anymore in the repository, just unlink it. */ + if (!glnx_unlinkat (dfd, loose_path_buf, 0, error)) + return FALSE; + } + else + { + /* This undoes all of the previous writes; we want to generate reflinked data. */ + if (ftruncate (tmpf->fd, 0) < 0) + return glnx_throw_errno_prefix (error, "ftruncate"); + + if (glnx_regfile_copy_bytes (fdf, tmpf->fd, -1) < 0) + return glnx_throw_errno_prefix (error, "regfile copy"); + + return TRUE; + } + } + if (self->parent_repo) + return _try_clone_from_payload_link (self->parent_repo, payload_checksum, file_info, tmpf, cancellable, error); + + return TRUE; +} + /* The main driver for writing a content (regfile or symlink) object. * There are a variety of tricky cases here; for example, bare-user * repos store symlinks as regular files. Computing checksums @@ -616,6 +810,8 @@ write_content_object (OstreeRepo *self, GInputStream *file_input; /* Unowned alias */ g_autoptr(GInputStream) file_input_owned = NULL; /* We need a temporary for bare-user symlinks */ glnx_unref_object OtChecksumInstream *checksum_input = NULL; + glnx_unref_object OtChecksumInstream *checksum_payload_input = NULL; + const GFileType object_file_type = g_file_info_get_file_type (file_info); if (out_csum) { /* Previously we checksummed the input verbatim; now @@ -624,6 +820,7 @@ write_content_object (OstreeRepo *self, * it's not that's not a serious problem because we're still computing a * checksum over the data we actually use. */ + gboolean reflinks_supported = FALSE; g_autoptr(GBytes) header = _ostree_file_header_new (file_info, xattrs); size_t len; const guint8 *buf = g_bytes_get_data (header, &len); @@ -633,13 +830,26 @@ write_content_object (OstreeRepo *self, null_input = input = g_memory_input_stream_new_from_data ("", 0, NULL); checksum_input = ot_checksum_instream_new_with_start (input, G_CHECKSUM_SHA256, buf, len); - file_input = (GInputStream*)checksum_input; + + if (!_check_support_reflink (self, &reflinks_supported, error)) + return FALSE; + + if (xattrs == NULL || !G_IN_SET(self->mode, OSTREE_REPO_MODE_BARE, OSTREE_REPO_MODE_BARE_USER, OSTREE_REPO_MODE_BARE_USER_ONLY) || object_file_type != G_FILE_TYPE_REGULAR || + !reflinks_supported) + file_input = (GInputStream*)checksum_input; + else + { + /* The payload checksum-input reads from the full object checksum-input; this + * means it skips the header. + */ + checksum_payload_input = ot_checksum_instream_new ((GInputStream*)checksum_input, G_CHECKSUM_SHA256); + file_input = (GInputStream*)checksum_payload_input; + } } else file_input = input; gboolean phys_object_is_symlink = FALSE; - const GFileType object_file_type = g_file_info_get_file_type (file_info); switch (object_file_type) { case G_FILE_TYPE_REGULAR: @@ -765,6 +975,7 @@ write_content_object (OstreeRepo *self, } const char *actual_checksum = NULL; + g_autofree char *actual_payload_checksum = NULL; g_autofree char *actual_checksum_owned = NULL; if (!checksum_input) actual_checksum = expected_checksum; @@ -777,6 +988,9 @@ write_content_object (OstreeRepo *self, error)) return FALSE; } + + if (checksum_payload_input) + actual_payload_checksum = ot_checksum_instream_get_string (checksum_payload_input); } g_assert (actual_checksum != NULL); /* Pacify static analysis */ @@ -794,6 +1008,10 @@ write_content_object (OstreeRepo *self, g_mutex_lock (&self->txn_lock); self->txn.stats.content_objects_total++; g_mutex_unlock (&self->txn_lock); + + if (!_create_payload_link (self, actual_checksum, actual_payload_checksum, file_info, cancellable, error)) + return FALSE; + if (out_csum) *out_csum = ostree_checksum_to_bytes (actual_checksum); /* Note early return */ @@ -853,12 +1071,20 @@ write_content_object (OstreeRepo *self, repo_store_size_entry (self, actual_checksum, unpacked_size, stbuf.st_size); } + /* Check if a file with the same payload is present in the repository, + and in case try to reflink it */ + if (actual_payload_checksum && !_try_clone_from_payload_link (self, actual_payload_checksum, file_info, &tmpf, cancellable, error)) + return FALSE; + /* This path is for regular files */ if (!commit_loose_regfile_object (self, actual_checksum, &tmpf, uid, gid, mode, xattrs, cancellable, error)) return FALSE; + + if (!_create_payload_link (self, actual_checksum, actual_payload_checksum, file_info, cancellable, error)) + return FALSE; } /* Update statistics */ @@ -3999,7 +4225,11 @@ import_one_object_direct (OstreeRepo *dest_repo, if (!copy_detached_metadata (dest_repo, src_repo, checksum, cancellable, error)) return FALSE; } - + else if (objtype == OSTREE_OBJECT_TYPE_FILE) + { + if (!_import_payload_link (dest_repo, src_repo, checksum, cancellable, error)) + return FALSE; + } *out_was_supported = TRUE; return TRUE; } @@ -4092,7 +4322,14 @@ _ostree_repo_import_object (OstreeRepo *self, return FALSE; /* If we have it, we're done */ if (has_object) - return TRUE; + { + if (objtype == OSTREE_OBJECT_TYPE_FILE) + { + if (!_import_payload_link (self, source, checksum, cancellable, error)) + return FALSE; + } + return TRUE; + } if (OSTREE_OBJECT_TYPE_IS_META (objtype)) { diff --git a/src/libostree/ostree-repo-private.h b/src/libostree/ostree-repo-private.h index d61db28fd3..3078a9e293 100644 --- a/src/libostree/ostree-repo-private.h +++ b/src/libostree/ostree-repo-private.h @@ -53,6 +53,9 @@ G_BEGIN_DECLS #define OSTREE_SUMMARY_COLLECTION_ID "ostree.summary.collection-id" #define OSTREE_SUMMARY_COLLECTION_MAP "ostree.summary.collection-map" +#define _OSTREE_PAYLOAD_LINK_PREFIX "../" +#define _OSTREE_PAYLOAD_LINK_PREFIX_LEN (sizeof (_OSTREE_PAYLOAD_LINK_PREFIX) - 1) + /* Well-known keys for the additional metadata field in a commit in a ref entry * in a summary file. */ #define OSTREE_COMMIT_TIMESTAMP "ostree.commit.timestamp" @@ -161,6 +164,8 @@ struct OstreeRepo { gchar *collection_id; gboolean add_remotes_config_dir; /* Add new remotes in remotes.d dir */ gint lock_timeout_seconds; + guint64 payload_link_threshold; + gint fs_support_reflink; /* The underlying filesystem has support for ioctl (FICLONE..) */ OstreeRepo *parent_repo; }; diff --git a/src/libostree/ostree-repo-prune.c b/src/libostree/ostree-repo-prune.c index c8a178ec27..f0c0a97411 100644 --- a/src/libostree/ostree-repo-prune.c +++ b/src/libostree/ostree-repo-prune.c @@ -46,11 +46,14 @@ maybe_prune_loose_object (OtPruneData *data, GCancellable *cancellable, GError **error) { + gboolean reachable = FALSE; g_autoptr(GVariant) key = NULL; key = ostree_object_name_serialize (checksum, objtype); - if (!g_hash_table_lookup_extended (data->reachable, key, NULL, NULL)) + if (g_hash_table_lookup_extended (data->reachable, key, NULL, NULL)) + reachable = TRUE; + else { guint64 storage_size = 0; @@ -65,7 +68,38 @@ maybe_prune_loose_object (OtPruneData *data, if (!(flags & OSTREE_REPO_PRUNE_FLAGS_NO_PRUNE)) { - if (objtype == OSTREE_OBJECT_TYPE_COMMIT) + if (objtype == OSTREE_OBJECT_TYPE_PAYLOAD_LINK) + { + ssize_t size; + char loose_path_buf[_OSTREE_LOOSE_PATH_MAX]; + char target_checksum[OSTREE_SHA256_STRING_LEN+1]; + char target_buf[_OSTREE_LOOSE_PATH_MAX + _OSTREE_PAYLOAD_LINK_PREFIX_LEN]; + + _ostree_loose_path (loose_path_buf, checksum, OSTREE_OBJECT_TYPE_PAYLOAD_LINK, data->repo->mode); + size = readlinkat (data->repo->objects_dir_fd, loose_path_buf, target_buf, sizeof (target_buf)); + if (size < 0) + return glnx_throw_errno_prefix (error, "readlinkat"); + + if (size < OSTREE_SHA256_STRING_LEN + _OSTREE_PAYLOAD_LINK_PREFIX_LEN) + return glnx_throw (error, "invalid data size for %s", loose_path_buf); + + sprintf (target_checksum, "%.2s%.62s", target_buf + _OSTREE_PAYLOAD_LINK_PREFIX_LEN, target_buf + _OSTREE_PAYLOAD_LINK_PREFIX_LEN + 3); + + g_autoptr(GVariant) target_key = ostree_object_name_serialize (target_checksum, OSTREE_OBJECT_TYPE_FILE); + + if (g_hash_table_lookup_extended (data->reachable, target_key, NULL, NULL)) + { + guint64 target_storage_size = 0; + if (!ostree_repo_query_object_storage_size (data->repo, OSTREE_OBJECT_TYPE_FILE, target_checksum, + &target_storage_size, cancellable, error)) + return FALSE; + + reachable = target_storage_size >= data->repo->payload_link_threshold; + if (reachable) + goto exit; + } + } + else if (objtype == OSTREE_OBJECT_TYPE_COMMIT) { if (!ostree_repo_mark_commit_partial (data->repo, checksum, FALSE, error)) return FALSE; @@ -82,7 +116,9 @@ maybe_prune_loose_object (OtPruneData *data, else data->n_unreachable_content++; } - else + + exit: + if (reachable) { g_debug ("Keeping needed object %s.%s", checksum, ostree_object_type_to_string (objtype)); @@ -286,7 +322,7 @@ repo_prune_internal (OstreeRepo *self, * of traversing all commits, only refs will be used. Particularly * when combined with @depth, this is a convenient way to delete * history from the repository. - * + * * Use the %OSTREE_REPO_PRUNE_FLAGS_NO_PRUNE to just determine * statistics on objects that would be deleted, without actually * deleting them. diff --git a/src/libostree/ostree-repo.c b/src/libostree/ostree-repo.c index 8d94f71a55..5798ae2e78 100644 --- a/src/libostree/ostree-repo.c +++ b/src/libostree/ostree-repo.c @@ -2827,6 +2827,15 @@ reload_core_config (OstreeRepo *self, return FALSE; } + { g_autofree char *payload_threshold = NULL; + + if (!ot_keyfile_get_value_with_default (self->config, "core", "payload-link-threshold", "-1", + &payload_threshold, error)) + return FALSE; + + self->payload_link_threshold = g_ascii_strtoull (payload_threshold, NULL, 10); + } + return TRUE; } diff --git a/tests/installed/itest-payload-link.sh b/tests/installed/itest-payload-link.sh new file mode 100755 index 0000000000..b28ca42a08 --- /dev/null +++ b/tests/installed/itest-payload-link.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# +# Copyright (C) 2018 Red Hat, Inc. +# +# SPDX-License-Identifier: LGPL-2.0+ +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +set -xeuo pipefail + +dn=$(dirname $0) +. ${dn}/libinsttest.sh + +echo "1..1" + +oldpwd=`pwd` + +cd /var/srv +rm repo bare-repo -rf +mkdir repo +ostree --repo=repo init --mode=archive +echo -e '[archive]\nzlib-level=1\n' >> repo/config +host_nonremoteref=$(echo ${host_refspec} | sed 's,[^:]*:,,') +ostree --repo=repo pull-local /ostree/repo ${host_commit} +ostree --repo=repo refs ${host_commit} --create=${host_nonremoteref} + +run_tmp_webserver $(pwd)/repo + +cleanup() { + cd ${oldpwd} + umount mnt || true + test -n "${blkdev}" && losetup -d ${blkdev} || true + rm -rf mnt testblk.img +} +trap cleanup EXIT + +mkdir mnt +truncate -s 100MB testblk.img +if ! blkdev=$(losetup --find --show $(pwd)/testblk.img); then + echo "ok # SKIP not run when cannot setup loop device" + exit 0 +fi + +mkfs.xfs -m reflink=1 ${blkdev} + +mount ${blkdev} mnt + +test_tmpdir=$(pwd)/mnt +cd ${test_tmpdir} + +touch a +if cp --reflink a b; then + mkdir repo + ostree --repo=repo init + ostree config --repo=repo set core.payload-link-threshold 0 + ostree --repo=bare-repo remote add origin --set=gpg-verify=false $(cat ${test_tmpdir}/httpd-address) + ostree --repo=bare-repo pull --disable-static-deltas origin ${host_nonremoteref} + if test `find repo -name '*.payload-link' | wc -l` = 0; then + fatal ".payload-link files not found" + fi + + find repo -name '*.payload-link' | while read i; + do + payload_checksum=$(basename $(dirname $i))$(basename $i .payload-link) + payload_checksum_calculated=$(sha256sum $(readlink -f $i) | cut -d ' ' -f 1) + if test $payload_checksum != $payload_checksum_calculated; then + fatal ".payload-link has the wrong checksum" + fi + done + echo "ok pull creates .payload-link" +else + echo "ok # SKIP no reflink support in the file system" +fi