Skip to content

Commit

Permalink
maintenance: add new vfs-cache-move maintenance task
Browse files Browse the repository at this point in the history
Introduce a new maintenance task, `vfs-cache-move`, that operates on
Scalar or VFS for Git repositories with a per-volume, shared object
cache (specified by `gvfs.sharedCache`) to migrate packfiles from the
repository object directory to the shared cache.

Older versions of `microsoft/git` incorrectly placed packfiles in the
repository object directory instead of the shared cache; this task will
help clean up existing clones impacted by that issue.

Signed-off-by: Matthew John Cheetham <[email protected]>
  • Loading branch information
mjcheetham committed Jan 21, 2025
1 parent 1d7817c commit 904f61a
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 0 deletions.
8 changes: 8 additions & 0 deletions Documentation/git-maintenance.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ task:
* `prefetch`: hourly.
* `loose-objects`: daily.
* `incremental-repack`: daily.
* `vfs-cache-move`: weekly.
--
+
`git maintenance register` will also disable foreground maintenance by
Expand Down Expand Up @@ -158,6 +159,13 @@ pack-refs::
need to iterate across many references. See linkgit:git-pack-refs[1]
for more information.

vfs-cache-move::
The `vfs-cache-move` task only operates on Scalar or VFS for Git
repositories (cloned with either `scalar clone` or `gvfs clone`) that
have the `gvfs.sharedCache` configuration setting present. This task
migrates pack files from the repository's object directory in to the
shared volume cache.

OPTIONS
-------
--auto::
Expand Down
135 changes: 135 additions & 0 deletions builtin/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* Copyright (c) 2006 Shawn O. Pearce
*/
#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "builtin.h"
#include "abspath.h"
#include "date.h"
Expand Down Expand Up @@ -41,6 +42,8 @@
#include "hook.h"
#include "setup.h"
#include "trace2.h"
#include "copy.h"
#include "dir.h"

#define FAILED_RUN "failed to run %s"

Expand Down Expand Up @@ -1345,6 +1348,131 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts
return 0;
}

static void link_or_copy_or_die(const char *src, const char *dst)
{
if (!link(src, dst))
return;

warning_errno(_("failed to link '%s' to '%s'... trying copy..."), src,
dst);

if (!copy_file(dst, src, 0644))
return;

die_errno(_("failed to copy '%s' to '%s'"), src, dst);
}

static void migrate_pack(const char *srcdir, const char *dstdir,
const char *pack_filename)
{
struct stat st;
int has_keep, has_rev, has_idx;
char *basename, *pack_src, *keep_src, *rev_src, *idx_src,
*pack_dst, *keep_dst, *rev_dst, *idx_dst;

trace2_region_enter("maintenance", "migrate_pack", the_repository);

basename = xstrndup(pack_filename, strlen(pack_filename) - 5 /*.pack*/);
pack_src = xstrfmt("%s/%s", srcdir, pack_filename);
pack_dst = xstrfmt("%s/%s", dstdir, pack_filename);
keep_src = xstrfmt("%s/%s.keep", srcdir, basename);
keep_dst = xstrfmt("%s/%s.keep", dstdir, basename);
rev_src = xstrfmt("%s/%s.rev", srcdir, basename);
rev_dst = xstrfmt("%s/%s.rev", dstdir, basename);
idx_src = xstrfmt("%s/%s.idx", srcdir, basename);
idx_dst = xstrfmt("%s/%s.idx", dstdir, basename);

has_keep = !stat(keep_src, &st);
has_rev = !stat(rev_src, &st);
has_idx = !stat(idx_src, &st);

/* A pack without an index file is not yet ready to be migrated. */
if (!has_idx)
goto cleanup;

/*
* Hard link (or copy if that fails) all but the index file so that
* other Git processes don't attempt to use the pack file from the new
* location yet.
*/
link_or_copy_or_die(pack_src, pack_dst);
if (has_keep)
link_or_copy_or_die(keep_src, keep_dst);
if (has_rev)
link_or_copy_or_die(rev_src, rev_dst);

/*
* Move the index file atomically now that the other files can be found
* at the destination.
*/
if (rename(idx_src, idx_dst))
die_errno(_("failed to move '%s' to '%s'"), idx_src, idx_dst);

/*
* Now the pack and all associated files exist at the destination we can
* now clean up the files in the source directory.
*/
if (unlink(pack_src))
warning_errno(_("failed to delete '%s'"), pack_src);
if (has_keep && unlink(keep_src))
warning_errno(_("failed to delete '%s'"), keep_src);
if (has_rev & unlink(rev_src))
warning_errno(_("failed to delete '%s'"), rev_src);

cleanup:
free(idx_src);
free(idx_dst);
free(rev_src);
free(rev_dst);
free(keep_src);
free(keep_dst);
free(pack_src);
free(pack_dst);
free(basename);

trace2_region_leave("maintenance", "migrate_pack", the_repository);
}

static void move_pack_to_vfs_cache(const char *full_path, size_t full_path_len,
const char *file_name, UNUSED void *data)
{
char *srcdir;
struct strbuf dstdir = STRBUF_INIT;

/* We only care about the actual pack files here.
* The associated .idx, .keep, .rev files will be copied in tandem
* with the pack file, with the index file being moved last.
* The original locations of the non-index files will only deleted
* once all other files have been copied/moved.
*/
if (!ends_with(file_name, ".pack"))
return;

srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1);

/* No cache or same source + desintation means there's no work to do. */
if (!object_dir || !fspathcmp(srcdir, object_dir))
return;

strbuf_addf(&dstdir, "%s/pack", object_dir);

migrate_pack(srcdir, dstdir.buf, file_name);

free(srcdir);
strbuf_release(&dstdir);
}

static int maintenance_task_vfs_cache_move(UNUSED struct maintenance_run_opts *opts,
UNUSED struct gc_config *cfg)
{
struct repository *r = the_repository;

for_each_file_in_pack_dir(r->objects->odb->path, move_pack_to_vfs_cache,
NULL);

return 0;
}

typedef int maintenance_task_fn(struct maintenance_run_opts *opts,
struct gc_config *cfg);

Expand Down Expand Up @@ -1374,6 +1502,7 @@ enum maintenance_task_label {
TASK_GC,
TASK_COMMIT_GRAPH,
TASK_PACK_REFS,
TASK_VFS_CACHE_MOVE,

/* Leave as final value */
TASK__COUNT
Expand Down Expand Up @@ -1410,6 +1539,10 @@ static struct maintenance_task tasks[] = {
maintenance_task_pack_refs,
pack_refs_condition,
},
[TASK_VFS_CACHE_MOVE] = {
"vfs-cache-move",
maintenance_task_vfs_cache_move,
},
};

static int compare_tasks_by_selection(const void *a_, const void *b_)
Expand Down Expand Up @@ -1504,6 +1637,8 @@ static void initialize_maintenance_strategy(void)
tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY;
tasks[TASK_PACK_REFS].enabled = 1;
tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY;
tasks[TASK_VFS_CACHE_MOVE].enabled = 1;
tasks[TASK_VFS_CACHE_MOVE].schedule = SCHEDULE_WEEKLY;
}
}

Expand Down
35 changes: 35 additions & 0 deletions t/t7900-maintenance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1011,4 +1011,39 @@ test_expect_success 'repacking loose objects is quiet' '
)
'

test_expect_success 'vfs-cache-move task' '
#test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
test_commit something &&
git config set gvfs.sharedcache ../cache &&
git config set maintenance.gc.enabled false &&
git config set maintenance.vfs-cache-move.enabled true &&
git config set maintenance.vfs-cache-move.auto 1 &&
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
git -c fastimport.unpackLimit=0 fast-import &&
find .git/objects/pack \
-type f \
\( -name "*.pack" \
-o -name "*.idx" \
-o -name "*.keep" \
-o -name "*.rev" \) >src.txt &&
sed "s|.*/|../cache/pack/|" src.txt >dst.txt &&
mkdir -p ../cache/pack &&
git maintenance run &&
while IFS= read -r f; do
test_path_is_missing $f || exit 1
done <src.txt &&
while IFS= read -r f; do
test_path_exists $f || exit 1
done <dst.txt
)
'

test_done

0 comments on commit 904f61a

Please sign in to comment.