From 856ac9d0e6a38e0fbf9b4cc307aa84a36a9f127d Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Tue, 31 Dec 2024 10:59:21 -0500 Subject: [PATCH] More reproducibility fixes Fixes More reproducibility bug fixes --- reproducibility_tests/ref_counter.jl | 5 +- .../reproducibility_utils.jl | 246 ++++++++-- test/unit_reproducibility_infra.jl | 446 ++++++++++++------ 3 files changed, 505 insertions(+), 192 deletions(-) diff --git a/reproducibility_tests/ref_counter.jl b/reproducibility_tests/ref_counter.jl index 736015e973..6b26c33572 100644 --- a/reproducibility_tests/ref_counter.jl +++ b/reproducibility_tests/ref_counter.jl @@ -1,4 +1,4 @@ -193 +194 # **README** # @@ -21,6 +21,9 @@ #= +194 +- Reproducibility infrastructure fixes. + 193 - More reproducibility infrastructure fixes. diff --git a/reproducibility_tests/reproducibility_utils.jl b/reproducibility_tests/reproducibility_utils.jl index 8db107c62a..3e08beb0bc 100644 --- a/reproducibility_tests/reproducibility_utils.jl +++ b/reproducibility_tests/reproducibility_utils.jl @@ -86,9 +86,11 @@ end function all_files_in_dir(dir) all_files = String[] - for (root, dirs, files) in walkdir(dir; follow_symlinks = true) + for (root, dirs, files) in walkdir(dir) for file in files - push!(all_files, joinpath(root, file)) + f = joinpath(root, file) + isfile(f) || continue # avoid symlinks + push!(all_files, f) end end return all_files @@ -365,7 +367,7 @@ Return a hash from the contents of all Julia files found recursively in `dir` """ function source_checksum(dir = pwd()) jl_files = String[] - for (root, dirs, files) in walkdir(dir; follow_symlinks = true) + for (root, dirs, files) in walkdir(dir) for file in files endswith(file, ".jl") && push!(jl_files, joinpath(root, file)) end @@ -377,6 +379,20 @@ function source_checksum(dir = pwd()) return hash(joined_contents) end + +""" + source_has_changed( + n = 5, + root_dir = "/central/scratch/esm/slurm-buildkite/climaatmos-main", + ref_counter_PR = read_ref_counter(joinpath(@__DIR__, "ref_counter.jl")), + skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci", + src_dir = dirname(@__DIR__), + ) + +Returns a Boolean indicating if the `.jl` files in `src_dir` have changed base +on `latest_comparable_dirs` (please see the argument list in the +`latest_comparable_dirs` documentation). +""" function source_has_changed(; n = 5, root_dir = "/central/scratch/esm/slurm-buildkite/climaatmos-main", @@ -400,24 +416,74 @@ function source_has_changed(; end end -rm_folder(path; strip_folder) = - joinpath(filter(x -> !occursin(strip_folder, x), splitpath(path))...) +""" + strip_output_active_folder(folder) + +Returns "" if `folder` is `"output_active"` or in the form `output_active_XXXX` +where `X` are integers between 0 and 9 +""" +function strip_output_active_folder(folder) + if folder == "output_active" + return "" + elseif occursin("output_", folder) && + length(folder) == length("output_XXXX") + is_active_output_folder = true + rfolder = reverse(folder) + for i in 1:4 + try + parse(Int, rfolder[i]) + catch + is_active_output_folder = false + end + end + if is_active_output_folder + return "" + else + return folder + end + else + return folder + end +end + +""" + strip_output_active_path(path) + +Applies `strip_output_active_folder` to all folders in the given path. +""" +strip_output_active_path(path) = + joinpath(map(x -> strip_output_active_folder(x), splitpath(path))...) + +print_dir_tree(dir) = print_dir_tree(stdout, dir) +print_dir_tree(io::IO, dir) = println(io, string_dir_tree(dir)) + +function string_dir_tree(dir) + s = "Files in `$dir`\n:" + for (root, _, files) in walkdir(dir) + for file in files + f = joinpath(root, file) + isfile(f) || continue # rm symlink folders (included but not files) + s *= " $f\n" + end + end + return s +end """ move_data_to_save_dir(; - dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", buildkite_ci = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci", - commit = get(ENV, "BUILDKITE_COMMIT", nothing), branch = get(ENV, "BUILDKITE_BRANCH", nothing), in_merge_queue = startswith(branch, "gh-readonly-queue/main/"), dirs_src, - strip_folder = Pair("output_active", ""), ref_counter_file_PR = joinpath(@__DIR__, "ref_counter.jl"), ref_counter_PR = read_ref_counter(ref_counter_file_PR), skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci", + dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", + commit = get(ENV, "BUILDKITE_COMMIT", nothing), n_hash_characters = 7, repro_folder = "reproducibility_bundle", + strip_folder = strip_output_active_path, ) Moves data in the following way: @@ -444,19 +510,19 @@ Data movement will occur when this function is called: code in the latest comparable reference """ function move_data_to_save_dir(; - dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", buildkite_ci = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci", - commit = get(ENV, "BUILDKITE_COMMIT", nothing), branch = get(ENV, "BUILDKITE_BRANCH", nothing), in_merge_queue = startswith(branch, "gh-readonly-queue/main/"), dirs_src, - strip_folder = "output_active", ref_counter_file_PR = joinpath(@__DIR__, "ref_counter.jl"), ref_counter_PR = read_ref_counter(ref_counter_file_PR), skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci", + dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", + commit = get(ENV, "BUILDKITE_COMMIT", nothing), n_hash_characters = 7, repro_folder = "reproducibility_bundle", + strip_folder = strip_output_active_path, ) buildkite_ci || return nothing @@ -471,46 +537,44 @@ function move_data_to_save_dir(; branch == "main" && source_has_changed(; n = 1, root_dir = dest_root, ref_counter_PR, skip) ) - commit_sha = commit[1:min(n_hash_characters, length(commit))] - mkpath(dest_root) - dest_dir = joinpath(dest_root, commit_sha) - mkpath(dest_dir) - dest_repro = joinpath(dest_dir, repro_folder) - mkpath(dest_repro) - # Always move reproducibility data, so that we - # can compare against multiple references - for src in dirs_src - dst = joinpath(dest_repro, basename(src)) - debug_reproducibility() && @info "Repro: moving $src to $dst" - mv(src, dst; force = true) + (; files_src, files_dest) = save_dir_in_out_list(; + dirs_src, + dest_root, + commit, + n_hash_characters, + repro_folder, + strip_folder, + ) + if debug_reproducibility() + @show repro_folder + @show dirs_src + @show dest_root + @show files_dest + @show files_src + @show isfile.(files_src) + println("******") + foreach(print_dir_tree, dirs_src) + println("******") + print_dir_tree(dest_root) + println("******") end - for dst in all_files_in_dir(dest_repro) - dst_new = rm_folder(dst; strip_folder) - if debug_reproducibility() - @show isfile(dst) - @show dst - @show dst_new - end - if dst ≠ dst_new - mkpath(dirname(dst_new)) - debug_reproducibility() && - @info "Repro: re-moving $dst to $dst_new" - mv(dst, dst_new; force = true) - end + for (src, dest) in zip(files_src, files_dest) + @show src + @show dest + @assert isfile(src) + mkpath(dirname(dest)) + mv(src, dest; force = true) end + dest_repro = destination_directory(; + dest_root, + commit, + n_hash_characters, + repro_folder, + ) ref_counter_file_main = joinpath(dest_repro, "ref_counter.jl") debug_reproducibility() && @info "Repro: moving $ref_counter_file_PR to $ref_counter_file_main" mv(ref_counter_file_PR, ref_counter_file_main; force = true) - if debug_reproducibility() - println("####################### SRC") - for src in dirs_src - @info(string_all_files_in_dir(src)) - end - println("####################### DST") - @info(string_all_files_in_dir(dest_repro)) - println("#######################") - end else if debug_reproducibility() @warn "Repro: skipping data movement" @@ -526,6 +590,98 @@ function move_data_to_save_dir(; end end + +""" + save_dir_transform( + src; + job_id, + dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", + commit = get(ENV, "BUILDKITE_COMMIT", nothing), + n_hash_characters = 7, + repro_folder = "reproducibility_bundle", + strip_folder = strip_output_active_path, + ) + +Returns the output file, to be saved, given: + - `src` the source file + - `job_id` the job ID + - `dest_root` the destination root directory + - `commit` the commit hash + - `n_hash_characters` truncates the commit hash to given number of characters + - `repro_folder` reproducibility folder + - `strip_folder` function to strip folders in output path +""" +function save_dir_transform( + src; + job_id, + dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", + commit = get(ENV, "BUILDKITE_COMMIT", nothing), + n_hash_characters = 7, + repro_folder = "reproducibility_bundle", + strip_folder = strip_output_active_path, +) + dest_repro = destination_directory(; + dest_root, + commit, + n_hash_characters, + repro_folder, + ) + src_filename = basename(src) + dst = joinpath(dest_repro, job_id, src_filename) + return strip_output_active_path(dst) +end + +""" + destination_directory(; + dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", + commit = get(ENV, "BUILDKITE_COMMIT", nothing), + n_hash_characters = 7, + repro_folder = "reproducibility_bundle", + ) + +Return the reproducibility destination directory: +`root/commit_sha/repro_folder`, given: + - `dest_root` the destination root directory + - `commit` the commit hash + - `n_hash_characters` truncates the commit hash to given number of characters + - `repro_folder` reproducibility folder +""" +function destination_directory(; + dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main", + commit = get(ENV, "BUILDKITE_COMMIT", nothing), + n_hash_characters = 7, + repro_folder = "reproducibility_bundle", +) + commit_sha = commit[1:min(n_hash_characters, length(commit))] + return joinpath(dest_root, commit_sha, repro_folder) +end + +""" + save_dir_in_out_list + +Returns two vectors of strings, containing input and output files, for moving +data from the computed to saved directories. + +```julia +(; files_src, files_dest) = save_dir_in_out_list(; dirs_src) +for (src, dest) in zip(files_src, files_dest) + mv(src, dest; force = true) +end +``` +""" +function save_dir_in_out_list(; dirs_src, kwargs...) + files_dest = String[] + files_src = String[] + for src_dir in dirs_src + job_id = basename(src_dir) + for src in all_files_in_dir(src_dir) + push!(files_src, src) + push!(files_dest, save_dir_transform(src; job_id, kwargs...)) + end + end + return (; files_src, files_dest) +end + parse_file(file) = eval(Meta.parse(join(readlines(file)))) # parse_file(file) = parse_file_json(file) # doesn't work for some reason parse_file_json(file) = diff --git a/test/unit_reproducibility_infra.jl b/test/unit_reproducibility_infra.jl index 145f98fac8..5afe803d3d 100644 --- a/test/unit_reproducibility_infra.jl +++ b/test/unit_reproducibility_infra.jl @@ -683,6 +683,306 @@ end end end +@testset "Reproducibility infrastructure: save_dir_transform" begin + make_and_cd() do dir + job_id = "job_id" + commit = "commit_sha" + n_hash_characters = 10 + output = "output_active" + strip_folder = output + repro_folder = "rbundle" + src = joinpath("$job_id", "$output", "$repro_folder", "prog_state.hdf5") + dst = joinpath("$commit", "$repro_folder", "$job_id", "prog_state.hdf5") + @test save_dir_transform( + src; + dest_root = dir, + job_id, + commit, + n_hash_characters, + repro_folder, + strip_folder, + ) == joinpath(dir, dst) + + job_id = "job_id" + commit = "commit_sha" + n_hash_characters = 10 + output = "output_active" + strip_folder = output + repro_folder = "rbundle" + src = joinpath("$job_id", "$output", "prog_state.hdf5") + dst = joinpath("$commit", "$repro_folder", "$job_id", "prog_state.hdf5") + @test save_dir_transform( + src; + dest_root = dir, + job_id, + commit, + n_hash_characters, + repro_folder, + strip_folder, + ) == joinpath(dir, dst) + end +end + +@testset "Reproducibility infrastructure: strip_output_active_path" begin + @test strip_output_active_path(joinpath("a", "b", "c")) == + joinpath("a", "b", "c") + @test strip_output_active_path(joinpath("a", "output_active", "c")) == + joinpath("a", "c") + @test strip_output_active_path(joinpath("a", "output_1234", "c")) == + joinpath("a", "c") + @test strip_output_active_path(joinpath("a", "output_1A34", "c")) == + joinpath("a", "output_1A34", "c") +end + +@testset "Reproducibility infrastructure: save_dir_in_out_list" begin + mktempdir2_cd_computed() do (save_dir, computed_dir) + hash1 = joinpath(save_dir, "hash1") + hash2 = joinpath(save_dir, "hash2") + make_file_with_contents(hash1, "file_x.jl", "abc") + make_file_with_contents(hash1, "file_y.jl", "abc") + make_file_with_contents(hash1, "file_z.jl", "abc") + make_ref_file_counter(3, hash1, "repro_bundle") + + make_file_with_contents(hash2, "file_x.jl", "abc") + make_file_with_contents(hash2, "file_y.jl", "abc") + make_file_with_contents(hash2, "file_z.jl", "abc") + make_ref_file_counter(3, hash2, "repro_bundle") + + make_file_with_contents(computed_dir, "file_x.jl", "abc") + make_file_with_contents(computed_dir, "file_y.jl", "abc") + make_file_with_contents(computed_dir, "file_z.jl", "abc") + ref_counter_file_dir = + make_ref_file_counter(3, computed_dir, "repro_bundle") + job_id_1 = joinpath(computed_dir, "repro_bundle", "job_id_1") + job_id_2 = joinpath(computed_dir, "repro_bundle", "job_id_2") + + mkpath(joinpath(job_id_1, "output_active")) + file = joinpath(job_id_1, "output_active", "ref_prog_state.dat") + open(io -> println(io, 1), file, "w") + + mkpath(joinpath(job_id_2, "output_active")) + file = joinpath(job_id_2, "output_active", "ref_prog_state.dat") + open(io -> println(io, 1), file, "w") + + @test source_checksum(hash1) == source_checksum(computed_dir) + @test source_checksum(hash2) == source_checksum(computed_dir) + + repro_folder = "repro_bundle" + (; files_src, files_dest) = save_dir_in_out_list(; + dirs_src = [job_id_1, job_id_2], + dest_root = save_dir, + commit = "commit_sha", + n_hash_characters = 10, + repro_folder, + strip_folder = "output_active", + ) + + @test files_src[1] == joinpath( + computed_dir, + "repro_bundle", + "job_id_1", + "output_active", + "ref_prog_state.dat", + ) + @test files_src[2] == joinpath( + computed_dir, + "repro_bundle", + "job_id_2", + "output_active", + "ref_prog_state.dat", + ) + @test files_dest[1] == joinpath( + save_dir, + "commit_sha", + "repro_bundle", + "job_id_1", + "ref_prog_state.dat", + ) + @test files_dest[1] == joinpath( + save_dir, + "commit_sha", + "repro_bundle", + "job_id_1", + "ref_prog_state.dat", + ) + + end +end + +@testset "Reproducibility infrastructure: move_data_to_save_dir legacy folder structure" begin + mktempdir2_cd_computed() do (save_dir, computed_dir) + hash1 = joinpath(save_dir, "hash1") + hash2 = joinpath(save_dir, "hash2") + make_file_with_contents(hash1, "file_x.jl", "abc") + make_file_with_contents(hash1, "file_y.jl", "abc") + make_file_with_contents(hash1, "file_z.jl", "abc") + make_ref_file_counter(3, hash1) + + make_file_with_contents(hash2, "file_x.jl", "abc") + make_file_with_contents(hash2, "file_y.jl", "abc") + make_file_with_contents(hash2, "file_z.jl", "abc") + make_ref_file_counter(3, hash2) + + make_file_with_contents(computed_dir, "file_x.jl", "abc") + make_file_with_contents(computed_dir, "file_y.jl", "abc") + make_file_with_contents(computed_dir, "file_z.jl", "abc") + ref_counter_file_dir = make_ref_file_counter(3, computed_dir) + job_id_1 = joinpath(computed_dir, "job_id_1") + job_id_2 = joinpath(computed_dir, "job_id_2") + + mkpath(joinpath(job_id_1, "output_active")) + file = joinpath(job_id_1, "output_active", "ref_prog_state.dat") + open(io -> println(io, 1), file, "w") + + mkpath(joinpath(job_id_2, "output_active")) + file = joinpath(job_id_2, "output_active", "ref_prog_state.dat") + open(io -> println(io, 1), file, "w") + + @test source_checksum(hash1) == source_checksum(computed_dir) + @test source_checksum(hash2) == source_checksum(computed_dir) + + repro_folder = "repro_bundle" + repro_dir = joinpath(save_dir, "hash_new", repro_folder) + move_data_to_save_dir(; + dest_root = save_dir, + buildkite_ci = true, + commit = "hash_new", + n_hash_characters = length("hash_new"), + branch = "unit_test_move_data_to_save_dir", + in_merge_queue = true, + dirs_src = [job_id_1, job_id_2], + ref_counter_file_PR = joinpath( + ref_counter_file_dir, + "ref_counter.jl", + ), + ref_counter_PR = 3, + repro_folder, + skip = false, + ) + @test isfile(joinpath(repro_dir, "job_id_1", "ref_prog_state.dat")) + @test isfile(joinpath(repro_dir, "job_id_2", "ref_prog_state.dat")) + @test isfile(joinpath(repro_dir, "ref_counter.jl")) + end +end + +@testset "Reproducibility infrastructure: move_data_to_save_dir" begin + mktempdir2_cd_computed() do (save_dir, computed_dir) + hash1 = joinpath(save_dir, "hash1") + hash2 = joinpath(save_dir, "hash2") + make_file_with_contents(hash1, "file_x.jl", "abc") + make_file_with_contents(hash1, "file_y.jl", "abc") + make_file_with_contents(hash1, "file_z.jl", "abc") + make_ref_file_counter(3, hash1, "repro_bundle") + + make_file_with_contents(hash2, "file_x.jl", "abc") + make_file_with_contents(hash2, "file_y.jl", "abc") + make_file_with_contents(hash2, "file_z.jl", "abc") + make_ref_file_counter(3, hash2, "repro_bundle") + + make_file_with_contents(computed_dir, "file_x.jl", "abc") + make_file_with_contents(computed_dir, "file_y.jl", "abc") + make_file_with_contents(computed_dir, "file_z.jl", "abc") + ref_counter_file_dir = + make_ref_file_counter(3, computed_dir, "repro_bundle") + job_id_1 = joinpath(computed_dir, "repro_bundle", "job_id_1") + job_id_2 = joinpath(computed_dir, "repro_bundle", "job_id_2") + + + mkpath(joinpath(job_id_1, "output_active")) + file = joinpath(job_id_1, "output_active", "ref_prog_state.dat") + open(io -> println(io, 1), file, "w") + + mkpath(joinpath(job_id_2, "output_active")) + file = joinpath(job_id_2, "output_active", "ref_prog_state.dat") + open(io -> println(io, 1), file, "w") + + @test source_checksum(hash1) == source_checksum(computed_dir) + @test source_checksum(hash2) == source_checksum(computed_dir) + + repro_folder = "repro_bundle" + move_data_to_save_dir(; + strip_folder = "output_active", + dest_root = save_dir, + buildkite_ci = true, + commit = "hash_new", + n_hash_characters = length("hash_new"), + branch = "unit_test_move_data_to_save_dir", + in_merge_queue = true, + dirs_src = [job_id_1, job_id_2], + ref_counter_file_PR = joinpath( + ref_counter_file_dir, + "ref_counter.jl", + ), + repro_folder, + ref_counter_PR = 3, + skip = false, + ) + repro_dir = joinpath(save_dir, "hash_new", "repro_bundle") + @test isfile(joinpath(repro_dir, "job_id_1", "ref_prog_state.dat")) + @test isfile(joinpath(repro_dir, "job_id_2", "ref_prog_state.dat")) + @test isfile(joinpath(repro_dir, "ref_counter.jl")) + end +end + +@testset "Reproducibility infrastructure: move_data_to_save_dir with symlinks" begin + mktempdir2_cd_computed() do (save_dir, computed_dir) + hash1 = joinpath(save_dir, "hash1") + hash2 = joinpath(save_dir, "hash2") + make_file_with_contents(hash1, "file_x.jl", "abc") + make_file_with_contents(hash1, "file_y.jl", "abc") + make_file_with_contents(hash1, "file_z.jl", "abc") + make_ref_file_counter(3, hash1, "repro_bundle") + + make_file_with_contents(hash2, "file_x.jl", "abc") + make_file_with_contents(hash2, "file_y.jl", "abc") + make_file_with_contents(hash2, "file_z.jl", "abc") + make_ref_file_counter(3, hash2, "repro_bundle") + + make_file_with_contents(computed_dir, "file_x.jl", "abc") + make_file_with_contents(computed_dir, "file_y.jl", "abc") + make_file_with_contents(computed_dir, "file_z.jl", "abc") + ref_counter_file_dir = + make_ref_file_counter(3, computed_dir, "repro_bundle") + job_id_1 = joinpath(computed_dir, "repro_bundle", "job_id_1") + job_id_2 = joinpath(computed_dir, "repro_bundle", "job_id_2") + + + job_id_1_sym_dir = OutputPathGenerator.generate_output_path(job_id_1) + file = joinpath(job_id_1_sym_dir, "ref_prog_state.dat") + open(io -> println(io, 1), file, "w") + + job_id_2_sym_dir = OutputPathGenerator.generate_output_path(job_id_2) + file = joinpath(job_id_2_sym_dir, "ref_prog_state.dat") + open(io -> println(io, 1), file, "w") + + @test source_checksum(hash1) == source_checksum(computed_dir) + @test source_checksum(hash2) == source_checksum(computed_dir) + + repro_folder = "repro_bundle" + move_data_to_save_dir(; + strip_folder = "output_active", + dest_root = save_dir, + buildkite_ci = true, + commit = "hash_new", + n_hash_characters = length("hash_new"), + branch = "unit_test_move_data_to_save_dir", + in_merge_queue = true, + dirs_src = [job_id_1, job_id_2], + ref_counter_file_PR = joinpath( + ref_counter_file_dir, + "ref_counter.jl", + ), + repro_folder, + ref_counter_PR = 3, + skip = false, + ) + repro_dir = joinpath(save_dir, "hash_new", "repro_bundle") + @test isfile(joinpath(repro_dir, "job_id_1", "ref_prog_state.dat")) + @test isfile(joinpath(repro_dir, "job_id_2", "ref_prog_state.dat")) + @test isfile(joinpath(repro_dir, "ref_counter.jl")) + end +end + using ClimaComms using ClimaCore: Spaces, Fields, Grids, InputOutput using ClimaCore @@ -1130,150 +1430,4 @@ if pkgversion(ClimaCore) ≥ v"0.14.18" @test isfile(joinpath(repro_dir, "computed_mse_CH03.json")) end end - - @testset "Reproducibility infrastructure: move_data_to_save_dir legacy folder structure" begin - mktempdir2_cd_computed() do (save_dir, computed_dir) - grid = ExtrudedCubedSphereGrid(; - z_elem = 5, - z_min = 0, - z_max = 1, - radius = 10, - h_elem = 5, - n_quad_points = 2, - ) - space = - Spaces.ExtrudedFiniteDifferenceSpace(grid, Grids.CellCenter()) - comms_ctx = ClimaComms.context(space) - fv = Fields.FieldVector(; x = ones(space), y = ones(space)) - - hash1 = joinpath(save_dir, "hash1") - hash2 = joinpath(save_dir, "hash2") - make_file_with_contents(hash1, "file_x.jl", "abc") - make_file_with_contents(hash1, "file_y.jl", "abc") - make_file_with_contents(hash1, "file_z.jl", "abc") - make_ref_file_counter(3, hash1) - - make_file_with_contents(hash2, "file_x.jl", "abc") - make_file_with_contents(hash2, "file_y.jl", "abc") - make_file_with_contents(hash2, "file_z.jl", "abc") - make_ref_file_counter(3, hash2) - - make_file_with_contents(computed_dir, "file_x.jl", "abc") - make_file_with_contents(computed_dir, "file_y.jl", "abc") - make_file_with_contents(computed_dir, "file_z.jl", "abc") - ref_counter_file_dir = make_ref_file_counter(3, computed_dir) - job_id_1 = joinpath(computed_dir, "job_id_1") - job_id_2 = joinpath(computed_dir, "job_id_2") - put_data_file( - job_id_1, - fv, - comms_ctx; - filename = "ref_prog_state.hdf5", - ) - put_data_file( - job_id_2, - fv, - comms_ctx; - filename = "ref_prog_state.hdf5", - ) - @test source_checksum(hash1) == source_checksum(computed_dir) - @test source_checksum(hash2) == source_checksum(computed_dir) - - repro_folder = "repro_bundle" - repro_dir = joinpath(save_dir, "hash_new", repro_folder) - move_data_to_save_dir(; - dest_root = save_dir, - buildkite_ci = true, - commit = "hash_new", - n_hash_characters = length("hash_new"), - branch = "unit_test_move_data_to_save_dir", - in_merge_queue = true, - dirs_src = [job_id_1, job_id_2], - ref_counter_file_PR = joinpath( - ref_counter_file_dir, - "ref_counter.jl", - ), - ref_counter_PR = 3, - repro_folder, - skip = false, - ) - @test isfile(joinpath(repro_dir, "job_id_1", "ref_prog_state.hdf5")) - @test isfile(joinpath(repro_dir, "job_id_2", "ref_prog_state.hdf5")) - @test isfile(joinpath(repro_dir, "ref_counter.jl")) - end - end - - @testset "Reproducibility infrastructure: move_data_to_save_dir" begin - mktempdir2_cd_computed() do (save_dir, computed_dir) - grid = ExtrudedCubedSphereGrid(; - z_elem = 5, - z_min = 0, - z_max = 1, - radius = 10, - h_elem = 5, - n_quad_points = 2, - ) - space = - Spaces.ExtrudedFiniteDifferenceSpace(grid, Grids.CellCenter()) - comms_ctx = ClimaComms.context(space) - fv = Fields.FieldVector(; x = ones(space), y = ones(space)) - - hash1 = joinpath(save_dir, "hash1") - hash2 = joinpath(save_dir, "hash2") - make_file_with_contents(hash1, "file_x.jl", "abc") - make_file_with_contents(hash1, "file_y.jl", "abc") - make_file_with_contents(hash1, "file_z.jl", "abc") - make_ref_file_counter(3, hash1, "repro_bundle") - - make_file_with_contents(hash2, "file_x.jl", "abc") - make_file_with_contents(hash2, "file_y.jl", "abc") - make_file_with_contents(hash2, "file_z.jl", "abc") - make_ref_file_counter(3, hash2, "repro_bundle") - - make_file_with_contents(computed_dir, "file_x.jl", "abc") - make_file_with_contents(computed_dir, "file_y.jl", "abc") - make_file_with_contents(computed_dir, "file_z.jl", "abc") - ref_counter_file_dir = - make_ref_file_counter(3, computed_dir, "repro_bundle") - job_id_1 = joinpath(computed_dir, "repro_bundle", "job_id_1") - job_id_2 = joinpath(computed_dir, "repro_bundle", "job_id_2") - put_data_file( - joinpath(job_id_1, "output_active"), - fv, - comms_ctx; - filename = "ref_prog_state.hdf5", - ) - put_data_file( - joinpath(job_id_2, "output_active"), - fv, - comms_ctx; - filename = "ref_prog_state.hdf5", - ) - @test source_checksum(hash1) == source_checksum(computed_dir) - @test source_checksum(hash2) == source_checksum(computed_dir) - - repro_folder = "repro_bundle" - move_data_to_save_dir(; - strip_folder = "output_active", - dest_root = save_dir, - buildkite_ci = true, - commit = "hash_new", - n_hash_characters = length("hash_new"), - branch = "unit_test_move_data_to_save_dir", - in_merge_queue = true, - dirs_src = [job_id_1, job_id_2], - ref_counter_file_PR = joinpath( - ref_counter_file_dir, - "ref_counter.jl", - ), - repro_folder, - ref_counter_PR = 3, - skip = false, - ) - repro_dir = joinpath(save_dir, "hash_new", "repro_bundle") - @test isfile(joinpath(repro_dir, "job_id_1", "ref_prog_state.hdf5")) - @test isfile(joinpath(repro_dir, "job_id_2", "ref_prog_state.hdf5")) - @test isfile(joinpath(repro_dir, "ref_counter.jl")) - end - end end