diff --git a/regression_tests/move_output.jl b/regression_tests/move_output.jl index c629effc906..bfff349c23b 100644 --- a/regression_tests/move_output.jl +++ b/regression_tests/move_output.jl @@ -9,11 +9,11 @@ job_ids = getindex.(split.(lines, "\""), 2) @assert count(x -> occursin("OrderedDict", x), all_lines) == length(job_ids) + 1 @assert length(job_ids) ≠ 0 # safety net +# Note: cluster_data_prefix is also defined in compute_mse.jl +cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main" if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci" commit = ENV["BUILDKITE_COMMIT"] branch = ENV["BUILDKITE_BRANCH"] - # Note: cluster_data_prefix is also defined in compute_mse.jl - cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main" @info "pwd() = $(pwd())" @info "branch = $(branch)" @@ -46,3 +46,58 @@ if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci" else @info "ENV keys: $(keys(ENV))" end + +function get_ref_counter(p) + f = joinpath(p, "ref_counter.jl") + if !isfile(f) + msg = "Expected ref counter file did not exist in path $p\n. Found (recursive) `ref_counter.jl` files:\n" + for (root, _, files) in walkdir(p) + for f in files + endswith(f, "ref_counter.jl") || continue + msg *= " $(joinpath(root, f))" + end + end + @warn msg + return nothing + end + return parse(Int, first(readlines(f))) +end +function cleanup_central(cluster_data_prefix) + @warn "Cleaning up old files on central" + # Get (sorted) array of paths, `pop!(sorted_paths)` + # is the most recent merged folder. + sorted_paths = sorted_dataset_folder(; dir = cluster_data_prefix) + keep_latest_n = 0 + keep_latest_ref_counters = 5 + if !isempty(sorted_paths) + N = length(sorted_paths) - keep_latest_n + paths_to_delete = [] + debug = false + for i in 1:N + ref_counter = get_ref_counter(sorted_paths[i]) + ref_counter_last = get_ref_counter(sorted_paths[end]) + if isnothing(ref_counter_last) || isnothing(ref_counter) + debug = true + end + # Just to be safe, let's also make sure that we don't delete + # any paths with recent (let's say 5) ref counter increments ago. + if ref_counter + keep_latest_ref_counters < ref_counter_last + push!(paths_to_delete, sorted_paths[i]) + end + end + if debug + @show sorted_paths + error("Reference counters did not exist") + end + @info "Deleting files:" + for i in 1:N + f = paths_to_delete[i] + @info " (File, date): ($(f), $(Dates.unix2datetime(stat(f).mtime)))" + end + # for i in 1:N + # rm(paths_to_delete[i]) + # end + end +end + +cleanup_central(cluster_data_prefix)