From b290ecc4e5513f650eda56a0d7c10e29278f5615 Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Wed, 8 May 2024 11:39:31 -0400 Subject: [PATCH] Cleanup old folders on central --- regression_tests/move_output.jl | 48 +++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/regression_tests/move_output.jl b/regression_tests/move_output.jl index c629effc906..62a70181f98 100644 --- a/regression_tests/move_output.jl +++ b/regression_tests/move_output.jl @@ -9,11 +9,11 @@ job_ids = getindex.(split.(lines, "\""), 2) @assert count(x -> occursin("OrderedDict", x), all_lines) == length(job_ids) + 1 @assert length(job_ids) ≠ 0 # safety net +# Note: cluster_data_prefix is also defined in compute_mse.jl +cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main" if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci" commit = ENV["BUILDKITE_COMMIT"] branch = ENV["BUILDKITE_BRANCH"] - # Note: cluster_data_prefix is also defined in compute_mse.jl - cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main" @info "pwd() = $(pwd())" @info "branch = $(branch)" @@ -46,3 +46,47 @@ if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci" else @info "ENV keys: $(keys(ENV))" end + +function get_ref_counter(p) + f = joinpath(p, "ref_counter.jl") + if !isfile(f) + @warn "Expected ref counter file did not exist in path $p\n path contents:\n$(readdir(p))" + return nothing + end + return parse(Int, first(readlines(f))) +end +function cleanup_central(cluster_data_prefix) + @warn "Cleaning up old files on central" + # Get (sorted) array of paths, `pop!(sorted_paths)` + # is the most recent merged folder. + sorted_paths = sorted_dataset_folder(; dir = cluster_data_prefix) + keep_latest_n = 10 + keep_latest_ref_counters = 5 + if !isempty(sorted_paths) + N = length(sorted_paths) - keep_latest_n + paths_to_delete = [] + for i in 1:N + ref_counter = get_ref_counter(sorted_paths[i]) + ref_counter_last = get_ref_counter(sorted_paths[end]) + if isnothing(ref_counter_last) || isnothing(ref_counter) + @show sorted_paths + error("Reference counters did not exist") + end + # Just to be safe, let's also make sure that we don't delete + # any paths with recent (let's say 5) ref counter increments ago. + if ref_counter + keep_latest_ref_counters < ref_counter_last + push!(paths_to_delete, sorted_paths[i]) + end + end + @info "Deleting files:" + for i in 1:N + f = paths_to_delete[i] + @info " (File, date): ($(f), $(Dates.unix2datetime(stat(f).mtime)))" + end + # for i in 1:N + # rm(paths_to_delete[i]) + # end + end +end + +cleanup_central(cluster_data_prefix)