Skip to content

Commit

Permalink
Cleanup old folders on central
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskawczynski committed May 14, 2024
1 parent 0d27dbd commit 9f2ed60
Showing 1 changed file with 57 additions and 2 deletions.
59 changes: 57 additions & 2 deletions regression_tests/move_output.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ job_ids = getindex.(split.(lines, "\""), 2)
@assert count(x -> occursin("OrderedDict", x), all_lines) == length(job_ids) + 1
@assert length(job_ids) 0 # safety net

# Note: cluster_data_prefix is also defined in compute_mse.jl
cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main"
if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci"
commit = ENV["BUILDKITE_COMMIT"]
branch = ENV["BUILDKITE_BRANCH"]
# Note: cluster_data_prefix is also defined in compute_mse.jl
cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main"

@info "pwd() = $(pwd())"
@info "branch = $(branch)"
Expand Down Expand Up @@ -46,3 +46,58 @@ if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci"
else
@info "ENV keys: $(keys(ENV))"
end

function get_ref_counter(p)
f = joinpath(p, "ref_counter.jl")
if !isfile(f)
msg = "Expected ref counter file did not exist in path $p\n. Found (recursive) `ref_counter.jl` files:\n"
for (root, _, files) in walkdir(p)
for f in files
endswith(f, "ref_counter.jl") || continue
msg *= " $(joinpath(root, f))"
end
end
@warn msg
return nothing
end
return parse(Int, first(readlines(f)))
end
function cleanup_central(cluster_data_prefix)
@warn "Cleaning up old files on central"
# Get (sorted) array of paths, `pop!(sorted_paths)`
# is the most recent merged folder.
sorted_paths = sorted_dataset_folder(; dir = cluster_data_prefix)
keep_latest_n = 0
keep_latest_ref_counters = 5
if !isempty(sorted_paths)
N = length(sorted_paths) - keep_latest_n
paths_to_delete = []
debug = false
for i in 1:N
ref_counter = get_ref_counter(sorted_paths[i])
ref_counter_last = get_ref_counter(sorted_paths[end])
if isnothing(ref_counter_last) || isnothing(ref_counter)
debug = true
end
# Just to be safe, let's also make sure that we don't delete
# any paths with recent (let's say 5) ref counter increments ago.
if ref_counter + keep_latest_ref_counters < ref_counter_last
push!(paths_to_delete, sorted_paths[i])
end
end
if debug
@show sorted_paths
error("Reference counters did not exist")
end
@info "Deleting files:"
for i in 1:N
f = paths_to_delete[i]
@info " (File, date): ($(f), $(Dates.unix2datetime(stat(f).mtime)))"
end
# for i in 1:N
# rm(paths_to_delete[i])
# end
end
end

cleanup_central(cluster_data_prefix)

0 comments on commit 9f2ed60

Please sign in to comment.