Skip to content

Commit

Permalink
Cleanup old folders on central
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskawczynski committed May 9, 2024
1 parent e90f40e commit b290ecc
Showing 1 changed file with 46 additions and 2 deletions.
48 changes: 46 additions & 2 deletions regression_tests/move_output.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ job_ids = getindex.(split.(lines, "\""), 2)
@assert count(x -> occursin("OrderedDict", x), all_lines) == length(job_ids) + 1
@assert length(job_ids) 0 # safety net

# Note: cluster_data_prefix is also defined in compute_mse.jl
cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main"
if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci"
commit = ENV["BUILDKITE_COMMIT"]
branch = ENV["BUILDKITE_BRANCH"]
# Note: cluster_data_prefix is also defined in compute_mse.jl
cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main"

@info "pwd() = $(pwd())"
@info "branch = $(branch)"
Expand Down Expand Up @@ -46,3 +46,47 @@ if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci"
else
@info "ENV keys: $(keys(ENV))"
end

function get_ref_counter(p)
f = joinpath(p, "ref_counter.jl")
if !isfile(f)
@warn "Expected ref counter file did not exist in path $p\n path contents:\n$(readdir(p))"
return nothing
end
return parse(Int, first(readlines(f)))
end
function cleanup_central(cluster_data_prefix)
@warn "Cleaning up old files on central"
# Get (sorted) array of paths, `pop!(sorted_paths)`
# is the most recent merged folder.
sorted_paths = sorted_dataset_folder(; dir = cluster_data_prefix)
keep_latest_n = 10
keep_latest_ref_counters = 5
if !isempty(sorted_paths)
N = length(sorted_paths) - keep_latest_n
paths_to_delete = []
for i in 1:N
ref_counter = get_ref_counter(sorted_paths[i])
ref_counter_last = get_ref_counter(sorted_paths[end])
if isnothing(ref_counter_last) || isnothing(ref_counter)
@show sorted_paths
error("Reference counters did not exist")
end
# Just to be safe, let's also make sure that we don't delete
# any paths with recent (let's say 5) ref counter increments ago.
if ref_counter + keep_latest_ref_counters < ref_counter_last
push!(paths_to_delete, sorted_paths[i])
end
end
@info "Deleting files:"
for i in 1:N
f = paths_to_delete[i]
@info " (File, date): ($(f), $(Dates.unix2datetime(stat(f).mtime)))"
end
# for i in 1:N
# rm(paths_to_delete[i])
# end
end
end

cleanup_central(cluster_data_prefix)

0 comments on commit b290ecc

Please sign in to comment.