Skip to content

Commit

Permalink
Merge #891
Browse files Browse the repository at this point in the history
891: Revert "make GC deterministic in distributed" r=szy21 a=szy21

revert #821, which may cause the failure in mpi jobs.

Co-authored-by: jiahe23 <[email protected]>
  • Loading branch information
bors[bot] and jiahe23 authored Oct 5, 2022
2 parents 9066111 + 50497db commit 9c34456
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 28 deletions.
2 changes: 1 addition & 1 deletion .buildkite/scaling/pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ FT="Float32"
resolutions=("low" "mid" "high")
max_procs_per_node=16 # limit this artificially for profiling
profiling=enable
exclusive=true
exclusive=false
mpi_impl="openmpi"

# set up environment and agents
Expand Down
24 changes: 0 additions & 24 deletions examples/hybrid/callbacks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,10 @@ function get_callbacks(parsed_args, simulation, model_spec, params)
else
call_every_dt(save_restart_func, dt_save_restart)
end

gc_callback = if simulation.is_distributed
call_every_n_steps(gc_func, 1000)
else
nothing
end

return ODE.CallbackSet(
dss_cb,
save_to_disk_callback,
save_restart_callback,
gc_callback,
additional_callbacks...,
)
end
Expand Down Expand Up @@ -403,19 +395,3 @@ function save_restart_func(integrator)
Base.close(hdfwriter)
return nothing
end

function gc_func(integrator)
free_mem = Sys.free_memory()
total_mem = Sys.total_memory()
p_free_mem = free_mem / total_mem
min_p_free_mem =
ClimaCommsMPI.MPI.Allreduce(p_free_mem, min, comms_ctx.mpicomm)
do_gc = min_p_free_mem < 0.2
@debug "GC check" "free mem (MB)" = free_mem / 2^20 "total mem (MB)" =
total_mem / 2^20 "Minimum free memory (%)" = min_p_free_mem * 100 "Calling GC" =
do_gc
if do_gc
GC.gc()
end
return nothing
end
3 changes: 0 additions & 3 deletions examples/hybrid/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,6 @@ end
@info "Running job:`$(simulation.job_id)`"
if simulation.is_distributed
OrdinaryDiffEq.step!(integrator)
GC.enable(false)
GC.gc()
ClimaComms.barrier(comms_ctx)
if ClimaComms.iamroot(comms_ctx)
@timev begin
Expand All @@ -256,7 +254,6 @@ if simulation.is_distributed
walltime = @elapsed sol = OrdinaryDiffEq.solve!(integrator)
end
ClimaComms.barrier(comms_ctx)
GC.enable(true)
else
sol = @timev OrdinaryDiffEq.solve!(integrator)
end
Expand Down

0 comments on commit 9c34456

Please sign in to comment.