Skip to content

Commit

Permalink
init flamegraphs
Browse files Browse the repository at this point in the history
transfer + modify from ClimaCore/ClimaAtmos

add alloc benchmarks

add buildkite driver

rev allocs

docs

activate docs

clean

fix

doc fix

doc fix

check in perf Manifest

bkite flag order fix

scope fix

FT redef error

fix

fix

stash

fix

clean

rebase fixes

rebase fix bkite

rev1
  • Loading branch information
LenkaNovak committed Jan 30, 2023
1 parent 8993266 commit 3346bf9
Show file tree
Hide file tree
Showing 11 changed files with 2,353 additions and 27 deletions.
31 changes: 27 additions & 4 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ steps:
- "julia --project=experiments/AMIP/moist_mpi_earth/ -e 'using Pkg; Pkg.precompile()'"
- "julia --project=experiments/AMIP/moist_mpi_earth/ -e 'using Pkg; Pkg.status()'"

- echo "--- Instantiate perf env"
- "julia --project=perf/ -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
- "julia --project=perf/ -e 'using Pkg; Pkg.precompile()'"
- "julia --project=perf/ -e 'using Pkg; Pkg.status()'"

- echo "--- Instantiate test env"
- "julia --project=test/ -e 'using Pkg; Pkg.develop(path=\".\")'"
- "julia --project=test/ -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
Expand Down Expand Up @@ -109,7 +114,7 @@ steps:
artifact_paths: "experiments/AMIP/moist_mpi_earth/output/slabplanet/default_notmono_artifacts/total_energy*.png"

- label: "Moist earth with slab surface - notmono + modular: bulk gray no_sponge idealinsol freq_dt_cpl"
command: "julia --color=yes --project=experiments/AMIP/moist_mpi_earth/ experiments/AMIP/moist_mpi_earth/coupler_driver_modular.jl --enable_threading true --coupled true --surface_scheme monin_obukhov --moist equil --vert_diff true --rad gray --energy_check true --mode_name slabplanet --t_end 10days --dt_save_to_sol 3600secs --dt_cpl 200 --dt 200secs --mono_surface false --h_elem 4 --precip_model 0M --run_name default_modular"
command: "julia --color=yes --project=experiments/AMIP/moist_mpi_earth/ experiments/AMIP/moist_mpi_earth/coupler_driver_modular.jl --run_name default_modular --enable_threading true --coupled true --surface_scheme bulk --moist equil --vert_diff true --rad gray --energy_check true --mode_name slabplanet --t_end 10days --dt_save_to_sol 3600secs --dt_cpl 200 --dt 200secs --mono_surface false --h_elem 4 --precip_model 0M"
artifact_paths: "experiments/AMIP/moist_mpi_earth/output/slabplanet/default_modular_artifacts/total_energy*.png"

# Note: this test fails when run with the more realistic albedo from file
Expand Down Expand Up @@ -139,14 +144,13 @@ steps:
artifact_paths: "experiments/AMIP/moist_mpi_earth/output/amip/coarse_single_artifacts/*"

- label: "AMIP - modular"
command: "julia --color=yes --project=experiments/AMIP/moist_mpi_earth/ experiments/AMIP/moist_mpi_earth/coupler_driver_modular.jl --enable_threading true --coupled true --surface_scheme monin_obukhov --moist equil --vert_diff true --rad gray --energy_check false --mode_name amip --anim true --t_end 32days --dt_save_to_sol 1days --dt_cpl 400 --dt 400secs --mono_surface false --h_elem 6 --dt_save_restart 10days --precip_model 0M --run_name coarse_single_modular"
command: "julia --color=yes --project=experiments/AMIP/moist_mpi_earth/ experiments/AMIP/moist_mpi_earth/coupler_driver_modular.jl --run_name coarse_single_modular --enable_threading true --coupled true --surface_scheme bulk --moist equil --vert_diff true --rad gray --energy_check false --mode_name amip --anim true --t_end 32days --dt_save_to_sol 1days --dt_cpl 400 --dt 400secs --mono_surface false --h_elem 6 --dt_save_restart 10days --precip_model 0M"
artifact_paths: "experiments/AMIP/moist_mpi_earth/output/amip/coarse_single_artifacts/*"

- label: "sea_breeze"
command: "julia --color=yes --project=experiments/ClimaCore/sea_breeze experiments/ClimaCore/sea_breeze/run.jl"
artifact_paths: "sea_breeze/"


- label: "MPI AMIP"
key: "mpi_amip"
command: "mpiexec julia --color=yes --project=experiments/AMIP/moist_mpi_earth/ experiments/AMIP/moist_mpi_earth/coupler_driver.jl --coupled true --surface_scheme monin_obukhov --moist equil --vert_diff true --rad gray --energy_check false --mode_name amip --anim true --t_end 32days --dt_save_to_sol 1days --dt_cpl 400 --dt 400secs --mono_surface false --h_elem 6 --dt_save_restart 5days --precip_model 0M --run_name coarse_mpi_n2"
Expand All @@ -158,10 +162,29 @@ steps:

- label: "MPI AMIP FINE"
key: "mpi_amip_fine"
command: "mpiexec julia --color=yes --project=experiments/AMIP/moist_mpi_earth/ experiments/AMIP/moist_mpi_earth/coupler_driver.jl --coupled true --surface_scheme monin_obukhov --moist equil --vert_diff true --rad allskywithclear --z_elem 50 --dz_top 3000 --dz_bottom 30 --h_elem 16 --kappa_4 1e16 --rayleigh_sponge true --alpha_rayleigh_uh 0 --alpha_rayleigh_w 10 --dt_cpl 150 --dt 150secs --dt_rad 1hours --idealized_insolation false --FLOAT_TYPE Float64 --energy_check false --mode_name amip --t_end 5days --dt_save_to_sol 10days --mono_surface false --precip_model 0M --run_name target_amip_n32_shortrun"
command: "mpiexec julia --color=yes --project=experiments/AMIP/moist_mpi_earth/ experiments/AMIP/moist_mpi_earth/coupler_driver.jl --run_name target_amip_n32_shortrun --coupled true --surface_scheme monin_obukhov --moist equil --vert_diff true --rad allskywithclear --z_elem 50 --dz_top 3000 --dz_bottom 30 --h_elem 16 --kappa_4 1e16 --rayleigh_sponge true --alpha_rayleigh_uh 0 --alpha_rayleigh_w 10 --dt_cpl 150 --dt 150secs --dt_rad 1hours --idealized_insolation false --FLOAT_TYPE Float64 --energy_check false --mode_name amip --t_end 5days --dt_save_to_sol 10days --mono_surface false --precip_model 0M"
artifact_paths: "experiments/AMIP/moist_mpi_earth/output/amip/target_amip_n32_shortrun_artifacts/*"
env:
CLIMACORE_DISTRIBUTED: "MPI"
agents:
slurm_ntasks: 32
slurm_mem_per_cpu: 16G

# performance tests
- label: ":rocket: flame graph and allocation tests: default_modular"
command: "julia --color=yes --project=perf perf/flame.jl --run_name 1"
artifact_paths: "perf/output/default_modular/*"
agents:
slurm_mem: 20GB

- label: ":rocket: flame graph and allocation tests: perf_coarse_single_modular"
command: "julia --color=yes --project=perf perf/flame.jl --run_name 2"
artifact_paths: "perf/output/perf_coarse_single_modular/*"
agents:

slurm_mem: 20GB
- label: ":rocket: flame graph and allocation tests: target_amip_n32_shortrun"
command: "julia --color=yes --project=perf perf/flame.jl --run_name 3"
artifact_paths: "perf/output/target_amip_n32_shortrun/*"
agents:
slurm_mem: 20GB
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ docs/src/generated/
# Experiments
!experiments/AMIP/**/Manifest.toml
!experiments/ClimaCore/sea_breeze/Manifest.toml
!perf/Manifest.toml

# Data
*.vtk
Expand Down
8 changes: 7 additions & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,14 @@ interface_pages = [
"testhelper.md",
"timemanager.md",
]
performance_pages = ["performance.md"]

pages = Any["Home" => "index.md", "Examples" => experiment_pages, "Coupler Interface" => interface_pages]
pages = Any[
"Home" => "index.md",
"Examples" => experiment_pages,
"Coupler Interface" => interface_pages,
"Performance" => performance_pages,
]


makedocs(sitename = "ClimaCoupler.jl", format = Documenter.HTML(), modules = [ClimaCoupler], pages = pages)
Expand Down
Binary file added docs/src/images/canvas_coupler.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 22 additions & 0 deletions docs/src/performance.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Performance Analysis Tools

`ClimaCoupler.jl` provides basic tools for analyzing performance:
1. **Flame graphs**: the `perf/flame.jl` script is run by Buildkite to produce flame graphs using [ProfileCanvas.jl](https://github.com/pfitzseb/ProfileCanvas.jl) in the `perf/output/` directory
2. **Job walltime and allocation record**: `log_history` script (not yet implemented)

## Flame Graph Interpretation
- use for single-process (un)threaded performance CPU profiling of individual stack traces. It provides a tree representation of a set of backtraces, showing the dependence and CPU cost of each function.
- here is an example of a flame graph of ClimaCoupler's AMIP run:

![canvas](images/canvas_coupler.png)

- each row along the y-axis represents a level of backtraces. In this case the lowermost level is at the top, and the top level represents what is directly being run on the CPU. The stacks in each level are sorted alphabetically (not chronologically, like flame _charts_). The column width is proportional to the presence in samples (related to allocations). The colors are grouped into runtime-dispatch, gc, compilation and default. The intensity is random.

## References
- [Description of flame graphs and their interpretation](https://github.com/CliMA/slurm-buildkite/wiki/Flame-Graphs)
- [ClimaCore tips](https://clima.github.io/ClimaCore.jl/dev/performance_tips/) for writing performant code
- [General Julia-specific performance tips](https://docs.julialang.org/en/v1/manual/performance-tips/)
- [Logging performance history using Buildkite and SLURM](https://github.com/CliMA/slurm-buildkite/wiki/Memory)
- [NSight Systems](https://github.com/CliMA/slurm-buildkite/wiki/Nsight-Systems) for MPI profiling using Buildkite and SLURM


85 changes: 69 additions & 16 deletions experiments/AMIP/moist_mpi_earth/cli_options.jl
Original file line number Diff line number Diff line change
Expand Up @@ -441,14 +441,31 @@ function print_repl_script(str)
println(ib)
end

function parsed_args_from_command_line_flags(str, parsed_args = Dict())
s = str
s = last(split(s, ".jl"))
s = strip(s)
parsed_args_list = split(s, " ")
function time_to_seconds(s::String)
factor = Dict("secs" => 1, "mins" => 60, "hours" => 60 * 60, "days" => 60 * 60 * 24)
s == "Inf" && return Inf
if count(occursin.(keys(factor), Ref(s))) != 1
error("Bad format for flag $s. Examples: [`10secs`, `20mins`, `30hours`, `40days`]")
end
for match in keys(factor)
occursin(match, s) || continue
return parse(Float64, first(split(s, match))) * factor[match]
end
error("Uncaught case in computing time from given string.")
end

parsed_args_from_ARGS(ARGS, parsed_args = Dict()) = parsed_args_from_ARGS_string(strip(join(ARGS, " ")), parsed_args)

parsed_args_from_command_line_flags(str, parsed_args = Dict()) =
parsed_args_from_ARGS_string(strip(last(split(str, ".jl"))), parsed_args)

function parsed_args_from_ARGS_string(str, parsed_args = Dict())
str = replace(str, " " => " ", " " => " ", " " => " ")
parsed_args_list = split(str, " ")
parsed_args_list == [""] && return parsed_args
@assert iseven(length(parsed_args_list))
parsed_arg_pairs = map(1:2:(length(parsed_args_list) - 1)) do i
Pair(parsed_args_list[i], parsed_args_list[i + 1])
Pair(parsed_args_list[i], strip(parsed_args_list[i + 1], '\"'))
end
function parse_arg(val)
for T in (Bool, Int, Float32, Float64)
Expand All @@ -457,23 +474,59 @@ function parsed_args_from_command_line_flags(str, parsed_args = Dict())
catch
end
end
return val # string
return String(val) # string
end
for (flag, val) in parsed_arg_pairs
parsed_args[replace(flag, "--" => "")] = parse_arg(val)
end
return parsed_args
end

function time_to_seconds(s::String)
factor = Dict("secs" => 1, "mins" => 60, "hours" => 60 * 60, "days" => 60 * 60 * 24)
s == "Inf" && return Inf
if count(occursin.(keys(factor), Ref(s))) != 1
error("Bad format for flag $s. Examples: [`10secs`, `20mins`, `30hours`, `40days`]")
"""
parsed_args_per_job_id()
parsed_args_per_job_id(buildkite_yaml)
A dict of `parsed_args` to run the ClimaAtmos driver
whose keys are the `job_id`s from buildkite yaml.
# Example
To run the `sphere_aquaplanet_rhoe_equilmoist_allsky`
buildkite job from the standard buildkite pipeline, use:
```
using Revise; include("examples/hybrid/cli_options.jl");
dict = parsed_args_per_job_id();
parsed_args = dict["sphere_aquaplanet_rhoe_equilmoist_allsky"];
include("examples/hybrid/driver.jl")
```
"""
function parsed_args_per_job_id(; trigger = "driver.jl")
cc_dir = joinpath(@__DIR__, "..", "..", "..")
buildkite_yaml = joinpath(cc_dir, ".buildkite", "pipeline.yml")
parsed_args_per_job_id(buildkite_yaml; trigger)
end

function parsed_args_per_job_id(buildkite_yaml; trigger = "driver.jl")
buildkite_commands = readlines(buildkite_yaml)
filter!(x -> occursin(trigger, x), buildkite_commands)

@assert length(buildkite_commands) > 0 # sanity check
result = Dict()
for bkcs in buildkite_commands
(s, default_parsed_args) = parse_commandline()
job_id = first(split(last(split(bkcs, "--run_name ")), " "))
job_id = strip(job_id, '\"')
result[job_id] = parsed_args_from_command_line_flags(bkcs, default_parsed_args)
end
for match in keys(factor)
occursin(match, s) || continue
return parse(Float64, first(split(s, match))) * factor[match]
return result
end

function non_default_command_line_flags_parsed_args(parsed_args)
(s, default_parsed_args) = parse_commandline()
s = ""
for k in keys(parsed_args)
default_parsed_args[k] == parsed_args[k] && continue
s *= "--$k $(parsed_args[k]) "
end
error("Uncaught case in computing time from given string.")
return rstrip(s)
end
17 changes: 13 additions & 4 deletions experiments/AMIP/moist_mpi_earth/coupler_driver_modular.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,10 @@ using ClimaCore.Utilities: half, PlusHalf
using ClimaCore: InputOutput, Fields


include("cli_options.jl")
(s, parsed_args) = parse_commandline()
if !(@isdefined parsed_args)
include("cli_options.jl")
(s, parsed_args) = parse_commandline()
end

## modify parsed args for fast testing from REPL #hide
if isinteractive()
Expand All @@ -89,7 +91,9 @@ end
mode_name = parsed_args["mode_name"]
run_name = parsed_args["run_name"]
energy_check = parsed_args["energy_check"]
const FT = parsed_args["FLOAT_TYPE"] == "Float64" ? Float64 : Float32
if !(@isdefined FT)
const FT = parsed_args["FLOAT_TYPE"] == "Float64" ? Float64 : Float32
end
land_sim_name = "bucket"
t_end = Int(time_to_seconds(parsed_args["t_end"]))
tspan = (Int(0), t_end)
Expand Down Expand Up @@ -320,7 +324,7 @@ cs = CoupledSimulation{FT}(
coupler_fields,
parsed_args,
conservation_checks,
tspan,
[tspan[1], tspan[2]],
integrator.t,
Δt_cpl,
(; land = land_mask, ocean = zeros(boundary_space), ice = zeros(boundary_space)),
Expand Down Expand Up @@ -426,6 +430,11 @@ function solve_coupler!(cs)
return cs
end

## exit if running performance anaysis #hide
if haskey(ENV, "CI_PERF_SKIP_COUPLED_RUN") #hide
throw(:exit_profile_init) #hide
end #hide

## run the coupled simulation
solve_coupler!(cs);

Expand Down
Loading

0 comments on commit 3346bf9

Please sign in to comment.