diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 5e332ebc7..cc14d741f 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -193,13 +193,31 @@ steps: command: "julia --color=yes --project=perf perf/flame.jl --run_name 2" artifact_paths: "perf/output/perf_coarse_single_modular/*" agents: - slurm_mem: 20GB + - label: ":rocket: flame graph and allocation tests: perf_target_amip_n32_shortrun" command: "julia --color=yes --project=perf perf/flame.jl --run_name 3" artifact_paths: "perf/output/perf_target_amip_n32_shortrun/*" agents: slurm_mem: 20GB + + - label: ":rocket: performance: flame graph diff: perf_default_modular" + command: "julia --color=yes --project=perf perf/flame_diff.jl --run_name 1" + artifact_paths: "perf/output/perf_diff_default_modular/*" + agents: + slurm_mem: 20GB + + - label: ":rocket: performance: flame graph diff: perf_coarse_single_modular" + command: "julia --color=yes --project=perf perf/flame_diff.jl --run_name 2" + artifact_paths: "perf/output/perf_diff_coarse_single_modular/*" + agents: + slurm_mem: 20GB + + - label: ":rocket: performance: flame graph diff: perf_target_amip_n32_shortrun" + command: "julia --color=yes --project=perf perf/flame_diff.jl --run_name 3" + artifact_paths: "perf/output/perf_diff_target_amip_n32_shortrun/*" + agents: + slurm_mem: 20GB - wait @@ -208,4 +226,5 @@ steps: command: - build_history staging # name of branch to plot artifact_paths: - - "build_history.html" \ No newline at end of file + - "build_history.html" + diff --git a/docs/src/performance.md b/docs/src/performance.md index 3e6c74084..d71879048 100644 --- a/docs/src/performance.md +++ b/docs/src/performance.md @@ -6,11 +6,12 @@ ## Flame Graph Interpretation - use for single-process (un)threaded performance CPU profiling of individual stack traces. It provides a tree representation of a set of backtraces, showing the dependence and CPU cost of each function. -- here is an example of a flame graph of ClimaCoupler's AMIP run: +- here is an example of a flame graph of ClimaCoupler's AMIP run, produced by Buildkite running the `perf/flame.jl` script: ![canvas](images/canvas_coupler.png) - each row along the y-axis represents a level of backtraces. In this case the lowermost level is at the top, and the top level represents what is directly being run on the CPU. The stacks in each level are sorted alphabetically (not chronologically, like flame _charts_). The column width is proportional to the presence in samples (related to allocations). The colors are grouped into runtime-dispatch, gc, compilation and default. The intensity is random. +- we also have a local beta version of flame graphs (in `perf/ProfileCanvasDiff.jl` and `perf/ProfileViewerDiff.js`), triggered by the `perf/flame_diff.jl` script, which plots the same flame graphs as above but with the color corresponding to whether the stack allocation has been reduced (blue) or increased (red) compared to the last staged runs. The color intensity is proportional to the fractional change, and black signifies untracked traces. ## References - [Description of flame graphs and their interpretation](https://github.com/CliMA/slurm-buildkite/wiki/Flame-Graphs) diff --git a/perf/ProfileCanvasDiff.jl b/perf/ProfileCanvasDiff.jl new file mode 100644 index 000000000..a0c4a9664 --- /dev/null +++ b/perf/ProfileCanvasDiff.jl @@ -0,0 +1,390 @@ +# temporarily copied and modified from https://github.com/pfitzseb/ProfileCanvas.jl + +module ProfileCanvasDiff + +using Profile, JSON, REPL, Pkg.Artifacts, Base64 + +export @profview, @profview_allocs + +struct ProfileData + data::Any + typ::Any +end + +mutable struct ProfileFrame + func::String + file::String # human readable file name + path::String # absolute path + line::Int # 1-based line number + count::Int # number of samples in this frame + countLabel::Union{Missing, String} # defaults to `$count samples` + flags::UInt8 # any or all of ProfileFrameFlag + taskId::Union{Missing, UInt} + children::Vector{ProfileFrame} + count_change::Float64 # fractional change in count: (new - old) / old +end + +struct ProfileDisplay <: Base.Multimedia.AbstractDisplay end + +function __init__() + pushdisplay(ProfileDisplay()) + + atreplinit(i -> begin + while ProfileDisplay() in Base.Multimedia.displays + popdisplay(ProfileDisplay()) + end + pushdisplay(ProfileDisplay()) + end) +end + +function jlprofile_data_uri(build_path) + path = joinpath(build_path, "ProfileViewerDiff.js") + @info "the module path is $path" + str = read(path, String) + + return string("data:text/javascript;base64,", base64encode(str)) +end + +function Base.show(io::IO, ::MIME"text/html", canvas::ProfileData, build_path = "") + id = "profiler-container-$(round(Int, rand()*100000))" + + data_uri = (jlprofile_data_uri(build_path)) + println( + io, + """ +
+ + """, + ) +end + +function Base.display(_::ProfileDisplay, canvas::ProfileData) + + file = html_file(string(tempname(), ".html"), canvas) + url = "file://$file" + + if Sys.iswindows() + run(`cmd /c "start $url"`) + elseif Sys.isapple() + run(`open $url`) + elseif Sys.islinux() || Sys.isbsd() + run(`xdg-open $url`) + end +end + +html_file(filename, data = Profile.fetch(); build_path = "", kwargs...) = + html_file(filename, build_path = build_path, view(data; kwargs...)) + +function html_file(file::AbstractString, canvas::ProfileData; build_path = "") + @assert endswith(file, ".html") + + data_uri = jlprofile_data_uri(build_path) + open(file, "w") do io + id = "profiler-container-$(round(Int, rand()*100000))" + + println( + io, + """ + + + + + +
+ + + + """, + ) + end + return file +end + +using Profile + +# https://github.com/timholy/FlameGraphs.jl/blob/master/src/graph.jl +const ProfileFrameFlag = ( + RuntimeDispatch = UInt8(2^0), + GCEvent = UInt8(2^1), + REPL = UInt8(2^2), + Compilation = UInt8(2^3), + TaskEvent = UInt8(2^4), +) + +function view(data = Profile.fetch(); C = false, tracked_list = Dict{String, Int}(;), build_path = "", kwargs...) + d = Dict{String, ProfileFrame}() + + if VERSION >= v"1.8.0-DEV.460" + threads = ["all", 1:Threads.nthreads()...] + else + threads = ["all"] + end + + if isempty(data) + Profile.warning_empty() + return + end + + lidict = Profile.getdict(unique(data)) + data_u64 = convert(Vector{UInt64}, data) + for thread in threads + graph = stackframetree(data_u64, lidict; thread = thread, kwargs...) + d[string(thread)] = make_tree( + ProfileFrame("root", "", "", 0, graph.count, missing, 0x0, missing, ProfileFrame[], 999), #root process + graph; + C = C, + tracked_list = tracked_list, + kwargs..., + ) + end + + return ProfileData(d, "Thread") +end + +function stackframetree(data_u64, lidict; thread = nothing, combine = true, recur = :off) + root = combine ? Profile.StackFrameTree{StackTraces.StackFrame}() : Profile.StackFrameTree{UInt64}() + if VERSION >= v"1.8.0-DEV.460" + thread = thread == "all" ? (1:Threads.nthreads()) : thread + root, _ = Profile.tree!(root, data_u64, lidict, true, recur, thread) + else + root = Profile.tree!(root, data_u64, lidict, true, recur) + end + if !isempty(root.down) + root.count = sum(pr -> pr.second.count, root.down) + end + + return root +end + +function status(sf::StackTraces.StackFrame) + st = UInt8(0) + if sf.from_c && (sf.func === :jl_invoke || sf.func === :jl_apply_generic || sf.func === :ijl_apply_generic) + st |= ProfileFrameFlag.RuntimeDispatch + end + if sf.from_c && startswith(String(sf.func), "jl_gc_") + st |= ProfileFrameFlag.GCEvent + end + if !sf.from_c && sf.func === :eval_user_input && endswith(String(sf.file), "REPL.jl") + st |= ProfileFrameFlag.REPL + end + if !sf.from_c && occursin("./compiler/", String(sf.file)) + st |= ProfileFrameFlag.Compilation + end + if !sf.from_c && occursin("task.jl", String(sf.file)) + st |= ProfileFrameFlag.TaskEvent + end + return st +end + +function status(node::Profile.StackFrameTree, C::Bool) + st = status(node.frame) + C && return st + # If we're suppressing C frames, check all C-frame children + for child in values(node.down) + child.frame.from_c || continue + st |= status(child, C) + end + return st +end + +function add_child(graph::ProfileFrame, node, C::Bool; tracked_list = Dict{String, Int}(;)) + name = string(node.frame.file) + func = String(node.frame.func) + line = node.frame.line + file = node.frame.file + + if func == "" + func = "unknown" + end + + count_old = func in keys(tracked_list) ? tracked_list["$func.$file.$line"] : 999 + frame = ProfileFrame( + func, + basename(name), + name, + node.frame.line, + node.count, + missing, + status(node, C), + missing, + ProfileFrame[], + (Float64(node.count) - Float64(count_old)) / Float64(count_old), + ) + + push!(graph.children, frame) + + return frame +end + +function make_tree(graph, node::Profile.StackFrameTree; C = false, tracked_list = Dict{String, Int}(;)) + for child_node in sort!(collect(values(node.down)); rev = true, by = node -> node.count) + # child not a hidden frame + if C || !child_node.frame.from_c + child = add_child(graph, child_node, C, tracked_list = tracked_list) + make_tree(child, child_node; C = C, tracked_list = tracked_list) + else + make_tree(graph, child_node, tracked_list = tracked_list) + end + end + + return graph +end + +""" + @profview f(args...) [C = false] + +Clear the Profile buffer, profile `f(args...)`, and view the result graphically. + +The default of `C = false` will only show Julia frames in the profile graph. +""" +macro profview(ex, args...) + return quote + Profile.clear() + Profile.@profile $(esc(ex)) + view(; $(esc.(args)...)) + end +end + +## Allocs + +""" + @profview_allocs f(args...) [sample_rate=0.0001] [C=false] + +Clear the Profile buffer, profile `f(args...)`, and view the result graphically. +""" +macro profview_allocs(ex, args...) + sample_rate_expr = :(sample_rate = 0.0001) + for arg in args + if Meta.isexpr(arg, :(=)) && length(arg.args) > 0 && arg.args[1] === :sample_rate + sample_rate_expr = arg + end + end + if isdefined(Profile, :Allocs) + return quote + Profile.Allocs.clear() + Profile.Allocs.@profile $(esc(sample_rate_expr)) $(esc(ex)) + view_allocs() + end + else + return :(@error "This version of Julia does not support the allocation profiler.") + end +end + +function view_allocs(_results = Profile.Allocs.fetch(); C = false) + results = _results::Profile.Allocs.AllocResults + allocs = results.allocs + + allocs_root = ProfileFrame("root", "", "", 0, 0, missing, 0x0, missing, ProfileFrame[], 0) + counts_root = ProfileFrame("root", "", "", 0, 0, missing, 0x0, missing, ProfileFrame[], 0) + for alloc in allocs + this_allocs = allocs_root + this_counts = counts_root + + for sf in Iterators.reverse(alloc.stacktrace) + if !C && sf.from_c + continue + end + file = string(sf.file) + this_counts′ = ProfileFrame( + string(sf.func), + basename(file), + file, + sf.line, + 0, + missing, + 0x0, + missing, + ProfileFrame[], + 0, + ) + ind = findfirst( + c -> (c.func == this_counts′.func && c.path == this_counts′.path && c.line == this_counts′.line), + this_allocs.children, + ) + + this_counts, this_allocs = if ind === nothing + push!(this_counts.children, this_counts′) + this_allocs′ = deepcopy(this_counts′) + push!(this_allocs.children, this_allocs′) + + (this_counts′, this_allocs′) + else + (this_counts.children[ind], this_allocs.children[ind]) + end + this_allocs.count += alloc.size + this_allocs.countLabel = memory_size(this_allocs.count) + this_counts.count += 1 + this_allocs.count_change = 0.6 + end + + alloc_type = replace(string(alloc.type), "Profile.Allocs." => "") + ind = findfirst(c -> (c.func == alloc_type), this_allocs.children) + if ind === nothing + push!( + this_allocs.children, + ProfileFrame( + alloc_type, + "", + "", + 0, + this_allocs.count, + memory_size(this_allocs.count), + ProfileFrameFlag.GCEvent, + missing, + ProfileFrame[], + 0.6, + ), + ) + push!( + this_counts.children, + ProfileFrame(alloc_type, "", "", 0, 1, missing, ProfileFrameFlag.GCEvent, missing, ProfileFrame[], 1), + ) + else + this_counts.children[ind].count += 1 + this_allocs.children[ind].count += alloc.size + this_allocs.children[ind].countLabel = memory_size(this_allocs.children[ind].count) + this_allocs.children[ind].count_change = 0.6 + end + + counts_root.count += 1 + allocs_root.count += alloc.size + allocs_root.countLabel = memory_size(allocs_root.count) + allocs_root.count_change = 0.6 + end + + d = Dict{String, ProfileFrame}("size" => allocs_root, "count" => counts_root) + + return ProfileData(d, "Allocation") +end + +const prefixes = ["bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"] +function memory_size(size) + i = 1 + while size > 1000 && i + 1 < length(prefixes) + size /= 1000 + i += 1 + end + return string(round(Int, size), " ", prefixes[i]) +end + + +end diff --git a/perf/ProfileViewerDiff.js b/perf/ProfileViewerDiff.js new file mode 100644 index 000000000..d8827cc27 --- /dev/null +++ b/perf/ProfileViewerDiff.js @@ -0,0 +1,694 @@ +/* temporarily copied and modified from https://github.com/pfitzseb/jl-profile.js/blob/a13f2ef7852bc1782ec180e3efd4ccef2da7ba6d/dist/profile-viewer.js */ +export class ProfileViewer { + constructor(element, data, selectorLabel) { + this.selections = []; + this.offsetX = 0; + this.offsetY = 0; + this.isWheeling = false; + this.canWheelDown = true; + this.scrollPosition = 0; + this.isResizing = false; + this.isDocumentScrolling = false; + this.isMouseMove = false; + this.scale = window.devicePixelRatio; + this.borderWidth = 2; + this.padding = 2; + this.fontConfig = '10px sans-serif'; + this.borderColor = '#fff'; + this.selectorLabel = 'Thread'; + this.boxHeight = 24; + this.destroyed = false; + if (typeof element === 'string') { + element = document.querySelector(element); + } + if (!element) { + throw new Error('Invalid parent element specified.'); + } + this.container = element; + if (selectorLabel) { + this.selectorLabel = selectorLabel; + } + this.insertDOM(); + this.getStyles(); + this.registerResizeObserver(); + this.registerScrollListener(); + if (data) { + this.setData(data); + } + this.getOffset(); + } + /** + * Remove event listeners and added child elements. The global stylesheet + * is only removed if this is the last reference to it (i.e. there are no + * other not-destroyed ProfileViewer instances in the DOM). + */ + destroy() { + this.destroyed = true; + this.resizeObserver.disconnect(); + if (this.scrollHandler) { + document.removeEventListener('scroll', this.scrollHandler); + } + if (this.stylesheet && parseInt(this.stylesheet.dataset.references) === 0) { + document.head.removeChild(this.stylesheet); + } + while (this.container.firstChild) { + this.container.removeChild(this.container.lastChild); + } + } + setData(data) { + if (this.destroyed) { + console.error('This profile viewer is destroyed.'); + return; + } + if (!data) { + this.data = data; + this.clear(); + return; + } + const selections = Object.keys(data); + selections.sort((a, b) => { + if (a === 'all') { + return -1; + } + if (b === 'all') { + return 1; + } + if (a < b) { + return -1; + } + if (a > b) { + return 1; + } + return 0; + }); + this.data = data; + this.selections = selections; + this.currentSelection = this.selections[0]; + this.activeNode = this.data[this.currentSelection]; + this.updateFilter(); + this.redraw(); + } + setSelectorLabel(label) { + this.selectorLabel = label; + this.selectorLabelElement.innerText = `${label}: `; + } + registerCtrlClickHandler(f) { + this.ctrlClickHandler = f; + } + /** + * @deprecated Use `registerSelectionHandler` instead. + */ + registerThreadSelectorHandler(f) { + this.selectionHandler = f; + } + registerSelectionHandler(f) { + this.selectionHandler = f; + } + registerScrollListener() { + document.addEventListener('scroll', this.scrollHandler); + } + clear() { + this.selections = []; + this.currentSelection = ''; + this.activeNode = undefined; + this.canvasCtx.clearRect(0, 0, this.canvasWidth, this.canvasHeight); + this.hoverCanvasCtx.clearRect(0, 0, this.canvasWidth, this.canvasHeight); + } + isDestroyed() { + return this.destroyed; + } + getStyles() { + var _a, _b, _c; + const style = window.getComputedStyle(this.container, null); + const fontFamily = style.fontFamily; + const fontSize = style.fontSize; + this.fontConfig = + parseInt(fontSize !== null && fontSize !== void 0 ? fontSize : '12px', 10) * this.scale + + 'px ' + + (fontFamily !== null && fontFamily !== void 0 ? fontFamily : 'sans-serif'); + this.borderColor = (_a = style.color) !== null && _a !== void 0 ? _a : '#000'; + this.canvasCtx.font = this.fontConfig; + this.canvasCtx.textBaseline = 'middle'; + const textMetrics = this.canvasCtx.measureText('ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]*\'"^_`abcdefghijklmnopqrstuvwxyz'); + this.boxHeight = Math.ceil((((_b = textMetrics.fontBoundingBoxDescent) !== null && _b !== void 0 ? _b : textMetrics.actualBoundingBoxDescent) + + ((_c = textMetrics.fontBoundingBoxAscent) !== null && _c !== void 0 ? _c : textMetrics.actualBoundingBoxAscent) + + 2 * this.borderWidth + + 2 * this.padding) * + this.scale); + if (this.activeNode) { + this.redraw(); + } + } + redraw() { + this.canWheelDown = false; + this.canvasCtx.clearRect(0, 0, this.canvasWidth, this.canvasHeight); + this.clearHover(); + this.drawGraph(this.activeNode, this.canvasWidth, this.canvasHeight, 0, this.scrollPosition); + } + insertDOM() { + this.insertStylesheet(); + this.canvas = document.createElement('canvas'); + this.canvas.classList.add('__profiler-canvas'); + this.canvasCtx = this.canvas.getContext('2d'); + this.hoverCanvas = document.createElement('canvas'); + this.hoverCanvas.classList.add('__profiler-hover-canvas'); + this.hoverCanvasCtx = this.hoverCanvas.getContext('2d'); + const canvasContainer = document.createElement('div'); + canvasContainer.classList.add('__profiler-canvas-container'); + canvasContainer.appendChild(this.canvas); + canvasContainer.appendChild(this.hoverCanvas); + canvasContainer.appendChild(this.createTooltip()); + this.container.appendChild(this.createFilterContainer()); + this.container.appendChild(canvasContainer); + this.canvas.addEventListener('wheel', (ev) => { + if (!this.activeNode) { + return; + } + if (ev.deltaY > 0 && !this.canWheelDown) { + return; + } + if (ev.deltaY < 0 && this.scrollPosition === 0) { + if (-ev.deltaY > this.boxHeight) { + const parent = this.findParentNode(this.activeNode); + if (parent) { + ev.preventDefault(); + ev.stopPropagation(); + this.clearHover(); + this.activeNode = parent; + this.redraw(); + } + return; + } + } + ev.preventDefault(); + ev.stopPropagation(); + if (!this.isWheeling) { + window.requestAnimationFrame(() => { + this.scrollPosition = Math.min(0, this.scrollPosition - ev.deltaY); + this.redraw(); + this.isWheeling = false; + }); + this.isWheeling = true; + } + }); + this.canvas.addEventListener('mousemove', (ev) => { + if (!this.isMouseMove && this.activeNode) { + window.requestAnimationFrame(() => { + // XXX: this is bad + this.getOffset(); + const mouseX = ev.clientX - this.offsetX; + const mouseY = ev.clientY - this.offsetY; + this.hoverCanvasCtx.clearRect(0, 0, this.canvasWidth, this.canvasHeight); + const didDraw = this.drawHover(this.activeNode, this.scale * mouseX, this.scale * mouseY); + if (didDraw) { + if (mouseX > this.canvasWidthCSS / 2) { + this.tooltipElement.style.right = + this.canvasWidthCSS - mouseX + 10 + 'px'; + this.tooltipElement.style.left = 'unset'; + } + else { + this.tooltipElement.style.right = 'unset'; + this.tooltipElement.style.left = mouseX + 10 + 'px'; + } + if (mouseY > this.canvasHeightCSS / 2) { + this.tooltipElement.style.bottom = + this.canvasHeightCSS - mouseY + 10 + 'px'; + this.tooltipElement.style.top = 'unset'; + } + else { + this.tooltipElement.style.bottom = 'unset'; + this.tooltipElement.style.top = mouseY + 10 + 'px'; + } + this.tooltipElement.style.display = 'block'; + } + else { + this.tooltipElement.style.display = 'none'; + } + this.isMouseMove = false; + }); + this.isMouseMove = true; + } + }); + this.canvas.addEventListener('click', (ev) => { + if (!this.activeNode) { + return; + } + ev.preventDefault(); + ev.stopPropagation(); + this.getOffset(); + const mouseX = this.scale * (ev.clientX - this.offsetX); + const mouseY = this.scale * (ev.clientY - this.offsetY); + if (ev.ctrlKey || ev.metaKey) { + this.runOnNodeAtMousePosition(this.activeNode, mouseX, mouseY, (node) => { + if (this.ctrlClickHandler) { + this.ctrlClickHandler(node); + } + }); + } + else { + if (this.zoomInOnNode(this.activeNode, mouseX, mouseY)) { + this.scrollPosition = 0; + this.redraw(); + } + else if (ev.detail === 2) { + // reset on double-click + this.resetView(); + } + } + }); + } + resetView() { + this.activeNode = this.data[this.currentSelection]; + this.scrollPosition = 0; + this.redraw(); + } + insertStylesheet() { + const stylesheet = document.querySelector('#__profiler_stylesheet'); + if (stylesheet) { + stylesheet.dataset.references = (parseInt(stylesheet.dataset.references) + 1).toString(); + this.stylesheet = stylesheet; + } + else { + this.stylesheet = document.createElement('style'); + this.stylesheet.setAttribute('id', '__profiler-stylesheet'); + this.stylesheet.dataset.references = '0'; + this.stylesheet.innerText = ` + .__profiler-canvas { + z-index: 0; + position: absolute; + width: 100%; + } + .__profiler-canvas-container { + width: 100%; + height: 100%; + position: relative; + } + .__profiler-hover-canvas { + z-index: 1; + position: absolute; + pointer-events: none; + width: 100%; + } + .__profiler-tooltip { + z-index: 2; + display: none; + position: absolute; + background-color: #ddd; + border: 1px solid black; + padding: 5px 10px; + pointer-events: none; + max-width: 45%; + overflow: hidden; + } + .__profiler-tooltip > div { + line-break: anywhere; + } + .__profiler-filter { + height: 30px; + padding: 2px 16px; + margin: 0; + box-sizing: border-box; + border-bottom: 1px solid #444; + user-select: none; + } + .__profiler-reset { + float: right; + } + `; + document.head.appendChild(this.stylesheet); + } + } + createTooltip() { + this.tooltipElement = document.createElement('div'); + this.tooltipElement.classList.add('__profiler-tooltip'); + this.tooltip = { + count: document.createElement('span'), + percentage: document.createElement('span'), + function: document.createElement('code'), + file: document.createElement('a'), + flags: document.createElement('span'), + }; + this.tooltip.function.classList.add('fname'); + const rows = [ + [this.tooltip.function], + [ + this.tooltip.count, + document.createTextNode(' ('), + this.tooltip.percentage, + document.createTextNode(') '), + ], + [this.tooltip.file], + [this.tooltip.flags], + ]; + for (const row of rows) { + const rowContainer = document.createElement('div'); + for (const col of row) { + rowContainer.appendChild(col); + } + this.tooltipElement.appendChild(rowContainer); + } + this.tooltip['ctrlClickHint'] = document.createElement('small'); + this.tooltipElement.appendChild(this.tooltip['ctrlClickHint']); + this.container.appendChild(this.tooltipElement); + return this.tooltipElement; + } + createFilterContainer() { + this.filterContainer = document.createElement('div'); + this.filterContainer.classList.add('__profiler-filter'); + this.selectorLabelElement = document.createElement('label'); + this.selectorLabelElement.innerText = `${this.selectorLabel}: `; + this.filterContainer.appendChild(this.selectorLabelElement); + this.filterInput = document.createElement('select'); + this.filterInput.addEventListener('change', () => { + this.currentSelection = this.filterInput.value; + if (this.selectionHandler) { + this.selectionHandler(this.currentSelection); + } + this.resetView(); + }); + this.filterContainer.appendChild(this.filterInput); + const resetter = document.createElement('button'); + resetter.classList.add('__profiler-reset'); + resetter.innerText = 'reset view'; + resetter.addEventListener('click', () => { + this.resetView(); + }); + this.filterContainer.appendChild(resetter); + return this.filterContainer; + } + updateFilter() { + while (this.filterInput.firstChild) { + this.filterInput.removeChild(this.filterInput.lastChild); + } + for (const selection of this.selections) { + const entry = document.createElement('option'); + entry.innerText = selection; + entry.setAttribute('value', selection); + this.filterInput.appendChild(entry); + } + } + registerResizeObserver() { + this.resizeObserver = new ResizeObserver((entries) => { + if (!this.isResizing) { + for (const entry of entries) { + if (entry.target === this.container) { + window.requestAnimationFrame(() => { + if (window.devicePixelRatio !== this.scale) { + this.scale = window.devicePixelRatio; + this.getStyles(); + } + this.canvasWidth = Math.round(entry.contentRect.width * this.scale); + this.canvasHeight = Math.round((entry.contentRect.height - 30) * this.scale); + this.canvasWidthCSS = entry.contentRect.width; + this.canvasHeightCSS = entry.contentRect.height; + this.canvas.width = this.canvasWidth; + this.canvas.height = this.canvasHeight; + this.hoverCanvas.width = this.canvasWidth; + this.hoverCanvas.height = this.canvasHeight; + this.redraw(); + this.isResizing = false; + }); + } + } + this.isResizing = true; + } + }); + this.resizeObserver.observe(this.container); + } + scrollHandler(e) { + if (!this.isDocumentScrolling) { + window.requestAnimationFrame(() => { + this.getOffset(); + this.isDocumentScrolling = false; + }); + this.isDocumentScrolling = true; + } + } + getOffset() { + const box = this.canvas.getBoundingClientRect(); + this.offsetX = box.left; + this.offsetY = box.top; + } + // hash of function named, used to seed PRNG + nodeHash(node) { + const hashString = node.file + node.line; + let hash = 0; + for (let i = 0; i < hashString.length; i++) { + const char = hashString.charCodeAt(i); + hash = (hash << 5) - hash + char; + hash = hash & hash; + } + return hash; + } + // Simple PRNG from https://stackoverflow.com/a/47593316/12113178 + mulberry32(a) { + return function () { + let t = (a += 0x6d2b79f5); + t = Math.imul(t ^ (t >>> 15), t | 1); + t ^= t + Math.imul(t ^ (t >>> 7), t | 61); + return ((t ^ (t >>> 14)) >>> 0) / 4294967296; + }; + } + // modifies the normal color by three stable random values drawn from a + // PRNG seeded by the node hash + modifyNodeColorByHash(r, g, b, hash, range = 70) { + const rng = this.mulberry32(hash); + if (r === g && g === b) { + r = g = b = Math.min(255, Math.max(0, r + (rng() - 0.5) * range)); + } + else { + r = Math.min(255, Math.max(0, r + (rng() - 0.5) * range)); + g = Math.min(255, Math.max(0, g + (rng() - 0.5) * range)); + b = Math.min(255, Math.max(0, b + (rng() - 0.5) * range)); + } + return { + r, + g, + b, + }; + } + modifyNodeColorByCount(r, g, b, count, range = 255) { + r = Math.min(255, g * (1-count) ); + g = Math.min(255, g * (1-count) ); + b = Math.min(255, b * (1-count) ); + return { + r, + g, + b, + }; + } + nodeColors(node, hash) { + let r, g, b; + let a = 1; + /** + if (node.flags & 0x01) { + // runtime-dispatch + ; + ({ r, g, b } = this.modifyNodeColorByHash(204, 103, 103, hash, 20)); + } + else if (node.flags & 0x02) { + // gc + ; + ({ r, g, b } = this.modifyNodeColorByHash(204, 153, 68, hash, 20)); + } + else if (node.flags & 0x08) { + // compilation? + ; + ({ r, g, b } = this.modifyNodeColorByHash(100, 100, 100, hash, 60)); + } + else { + // default + ; + ({ r, g, b } = this.modifyNodeColorByHash(64, 99, 221, hash)); + } + if (node.flags & 0x10) { + // C frame + a = 0.5; + } + */ + + // if (node.count > 0) { + // better performance + ; + if (node.count_change > 100) { + ({ r, g, b } = this.modifyNodeColorByCount(0, 0, 0, node.count_change )); + } + else if (node.count_change < 0) { + ({ r, g, b } = this.modifyNodeColorByCount(255, 255, 255, - node.count_change )); + r = 255; + } + else { + ({ r, g, b } = this.modifyNodeColorByCount(255, 255, 255, node.count_change )); + b = 255; + } + + return { + fill: 'rgba(' + r + ',' + g + ',' + b + ',' + a + ')', + stroke: 'rgba(' + 0.8 * r + ',' + 0.8 * g + ',' + 0.8 * b + ',' + a + ')', + text: 'rgba(255, 255, 255, ' + Math.max(0.6, a) + ')', + }; + } + drawGraph(node, width, height, x, y) { + if (!node) { + return; + } + this.canvasCtx.font = this.fontConfig; + this.canvasCtx.textBaseline = 'middle'; + if (y + this.boxHeight >= 0) { + const hash = this.nodeHash(node); + const { fill, stroke, text } = this.nodeColors(node, hash); + this.drawNode(node.func, fill, stroke, text, width, x, y); + } + node.pos = { + x, + y, + width, + height: this.boxHeight, + }; + if (y + this.boxHeight <= this.canvasHeight) { + for (const child of node.children) { + const w = width * (child.fraction || child.count / node.count); + this.drawGraph(child, w, height, x, y + this.boxHeight); + x += w; + } + } + else { + this.canWheelDown = true; + } + } + drawNode(text, color, bColor, textColor, width, x, y) { + if (width < 1) { + width = 1; + } + const drawBorder = false; //width > 20*this.borderWidth; + this.canvasCtx.fillStyle = color; + this.canvasCtx.beginPath(); + this.canvasCtx.rect(x, y + this.borderWidth, width, this.boxHeight - this.borderWidth); + this.canvasCtx.closePath(); + this.canvasCtx.fill(); + if (drawBorder) { + this.canvasCtx.fillStyle = bColor; + this.canvasCtx.beginPath(); + this.canvasCtx.rect(x, y + this.borderWidth, this.borderWidth, this.boxHeight - this.borderWidth); + this.canvasCtx.closePath(); + this.canvasCtx.fill(); + } + const textWidth = width - 2 * this.padding - 2 * this.borderWidth; + if (textWidth > 10) { + this.canvasCtx.save(); + this.canvasCtx.beginPath(); + this.canvasCtx.rect(x + this.borderWidth + this.padding, y + this.borderWidth + this.padding, textWidth, this.boxHeight - this.borderWidth - 2 * this.padding); + this.canvasCtx.closePath(); + this.canvasCtx.clip(); + this.canvasCtx.fillStyle = textColor; + this.canvasCtx.fillText(text, x + this.borderWidth + this.padding, y + this.boxHeight / 2 + this.borderWidth); + this.canvasCtx.restore(); + } + } + updateTooltip(node) { + this.tooltip.function.innerText = node.func; + if (node.file || node.line > 0) { + this.tooltip.file.innerText = node.file + ':' + node.line; + } + else { + this.tooltip.file.innerText = ''; + } + this.tooltip.count.innerText = (node.countLabel || (node.count + ' samples')).toString(); + let percentageText = ((100 * node.count) / + this.data[this.currentSelection].count).toFixed() + '% of root'; + if (this.activeNode.count != this.data[this.currentSelection].count) { + percentageText = percentageText + ', ' + ((100 * node.count) / + this.activeNode.count).toFixed() + '% of selection'; + } + this.tooltip.percentage.innerText = percentageText; + const flags = []; + if (node.flags & 0x01) { + flags.push('runtime-dispatch'); + } + if (node.flags & 0x02) { + flags.push('GC'); + } + if (node.flags & 0x08) { + flags.push('compilation'); + } + if (node.flags & 0x10) { + flags.push('task'); + } + let flagString = ''; + if (flags.length > 0) { + flagString = 'Flags: ' + flags.join(', '); + } + this.tooltip.flags.innerText = flagString; + if (this.ctrlClickHandler) { + this.tooltip['ctrlClickHint'].innerText = + 'Ctrl/Cmd+Click to open this file'; + } + } + drawHoverNode(node) { + this.hoverCanvasCtx.fillStyle = this.borderColor; + this.hoverCanvasCtx.fillRect(node.pos.x, node.pos.y + this.borderWidth, Math.max(1, node.pos.width), node.pos.height - this.borderWidth); + const innerWidth = node.pos.width - this.borderWidth * 2 * this.scale; + if (innerWidth > 1) { + this.hoverCanvasCtx.clearRect(node.pos.x + this.borderWidth * this.scale, node.pos.y + 2 * this.borderWidth * this.scale, innerWidth, node.pos.height - this.borderWidth * 3 * this.scale); + } + this.updateTooltip(node); + } + clearHover() { + this.hoverCanvasCtx.clearRect(0, 0, this.canvasWidth, this.canvasHeight); + this.tooltipElement.style.display = 'none'; + } + drawHover(node, mouseX, mouseY) { + let found = false; + this.runOnNodeAtMousePosition(node, mouseX, mouseY, (node) => { + this.drawHoverNode(node); + found = true; + }); + return found; + } + runOnNodeAtMousePosition(root, x, y, f) { + if (x >= Math.floor(root.pos.x) && + x <= Math.ceil(root.pos.x + root.pos.width) && + y >= root.pos.y) { + if (y <= root.pos.y + root.pos.height) { + f(root); + return true; + } + else { + for (const child of root.children) { + if (this.runOnNodeAtMousePosition(child, x, y, f)) { + return true; + } + } + } + } + return false; + } + zoomInOnNode(node, mouseX, mouseY) { + let found = false; + this.runOnNodeAtMousePosition(node, mouseX, mouseY, (node) => { + this.clearHover(); + this.activeNode = node; + found = true; + }); + return found; + } + // ideally this wouldn't require tree traversal at all + findParentNode(target, current = null) { + if (current === null) { + current = this.data[this.currentSelection]; + } + for (const child of current.children) { + if (child === target) { + return current; + } + else { + const found = this.findParentNode(target, child); + if (found) { + return found; + } + } + } + return null; + } +} +//# sourceMappingURL=profile-viewer.js.map \ No newline at end of file diff --git a/perf/flame.jl b/perf/flame.jl index 4e67e8c8c..14cf45b72 100644 --- a/perf/flame.jl +++ b/perf/flame.jl @@ -1,6 +1,6 @@ -# flame.jl: provides allocation brakdown for individual backtraces for single-process unthredded runs +# flame.jl: provides allocation breakdown for individual backtraces for single-process unthredded runs # and check for overall allocation limits based on previous runs -# copied and modified from ClimaAtmos/perf +# copied and modified from `ClimaAtmos/perf` import Profile import ProfileCanvas diff --git a/perf/flame_diff.jl b/perf/flame_diff.jl new file mode 100644 index 000000000..dcdb806b0 --- /dev/null +++ b/perf/flame_diff.jl @@ -0,0 +1,121 @@ +# flame_diff.jl: provides allocation breakdown for individual backtraces for single-process unthredded runs +# and check for fractional change in allocation compared to the last staged run + +buildkite_branch = ENV["BUILDKITE_BRANCH"] +buildkite_commit = ENV["BUILDKITE_COMMIT"] +buildkite_bnumber = ENV["BUILDKITE_BUILD_NUMBER"] +buildkite_cc_dir = "/groups/esm/slurm-buildkite/climacoupler-ci/" + +build_path = "/central/scratch/esm/slurm-buildkite/climacoupler-ci/$buildkite_bnumber/climacoupler-ci/perf/" +cwd = pwd() +@info "build_path is: $build_path; cwd is $cwd" + +import Profile +using Test +import Base: view +include("ProfileCanvasDiff.jl") +import .ProfileCanvasDiff + +Profile.clear_malloc_data() +Profile.clear() + +cc_dir = joinpath(dirname(@__DIR__)); +include(joinpath(cc_dir, "experiments", "AMIP", "moist_mpi_earth", "cli_options.jl")); + +# assuming a common driver for all tested runs +filename = joinpath(cc_dir, "experiments", "AMIP", "moist_mpi_earth", "coupler_driver_modular.jl") + +# selected runs for performance analysis and their expected allocations (based on previous runs) +run_name_list = ["default_modular", "coarse_single_modular", "target_amip_n32_shortrun"] +run_name = run_name_list[parse(Int, ARGS[2])] + +# number of time steps used for profiling +const n_samples = 2 + +# flag to split coupler init from its solve +ENV["CI_PERF_SKIP_COUPLED_RUN"] = true + +# pass in the correct arguments, overriding defaults with those specific to each run_name (in `pipeline.yaml`) +dict = parsed_args_per_job_id(; trigger = "--run_name $run_name") +parsed_args_prescribed = parsed_args_from_ARGS(ARGS) +parsed_args_target = dict[run_name] +parsed_args = merge(parsed_args_target, parsed_args_prescribed) # global scope needed to recognize this definition in the coupler driver +run_name = "perf_diff_" * run_name +parsed_args["job_id"] = run_name +parsed_args["run_name"] = run_name +parsed_args["enable_threading"] = false + +@info run_name + +function step_coupler!(cs, n_samples) + cs.tspan[1] = cs.model_sims.atmos_sim.integrator.t + cs.tspan[2] = cs.tspan[1] + n_samples * cs.Δt_cpl + solve_coupler!(cs) +end + +try # initialize the coupler + ENV["CI_PERF_SKIP_COUPLED_RUN"] = true + include(filename) +catch err + if err.error !== :exit_profile_init + rethrow(err.error) + end +end + +##### +##### Profiling +##### +""" + iterate_children(flame_tree, ct = 0, dict = Dict{String, Float64}()) + +Iterate over all children of a stack tree and save their names ("\$func.\$file.\$line") and +corresponding count values in a Dict. +""" +function iterate_children(flame_tree, ct = 0, dict = Dict{String, Float64}()) + ct += 1 + line = flame_tree.line + file = flame_tree.file + func = flame_tree.func + push!(dict, "$func.$file.$line" => flame_tree.count) + + if isempty(flame_tree.children) + nothing + else + for sf in flame_tree.children + iterate_children(sf, ct, dict) + end + end + return dict +end + +# obtain the stacktree from the last saved file in `buildkite_cc_dir` +ref_file = joinpath(buildkite_cc_dir, "$run_name.jld2") +tracked_list = isfile(ref_file) ? load("$run_name.jld2") : Dict{String, Float64}() + +# compile coupling loop first +step_coupler!(cs, n_samples) +Profile.clear_malloc_data() +Profile.clear() + +# profile the coupling loop +prof = Profile.@profile begin + step_coupler!(cs, n_samples) +end + +# produce flamegraph with colors highlighting the allocation differences relative to the last saved run +# profile_data +if haskey(ENV, "BUILDKITE_COMMIT") || haskey(ENV, "BUILDKITE_BRANCH") + output_dir = "perf/output/$run_name" + mkpath(output_dir) + ProfileCanvasDiff.html_file(joinpath(output_dir, "flame_diff.html"), build_path = build_path) +end + +# save (and reset) the stack tree if this is running on the `staging` branch +profile_data = ProfileCanvasDiff.view(Profile.fetch(), tracked_list = tracked_list); +flame_tree = profile_data.data["all"] +my_dict = iterate_children(flame_tree) +@info "This branch is: $buildkite_branch, commit $buildkite_commit" +if buildkite_branch == "staging" + isfile(ref_file) ? mv(ref_file, "ref_file.$ref_file.$buildkite_commit") : nothing + save(ref_file, my_dict) # reset ref_file upon staging +end