diff --git a/stdlib/Profile/docs/src/index.md b/stdlib/Profile/docs/src/index.md index 1fbab0ea534d8..ab670866e086c 100644 --- a/stdlib/Profile/docs/src/index.md +++ b/stdlib/Profile/docs/src/index.md @@ -106,6 +106,7 @@ The methods in `Profile.Allocs` are not exported and need to be called e.g. as ` ```@docs Profile.Allocs.clear +Profile.Allocs.print Profile.Allocs.fetch Profile.Allocs.start Profile.Allocs.stop diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl index e45f4dca9607f..59399b1f21bb3 100644 --- a/stdlib/Profile/src/Allocs.jl +++ b/stdlib/Profile/src/Allocs.jl @@ -1,5 +1,12 @@ module Allocs +global print # Allocs.print is separate from both Base.print and Profile.print +public @profile, + clear, + print, + fetch + +using ..Profile: Profile, ProfileFormat, StackFrameTree, print_flat, print_tree using Base.StackTraces: StackTrace, StackFrame, lookup using Base: InterpreterIP @@ -138,7 +145,7 @@ end # Without this, the Alloc's stacktrace prints for lines and lines and lines... function Base.show(io::IO, a::Alloc) stacktrace_sample = length(a.stacktrace) >= 1 ? "$(a.stacktrace[1]), ..." : "" - print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))") + Base.print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))") end const BacktraceCache = Dict{BTElement,Vector{StackFrame}} @@ -216,4 +223,201 @@ function stacktrace_memoized( return stack end +function warning_empty() + @warn """ + There were no samples collected. + Run your program longer (perhaps by running it multiple times), + or adjust the frequency of samples to record every event with + the `sample_rate=1.0` kwarg.""" +end + + +""" + Profile.Allocs.print([io::IO = stdout,] [data::AllocResults = fetch()]; kwargs...) + +Prints profiling results to `io` (by default, `stdout`). If you do not +supply a `data` vector, the internal buffer of accumulated backtraces +will be used. + +See `Profile.print` for an explanation of the valid keyword arguments. +""" +print(; kwargs...) = + Profile.print(stdout, fetch(); kwargs...) +print(io::IO; kwargs...) = + Profile.print(io, fetch(); kwargs...) +print(io::IO, data::AllocResults; kwargs...) = + Profile.print(io, data; kwargs...) +Profile.print(data::AllocResults; kwargs...) = + Profile.print(stdout, data; kwargs...) + +function Profile.print(io::IO, + data::AllocResults, + ; + format = :tree, + C = false, + #combine = true, + maxdepth::Int = typemax(Int), + mincount::Int = 0, + noisefloor = 0, + sortedby::Symbol = :filefuncline, + groupby::Union{Symbol,AbstractVector{Symbol}} = :none, + recur::Symbol = :off, + ) + pf = ProfileFormat(;C, maxdepth, mincount, noisefloor, sortedby, recur) + Profile.print(io, data, pf, format) + return +end + +function Profile.print(io::IO, data::AllocResults, fmt::ProfileFormat, format::Symbol) + cols::Int = Base.displaysize(io)[2] + fmt.recur ∈ (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized")) + data = data.allocs + if format === :tree + tree(io, data, cols, fmt) + elseif format === :flat + fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off")) + flat(io, data, cols, fmt) + else + throw(ArgumentError("output format $(repr(format)) not recognized")) + end + nothing +end + + +function parse_flat(::Type{T}, data::Vector{Alloc}, C::Bool) where T + lilist = StackFrame[] + n = Int[] + m = Int[] + lilist_idx = Dict{T, Int}() + recursive = Set{T}() + totalbytes = 0 + for r in data + first = true + empty!(recursive) + nb = r.size # or 1 for counting + totalbytes += nb + for frame in r.stacktrace + !C && frame.from_c && continue + key = (T === UInt64 ? ip : frame) + idx = get!(lilist_idx, key, length(lilist) + 1) + if idx > length(lilist) + push!(recursive, key) + push!(lilist, frame) + push!(n, nb) + push!(m, 0) + elseif !(key in recursive) + push!(recursive, key) + n[idx] += nb + end + if first + m[idx] += nb + first = false + end + end + end + @assert length(lilist) == length(n) == length(m) == length(lilist_idx) + return (lilist, n, m, totalbytes) +end + +function flat(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat) + fmt.combine || error(ArgumentError("combine=false")) + lilist, n, m, totalbytes = parse_flat(fmt.combine ? StackFrame : UInt64, data, fmt.C) + filenamemap = Dict{Symbol,String}() + if isempty(lilist) + warning_empty() + return true + end + print_flat(io, lilist, n, m, cols, filenamemap, fmt) + Base.println(io, "Total snapshots: ", length(data)) + Base.println(io, "Total bytes: ", totalbytes) + return false +end + +function tree!(root::StackFrameTree{T}, all::Vector{Alloc}, C::Bool, recur::Symbol) where {T} + tops = Vector{StackFrameTree{T}}() + build = Dict{T, StackFrameTree{T}}() + for r in all + first = true + nb = r.size # or 1 for counting + root.recur = 0 + root.count += nb + parent = root + for i in reverse(eachindex(r.stacktrace)) + frame = r.stacktrace[i] + key = (T === UInt64 ? ip : frame) + if (recur === :flat && !frame.from_c) || recur === :flatc + # see if this frame already has a parent + this = get!(build, frame, parent) + if this !== parent + # Rewind the `parent` tree back, if this exact ip (FIXME) was already present *higher* in the current tree + push!(tops, parent) + parent = this + end + end + !C && frame.from_c && continue + this = get!(StackFrameTree{T}, parent.down, key) + if recur === :off || this.recur == 0 + this.frame = frame + this.up = parent + this.count += nb + this.recur = 1 + else + this.count_recur += 1 + end + parent = this + end + parent.overhead += nb + if recur !== :off + # We mark all visited nodes to so we'll only count those branches + # once for each backtrace. Reset that now for the next backtrace. + empty!(build) + push!(tops, parent) + for top in tops + while top.recur != 0 + top.max_recur < top.recur && (top.max_recur = top.recur) + top.recur = 0 + top = top.up + end + end + empty!(tops) + end + let this = parent + while this !== root + this.flat_count += nb + this = this.up + end + end + end + function cleanup!(node::StackFrameTree) + stack = [node] + while !isempty(stack) + node = pop!(stack) + node.recur = 0 + empty!(node.builder_key) + empty!(node.builder_value) + append!(stack, values(node.down)) + end + nothing + end + cleanup!(root) + return root +end + +function tree(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat) + fmt.combine || error(ArgumentError("combine=false")) + if fmt.combine + root = tree!(StackFrameTree{StackFrame}(), data, fmt.C, fmt.recur) + else + root = tree!(StackFrameTree{UInt64}(), data, fmt.C, fmt.recur) + end + print_tree(io, root, cols, fmt, false) + if isempty(root.down) + warning_empty() + return true + end + Base.println(io, "Total snapshots: ", length(data)) + Base.println(io, "Total bytes: ", root.count) + return false +end + end diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index c37cdd0af0368..206d2957a91e5 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -5,6 +5,20 @@ Profiling support, main entry point is the [`@profile`](@ref) macro. """ module Profile +global print +public @profile, + clear, + print, + fetch, + retrieve, + add_fake_meta, + flatten, + callers, + init, + take_heap_snapshot, + clear_malloc_data, + Allocs + import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping) @@ -220,7 +234,7 @@ function print(io::IO, pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur) if groupby === :none - print(io, data, lidict, pf, format, threads, tasks, false) + print_group(io, data, lidict, pf, format, threads, tasks, false) else if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]]) error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]")) @@ -244,7 +258,7 @@ function print(io::IO, printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color()) for threadid in threadids printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color()) - nosamples = print(io, data, lidict, pf, format, threadid, taskid, true) + nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true) nosamples && (any_nosamples = true) println(io) end @@ -262,7 +276,7 @@ function print(io::IO, printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color()) for taskid in taskids printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color()) - nosamples = print(io, data, lidict, pf, format, threadid, taskid, true) + nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true) nosamples && (any_nosamples = true) println(io) end @@ -274,7 +288,7 @@ function print(io::IO, isempty(taskids) && (any_nosamples = true) for taskid in taskids printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color()) - nosamples = print(io, data, lidict, pf, format, threads, taskid, true) + nosamples = print_group(io, data, lidict, pf, format, threads, taskid, true) nosamples && (any_nosamples = true) println(io) end @@ -284,7 +298,7 @@ function print(io::IO, isempty(threadids) && (any_nosamples = true) for threadid in threadids printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color()) - nosamples = print(io, data, lidict, pf, format, threadid, tasks, true) + nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true) nosamples && (any_nosamples = true) println(io) end @@ -306,7 +320,7 @@ See `Profile.print([io], data)` for an explanation of the valid keyword argument print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) = print(stdout, data, lidict; kwargs...) -function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat, +function print_group(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat, format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool = false) cols::Int = Base.displaysize(io)[2] diff --git a/stdlib/Profile/test/allocs.jl b/stdlib/Profile/test/allocs.jl index e77e5bc826612..d4930a2b7f5ed 100644 --- a/stdlib/Profile/test/allocs.jl +++ b/stdlib/Profile/test/allocs.jl @@ -1,6 +1,13 @@ using Test using Profile: Allocs +Allocs.clear() +let iobuf = IOBuffer() + for format in (:tree, :flat) + Test.@test_logs (:warn, r"^There were no samples collected\.") Allocs.print(iobuf; format, C=true) + end +end + @testset "alloc profiler doesn't segfault" begin res = Allocs.@profile sample_rate=1.0 begin # test the allocations during compilation @@ -13,6 +20,20 @@ using Profile: Allocs @test first_alloc.size > 0 @test length(first_alloc.stacktrace) > 0 @test length(string(first_alloc.type)) > 0 + + # test printing options + for options in ((format=:tree, C=true), + (format=:tree, maxdepth=2), + (format=:flat, C=true), + (), + (format=:flat, sortedby=:count), + (format=:tree, recur=:flat), + ) + iobuf = IOBuffer() + Allocs.print(iobuf; options...) + str = String(take!(iobuf)) + @test !isempty(str) + end end @testset "alloc profiler works when there are multiple tasks on multiple threads" begin diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index 95ec7f857dad7..eccfeea846a23 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -38,28 +38,18 @@ let r = Profile.retrieve() end end -let iobuf = IOBuffer() - Profile.print(iobuf, format=:tree, C=true) - str = String(take!(iobuf)) - @test !isempty(str) - truncate(iobuf, 0) - Profile.print(iobuf, format=:tree, maxdepth=2) - str = String(take!(iobuf)) - @test !isempty(str) - truncate(iobuf, 0) - Profile.print(iobuf, format=:flat, C=true) - str = String(take!(iobuf)) - @test !isempty(str) - truncate(iobuf, 0) - Profile.print(iobuf) - @test !isempty(String(take!(iobuf))) - truncate(iobuf, 0) - Profile.print(iobuf, format=:flat, sortedby=:count) - @test !isempty(String(take!(iobuf))) - Profile.print(iobuf, format=:tree, recur=:flat) +# test printing options +for options in ((format=:tree, C=true), + (format=:tree, maxdepth=2), + (format=:flat, C=true), + (), + (format=:flat, sortedby=:count), + (format=:tree, recur=:flat), + ) + iobuf = IOBuffer() + Profile.print(iobuf; options...) str = String(take!(iobuf)) @test !isempty(str) - truncate(iobuf, 0) end @testset "Profile.print() groupby options" begin