diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 2f79d3de97a2d..3c608f358b694 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -144,8 +144,8 @@ The keyword arguments can be any combination of: line, `:count` sorts in order of number of collected samples, and `:overhead` sorts by the number of samples incurred by each function by itself. - - `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:threads`, `:tasks`, - `[:threads, :tasks]`, or `[:tasks, :threads]` where the last two provide nested grouping. + - `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:thread`, `:task`, + `[:thread, :task]`, or `[:task, :thread]` where the last two provide nested grouping. - `noisefloor` -- Limits frames that exceed the heuristic noise floor of the sample (only applies to format `:tree`). A suggested value to try for this is 2.0 (the default is 0). This parameter hides samples for which `n <= noisefloor * √N`, @@ -296,7 +296,13 @@ function is_block_end(data, i) # and we could have (though very unlikely): # 1::end # and we want to ignore the triple NULL (which is an ip). - return data[i] == 0 && data[i - 1] == 0 && data[i - 2] != 0 + data[i] == 0 || return false # first block end null + data[i - 1] == 0 || return false # second block end null + data[i - 2] in 1:2 || return false # sleep state + data[i - 3] != 0 || return false # cpu_cycle_clock + data[i - 4] != 0 || return false # taskid + data[i - 5] != 0 || return false # threadid + return true end """ @@ -519,29 +525,51 @@ function fetch(;include_meta = false) GC.@preserve data unsafe_copyto!(pointer(data), get_data_pointer(), len) if include_meta || isempty(data) return data - else - nblocks = 0 - for i = 2:length(data) - if is_block_end(data, i) # detect block ends and count them - nblocks += 1 - end - end - data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1))) - j = length(data_stripped) - i = length(data) - while i > 0 && j > 0 - data_stripped[j] = data[i] - if is_block_end(data, i) - i -= (nmeta + 1) # metadata fields and the extra NULL IP - end - i -= 1 - j -= 1 + end + return strip_meta(data) +end + +function strip_meta(data) + nblocks = count(Base.Fix1(is_block_end, data), eachindex(data)) + data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1))) + j = length(data_stripped) + i = length(data) + while i > 0 && j > 0 + data_stripped[j] = data[i] + if is_block_end(data, i) + i -= (nmeta + 1) # metadata fields and the extra NULL IP end - @assert i == j == 0 "metadata stripping failed i=$i j=$j data[1:i]=$(data[1:i])" - return data_stripped + i -= 1 + j -= 1 end + @assert i == j == 0 "metadata stripping failed i=$i j=$j data[1:i]=$(data[1:i])" + return data_stripped end +""" + Profile.add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) -> data_with_meta + +The converse of `Profile.fetch(;include_meta = false)`; this will add fake metadata, and can be used +for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not depend on the internal +details of the metadata format. +""" +function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) + threadid == 0 && error("Fake threadid cannot be 0") + taskid == 0 && error("Fake taskid cannot be 0") + any(Base.Fix1(is_block_end, data), eachindex(data)) && error("input already has metadata") + cpu_clock_cycle = UInt64(99) + data_with_meta = similar(data, 0) + for i = 1:length(data) + val = data[i] + if iszero(val) + # (threadid, taskid, cpu_cycle_clock, thread_sleeping) + push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0) + else + push!(data_with_meta, val) + end + end + return data_with_meta +end ## Print as a flat list # Counts the number of times each line appears, at any nesting level and at the topmost level @@ -807,7 +835,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI skip = false nsleeping = 0 for i in startframe:-1:1 - (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP + (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (it's read ahead below) and extra block end NULL IP ip = all[i] if is_block_end(all, i) # read metadata diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index 940f1c4478ae3..092372358e07f 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -1,6 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license using Test, Profile, Serialization, Logging +using Base.StackTraces: StackFrame Profile.clear() Profile.init() @@ -78,7 +79,17 @@ end data_with = Profile.fetch(include_meta = true) @test data_without[1] == data_with[1] @test data_without[end] == data_with[end] - @test length(data_without) < length(data_with) + nblocks = count(Base.Fix1(Profile.is_block_end, data_with), eachindex(data_with)) + @test length(data_without) == length(data_with) - nblocks * (Profile.nmeta + 1) + + data_with_fake = Profile.add_fake_meta(data_without) + @test_throws "input already has metadata" Profile.add_fake_meta(data_with) + data_stripped = Profile.strip_meta(data_with_fake) + @test data_stripped == data_without + # ideally the test below would be a test for equality, but real sample ips can be nulls, and thus + # adding metadata back in can convert those ips to new block ends, and the length is then longer + @test length(data_with_fake) >= length(data_with) + end Profile.clear() @@ -175,3 +186,36 @@ let cmd = Base.julia_cmd() @test success(p) @test parse(Int, s) > 100 end + +@testset "FlameGraphs" begin + # FlameGraphs makes use of some Profile's internals. Detect possible breakage by mimicking some of its tests. + # Breakage is acceptable since these internals are not part of the stable API, but it's better to know, and ideally + # should be paired with an issue or PR in FlameGraphs. + # + # This also improves the thoroughness of our overall Profile tests. + stackframe(func, file, line; C=false) = StackFrame(Symbol(func), Symbol(file), line, nothing, C, false, 0) + + backtraces = UInt64[ 4, 3, 2, 1, # order: callees then caller + 0, 6, 5, 1, + 0, 8, 7, + 0, 4, 3, 2, 1, + 0] + backtraces = Profile.add_fake_meta(backtraces) + lidict = Dict{UInt64,StackFrame}(1=>stackframe(:f1, :file1, 1), + 2=>stackframe(:f2, :file1, 5), + 3=>stackframe(:f3, :file2, 1), + 4=>stackframe(:f2, :file1, 15), + 5=>stackframe(:f4, :file1, 20), + 6=>stackframe(:f5, :file3, 1), + 7=>stackframe(:f1, :file1, 2), + 8=>stackframe(:f6, :file3, 10)) + root = Profile.StackFrameTree{StackFrame}() + Profile.tree!(root, backtraces, lidict, #= C =# true, :off) + @test length(root.down) == 2 + for k in keys(root.down) + @test k.file == :file1 + @test k.line ∈ (1, 2) + end + node = root.down[stackframe(:f1, :file1, 2)] + @test only(node.down).first == lidict[8] +end