Skip to content

Commit

Permalink
Add some Profile compatibility routines (JuliaLang#42482)
Browse files Browse the repository at this point in the history
  • Loading branch information
timholy authored and LilithHafner committed Feb 22, 2022
1 parent 81cb0ef commit 0baf153
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 24 deletions.
74 changes: 51 additions & 23 deletions stdlib/Profile/src/Profile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ The keyword arguments can be any combination of:
line, `:count` sorts in order of number of collected samples, and `:overhead` sorts by the number of samples
incurred by each function by itself.
- `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:threads`, `:tasks`,
`[:threads, :tasks]`, or `[:tasks, :threads]` where the last two provide nested grouping.
- `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:thread`, `:task`,
`[:thread, :task]`, or `[:task, :thread]` where the last two provide nested grouping.
- `noisefloor` -- Limits frames that exceed the heuristic noise floor of the sample (only applies to format `:tree`).
A suggested value to try for this is 2.0 (the default is 0). This parameter hides samples for which `n <= noisefloor * √N`,
Expand Down Expand Up @@ -296,7 +296,13 @@ function is_block_end(data, i)
# and we could have (though very unlikely):
# 1:<stack><metadata><null><null><NULL><metadata><null><null>:end
# and we want to ignore the triple NULL (which is an ip).
return data[i] == 0 && data[i - 1] == 0 && data[i - 2] != 0
data[i] == 0 || return false # first block end null
data[i - 1] == 0 || return false # second block end null
data[i - 2] in 1:2 || return false # sleep state
data[i - 3] != 0 || return false # cpu_cycle_clock
data[i - 4] != 0 || return false # taskid
data[i - 5] != 0 || return false # threadid
return true
end

"""
Expand Down Expand Up @@ -519,29 +525,51 @@ function fetch(;include_meta = false)
GC.@preserve data unsafe_copyto!(pointer(data), get_data_pointer(), len)
if include_meta || isempty(data)
return data
else
nblocks = 0
for i = 2:length(data)
if is_block_end(data, i) # detect block ends and count them
nblocks += 1
end
end
data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1)))
j = length(data_stripped)
i = length(data)
while i > 0 && j > 0
data_stripped[j] = data[i]
if is_block_end(data, i)
i -= (nmeta + 1) # metadata fields and the extra NULL IP
end
i -= 1
j -= 1
end
return strip_meta(data)
end

function strip_meta(data)
nblocks = count(Base.Fix1(is_block_end, data), eachindex(data))
data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1)))
j = length(data_stripped)
i = length(data)
while i > 0 && j > 0
data_stripped[j] = data[i]
if is_block_end(data, i)
i -= (nmeta + 1) # metadata fields and the extra NULL IP
end
@assert i == j == 0 "metadata stripping failed i=$i j=$j data[1:i]=$(data[1:i])"
return data_stripped
i -= 1
j -= 1
end
@assert i == j == 0 "metadata stripping failed i=$i j=$j data[1:i]=$(data[1:i])"
return data_stripped
end

"""
Profile.add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) -> data_with_meta
The converse of `Profile.fetch(;include_meta = false)`; this will add fake metadata, and can be used
for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not depend on the internal
details of the metadata format.
"""
function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0)
threadid == 0 && error("Fake threadid cannot be 0")
taskid == 0 && error("Fake taskid cannot be 0")
any(Base.Fix1(is_block_end, data), eachindex(data)) && error("input already has metadata")
cpu_clock_cycle = UInt64(99)
data_with_meta = similar(data, 0)
for i = 1:length(data)
val = data[i]
if iszero(val)
# (threadid, taskid, cpu_cycle_clock, thread_sleeping)
push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0)
else
push!(data_with_meta, val)
end
end
return data_with_meta
end

## Print as a flat list
# Counts the number of times each line appears, at any nesting level and at the topmost level
Expand Down Expand Up @@ -807,7 +835,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
skip = false
nsleeping = 0
for i in startframe:-1:1
(startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP
(startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (it's read ahead below) and extra block end NULL IP
ip = all[i]
if is_block_end(all, i)
# read metadata
Expand Down
46 changes: 45 additions & 1 deletion stdlib/Profile/test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

using Test, Profile, Serialization, Logging
using Base.StackTraces: StackFrame

Profile.clear()
Profile.init()
Expand Down Expand Up @@ -78,7 +79,17 @@ end
data_with = Profile.fetch(include_meta = true)
@test data_without[1] == data_with[1]
@test data_without[end] == data_with[end]
@test length(data_without) < length(data_with)
nblocks = count(Base.Fix1(Profile.is_block_end, data_with), eachindex(data_with))
@test length(data_without) == length(data_with) - nblocks * (Profile.nmeta + 1)

data_with_fake = Profile.add_fake_meta(data_without)
@test_throws "input already has metadata" Profile.add_fake_meta(data_with)
data_stripped = Profile.strip_meta(data_with_fake)
@test data_stripped == data_without
# ideally the test below would be a test for equality, but real sample ips can be nulls, and thus
# adding metadata back in can convert those ips to new block ends, and the length is then longer
@test length(data_with_fake) >= length(data_with)

end

Profile.clear()
Expand Down Expand Up @@ -175,3 +186,36 @@ let cmd = Base.julia_cmd()
@test success(p)
@test parse(Int, s) > 100
end

@testset "FlameGraphs" begin
# FlameGraphs makes use of some Profile's internals. Detect possible breakage by mimicking some of its tests.
# Breakage is acceptable since these internals are not part of the stable API, but it's better to know, and ideally
# should be paired with an issue or PR in FlameGraphs.
#
# This also improves the thoroughness of our overall Profile tests.
stackframe(func, file, line; C=false) = StackFrame(Symbol(func), Symbol(file), line, nothing, C, false, 0)

backtraces = UInt64[ 4, 3, 2, 1, # order: callees then caller
0, 6, 5, 1,
0, 8, 7,
0, 4, 3, 2, 1,
0]
backtraces = Profile.add_fake_meta(backtraces)
lidict = Dict{UInt64,StackFrame}(1=>stackframe(:f1, :file1, 1),
2=>stackframe(:f2, :file1, 5),
3=>stackframe(:f3, :file2, 1),
4=>stackframe(:f2, :file1, 15),
5=>stackframe(:f4, :file1, 20),
6=>stackframe(:f5, :file3, 1),
7=>stackframe(:f1, :file1, 2),
8=>stackframe(:f6, :file3, 10))
root = Profile.StackFrameTree{StackFrame}()
Profile.tree!(root, backtraces, lidict, #= C =# true, :off)
@test length(root.down) == 2
for k in keys(root.down)
@test k.file == :file1
@test k.line (1, 2)
end
node = root.down[stackframe(:f1, :file1, 2)]
@test only(node.down).first == lidict[8]
end

0 comments on commit 0baf153

Please sign in to comment.