From 157fef71f314d30ad3e53871f0fde9eb307e42f7 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sun, 3 Oct 2021 03:43:41 -0500 Subject: [PATCH 1/7] Add some Profile compatibility routines This makes it easier for external packages like FlameGraphs to continue to support Profile by hiding some of the details of the internal metadata format. --- stdlib/Profile/src/Profile.jl | 67 ++++++++++++++++++++++----------- stdlib/Profile/test/runtests.jl | 43 ++++++++++++++++++++- 2 files changed, 88 insertions(+), 22 deletions(-) diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 2f79d3de97a2d..0300a23b3dd9a 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -144,8 +144,8 @@ The keyword arguments can be any combination of: line, `:count` sorts in order of number of collected samples, and `:overhead` sorts by the number of samples incurred by each function by itself. - - `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:threads`, `:tasks`, - `[:threads, :tasks]`, or `[:tasks, :threads]` where the last two provide nested grouping. + - `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:thread`, `:task`, + `[:thread, :task]`, or `[:task, :thread]` where the last two provide nested grouping. - `noisefloor` -- Limits frames that exceed the heuristic noise floor of the sample (only applies to format `:tree`). A suggested value to try for this is 2.0 (the default is 0). This parameter hides samples for which `n <= noisefloor * √N`, @@ -519,29 +519,54 @@ function fetch(;include_meta = false) GC.@preserve data unsafe_copyto!(pointer(data), get_data_pointer(), len) if include_meta || isempty(data) return data - else - nblocks = 0 - for i = 2:length(data) - if is_block_end(data, i) # detect block ends and count them - nblocks += 1 - end + end + return strip_meta(data) +end + +function strip_meta(data) + nblocks = 0 + for i = 2:length(data) + if is_block_end(data, i) # detect block ends and count them + nblocks += 1 end - data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1))) - j = length(data_stripped) - i = length(data) - while i > 0 && j > 0 - data_stripped[j] = data[i] - if is_block_end(data, i) - i -= (nmeta + 1) # metadata fields and the extra NULL IP - end - i -= 1 - j -= 1 + end + data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1))) + j = length(data_stripped) + i = length(data) + while i > 0 && j > 0 + data_stripped[j] = data[i] + if is_block_end(data, i) + i -= (nmeta + 1) # metadata fields and the extra NULL IP end - @assert i == j == 0 "metadata stripping failed i=$i j=$j data[1:i]=$(data[1:i])" - return data_stripped + i -= 1 + j -= 1 end + @assert i == j == 0 "metadata stripping failed i=$i j=$j data[1:i]=$(data[1:i])" + return data_stripped end +""" + Profile.add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0f0f0f0f0) -> data_with_meta + +The converse of `Profile.fetch(;include_meta = false)`; this will add fake metadata, and can be used +for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not depend on the internal +details of the metadata format. +""" +function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0f0f0f0f0) + any(Base.Fix1(is_block_end, data), eachindex(data)) && error("input already has metadata") + cpu_clock_cycle = UInt64(99) + data_with_meta = similar(data, 0) + for i = 1:length(data) + val = data[i] + if iszero(val) + # (threadid, taskid, cpu_cycle_clock, thread_sleeping) + push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0) + else + push!(data_with_meta, val) + end + end + return data_with_meta +end ## Print as a flat list # Counts the number of times each line appears, at any nesting level and at the topmost level @@ -807,7 +832,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI skip = false nsleeping = 0 for i in startframe:-1:1 - (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP + (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (it's read ahead below) and extra block end NULL IP ip = all[i] if is_block_end(all, i) # read metadata diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index 940f1c4478ae3..23c731667b484 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -1,6 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license using Test, Profile, Serialization, Logging +using Base.StackTraces: StackFrame Profile.clear() Profile.init() @@ -78,7 +79,14 @@ end data_with = Profile.fetch(include_meta = true) @test data_without[1] == data_with[1] @test data_without[end] == data_with[end] - @test length(data_without) < length(data_with) + nblocks = count(Base.Fix1(Profile.is_block_end, data_with), eachindex(data_with)) + @test length(data_without) == length(data_with) - nblocks * (Profile.nmeta + 1) + + data_with_fake = Profile.add_fake_meta(data_without) + @test_throws "input already has metadata" Profile.add_fake_meta(data_with) + data_stripped = Profile.strip_meta(data_with_fake) + @test data_stripped == data_without + @test length(data_with_fake) == length(data_with) end Profile.clear() @@ -175,3 +183,36 @@ let cmd = Base.julia_cmd() @test success(p) @test parse(Int, s) > 100 end + +@testset "FlameGraphs" begin + # FlameGraphs makes use of some Profile's internals. Detect possible breakage by mimicking some of its tests. + # Breakage is acceptable since these internals are not part of the stable API, but it's better to know, and ideally + # should be paired with an issue or PR in FlameGraphs. + # + # This also improves the thoroughness of our overall Profile tests. + stackframe(func, file, line; C=false) = StackFrame(Symbol(func), Symbol(file), line, nothing, C, false, 0) + + backtraces = UInt64[ 4, 3, 2, 1, # order: callees then caller + 0, 6, 5, 1, + 0, 8, 7, + 0, 4, 3, 2, 1, + 0] + backtraces = Profile.add_fake_meta(backtraces) + lidict = Dict{UInt64,StackFrame}(1=>stackframe(:f1, :file1, 1), + 2=>stackframe(:f2, :file1, 5), + 3=>stackframe(:f3, :file2, 1), + 4=>stackframe(:f2, :file1, 15), + 5=>stackframe(:f4, :file1, 20), + 6=>stackframe(:f5, :file3, 1), + 7=>stackframe(:f1, :file1, 2), + 8=>stackframe(:f6, :file3, 10)) + root = Profile.StackFrameTree{StackFrame}() + Profile.tree!(root, backtraces, lidict, #= C =# true, :off) + @test length(root.down) == 2 + for k in keys(root.down) + @test k.file == :file1 + @test k.line ∈ (1, 2) + end + node = root.down[stackframe(:f1, :file1, 2)] + @test only(node.down).first == lidict[8] +end From 230038293898c7bdd766c9b86c801dd6d5dfbe32 Mon Sep 17 00:00:00 2001 From: Ian Date: Fri, 8 Oct 2021 21:37:39 -0400 Subject: [PATCH 2/7] use 32-bit friendly default task id --- stdlib/Profile/src/Profile.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 0300a23b3dd9a..1b8feeca4fd4d 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -546,13 +546,13 @@ function strip_meta(data) end """ - Profile.add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0f0f0f0f0) -> data_with_meta + Profile.add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) -> data_with_meta The converse of `Profile.fetch(;include_meta = false)`; this will add fake metadata, and can be used for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not depend on the internal details of the metadata format. """ -function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0f0f0f0f0) +function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) any(Base.Fix1(is_block_end, data), eachindex(data)) && error("input already has metadata") cpu_clock_cycle = UInt64(99) data_with_meta = similar(data, 0) From be83afb629f45d66b08cee8d1581890d8bbccfbd Mon Sep 17 00:00:00 2001 From: Ian Date: Sat, 9 Oct 2021 20:16:20 -0400 Subject: [PATCH 3/7] debugging --- stdlib/Profile/src/Profile.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 1b8feeca4fd4d..c04601658322f 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -553,7 +553,13 @@ for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not d details of the metadata format. """ function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) - any(Base.Fix1(is_block_end, data), eachindex(data)) && error("input already has metadata") + if any(Base.Fix1(is_block_end, data), eachindex(data)) + n_meta_blockends = count(Base.Fix1(is_block_end, data), eachindex(data)) + n_zeros = count(iszero, data) + @show n_meta_blockends n_zeros + println.(data) + error("input already has metadata") + end cpu_clock_cycle = UInt64(99) data_with_meta = similar(data, 0) for i = 1:length(data) From 26b5a236f6bca60fd6999aeaa110aabc68e61b0d Mon Sep 17 00:00:00 2001 From: Ian Date: Sat, 9 Oct 2021 21:25:00 -0400 Subject: [PATCH 4/7] Revert "debugging" This reverts commit cc2ebd47e782d8dcf4f18639e2dfa36cc5e9c84e. --- stdlib/Profile/src/Profile.jl | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index c04601658322f..1b8feeca4fd4d 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -553,13 +553,7 @@ for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not d details of the metadata format. """ function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) - if any(Base.Fix1(is_block_end, data), eachindex(data)) - n_meta_blockends = count(Base.Fix1(is_block_end, data), eachindex(data)) - n_zeros = count(iszero, data) - @show n_meta_blockends n_zeros - println.(data) - error("input already has metadata") - end + any(Base.Fix1(is_block_end, data), eachindex(data)) && error("input already has metadata") cpu_clock_cycle = UInt64(99) data_with_meta = similar(data, 0) for i = 1:length(data) From 5229691a30394335d18e83fbe0e806fbdc676c2d Mon Sep 17 00:00:00 2001 From: Ian Date: Sat, 9 Oct 2021 21:24:55 -0400 Subject: [PATCH 5/7] narrow the is_block_end heuristic --- stdlib/Profile/src/Profile.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 1b8feeca4fd4d..cf11dd38c44a0 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -296,7 +296,13 @@ function is_block_end(data, i) # and we could have (though very unlikely): # 1::end # and we want to ignore the triple NULL (which is an ip). - return data[i] == 0 && data[i - 1] == 0 && data[i - 2] != 0 + data[i] == 0 || return false # first block end null + data[i - 1] == 0 || return false # second block end null + data[i - 2] in 1:2 || return false # sleep state + data[i - 3] != 0 || return false # cpu_cycle_clock + data[i - 4] != 0 || return false # taskid + data[i - 5] != 0 || return false # threadid + return true end """ From 0f4f73b07cfb7261fb9e5934b9d33d1c8cf5a580 Mon Sep 17 00:00:00 2001 From: Ian Date: Sat, 9 Oct 2021 21:27:28 -0400 Subject: [PATCH 6/7] tidier count --- stdlib/Profile/src/Profile.jl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index cf11dd38c44a0..f201158e40a37 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -530,12 +530,7 @@ function fetch(;include_meta = false) end function strip_meta(data) - nblocks = 0 - for i = 2:length(data) - if is_block_end(data, i) # detect block ends and count them - nblocks += 1 - end - end + nblocks = count(Base.Fix1(is_block_end, data), eachindex(data)) data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1))) j = length(data_stripped) i = length(data) From 0e4e1a173b5bc10e54a634243e86e9730c5fa546 Mon Sep 17 00:00:00 2001 From: Ian Date: Sat, 9 Oct 2021 23:08:09 -0400 Subject: [PATCH 7/7] fix test by relaxing it, with explanation --- stdlib/Profile/src/Profile.jl | 2 ++ stdlib/Profile/test/runtests.jl | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index f201158e40a37..3c608f358b694 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -554,6 +554,8 @@ for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not d details of the metadata format. """ function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) + threadid == 0 && error("Fake threadid cannot be 0") + taskid == 0 && error("Fake taskid cannot be 0") any(Base.Fix1(is_block_end, data), eachindex(data)) && error("input already has metadata") cpu_clock_cycle = UInt64(99) data_with_meta = similar(data, 0) diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index 23c731667b484..092372358e07f 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -86,7 +86,10 @@ end @test_throws "input already has metadata" Profile.add_fake_meta(data_with) data_stripped = Profile.strip_meta(data_with_fake) @test data_stripped == data_without - @test length(data_with_fake) == length(data_with) + # ideally the test below would be a test for equality, but real sample ips can be nulls, and thus + # adding metadata back in can convert those ips to new block ends, and the length is then longer + @test length(data_with_fake) >= length(data_with) + end Profile.clear()