From 5ffee1291dd14fe856807c4fa802ef338e815dd0 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 1 Nov 2021 23:57:14 -0400 Subject: [PATCH] Profile: metadata handling tweaks (#42868) As was discussed in #42482 after merge, there's some things that could be done better: - Reverts the is_block_end logic, given it should only be run on data that includes metadata, so can afford to be more efficient - Adds has_meta for identifying when profile data has metadata - Adds metadata checks to functions that expect metadata to be present - Changes Profile.fetch to by default include metadata. The reason for this is that it was previously made to strip metadata to avoid breaking existing downstream Profile data consumers, but it turns out that they use Profile internals like tree! that require the metadata to be present, so this ended up causing an unintended breakage anyway - Adds consts for the metadata field offsets, for consumers to use --- stdlib/Profile/src/Profile.jl | 63 ++++++++++++++++++++------------- stdlib/Profile/test/runtests.jl | 6 ++-- 2 files changed, 42 insertions(+), 27 deletions(-) diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 3c608f358b694..3409e79bdb128 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -122,6 +122,12 @@ struct ProfileFormat end end +# offsets of the metadata in the data stream +const META_OFFSET_SLEEPSTATE = 2 +const META_OFFSET_CPUCYCLECLOCK = 3 +const META_OFFSET_TASKID = 4 +const META_OFFSET_THREADID = 5 + """ print([io::IO = stdout,] [data::Vector]; kwargs...) @@ -267,8 +273,8 @@ function get_task_ids(data::Vector{<:Unsigned}, threadid = nothing) taskids = UInt[] for i in length(data):-1:1 if is_block_end(data, i) - if isnothing(threadid) || data[i - 5] == threadid - taskid = data[i - 4] + if isnothing(threadid) || data[i - META_OFFSET_THREADID] == threadid + taskid = data[i - META_OFFSET_TASKID] !in(taskid, taskids) && push!(taskids, taskid) end end @@ -280,8 +286,8 @@ function get_thread_ids(data::Vector{<:Unsigned}, taskid = nothing) threadids = Int[] for i in length(data):-1:1 if is_block_end(data, i) - if isnothing(taskid) || data[i - 4] == taskid - threadid = data[i - 5] + if isnothing(taskid) || data[i - META_OFFSET_TASKID] == taskid + threadid = data[i - META_OFFSET_THREADID] !in(threadid, threadids) && push!(threadids, threadid) end end @@ -296,13 +302,20 @@ function is_block_end(data, i) # and we could have (though very unlikely): # 1::end # and we want to ignore the triple NULL (which is an ip). - data[i] == 0 || return false # first block end null - data[i - 1] == 0 || return false # second block end null - data[i - 2] in 1:2 || return false # sleep state - data[i - 3] != 0 || return false # cpu_cycle_clock - data[i - 4] != 0 || return false # taskid - data[i - 5] != 0 || return false # threadid - return true + return data[i] == 0 && data[i - 1] == 0 && data[i - META_OFFSET_SLEEPSTATE] != 0 +end + +function has_meta(data) + for i in 6:length(data) + data[i] == 0 || continue # first block end null + data[i - 1] == 0 || continue # second block end null + data[i - META_OFFSET_SLEEPSTATE] in 1:2 || continue + data[i - META_OFFSET_CPUCYCLECLOCK] != 0 || continue + data[i - META_OFFSET_TASKID] != 0 || continue + data[i - META_OFFSET_THREADID] != 0 || continue + return true + end + return false end """ @@ -505,15 +518,15 @@ error_codes = Dict( """ - fetch(;include_meta = false) -> data + fetch(;include_meta = true) -> data Returns a copy of the buffer of profile backtraces. Note that the values in `data` have meaning only on this machine in the current session, because it depends on the exact memory addresses used in JIT-compiling. This function is primarily for internal use; [`retrieve`](@ref) may be a better choice for most users. -By default metadata such as threadid and taskid will be stripped. Set `include_meta` to `true` to include metadata. +By default metadata such as threadid and taskid is included. Set `include_meta` to `false` to strip metadata. """ -function fetch(;include_meta = false) +function fetch(;include_meta = true) maxlen = maxlen_data() len = len_data() if is_buffer_full() @@ -542,7 +555,7 @@ function strip_meta(data) i -= 1 j -= 1 end - @assert i == j == 0 "metadata stripping failed i=$i j=$j data[1:i]=$(data[1:i])" + @assert i == j == 0 "metadata stripping failed" return data_stripped end @@ -556,7 +569,7 @@ details of the metadata format. function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) threadid == 0 && error("Fake threadid cannot be 0") taskid == 0 && error("Fake taskid cannot be 0") - any(Base.Fix1(is_block_end, data), eachindex(data)) && error("input already has metadata") + !isempty(data) && has_meta(data) && error("input already has metadata") cpu_clock_cycle = UInt64(99) data_with_meta = similar(data, 0) for i = 1:length(data) @@ -576,6 +589,7 @@ end # Merging multiple equivalent entries and recursive calls function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, C::Bool, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}) where {T} + !isempty(data) && !has_meta(data) && error("Profile data is missing required metadata") lilist = StackFrame[] n = Int[] m = Int[] @@ -591,10 +605,10 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, ip = data[i] if is_block_end(data, i) # read metadata - thread_sleeping = data[i - 2] - 1 # subtract 1 as state is incremented to avoid being equal to 0 - # cpu_cycle_clock = data[i - 3] - taskid = data[i - 4] - threadid = data[i - 5] + thread_sleeping = data[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0 + # cpu_cycle_clock = data[i - META_OFFSET_CPUCYCLECLOCK] + taskid = data[i - META_OFFSET_TASKID] + threadid = data[i - META_OFFSET_THREADID] if !in(threadid, threads) || !in(taskid, tasks) skip = true continue @@ -828,6 +842,7 @@ end # turn a list of backtraces into a tree (implicitly separated by NULL markers) function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, C::Bool, recur::Symbol, threads::Union{Int,AbstractVector{Int},Nothing}=nothing, tasks::Union{UInt,AbstractVector{UInt},Nothing}=nothing) where {T} + !isempty(all) && !has_meta(all) && error("Profile data is missing required metadata") parent = root tops = Vector{StackFrameTree{T}}() build = Vector{StackFrameTree{T}}() @@ -839,10 +854,10 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI ip = all[i] if is_block_end(all, i) # read metadata - thread_sleeping = all[i - 2] - 1 # subtract 1 as state is incremented to avoid being equal to 0 - # cpu_cycle_clock = all[i - 3] - taskid = all[i - 4] - threadid = all[i - 5] + thread_sleeping = all[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0 + # cpu_cycle_clock = all[i - META_OFFSET_CPUCYCLECLOCK] + taskid = all[i - META_OFFSET_TASKID] + threadid = all[i - META_OFFSET_THREADID] if (threads !== nothing && !in(threadid, threads)) || (tasks !== nothing && !in(taskid, tasks)) skip = true diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index 092372358e07f..ac7c8baefe09e 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -9,7 +9,7 @@ Profile.init() let iobuf = IOBuffer() for fmt in (:tree, :flat) Test.@test_logs (:warn, r"^There were no samples collected\.") Profile.print(iobuf, format=fmt, C=true) - Test.@test_logs (:warn, r"^There were no samples collected\.") Profile.print(iobuf, [0x0000000000000001], Dict(0x0000000000000001 => [Base.StackTraces.UNKNOWN]), format=fmt, C=false) + Test.@test_logs (:warn, r"^There were no samples collected\.") Profile.print(iobuf, Profile.add_fake_meta([0x0000000000000001, 0x0000000000000000]), Dict(0x0000000000000001 => [Base.StackTraces.UNKNOWN]), format=fmt, C=false) end end @@ -75,8 +75,8 @@ end end @testset "Profile.fetch() with and without meta" begin - data_without = Profile.fetch() - data_with = Profile.fetch(include_meta = true) + data_without = Profile.fetch(include_meta = false) + data_with = Profile.fetch() @test data_without[1] == data_with[1] @test data_without[end] == data_with[end] nblocks = count(Base.Fix1(Profile.is_block_end, data_with), eachindex(data_with))