Skip to content

Commit

Permalink
Make InferenceTimingNode bidirectional (#257)
Browse files Browse the repository at this point in the history
It's often convenient to be able to easily find the parent.
  • Loading branch information
timholy authored Aug 17, 2021
1 parent a75d418 commit eeb693d
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 3 deletions.
43 changes: 40 additions & 3 deletions SnoopCompileCore/src/snoopi_deep.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,53 @@ struct InferenceTimingNode
start_time::Float64
children::Vector{InferenceTimingNode}
bt
parent::InferenceTimingNode

# Root constructor
InferenceTimingNode(mi_timing::InferenceTiming, start_time, @nospecialize(bt)) =
new(mi_timing, start_time, InferenceTimingNode[], bt)
# Child constructor
function InferenceTimingNode(mi_timing::InferenceTiming, start_time, @nospecialize(bt), parent::InferenceTimingNode)
child = new(mi_timing, start_time, InferenceTimingNode[], bt, parent)
push!(parent.children, child)
return child
end
end
inclusive(node::InferenceTimingNode) = inclusive(node.mi_timing)
exclusive(node::InferenceTimingNode) = exclusive(node.mi_timing)
InferenceTiming(node::InferenceTimingNode) = node.mi_timing

function InferenceTimingNode(t::Core.Compiler.Timings.Timing)
children = [InferenceTimingNode(child) for child in t.children]
ttree = timingtree(t)
it, start_time, ttree_children = ttree::Tuple{InferenceTiming, Float64, Vector{Any}}
root = InferenceTimingNode(it, start_time, t.bt)
addchildren!(root, t, ttree_children)
return root
end

# Compute inclusive times and store as a temporary tree.
# To allow InferenceTimingNode to be both bidirectional and immutable, we need to create parent node before the child nodes.
# However, each node stores its inclusive time, which can only be computed efficiently from the leaves up (children before parents).
# This performs the inclusive-time computation, storing the result as a "temporary tree" that can be used during
# InferenceTimingNode creation (see `addchildren!`).
function timingtree(t::Core.Compiler.Timings.Timing)
time, start_time = t.time/10^9, t.start_time/10^9
incl_time = time + sum(inclusive, children; init=0.0)
return InferenceTimingNode(InferenceTiming(t.mi_info, incl_time, time), start_time, children, t.bt)
incl_time = time
tchildren = []
for child in t.children
tchild = timingtree(child)
push!(tchildren, tchild)
incl_time += inclusive(tchild[1])
end
return (InferenceTiming(t.mi_info, incl_time, time), start_time, tchildren)
end

function addchildren!(parent::InferenceTimingNode, t::Core.Compiler.Timings.Timing, ttrees)
for (child, ttree) in zip(t.children, ttrees)
it, start_time, ttree_children = ttree::Tuple{InferenceTiming, Float64, Vector{Any}}
node = InferenceTimingNode(it, start_time, child.bt, parent)
addchildren!(node, child, ttree_children)
end
end

function start_deep_timing()
Expand Down
2 changes: 2 additions & 0 deletions src/parcel_snoopi_deep.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ isROOT(m::Method) = m === Core.Compiler.Timings.ROOTmi.def
isROOT(mi_info::InferenceNode) = isROOT(MethodInstance(mi_info))
isROOT(node::InferenceTimingNode) = isROOT(node.mi_timing)

getroot(node::InferenceTimingNode) = isdefined(node.parent, :parent) ? getroot(node.parent) : node

# Record instruction pointers we've already looked up (performance optimization)
const lookups = Dict{Union{UInt, Core.Compiler.InterpreterIP}, Vector{StackTraces.StackFrame}}()
lookups_key(ip) = ip
Expand Down
3 changes: 3 additions & 0 deletions test/snoopi_deep.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ hasconstpropnumber(mi_info::Core.Compiler.Timings.InferenceFrameInfo) = any(t ->
end
@test SnoopCompile.isROOT(Core.MethodInstance(tinf))
@test SnoopCompile.isROOT(Method(tinf))
child = tinf.children[1]
@test SnoopCompile.getroot(child.children[1]) == child
@test SnoopCompile.getroot(child.children[1].children[1].children[1]) == child
@test isempty(staleinstances(tinf))
frames = filter(!hasconstpropnumber, flatten(tinf))
@test length(frames) == 7 # ROOT, g(::Int), g(::Bool), h(...), i(::Integer), i(::Int), i(::Bool)
Expand Down

0 comments on commit eeb693d

Please sign in to comment.