Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance improvements #253

Draft
wants to merge 24 commits into
base: ale/3.0
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2e1e46d
Output parent lists and check parents after rebuilding
gkronber Oct 10, 2024
1301422
Path splitting procedure to shorten path length with find call.
gkronber Oct 10, 2024
0704eb1
Merge branch 'fix_broken_cas_tests' into performance_improvements
gkronber Oct 10, 2024
500e25e
Store original e-nodes in egraph and keep only e-node ids in parent l…
gkronber Oct 10, 2024
ac4829c
Revert changes to pretty_dict output.
gkronber Oct 10, 2024
6341b3b
Change lookup in classes_by_op dictionary to prevent allocation of a …
gkronber Oct 10, 2024
9fb097c
Fixed implementation of iterate for optbuffer (currently only affecte…
gkronber Oct 10, 2024
80a4696
Fix compile error.
gkronber Oct 10, 2024
68d40d1
Find of eclass_id for enode_id is not necessary here
gkronber Oct 10, 2024
013253e
Add some test assertions for internal datastructures used for egraph …
gkronber Oct 12, 2024
4e6bc9f
Merge branch 'ale/3.0' into performance_improvements
gkronber Oct 12, 2024
9db374d
Set root to allow debugging (requires extraction)
gkronber Oct 13, 2024
3ac0718
isless for VecExpr to allow sorting.
gkronber Oct 13, 2024
90719d0
Check most specific constants first.
gkronber Oct 13, 2024
6ddffa4
Comment and removed unnecessary parentheses.
gkronber Oct 13, 2024
3c07d51
Fixes for constant matching from different PR
gkronber Oct 13, 2024
97c272b
Allow to set SaturationParams for simplify for testing, and mark two …
gkronber Oct 13, 2024
917f3fe
Small fixes.
gkronber Oct 13, 2024
e6f582c
Correct test cases for rebuilding
gkronber Oct 13, 2024
50d6dfd
Complete overhaul of rebuilding mechanism.
gkronber Oct 13, 2024
ee1862e
Fixes, moving forward...
gkronber Oct 14, 2024
348dd62
Bugfix in analysis rebuilding.
gkronber Oct 14, 2024
4d26e3c
Minor changes.
gkronber Oct 14, 2024
0d203a8
Remove nodes vector from egraph and clean-up code.
gkronber Oct 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/prove.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ function test_equality(t, exprs...; params = SaturationParams(), g = EGraph())
params = deepcopy(params)
params.goal = (g::EGraph) -> in_same_class(g, ids...)

g.root = first(ids) # to allow extraction (for debugging)
report = saturate!(g, t, params)
goal_reached = params.goal(g)

if !(report.reason === :saturated) && !goal_reached
return false # failed to prove
end
Expand Down
186 changes: 118 additions & 68 deletions src/EGraphs/egraph.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ The children and parent nodes are stored as [`VecExpr`](@ref)s for performance,
means that without a reference to the [`EGraph`](@ref) object we cannot re-build human-readable terms
they represent. The [`EGraph`](@ref) itself comes with pretty printing for human-readable terms.
"""
struct EClass{D}
id::Id
nodes::Vector{VecExpr}
parents::Vector{Id} # The original Ids of parent enodes.
mutable struct EClass{D}
const id::Id
const nodes::Vector{VecExpr}
parents::Vector{Pair{VecExpr,Id}} # the parent nodes and eclasses for upward merging
data::Union{D,Nothing}
end

Expand Down Expand Up @@ -115,15 +115,14 @@ mutable struct EGraph{ExpressionType,Analysis}
uf::UnionFind
"map from eclass id to eclasses"
classes::Dict{IdKey,EClass{Analysis}}
"vector of the original e-nodes"
nodes::Vector{VecExpr}
"hashcons mapping e-nodes to their e-class id"
memo::Dict{VecExpr,Id}
"Hashcons the constants in the e-graph"
constants::Dict{UInt64,Any}
"Nodes which need to be processed for rebuilding. The id is the id of the e-node, not the canonical id of the e-class."
"E-classes whose parent nodes have to be reprocessed."
pending::Vector{Id}
analysis_pending::UniqueQueue{Id}
"E-classes whose parent nodes have to be reprocessed for analysis values."
analysis_pending::Vector{Id}
root::Id
"a cache mapping signatures (function symbols and their arity) to e-classes that contain e-nodes with that function symbol."
classes_by_op::Dict{IdKey,Vector{Id}}
Expand All @@ -142,11 +141,10 @@ function EGraph{ExpressionType,Analysis}(; needslock::Bool = false) where {Expre
EGraph{ExpressionType,Analysis}(
UnionFind(),
Dict{IdKey,EClass{Analysis}}(),
Vector{VecExpr}(),
Dict{VecExpr,Id}(),
Dict{UInt64,Any}(),
Id[],
UniqueQueue{Id}(),
Vector{Id}(),
Vector{Id}(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why no more UniqueQueue?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed anymore.

Reasoning:

  • We need the nodes and eclass ids in the pending / analysis_pending vectors for rebuilding.
  • Nodes may be canonicalized (while they are contained in the pending collections)
  • UniqueQueue detects duplicates only when adding new entries, and it would not remove duplicates of nodes after canonicalization.
  • the rewrite of the rebuilding code removes duplicates in the pending and analysis_pending vectors after each iteration instead.

0,
Dict{IdKey,Vector{Id}}(),
false,
Expand Down Expand Up @@ -197,9 +195,10 @@ function to_expr(g::EGraph, n::VecExpr)
end

function pretty_dict(g::EGraph)
d = Dict{Int,Vector{Any}}()
d = Dict{Int,Tuple{Vector{Any},Vector{Any}}}()
for (class_id, eclass) in g.classes
d[class_id.val] = (map(n -> to_expr(g, n), eclass.nodes))
# TODO do not show parent lists anymore (but useful for debugging)
d[class_id.val] = (map(n -> to_expr(g, n), eclass.nodes), map(pair -> to_expr(g, pair[1]) => Int(pair[2]), eclass.parents))
end
d
end
Expand All @@ -208,8 +207,8 @@ export pretty_dict
function Base.show(io::IO, g::EGraph)
d = pretty_dict(g)
t = "$(typeof(g)) with $(length(d)) e-classes:"
cs = map(sort!(collect(d); by = first)) do (k, nodes)
" $k => [$(Base.join(nodes, ", "))]"
cs = map(sort!(collect(d); by = first)) do (k, (nodes, parents))
" $k => [$(Base.join(nodes, ", "))] parents: [$(Base.join(parents, ", "))]"
end
print(io, Base.join([t; cs], "\n"))
end
Expand Down Expand Up @@ -238,17 +237,13 @@ function lookup(g::EGraph, n::VecExpr)::Id
canonicalize!(g, n)

id = get(g.memo, n, zero(Id))
iszero(id) ? id : find(g, id)
iszero(id) ? id : find(g, id) # find necessary because g.memo values are not necessarily canonical
end


function add_class_by_op(g::EGraph, n, eclass_id)
key = IdKey(v_signature(n))
vec = get(g.classes_by_op, key, nothing)
if isnothing(vec)
vec = Id[eclass_id]
g.classes_by_op[key] = vec
end
vec = get!(Vector{Id}, g.classes_by_op, key)
push!(vec, eclass_id)
end

Expand All @@ -266,21 +261,24 @@ function add!(g::EGraph{ExpressionType,Analysis}, n::VecExpr, should_copy::Bool)
end

id = push!(g.uf) # create new singleton eclass
push!(g.nodes, n)

# eclass.nodes, eclass.parents, and g.memo all have a reference to the same VecExpr for the new enode.
# The node must never be manipulated while it is contained in memo.

if v_isexpr(n)
for c_id in v_children(n)
push!(g.classes[IdKey(c_id)].parents, id)
push!(g.classes[IdKey(c_id)].parents, n => id)
end
end

g.memo[n] = id

add_class_by_op(g, n, id)
eclass = EClass{Analysis}(id, VecExpr[n], Id[], make(g, n)) # TODO: check do we need to copy n for the nodes vector here?
eclass = EClass{Analysis}(id, VecExpr[n], Id[], make(g, n))
g.classes[IdKey(id)] = eclass
modify!(g, eclass)
push!(g.pending, id)

# push!(g.pending, id) # We just created a new eclass for a new node. No need to reprocess parents

return id
end
Expand All @@ -297,14 +295,15 @@ function preprocess(e::Expr)
end
preprocess(x) = x

addexpr!(::EGraph, se::EClass) = se.id # TODO: why do we need this?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's for dynamic rules f(a,b) => a. This will also work when a is an EClass


"""
Recursively traverse an type satisfying the `TermInterface` and insert terms into an
[`EGraph`](@ref). If `e` has no children (has an arity of 0) then directly
insert the literal into the [`EGraph`](@ref).
"""
function addexpr!(g::EGraph, se)::Id
se isa EClass && return se.id
e = preprocess(se)
e = preprocess(se) # TODO: type stability issue?

isexpr(e) || return add!(g, VecExpr(Id[Id(0), Id(0), Id(0), add_constant!(g, e)]), false)

Expand Down Expand Up @@ -345,16 +344,17 @@ function Base.union!(
id_1, id_2 = id_2, id_1
end

union!(g.uf, id_1.val, id_2.val)
merged_id = union!(g.uf, id_1.val, id_2.val)

eclass_2 = pop!(g.classes, id_2)::EClass
eclass_1 = g.classes[id_1]::EClass

append!(g.pending, eclass_2.parents)
# push!(g.pending, merged_id)
push!(g.pending, id_2.val) # TODO: it seems sufficient to queue parents of id_2.val?

(merged_1, merged_2, new_data) = merge_analysis_data!(eclass_1, eclass_2)
merged_1 && append!(g.analysis_pending, eclass_1.parents)
merged_2 && append!(g.analysis_pending, eclass_2.parents)
merged_1 && push!(g.analysis_pending, id_1.val)
merged_2 && push!(g.analysis_pending, id_2.val)


# update eclass_1
Expand Down Expand Up @@ -384,13 +384,12 @@ function rebuild_classes!(g::EGraph)
empty!(v)
end

trimmed_nodes = 0
for (eclass_id, eclass) in g.classes
# old_len = length(eclass.nodes)
for n in eclass.nodes
canonicalize!(g, n)
end
# Sort to go in order?
trimmed_nodes += length(eclass.nodes)
# TODO Sort to go in order?
unique!(eclass.nodes)
trimmed_nodes -= length(eclass.nodes)

for n in eclass.nodes
add_class_by_op(g, n, eclass_id.val)
Expand All @@ -399,53 +398,103 @@ function rebuild_classes!(g::EGraph)

for v in values(g.classes_by_op)
sort!(v)
unique!(v)
unique!(v) # TODO: _groupedunique!(itr), and implement isless(a::VecExpr, b::VecExpr) if it has an performance advantage
end
trimmed_nodes
end

function process_unions!(g::EGraph{ExpressionType,AnalysisType})::Int where {ExpressionType,AnalysisType}
n_unions = 0

# This is close to the pseudo-code in the egg paper.
# We separate the worklist into two lists for repair and update of semantic analysis values.
# The upwards update of semantic analysis values may require visiting fewer eclasses.
while !isempty(g.pending) || !isempty(g.analysis_pending)
while !isempty(g.pending)
enode_id = pop!(g.pending)
node = copy(g.nodes[enode_id])
canonicalize!(g, node)
memo_class = get!(g.memo, node, enode_id)
if memo_class != enode_id
did_something = union!(g, memo_class, enode_id)
# TODO unique! can node dedup be moved here? compare performance
# did_something && unique!(g[eclass_id].nodes)
n_unions += did_something
todo = collect(unique(id -> find(g, id), g.pending))
@debug "Worklist reduced from $(length(g.pending)) to $(length(todo)) entries."
empty!(g.pending)

for id in todo
n_unions += repair_parents!(g, id)
end

todo = collect(unique(id -> find(g, id), g.analysis_pending))
@debug "Analysis worklist reduced from $(length(g.analysis_pending)) to $(length(todo)) entries."
empty!(g.analysis_pending)
for id in todo
update_analysis_upwards!(g, id)
end
end
n_unions
end

function repair_parents!(g::EGraph, id::Id)
n_unions = 0
eclass = g[id] # id does not have to be an eclass id anymore if we merged classes below
for (p_node, _) in eclass.parents
memo_class = pop!(g.memo, p_node, 0)
# memo_class = get(g.memo, p_node, 0) # TODO: could we be messy instead and just canonicalize the node and add again (without pop!)?

# only canonicalize node and update in memo if the node still exists
if memo_class > 0
canonicalize!(g, p_node)
g.memo[p_node] = find(g, memo_class)
end
end

# sort and collect unique nodes in new parents list (merging eclasses when finding duplicate nodes)
if !isempty(eclass.parents)
new_parents = Vector{Pair{VecExpr,Id}}()
sort!(eclass.parents, by=pair->pair[1])
(prev_node, prev_id) = first(eclass.parents)

# TODO double check whether we need canonical eclass ids in parents list (find is called in rebuild above anyway)
push!(new_parents, prev_node => find(g, prev_id))

for i in Iterators.drop(eachindex(eclass.parents), 1)
(cur_node, cur_id) = eclass.parents[i]

if hash(cur_node) == hash(prev_node) && cur_node == prev_node
if union!(g, cur_id, prev_id)
n_unions += 1
end
else
push!(new_parents, cur_node => find(g, cur_id)) # find not necessary?
prev_node, prev_id = cur_node, cur_id
end
end

# TODO: remove assertions
# @assert length(unique(pair -> pair[1], new_parents)) == length(new_parents) "not unique: $new_parents"
# @assert all(pair -> pair[2] == find(g, pair[2]), new_parents) "not refering to eclasses: $(new_parents)\n $g"

eclass.parents = new_parents
end
n_unions
end

function update_analysis_upwards!(g::EGraph, id::Id)
for (p_node, p_id) in g[id].parents
p_id = find(g, p_id)
eclass = g.classes[IdKey(p_id)]

while !isempty(g.analysis_pending)
enode_id = pop!(g.analysis_pending)
node = g.nodes[enode_id]
eclass_id = find(g, enode_id)
eclass_id_key = IdKey(eclass_id)
eclass = g.classes[eclass_id_key]

node_data = make(g, node)
if !isnothing(node_data)
if !isnothing(eclass.data)
joined_data = join(eclass.data, node_data)

if joined_data != eclass.data
eclass.data = joined_data
modify!(g, eclass)
append!(g.analysis_pending, eclass.parents)
end
else
eclass.data = node_data
node_data = make(g, p_node)
if !isnothing(node_data)
if !isnothing(eclass.data)
joined_data = join(eclass.data, node_data)

if joined_data != eclass.data
eclass.data = joined_data
modify!(g, eclass)
append!(g.analysis_pending, eclass.parents)
append!(g.analysis_pending, p_id)
end
else
eclass.data = node_data
modify!(g, eclass)
append!(g.analysis_pending, p_id)
end
end
end
n_unions
end

function check_parents(g::EGraph)::Bool
Expand Down Expand Up @@ -512,6 +561,7 @@ for more details.
function rebuild!(g::EGraph; should_check_parents=false, should_check_memo=false, should_check_analysis=false)
n_unions = process_unions!(g)
trimmed_nodes = rebuild_classes!(g)

@assert !should_check_parents || check_parents(g)
@assert !should_check_memo || check_memo(g)
@assert !should_check_analysis || check_analysis(g)
Expand Down
6 changes: 3 additions & 3 deletions src/EGraphs/extract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ function find_costs!(extractor::Extractor{CF,CT}) where {CF,CT}
end

for (id, _) in extractor.g.classes
if !haskey(extractor.costs, id)
error("failed to compute extraction costs for eclass ", id.val)
end
# if !haskey(extractor.costs, id)
# error("failed to compute extraction costs for eclass ", id.val)
# end
end
end

Expand Down
Loading
Loading