From beb29b3e095fd6c56e7a7afc2e2f155b33af1b9e Mon Sep 17 00:00:00 2001 From: Adrian Lehmann Date: Tue, 2 Jul 2024 17:47:17 -0500 Subject: [PATCH 1/2] Add normal form, leader rewriting and fix equiv proof not have congruence --- src/EGraphs/EGraphs.jl | 3 ++ src/EGraphs/exprproof.jl | 61 ++++++++++++++++++++++++++++++++++++++++ src/EGraphs/proof.jl | 36 ++++++++++++++++++++++-- test/egraphs/proof.jl | 34 ++++++++++++++++++++-- 4 files changed, 128 insertions(+), 6 deletions(-) create mode 100644 src/EGraphs/exprproof.jl diff --git a/src/EGraphs/EGraphs.jl b/src/EGraphs/EGraphs.jl index b9e2a1bb..3a68100c 100644 --- a/src/EGraphs/EGraphs.jl +++ b/src/EGraphs/EGraphs.jl @@ -32,4 +32,7 @@ using .Schedulers include("saturation.jl") export SaturationParams, saturate! +include("exprproof.jl") +export PositionedProof, find_node_proof + end diff --git a/src/EGraphs/exprproof.jl b/src/EGraphs/exprproof.jl new file mode 100644 index 00000000..183f0f72 --- /dev/null +++ b/src/EGraphs/exprproof.jl @@ -0,0 +1,61 @@ +export PositionedProof, find_node_proof + + +mutable struct PositionedProof + """ + Positioned proof is a structure that keeps track of where we apply proofs to in larger expressions. + """ + proof::Vector{ProofNode} + children::Vector{PositionedProof} + # TODO: Track what is matched +end + +function find_node_proof(g::EGraph, node1::Id, node2::Id)::PositionedProof + # Proof search that can deal with expressions, too. + + # Idea: + + # Walk expr trees + + # For each node: + # If has flat proof, proof to leader + # Else, recursively unfold + + # If no proof found for subexpr, return nothing + + # Issues: how to relate expressions? + # Especially if different Size + # e.g. a*(b+c) = ab+bc (which is different size AST) + # bigger problem comes when a=z then z*(b+c) = ab+bc + + # So I guess the way we should go about it is go to base terms, rewrite to leader + + flat_proof = find_flat_proof(g.proof, node1, node2) + # If there is a basic proof, no need to construct something more complicated + # TODO: Profile if this kills performance + if length(flat_proof) != 0 + return flat_proof + end + + # Idea: rewrite both sides to "normal forms" and concat + # TODO: This is definetely suboptimal and should be optimized + + + +end + +# +function rewrite_to_normal_form(g::EGraph, node::Id)::PositionedProof + # Start off by rewriting node to leader + lp = rewrite_to_leader(g.proof, node1) + leader = lp.leader + leader_proof = lp.proof + + expr = g.nodes[leader] + proof = PositionedProof(leader_proof, []) + + for (idx, child) in enumerate(v_children(expr)) + proof.children[idx] = rewrite_to_normal_form(g, child) + end + return PositionedProof +end \ No newline at end of file diff --git a/src/EGraphs/proof.jl b/src/EGraphs/proof.jl index 956d46fb..3e6d493a 100644 --- a/src/EGraphs/proof.jl +++ b/src/EGraphs/proof.jl @@ -1,4 +1,4 @@ -export ProofConnection, ProofNode, EGraphProof, find_flat_proof +export ProofConnection, ProofNode, EGraphProof, find_flat_proof, rewrite_to_leader mutable struct ProofConnection """ @@ -24,6 +24,7 @@ end mutable struct ProofNode + # TODO: Explain existence_node::Id # TODO is this the parent in the unionfind? parent_connection::ProofConnection @@ -79,6 +80,7 @@ function make_leader(proof::EGraphProof, node::Id)::Bool true end + function Base.union!(proof::EGraphProof, node1::Id, node2::Id, rule_idx::Int) # TODO maybe should have extra argument called `rhs_new` in egg that is true when called from # application of rules where the instantiation of the rhs creates new e-classes @@ -106,7 +108,11 @@ end @inline isroot(pn::ProofNode) = isroot(pn.parent_connection) @inline isroot(pc::ProofConnection) = pc.current === pc.next -function find_flat_proof(proof::EGraphProof, node1::Id, node2::Id) + + + + +function find_flat_proof(proof::EGraphProof, node1::Id, node2::Id)::Vector{ProofNode} # We're doing a lowest common ancestor search. # We cache the IDs we have seen seen_set = Set{Id}() @@ -117,6 +123,9 @@ function find_flat_proof(proof::EGraphProof, node1::Id, node2::Id) # No existence_node would ever have id 0 lca = UInt(0) curr = proof.explain_find[node1] + if (node1 == node2) + return [curr] + end # Walk up to the root while true @@ -155,4 +164,25 @@ function find_flat_proof(proof::EGraphProof, node1::Id, node2::Id) # TODO maybe reverse append!(ret, walk_from2) ret -end \ No newline at end of file +end + +struct LeaderProof + leader::Id + proof::Vector{ProofNode} +end + +function rewrite_to_leader(proof::EGraphProof, node::Id)::LeaderProof + # Returns the leader of e-class and a proof to transform node into said leader + curr_proof = proof.explain_find[node] + proofs = [] + final_id = node + if curr_proof.parent_connection.current == curr_proof.parent_connection.next + return LeaderProof(node, [curr_proof]) # Special case to report congruence + end + while curr_proof.parent_connection.current != curr_proof.parent_connection.next + push!(proofs, curr_proof) + final_id = curr_proof.parent_connection.next + curr_proof = proof.explain_find[curr_proof.parent_connection.next] + end + return LeaderProof(final_id, proofs) +end diff --git a/test/egraphs/proof.jl b/test/egraphs/proof.jl index 69c2144a..36dfc362 100644 --- a/test/egraphs/proof.jl +++ b/test/egraphs/proof.jl @@ -1,8 +1,12 @@ using Metatheory, Test +using Metatheory.Library + g = EGraph(; proof = true) id_a = addexpr!(g, :a) +println(find_flat_proof(g.proof, id_a, id_a)) +@test length(find_flat_proof(g.proof, id_a, id_a)) == 1 # print_proof(g) @@ -31,10 +35,34 @@ id_d = addexpr!(g, :d) union!(g, id_a, id_d, 3) print_proof(g) - +println(find_flat_proof(g.proof, id_c, id_d)) # Takes 4 steps -@test length(find_flat_proof(g.proof, id_a, id_d)) == 4 +@test length(find_flat_proof(g.proof, id_c, id_d)) == 3 + +# TODO: Why doesn't d have a its leader +for id in [id_a, id_b, id_c, id_d] + leader = rewrite_to_leader(g.proof, id) + @test leader.leader == id_d + @test length(leader.proof) == length(find_flat_proof(g.proof, id, id_a)) +end + id_e = addexpr!(g, :e) -@test isempty(find_flat_proof(g.proof, id_a, id_e)) \ No newline at end of file +@test isempty(find_flat_proof(g.proof, id_a, id_e)) + +comm_monoid = @commutative_monoid (*) 1 + +fold_mul = @theory begin + ~a::Number * ~b::Number => ~a * ~b +end + +ex = :(a * 4) +id_ex = addexpr!(g, ex) +ex_to = :(e * 4) +id_ex_to = addexpr!(g, ex_to) +print_proof(g) + +println(find_node_proof(g, id_ex, id_ex_to)) # Current challenge + + From 0a33fbbd981611de1e4612b523e5020e0c165000 Mon Sep 17 00:00:00 2001 From: Adrian Lehmann Date: Wed, 3 Jul 2024 14:59:55 -0500 Subject: [PATCH 2/2] Provide node proofs --- src/EGraphs/exprproof.jl | 43 ++++++++++++++++++++++++---------------- src/EGraphs/proof.jl | 8 ++++---- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/EGraphs/exprproof.jl b/src/EGraphs/exprproof.jl index 183f0f72..a9efad4f 100644 --- a/src/EGraphs/exprproof.jl +++ b/src/EGraphs/exprproof.jl @@ -10,7 +10,7 @@ mutable struct PositionedProof # TODO: Track what is matched end -function find_node_proof(g::EGraph, node1::Id, node2::Id)::PositionedProof +function find_node_proof(g::EGraph, node1::Id, node2::Id)::Union{Tuple{PositionedProof, PositionedProof}, Nothing} # Proof search that can deal with expressions, too. # Idea: @@ -20,7 +20,7 @@ function find_node_proof(g::EGraph, node1::Id, node2::Id)::PositionedProof # For each node: # If has flat proof, proof to leader # Else, recursively unfold - + # If no proof found for subexpr, return nothing # Issues: how to relate expressions? @@ -39,23 +39,32 @@ function find_node_proof(g::EGraph, node1::Id, node2::Id)::PositionedProof # Idea: rewrite both sides to "normal forms" and concat # TODO: This is definetely suboptimal and should be optimized - + leader1, nfproof1 = rewrite_to_normal_form(g, node1) + leader2, nfproof2 = rewrite_to_normal_form(g, node2) + println(g.nodes[leader1]) + println(g.nodes[leader2]) + println(nfproof1) + println(nfproof2) + leader1 != leader2 && return nothing + return (nfproof1, nfproof2) + - end # -function rewrite_to_normal_form(g::EGraph, node::Id)::PositionedProof - # Start off by rewriting node to leader - lp = rewrite_to_leader(g.proof, node1) - leader = lp.leader - leader_proof = lp.proof - - expr = g.nodes[leader] - proof = PositionedProof(leader_proof, []) - - for (idx, child) in enumerate(v_children(expr)) - proof.children[idx] = rewrite_to_normal_form(g, child) - end - return PositionedProof +function rewrite_to_normal_form(g::EGraph, node::Id)::Tuple{Id,PositionedProof} + # Start off by rewriting node to leader + lp = rewrite_to_leader(g.proof, node) + leader = lp.leader + leader_proof = lp.proof + + expr = g.nodes[leader] + proof = PositionedProof(leader_proof, []) + sizehint!(proof.children, v_arity(expr)) + # Do we want to do this before or after tthe leader proof? + for child in v_children(expr) + _, child_proof = rewrite_to_normal_form(g, child) + push!(proof.children, child_proof) + end + return (leader, proof) end \ No newline at end of file diff --git a/src/EGraphs/proof.jl b/src/EGraphs/proof.jl index 3e6d493a..68381456 100644 --- a/src/EGraphs/proof.jl +++ b/src/EGraphs/proof.jl @@ -123,7 +123,7 @@ function find_flat_proof(proof::EGraphProof, node1::Id, node2::Id)::Vector{Proof # No existence_node would ever have id 0 lca = UInt(0) curr = proof.explain_find[node1] - if (node1 == node2) + if (node1 == node2) return [curr] end @@ -169,18 +169,18 @@ end struct LeaderProof leader::Id proof::Vector{ProofNode} -end +end function rewrite_to_leader(proof::EGraphProof, node::Id)::LeaderProof # Returns the leader of e-class and a proof to transform node into said leader - curr_proof = proof.explain_find[node] + curr_proof = proof.explain_find[node] proofs = [] final_id = node if curr_proof.parent_connection.current == curr_proof.parent_connection.next return LeaderProof(node, [curr_proof]) # Special case to report congruence end while curr_proof.parent_connection.current != curr_proof.parent_connection.next - push!(proofs, curr_proof) + push!(proofs, curr_proof) final_id = curr_proof.parent_connection.next curr_proof = proof.explain_find[curr_proof.parent_connection.next] end