From b59cb348382d4893ada8ef871a57bd969331a17f Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Wed, 4 Dec 2024 11:46:15 +0000
Subject: [PATCH 01/13] Update the current_witness_index after optimisation

---
 .../acvm/src/compiler/transformers/mod.rs     | 269 +++++++++++++++++-
 1 file changed, 264 insertions(+), 5 deletions(-)
diff --git a/acvm-repo/acvm/src/compiler/transformers/mod.rs b/acvm-repo/acvm/src/compiler/transformers/mod.rs
index c9ce4ac7895..c92b0516431 100644
--- a/acvm-repo/acvm/src/compiler/transformers/mod.rs
+++ b/acvm-repo/acvm/src/compiler/transformers/mod.rs
@@ -1,5 +1,12 @@
+use std::collections::BTreeSet;
+
 use acir::{
-    circuit::{brillig::BrilligOutputs, Circuit, ExpressionWidth, Opcode},
+    circuit::{
+        self,
+        brillig::{BrilligInputs, BrilligOutputs},
+        opcodes::{BlackBoxFuncCall, FunctionInput, MemOp},
+        Circuit, ExpressionWidth, Opcode,
+    },
     native_types::{Expression, Witness},
     AcirField,
 };
@@ -79,8 +86,6 @@ pub(super) fn transform_internal<F: AcirField>(
                     &mut next_witness_index,
                 );
 
-                // Update next_witness counter
-                next_witness_index += (intermediate_variables.len() - len) as u32;
                 let mut new_opcodes = Vec::new();
                 for (g, (norm, w)) in intermediate_variables.iter().skip(len) {
                     // de-normalize
@@ -160,13 +165,267 @@ pub(super) fn transform_internal<F: AcirField>(
     let mut merge_optimizer = MergeExpressionsOptimizer::new();
     let (opcodes, new_acir_opcode_positions) =
         merge_optimizer.eliminate_intermediate_variable(&acir, new_acir_opcode_positions);
-    // n.b. we do not update current_witness_index after the eliminate_intermediate_variable pass, the real index could be less.
-    let acir = Circuit {
+
+    // n.b. if we do not update current_witness_index after the eliminate_intermediate_variable pass, the real index could be less.
+    let mut acir = Circuit {
         current_witness_index,
         expression_width,
         opcodes,
         // The optimizer does not add new public inputs
         ..acir
     };
+
+    // After the elimination of intermediate variables the `current_witness_index` is potentially higher than it needs to be,
+    // which would cause gaps if we ran the optimization a second time, making it look like new variables were added.
+    // Here we figure out what is the final state of witnesses by visiting each opcode.
+    let witnesses = WitnessCollector::collect_from_circuit(&acir);
+    if let Some(max_witness) = witnesses.last() {
+        acir.current_witness_index = max_witness.0;
+    }
+
     (acir, new_acir_opcode_positions)
 }
+
+/// Collect all witnesses in a circuit.
+#[derive(Default, Clone, Debug)]
+struct WitnessCollector {
+    witnesses: BTreeSet<Witness>,
+}
+
+impl WitnessCollector {
+    /// Collect all witnesses in a circuit.
+    fn collect_from_circuit<F: AcirField>(circuit: &Circuit<F>) -> BTreeSet<Witness> {
+        let mut collector = Self::default();
+        collector.extend_from_circuit(circuit);
+        collector.witnesses
+    }
+
+    fn add(&mut self, witness: Witness) {
+        self.witnesses.insert(witness);
+    }
+
+    fn add_many(&mut self, witnesses: &[Witness]) {
+        self.witnesses.extend(witnesses);
+    }
+
+    /// Add all witnesses from the circuit.
+    fn extend_from_circuit<F: AcirField>(&mut self, circuit: &Circuit<F>) {
+        self.witnesses.extend(&circuit.private_parameters);
+        self.witnesses.extend(&circuit.public_parameters.0);
+        self.witnesses.extend(&circuit.return_values.0);
+        for opcode in &circuit.opcodes {
+            self.extend_from_opcode(opcode);
+        }
+    }
+
+    /// Add witnesses from the opcode.
+    fn extend_from_opcode<F: AcirField>(&mut self, opcode: &Opcode<F>) {
+        match opcode {
+            Opcode::AssertZero(expr) => {
+                self.extend_from_expr(expr);
+            }
+            Opcode::BlackBoxFuncCall(call) => self.extend_from_blackbox(call),
+            Opcode::MemoryOp { block_id: _, op, predicate } => {
+                let MemOp { operation, index, value } = op;
+                self.extend_from_expr(operation);
+                self.extend_from_expr(index);
+                self.extend_from_expr(value);
+                if let Some(pred) = predicate {
+                    self.extend_from_expr(pred);
+                }
+            }
+            Opcode::MemoryInit { block_id: _, init, block_type: _ } => {
+                for w in init {
+                    self.add(*w);
+                }
+            }
+            // We keep the display for a BrilligCall and circuit Call separate as they
+            // are distinct in their functionality and we should maintain this separation for debugging.
+            Opcode::BrilligCall { id: _, inputs, outputs, predicate } => {
+                if let Some(pred) = predicate {
+                    self.extend_from_expr(pred);
+                }
+                self.extend_from_brillig_inputs(inputs);
+                self.extend_from_brillig_outputs(outputs);
+            }
+            Opcode::Call { id: _, inputs, outputs, predicate } => {
+                if let Some(pred) = predicate {
+                    self.extend_from_expr(pred);
+                }
+                self.add_many(inputs);
+                self.add_many(outputs);
+            }
+        }
+    }
+
+    fn extend_from_expr<F: AcirField>(&mut self, expr: &Expression<F>) {
+        for i in &expr.mul_terms {
+            self.add(i.1);
+            self.add(i.2);
+        }
+        for i in &expr.linear_combinations {
+            self.add(i.1);
+        }
+    }
+
+    fn extend_from_brillig_inputs<F: AcirField>(&mut self, inputs: &[BrilligInputs<F>]) {
+        for input in inputs {
+            match input {
+                BrilligInputs::Single(expr) => {
+                    self.extend_from_expr(expr);
+                }
+                BrilligInputs::Array(exprs) => {
+                    for expr in exprs {
+                        self.extend_from_expr(expr);
+                    }
+                }
+                BrilligInputs::MemoryArray(_) => {}
+            }
+        }
+    }
+
+    fn extend_from_brillig_outputs(&mut self, outputs: &[BrilligOutputs]) {
+        for output in outputs {
+            match output {
+                BrilligOutputs::Simple(w) => {
+                    self.add(*w);
+                }
+                BrilligOutputs::Array(ws) => self.add_many(ws),
+            }
+        }
+    }
+
+    fn extend_from_blackbox<F: AcirField>(&mut self, call: &BlackBoxFuncCall<F>) {
+        match call {
+            BlackBoxFuncCall::AES128Encrypt { inputs, iv, key, outputs } => {
+                self.extend_from_function_inputs(inputs.as_slice());
+                self.extend_from_function_inputs(iv.as_slice());
+                self.extend_from_function_inputs(key.as_slice());
+                self.add_many(outputs);
+            }
+            BlackBoxFuncCall::AND { lhs, rhs, output } => {
+                self.extend_from_function_input(lhs);
+                self.extend_from_function_input(rhs);
+                self.add(*output);
+            }
+            BlackBoxFuncCall::XOR { lhs, rhs, output } => {
+                self.extend_from_function_input(lhs);
+                self.extend_from_function_input(rhs);
+                self.add(*output);
+            }
+            BlackBoxFuncCall::RANGE { input } => {
+                self.extend_from_function_input(input);
+            }
+            BlackBoxFuncCall::Blake2s { inputs, outputs } => {
+                self.extend_from_function_inputs(inputs.as_slice());
+                self.add_many(outputs.as_slice());
+            }
+            BlackBoxFuncCall::Blake3 { inputs, outputs } => {
+                self.extend_from_function_inputs(inputs.as_slice());
+                self.add_many(outputs.as_slice());
+            }
+            BlackBoxFuncCall::SchnorrVerify {
+                public_key_x,
+                public_key_y,
+                signature,
+                message,
+                output,
+            } => {
+                self.extend_from_function_input(public_key_x);
+                self.extend_from_function_input(public_key_y);
+                self.extend_from_function_inputs(signature.as_slice());
+                self.extend_from_function_inputs(message.as_slice());
+                self.add(*output);
+            }
+            BlackBoxFuncCall::EcdsaSecp256k1 {
+                public_key_x,
+                public_key_y,
+                signature,
+                hashed_message,
+                output,
+            } => {
+                self.extend_from_function_inputs(public_key_x.as_slice());
+                self.extend_from_function_inputs(public_key_y.as_slice());
+                self.extend_from_function_inputs(signature.as_slice());
+                self.extend_from_function_inputs(hashed_message.as_slice());
+                self.add(*output);
+            }
+            BlackBoxFuncCall::EcdsaSecp256r1 {
+                public_key_x,
+                public_key_y,
+                signature,
+                hashed_message,
+                output,
+            } => {
+                self.extend_from_function_inputs(public_key_x.as_slice());
+                self.extend_from_function_inputs(public_key_y.as_slice());
+                self.extend_from_function_inputs(signature.as_slice());
+                self.extend_from_function_inputs(hashed_message.as_slice());
+                self.add(*output);
+            }
+            BlackBoxFuncCall::MultiScalarMul { points, scalars, outputs } => {
+                self.extend_from_function_inputs(points.as_slice());
+                self.extend_from_function_inputs(scalars.as_slice());
+                let (x, y, i) = outputs;
+                self.add(*x);
+                self.add(*y);
+                self.add(*i);
+            }
+            BlackBoxFuncCall::EmbeddedCurveAdd { input1, input2, outputs } => {
+                self.extend_from_function_inputs(input1.as_slice());
+                self.extend_from_function_inputs(input2.as_slice());
+                let (x, y, i) = outputs;
+                self.add(*x);
+                self.add(*y);
+                self.add(*i);
+            }
+            BlackBoxFuncCall::Keccakf1600 { inputs, outputs } => {
+                self.extend_from_function_inputs(inputs.as_slice());
+                self.add_many(outputs.as_slice());
+            }
+            BlackBoxFuncCall::RecursiveAggregation {
+                verification_key,
+                proof,
+                public_inputs,
+                key_hash,
+                proof_type: _,
+            } => {
+                self.extend_from_function_inputs(verification_key.as_slice());
+                self.extend_from_function_inputs(proof.as_slice());
+                self.extend_from_function_inputs(public_inputs.as_slice());
+                self.extend_from_function_input(key_hash);
+            }
+            BlackBoxFuncCall::BigIntAdd { .. }
+            | BlackBoxFuncCall::BigIntSub { .. }
+            | BlackBoxFuncCall::BigIntMul { .. }
+            | BlackBoxFuncCall::BigIntDiv { .. } => {}
+            BlackBoxFuncCall::BigIntFromLeBytes { inputs, modulus: _, output: _ } => {
+                self.extend_from_function_inputs(inputs.as_slice());
+            }
+            BlackBoxFuncCall::BigIntToLeBytes { input: _, outputs } => {
+                self.add_many(outputs.as_slice());
+            }
+            BlackBoxFuncCall::Poseidon2Permutation { inputs, outputs, len: _ } => {
+                self.extend_from_function_inputs(inputs.as_slice());
+                self.add_many(outputs.as_slice());
+            }
+            BlackBoxFuncCall::Sha256Compression { inputs, hash_values, outputs } => {
+                self.extend_from_function_inputs(inputs.as_slice());
+                self.extend_from_function_inputs(hash_values.as_slice());
+                self.add_many(outputs.as_slice());
+            }
+        }
+    }
+
+    fn extend_from_function_input<F: AcirField>(&mut self, input: &FunctionInput<F>) {
+        if let circuit::opcodes::ConstantOrWitnessEnum::Witness(witness) = input.input() {
+            self.add(witness);
+        }
+    }
+
+    fn extend_from_function_inputs<F: AcirField>(&mut self, inputs: &[FunctionInput<F>]) {
+        for input in inputs {
+            self.extend_from_function_input(input);
+        }
+    }
+}

From 02c53bde86f943a8b60fe54ee95186e0df8e2836 Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Wed, 4 Dec 2024 12:14:54 +0000
Subject: [PATCH 02/13] Diff the debug output

---
 tooling/nargo_cli/src/cli/compile_cmd.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tooling/nargo_cli/src/cli/compile_cmd.rs b/tooling/nargo_cli/src/cli/compile_cmd.rs
index 3317cd34e85..f134374f89e 100644
--- a/tooling/nargo_cli/src/cli/compile_cmd.rs
+++ b/tooling/nargo_cli/src/cli/compile_cmd.rs
@@ -418,6 +418,12 @@ mod tests {
 
                 if verbose {
                     // Compare where the most likely difference is.
+                    similar_asserts::assert_eq!(
+                        format!("{}", program_1.program),
+                        format!("{}", program_2.program),
+                        "optimization not idempotent for test program '{}'",
+                        package.name
+                    );
                     assert_eq!(
                         program_1.program, program_2.program,
                         "optimization not idempotent for test program '{}'",

From 09fb7479ae7c0c6dfa79a13a7c294340e62f5b8c Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Wed, 4 Dec 2024 15:22:24 +0000
Subject: [PATCH 03/13] Fold over the witnesses

---
 .../acvm/src/compiler/transformers/mod.rs     | 234 +++++++++---------
 1 file changed, 123 insertions(+), 111 deletions(-)

diff --git a/acvm-repo/acvm/src/compiler/transformers/mod.rs b/acvm-repo/acvm/src/compiler/transformers/mod.rs
index c92b0516431..8f9ef63be1f 100644
--- a/acvm-repo/acvm/src/compiler/transformers/mod.rs
+++ b/acvm-repo/acvm/src/compiler/transformers/mod.rs
@@ -1,5 +1,3 @@
-use std::collections::BTreeSet;
-
 use acir::{
     circuit::{
         self,
@@ -163,13 +161,12 @@ pub(super) fn transform_internal<F: AcirField>(
         ..acir
     };
     let mut merge_optimizer = MergeExpressionsOptimizer::new();
+
     let (opcodes, new_acir_opcode_positions) =
         merge_optimizer.eliminate_intermediate_variable(&acir, new_acir_opcode_positions);
 
     // n.b. if we do not update current_witness_index after the eliminate_intermediate_variable pass, the real index could be less.
     let mut acir = Circuit {
-        current_witness_index,
-        expression_width,
         opcodes,
         // The optimizer does not add new public inputs
         ..acir
@@ -177,106 +174,121 @@ pub(super) fn transform_internal<F: AcirField>(
 
     // After the elimination of intermediate variables the `current_witness_index` is potentially higher than it needs to be,
     // which would cause gaps if we ran the optimization a second time, making it look like new variables were added.
-    // Here we figure out what is the final state of witnesses by visiting each opcode.
-    let witnesses = WitnessCollector::collect_from_circuit(&acir);
-    if let Some(max_witness) = witnesses.last() {
-        acir.current_witness_index = max_witness.0;
-    }
+    acir.current_witness_index = max_witness(&acir).witness_index();
 
     (acir, new_acir_opcode_positions)
 }
 
-/// Collect all witnesses in a circuit.
-#[derive(Default, Clone, Debug)]
-struct WitnessCollector {
-    witnesses: BTreeSet<Witness>,
+/// Find the witness with the highest ID in the circuit.
+fn max_witness<F: AcirField>(circuit: &Circuit<F>) -> Witness {
+    let mut witnesses = WitnessFolder::new(Witness::default(), |state, witness| {
+        *state = witness.max(*state);
+    });
+    witnesses.fold_circuit(circuit);
+    witnesses.into_state()
 }
 
-impl WitnessCollector {
-    /// Collect all witnesses in a circuit.
-    fn collect_from_circuit<F: AcirField>(circuit: &Circuit<F>) -> BTreeSet<Witness> {
-        let mut collector = Self::default();
-        collector.extend_from_circuit(circuit);
-        collector.witnesses
-    }
+/// Fold all witnesses in a circuit.
+struct WitnessFolder<S, A> {
+    state: S,
+    accumulate: A,
+}
 
-    fn add(&mut self, witness: Witness) {
-        self.witnesses.insert(witness);
+impl<S, A> WitnessFolder<S, A>
+where
+    A: Fn(&mut S, Witness),
+{
+    /// Create the folder with some initial state and an accumulator function.
+    fn new(init: S, accumulate: A) -> Self {
+        Self { state: init, accumulate }
     }
 
-    fn add_many(&mut self, witnesses: &[Witness]) {
-        self.witnesses.extend(witnesses);
+    /// Take the accumulated state.
+    fn into_state(self) -> S {
+        self.state
     }
 
     /// Add all witnesses from the circuit.
-    fn extend_from_circuit<F: AcirField>(&mut self, circuit: &Circuit<F>) {
-        self.witnesses.extend(&circuit.private_parameters);
-        self.witnesses.extend(&circuit.public_parameters.0);
-        self.witnesses.extend(&circuit.return_values.0);
+    fn fold_circuit<F: AcirField>(&mut self, circuit: &Circuit<F>) {
+        self.fold_many(circuit.private_parameters.iter());
+        self.fold_many(circuit.public_parameters.0.iter());
+        self.fold_many(circuit.return_values.0.iter());
         for opcode in &circuit.opcodes {
-            self.extend_from_opcode(opcode);
+            self.fold_opcode(opcode);
+        }
+    }
+
+    /// Fold a witness into the state.
+    fn fold(&mut self, witness: Witness) {
+        (self.accumulate)(&mut self.state, witness);
+    }
+
+    /// Fold many witnesses into the state.
+    fn fold_many<'w, I: Iterator<Item = &'w Witness>>(&mut self, witnesses: I) {
+        for w in witnesses {
+            self.fold(*w);
         }
     }
 
     /// Add witnesses from the opcode.
-    fn extend_from_opcode<F: AcirField>(&mut self, opcode: &Opcode<F>) {
+    fn fold_opcode<F: AcirField>(&mut self, opcode: &Opcode<F>) {
         match opcode {
             Opcode::AssertZero(expr) => {
-                self.extend_from_expr(expr);
+                self.fold_expr(expr);
             }
-            Opcode::BlackBoxFuncCall(call) => self.extend_from_blackbox(call),
+            Opcode::BlackBoxFuncCall(call) => self.fold_blackbox(call),
             Opcode::MemoryOp { block_id: _, op, predicate } => {
                 let MemOp { operation, index, value } = op;
-                self.extend_from_expr(operation);
-                self.extend_from_expr(index);
-                self.extend_from_expr(value);
+                self.fold_expr(operation);
+                self.fold_expr(index);
+                self.fold_expr(value);
                 if let Some(pred) = predicate {
-                    self.extend_from_expr(pred);
+                    self.fold_expr(pred);
                 }
             }
             Opcode::MemoryInit { block_id: _, init, block_type: _ } => {
                 for w in init {
-                    self.add(*w);
+                    self.fold(*w);
                 }
             }
             // We keep the display for a BrilligCall and circuit Call separate as they
             // are distinct in their functionality and we should maintain this separation for debugging.
             Opcode::BrilligCall { id: _, inputs, outputs, predicate } => {
                 if let Some(pred) = predicate {
-                    self.extend_from_expr(pred);
+                    self.fold_expr(pred);
                 }
-                self.extend_from_brillig_inputs(inputs);
-                self.extend_from_brillig_outputs(outputs);
+                self.fold_brillig_inputs(inputs);
+                self.fold_brillig_outputs(outputs);
             }
             Opcode::Call { id: _, inputs, outputs, predicate } => {
                 if let Some(pred) = predicate {
-                    self.extend_from_expr(pred);
+                    self.fold_expr(pred);
                 }
-                self.add_many(inputs);
-                self.add_many(outputs);
+                self.fold_many(inputs.iter());
+                self.fold_many(outputs.iter());
             }
         }
     }
 
-    fn extend_from_expr<F: AcirField>(&mut self, expr: &Expression<F>) {
+    fn fold_expr<F: AcirField>(&mut self, expr: &Expression<F>) {
         for i in &expr.mul_terms {
-            self.add(i.1);
-            self.add(i.2);
+            self.fold(i.1);
+            self.fold(i.2);
         }
         for i in &expr.linear_combinations {
-            self.add(i.1);
+            self.fold(i.1);
         }
     }
 
-    fn extend_from_brillig_inputs<F: AcirField>(&mut self, inputs: &[BrilligInputs<F>]) {
+    fn fold_brillig_inputs<F: AcirField>(&mut self, inputs: &[BrilligInputs<F>]) {
         for input in inputs {
             match input {
                 BrilligInputs::Single(expr) => {
-                    self.extend_from_expr(expr);
+                    self.fold_expr(expr);
                 }
                 BrilligInputs::Array(exprs) => {
                     for expr in exprs {
-                        self.extend_from_expr(expr);
+                        self.fold_expr(expr);
                     }
                 }
                 BrilligInputs::MemoryArray(_) => {}
@@ -284,45 +296,45 @@ impl WitnessCollector {
         }
     }
 
-    fn extend_from_brillig_outputs(&mut self, outputs: &[BrilligOutputs]) {
+    fn fold_brillig_outputs(&mut self, outputs: &[BrilligOutputs]) {
         for output in outputs {
             match output {
                 BrilligOutputs::Simple(w) => {
-                    self.add(*w);
+                    self.fold(*w);
                 }
-                BrilligOutputs::Array(ws) => self.add_many(ws),
+                BrilligOutputs::Array(ws) => self.fold_many(ws.iter()),
             }
         }
     }
 
-    fn extend_from_blackbox<F: AcirField>(&mut self, call: &BlackBoxFuncCall<F>) {
+    fn fold_blackbox<F: AcirField>(&mut self, call: &BlackBoxFuncCall<F>) {
         match call {
             BlackBoxFuncCall::AES128Encrypt { inputs, iv, key, outputs } => {
-                self.extend_from_function_inputs(inputs.as_slice());
-                self.extend_from_function_inputs(iv.as_slice());
-                self.extend_from_function_inputs(key.as_slice());
-                self.add_many(outputs);
+                self.fold_function_inputs(inputs.as_slice());
+                self.fold_function_inputs(iv.as_slice());
+                self.fold_function_inputs(key.as_slice());
+                self.fold_many(outputs.iter());
             }
             BlackBoxFuncCall::AND { lhs, rhs, output } => {
-                self.extend_from_function_input(lhs);
-                self.extend_from_function_input(rhs);
-                self.add(*output);
+                self.fold_function_input(lhs);
+                self.fold_function_input(rhs);
+                self.fold(*output);
             }
             BlackBoxFuncCall::XOR { lhs, rhs, output } => {
-                self.extend_from_function_input(lhs);
-                self.extend_from_function_input(rhs);
-                self.add(*output);
+                self.fold_function_input(lhs);
+                self.fold_function_input(rhs);
+                self.fold(*output);
             }
             BlackBoxFuncCall::RANGE { input } => {
-                self.extend_from_function_input(input);
+                self.fold_function_input(input);
             }
             BlackBoxFuncCall::Blake2s { inputs, outputs } => {
-                self.extend_from_function_inputs(inputs.as_slice());
-                self.add_many(outputs.as_slice());
+                self.fold_function_inputs(inputs.as_slice());
+                self.fold_many(outputs.iter());
             }
             BlackBoxFuncCall::Blake3 { inputs, outputs } => {
-                self.extend_from_function_inputs(inputs.as_slice());
-                self.add_many(outputs.as_slice());
+                self.fold_function_inputs(inputs.as_slice());
+                self.fold_many(outputs.iter());
             }
             BlackBoxFuncCall::SchnorrVerify {
                 public_key_x,
@@ -331,11 +343,11 @@ impl WitnessCollector {
                 message,
                 output,
             } => {
-                self.extend_from_function_input(public_key_x);
-                self.extend_from_function_input(public_key_y);
-                self.extend_from_function_inputs(signature.as_slice());
-                self.extend_from_function_inputs(message.as_slice());
-                self.add(*output);
+                self.fold_function_input(public_key_x);
+                self.fold_function_input(public_key_y);
+                self.fold_function_inputs(signature.as_slice());
+                self.fold_function_inputs(message.as_slice());
+                self.fold(*output);
             }
             BlackBoxFuncCall::EcdsaSecp256k1 {
                 public_key_x,
@@ -344,11 +356,11 @@ impl WitnessCollector {
                 hashed_message,
                 output,
             } => {
-                self.extend_from_function_inputs(public_key_x.as_slice());
-                self.extend_from_function_inputs(public_key_y.as_slice());
-                self.extend_from_function_inputs(signature.as_slice());
-                self.extend_from_function_inputs(hashed_message.as_slice());
-                self.add(*output);
+                self.fold_function_inputs(public_key_x.as_slice());
+                self.fold_function_inputs(public_key_y.as_slice());
+                self.fold_function_inputs(signature.as_slice());
+                self.fold_function_inputs(hashed_message.as_slice());
+                self.fold(*output);
             }
             BlackBoxFuncCall::EcdsaSecp256r1 {
                 public_key_x,
@@ -357,31 +369,31 @@ impl WitnessCollector {
                 hashed_message,
                 output,
             } => {
-                self.extend_from_function_inputs(public_key_x.as_slice());
-                self.extend_from_function_inputs(public_key_y.as_slice());
-                self.extend_from_function_inputs(signature.as_slice());
-                self.extend_from_function_inputs(hashed_message.as_slice());
-                self.add(*output);
+                self.fold_function_inputs(public_key_x.as_slice());
+                self.fold_function_inputs(public_key_y.as_slice());
+                self.fold_function_inputs(signature.as_slice());
+                self.fold_function_inputs(hashed_message.as_slice());
+                self.fold(*output);
             }
             BlackBoxFuncCall::MultiScalarMul { points, scalars, outputs } => {
-                self.extend_from_function_inputs(points.as_slice());
-                self.extend_from_function_inputs(scalars.as_slice());
+                self.fold_function_inputs(points.as_slice());
+                self.fold_function_inputs(scalars.as_slice());
                 let (x, y, i) = outputs;
-                self.add(*x);
-                self.add(*y);
-                self.add(*i);
+                self.fold(*x);
+                self.fold(*y);
+                self.fold(*i);
             }
             BlackBoxFuncCall::EmbeddedCurveAdd { input1, input2, outputs } => {
-                self.extend_from_function_inputs(input1.as_slice());
-                self.extend_from_function_inputs(input2.as_slice());
+                self.fold_function_inputs(input1.as_slice());
+                self.fold_function_inputs(input2.as_slice());
                 let (x, y, i) = outputs;
-                self.add(*x);
-                self.add(*y);
-                self.add(*i);
+                self.fold(*x);
+                self.fold(*y);
+                self.fold(*i);
             }
             BlackBoxFuncCall::Keccakf1600 { inputs, outputs } => {
-                self.extend_from_function_inputs(inputs.as_slice());
-                self.add_many(outputs.as_slice());
+                self.fold_function_inputs(inputs.as_slice());
+                self.fold_many(outputs.iter());
             }
             BlackBoxFuncCall::RecursiveAggregation {
                 verification_key,
@@ -390,42 +402,42 @@ impl WitnessCollector {
                 key_hash,
                 proof_type: _,
             } => {
-                self.extend_from_function_inputs(verification_key.as_slice());
-                self.extend_from_function_inputs(proof.as_slice());
-                self.extend_from_function_inputs(public_inputs.as_slice());
-                self.extend_from_function_input(key_hash);
+                self.fold_function_inputs(verification_key.as_slice());
+                self.fold_function_inputs(proof.as_slice());
+                self.fold_function_inputs(public_inputs.as_slice());
+                self.fold_function_input(key_hash);
             }
             BlackBoxFuncCall::BigIntAdd { .. }
             | BlackBoxFuncCall::BigIntSub { .. }
             | BlackBoxFuncCall::BigIntMul { .. }
             | BlackBoxFuncCall::BigIntDiv { .. } => {}
             BlackBoxFuncCall::BigIntFromLeBytes { inputs, modulus: _, output: _ } => {
-                self.extend_from_function_inputs(inputs.as_slice());
+                self.fold_function_inputs(inputs.as_slice());
             }
             BlackBoxFuncCall::BigIntToLeBytes { input: _, outputs } => {
-                self.add_many(outputs.as_slice());
+                self.fold_many(outputs.iter());
             }
             BlackBoxFuncCall::Poseidon2Permutation { inputs, outputs, len: _ } => {
-                self.extend_from_function_inputs(inputs.as_slice());
-                self.add_many(outputs.as_slice());
+                self.fold_function_inputs(inputs.as_slice());
+                self.fold_many(outputs.iter());
             }
             BlackBoxFuncCall::Sha256Compression { inputs, hash_values, outputs } => {
-                self.extend_from_function_inputs(inputs.as_slice());
-                self.extend_from_function_inputs(hash_values.as_slice());
-                self.add_many(outputs.as_slice());
+                self.fold_function_inputs(inputs.as_slice());
+                self.fold_function_inputs(hash_values.as_slice());
+                self.fold_many(outputs.iter());
             }
         }
     }
 
-    fn extend_from_function_input<F: AcirField>(&mut self, input: &FunctionInput<F>) {
+    fn fold_function_input<F: AcirField>(&mut self, input: &FunctionInput<F>) {
         if let circuit::opcodes::ConstantOrWitnessEnum::Witness(witness) = input.input() {
-            self.add(witness);
+            self.fold(witness);
         }
     }
 
-    fn extend_from_function_inputs<F: AcirField>(&mut self, inputs: &[FunctionInput<F>]) {
+    fn fold_function_inputs<F: AcirField>(&mut self, inputs: &[FunctionInput<F>]) {
         for input in inputs {
-            self.extend_from_function_input(input);
+            self.fold_function_input(input);
         }
     }
 }

From 97f75ba4ac56cd9945cebab01c6f0bbea6d70cbb Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Wed, 4 Dec 2024 15:56:30 +0000
Subject: [PATCH 04/13] Try to allow multiple passes with the
 MergeExpressionsOptimizer

---
 Cargo.lock                                    |  1 +
 acvm-repo/acvm/Cargo.toml                     |  2 +-
 .../acvm/src/compiler/transformers/mod.rs     | 37 ++++++++++++++-----
 tooling/nargo_cli/src/cli/compile_cmd.rs      |  5 +++
 4 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 2f19ed704b2..f414a126495 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -48,6 +48,7 @@ dependencies = [
  "ark-bn254",
  "bn254_blackbox_solver",
  "brillig_vm",
+ "fxhash",
  "indexmap 1.9.3",
  "num-bigint",
  "proptest",
diff --git a/acvm-repo/acvm/Cargo.toml b/acvm-repo/acvm/Cargo.toml
index e513ae4e727..ba01ac8ec16 100644
--- a/acvm-repo/acvm/Cargo.toml
+++ b/acvm-repo/acvm/Cargo.toml
@@ -17,7 +17,7 @@ workspace = true
 thiserror.workspace = true
 tracing.workspace = true
 serde.workspace = true
-
+fxhash.workspace = true
 acir.workspace = true
 brillig_vm.workspace = true
 acvm_blackbox_solver.workspace = true
diff --git a/acvm-repo/acvm/src/compiler/transformers/mod.rs b/acvm-repo/acvm/src/compiler/transformers/mod.rs
index 8f9ef63be1f..85a5ac8bb96 100644
--- a/acvm-repo/acvm/src/compiler/transformers/mod.rs
+++ b/acvm-repo/acvm/src/compiler/transformers/mod.rs
@@ -19,6 +19,10 @@ use super::{
     optimizers::MergeExpressionsOptimizer, transform_assert_messages, AcirTransformationMap,
 };
 
+/// The `MergeExpressionOptimizer` needs multiple passes to stabilize the output.
+/// Testing showed two passes to be enough.
+const MAX_OPTIMIZER_PASSES: usize = 10;
+
 /// Applies [`ProofSystemCompiler`][crate::ProofSystemCompiler] specific optimizations to a [`Circuit`].
 pub fn transform<F: AcirField>(
     acir: Circuit<F>,
@@ -153,24 +157,37 @@ pub(super) fn transform_internal<F: AcirField>(
 
     let current_witness_index = next_witness_index - 1;
 
-    let acir = Circuit {
+    let mut acir = Circuit {
         current_witness_index,
         expression_width,
         opcodes: transformed_opcodes,
         // The transformer does not add new public inputs
         ..acir
     };
-    let mut merge_optimizer = MergeExpressionsOptimizer::new();
 
-    let (opcodes, new_acir_opcode_positions) =
-        merge_optimizer.eliminate_intermediate_variable(&acir, new_acir_opcode_positions);
+    // Allow multiple passes until we have stable output.
+    let mut prev_circuit_hash = fxhash::hash64(&acir);
+    for _ in 0..MAX_OPTIMIZER_PASSES {
+        let mut merge_optimizer = MergeExpressionsOptimizer::new();
 
-    // n.b. if we do not update current_witness_index after the eliminate_intermediate_variable pass, the real index could be less.
-    let mut acir = Circuit {
-        opcodes,
-        // The optimizer does not add new public inputs
-        ..acir
-    };
+        let (next_opcodes, next_acir_opcode_positions) =
+            merge_optimizer.eliminate_intermediate_variable(&acir, new_acir_opcode_positions);
+
+        // n.b. if we do not update current_witness_index after the eliminate_intermediate_variable pass, the real index could be less.
+        acir = Circuit {
+            opcodes: next_opcodes,
+            // The optimizer does not add new public inputs
+            ..acir
+        };
+
+        let next_circuit_hash = fxhash::hash64(&acir);
+        new_acir_opcode_positions = next_acir_opcode_positions;
+
+        if next_circuit_hash == prev_circuit_hash {
+            break;
+        }
+        prev_circuit_hash = next_circuit_hash;
+    }
 
     // After the elimination of intermediate variables the `current_witness_index` is potentially higher than it needs to be,
     // which would cause gaps if we ran the optimization a second time, making it look like new variables were added.
diff --git a/tooling/nargo_cli/src/cli/compile_cmd.rs b/tooling/nargo_cli/src/cli/compile_cmd.rs
index f134374f89e..4c7f55fa0cc 100644
--- a/tooling/nargo_cli/src/cli/compile_cmd.rs
+++ b/tooling/nargo_cli/src/cli/compile_cmd.rs
@@ -413,6 +413,11 @@ mod tests {
 
                 let width = get_target_width(package.expression_width, None);
 
+                // Figure out if there is an upper bound to repeating the entire optimization step after which the results are stable:
+                // for _ in 0..2 {
+                //     program_0 = nargo::ops::transform_program(program_0, width);
+                // }
+
                 let program_1 = nargo::ops::transform_program(program_0, width);
                 let program_2 = nargo::ops::transform_program(program_1.clone(), width);
 

From 8cc215aab9fea523738a9210f9bd016984c4214f Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Wed, 4 Dec 2024 16:57:14 +0000
Subject: [PATCH 05/13] Trying to figure out where to put the loop

---
 acvm-repo/acvm/src/compiler/mod.rs            | 27 ++++--
 acvm-repo/acvm/src/compiler/optimizers/mod.rs |  2 +-
 .../acvm/src/compiler/transformers/mod.rs     | 87 ++++++++++++-------
 tooling/nargo_cli/src/cli/compile_cmd.rs      |  2 +-
 4 files changed, 74 insertions(+), 44 deletions(-)

diff --git a/acvm-repo/acvm/src/compiler/mod.rs b/acvm-repo/acvm/src/compiler/mod.rs
index 8829f77e50b..ad0f0d2df4c 100644
--- a/acvm-repo/acvm/src/compiler/mod.rs
+++ b/acvm-repo/acvm/src/compiler/mod.rs
@@ -14,7 +14,7 @@ pub use optimizers::optimize;
 use optimizers::optimize_internal;
 pub use simulator::CircuitSimulator;
 use transformers::transform_internal;
-pub use transformers::{transform, MIN_EXPRESSION_WIDTH};
+pub use transformers::MIN_EXPRESSION_WIDTH;
 
 /// This module moves and decomposes acir opcodes. The transformation map allows consumers of this module to map
 /// metadata they had about the opcodes to the new opcode structure generated after the transformation.
@@ -28,9 +28,9 @@ impl AcirTransformationMap {
     /// Builds a map from a vector of pointers to the old acir opcodes.
     /// The index of the vector is the new opcode index.
     /// The value of the vector is the old opcode index pointed.
-    fn new(acir_opcode_positions: Vec<usize>) -> Self {
+    fn new(acir_opcode_positions: &[usize]) -> Self {
         let mut old_indices_to_new_indices = HashMap::with_capacity(acir_opcode_positions.len());
-        for (new_index, old_index) in acir_opcode_positions.into_iter().enumerate() {
+        for (new_index, old_index) in acir_opcode_positions.iter().copied().enumerate() {
             old_indices_to_new_indices.entry(old_index).or_insert_with(Vec::new).push(new_index);
         }
         AcirTransformationMap { old_indices_to_new_indices }
@@ -76,14 +76,23 @@ pub fn compile<F: AcirField>(
     acir: Circuit<F>,
     expression_width: ExpressionWidth,
 ) -> (Circuit<F>, AcirTransformationMap) {
-    let (acir, acir_opcode_positions) = optimize_internal(acir);
+    let mut pass = 0;
+    let mut prev_acir = acir;
+    loop {
+        let (acir, acir_opcode_positions) = optimize_internal(prev_acir);
 
-    let (mut acir, acir_opcode_positions) =
-        transform_internal(acir, expression_width, acir_opcode_positions);
+        let (mut acir, acir_opcode_positions) =
+            transform_internal(acir, expression_width, acir_opcode_positions);
 
-    let transformation_map = AcirTransformationMap::new(acir_opcode_positions);
+        let transformation_map = AcirTransformationMap::new(&acir_opcode_positions);
 
-    acir.assert_messages = transform_assert_messages(acir.assert_messages, &transformation_map);
+        acir.assert_messages = transform_assert_messages(acir.assert_messages, &transformation_map);
 
-    (acir, transformation_map)
+        if pass == 2 {
+            return (acir, transformation_map);
+        }
+        println!("AFTER PASS {pass}: {acir}");
+        pass += 1;
+        prev_acir = acir;
+    }
 }
diff --git a/acvm-repo/acvm/src/compiler/optimizers/mod.rs b/acvm-repo/acvm/src/compiler/optimizers/mod.rs
index 1947a80dc35..f86bf500998 100644
--- a/acvm-repo/acvm/src/compiler/optimizers/mod.rs
+++ b/acvm-repo/acvm/src/compiler/optimizers/mod.rs
@@ -23,7 +23,7 @@ use super::{transform_assert_messages, AcirTransformationMap};
 pub fn optimize<F: AcirField>(acir: Circuit<F>) -> (Circuit<F>, AcirTransformationMap) {
     let (mut acir, new_opcode_positions) = optimize_internal(acir);
 
-    let transformation_map = AcirTransformationMap::new(new_opcode_positions);
+    let transformation_map = AcirTransformationMap::new(&new_opcode_positions);
 
     acir.assert_messages = transform_assert_messages(acir.assert_messages, &transformation_map);
 
diff --git a/acvm-repo/acvm/src/compiler/transformers/mod.rs b/acvm-repo/acvm/src/compiler/transformers/mod.rs
index 85a5ac8bb96..6d342731a8b 100644
--- a/acvm-repo/acvm/src/compiler/transformers/mod.rs
+++ b/acvm-repo/acvm/src/compiler/transformers/mod.rs
@@ -21,22 +21,25 @@ use super::{
 
 /// The `MergeExpressionOptimizer` needs multiple passes to stabilize the output.
 /// Testing showed two passes to be enough.
-const MAX_OPTIMIZER_PASSES: usize = 10;
+const MAX_OPTIMIZER_PASSES: usize = 2;
 
 /// Applies [`ProofSystemCompiler`][crate::ProofSystemCompiler] specific optimizations to a [`Circuit`].
-pub fn transform<F: AcirField>(
-    acir: Circuit<F>,
+fn _transform<F: AcirField>(
+    mut acir: Circuit<F>,
     expression_width: ExpressionWidth,
 ) -> (Circuit<F>, AcirTransformationMap) {
     // Track original acir opcode positions throughout the transformation passes of the compilation
     // by applying the modifications done to the circuit opcodes and also to the opcode_positions (delete and insert)
-    let acir_opcode_positions = acir.opcodes.iter().enumerate().map(|(i, _)| i).collect();
+    let mut acir_opcode_positions =
+        acir.opcodes.iter().enumerate().map(|(i, _)| i).collect::<Vec<_>>();
 
-    let (mut acir, acir_opcode_positions) =
+    let (new_acir, new_acir_opcode_positions) =
         transform_internal(acir, expression_width, acir_opcode_positions);
 
-    let transformation_map = AcirTransformationMap::new(acir_opcode_positions);
+    acir = new_acir;
+    acir_opcode_positions = new_acir_opcode_positions;
 
+    let transformation_map = AcirTransformationMap::new(&acir_opcode_positions);
     acir.assert_messages = transform_assert_messages(acir.assert_messages, &transformation_map);
 
     (acir, transformation_map)
@@ -45,9 +48,43 @@ pub fn transform<F: AcirField>(
 /// Applies [`ProofSystemCompiler`][crate::ProofSystemCompiler] specific optimizations to a [`Circuit`].
 ///
 /// Accepts an injected `acir_opcode_positions` to allow transformations to be applied directly after optimizations.
+///
+/// Does multiple passes until the output stabilises.
 #[tracing::instrument(level = "trace", name = "transform_acir", skip(acir, acir_opcode_positions))]
 pub(super) fn transform_internal<F: AcirField>(
-    acir: Circuit<F>,
+    mut acir: Circuit<F>,
+    expression_width: ExpressionWidth,
+    mut acir_opcode_positions: Vec<usize>,
+) -> (Circuit<F>, Vec<usize>) {
+    // Allow multiple passes until we have stable output.
+    let mut prev_opcodes_hash = fxhash::hash64(&acir.opcodes);
+
+    for _ in 0..1 {
+        let (new_acir, new_acir_opcode_positions) =
+            transform_internal_once(acir, expression_width, acir_opcode_positions);
+
+        acir = new_acir;
+        acir_opcode_positions = new_acir_opcode_positions;
+    }
+    // After the elimination of intermediate variables the `current_witness_index` is potentially higher than it needs to be,
+    // which would cause gaps if we ran the optimization a second time, making it look like new variables were added.
+    acir.current_witness_index = max_witness(&acir).witness_index();
+
+    (acir, acir_opcode_positions)
+}
+
+/// Applies [`ProofSystemCompiler`][crate::ProofSystemCompiler] specific optimizations to a [`Circuit`].
+///
+/// Accepts an injected `acir_opcode_positions` to allow transformations to be applied directly after optimizations.
+///
+/// Does a single optimisation pass.
+#[tracing::instrument(
+    level = "trace",
+    name = "transform_acir_once",
+    skip(acir, acir_opcode_positions)
+)]
+fn transform_internal_once<F: AcirField>(
+    mut acir: Circuit<F>,
     expression_width: ExpressionWidth,
     acir_opcode_positions: Vec<usize>,
 ) -> (Circuit<F>, Vec<usize>) {
@@ -157,7 +194,7 @@ pub(super) fn transform_internal<F: AcirField>(
 
     let current_witness_index = next_witness_index - 1;
 
-    let mut acir = Circuit {
+    acir = Circuit {
         current_witness_index,
         expression_width,
         opcodes: transformed_opcodes,
@@ -165,33 +202,17 @@ pub(super) fn transform_internal<F: AcirField>(
         ..acir
     };
 
-    // Allow multiple passes until we have stable output.
-    let mut prev_circuit_hash = fxhash::hash64(&acir);
-    for _ in 0..MAX_OPTIMIZER_PASSES {
-        let mut merge_optimizer = MergeExpressionsOptimizer::new();
-
-        let (next_opcodes, next_acir_opcode_positions) =
-            merge_optimizer.eliminate_intermediate_variable(&acir, new_acir_opcode_positions);
-
-        // n.b. if we do not update current_witness_index after the eliminate_intermediate_variable pass, the real index could be less.
-        acir = Circuit {
-            opcodes: next_opcodes,
-            // The optimizer does not add new public inputs
-            ..acir
-        };
-
-        let next_circuit_hash = fxhash::hash64(&acir);
-        new_acir_opcode_positions = next_acir_opcode_positions;
+    let mut merge_optimizer = MergeExpressionsOptimizer::new();
 
-        if next_circuit_hash == prev_circuit_hash {
-            break;
-        }
-        prev_circuit_hash = next_circuit_hash;
-    }
+    let (opcodes, new_acir_opcode_positions) =
+        merge_optimizer.eliminate_intermediate_variable(&acir, new_acir_opcode_positions);
 
-    // After the elimination of intermediate variables the `current_witness_index` is potentially higher than it needs to be,
-    // which would cause gaps if we ran the optimization a second time, making it look like new variables were added.
-    acir.current_witness_index = max_witness(&acir).witness_index();
+    // n.b. if we do not update current_witness_index after the eliminate_intermediate_variable pass, the real index could be less.
+    acir = Circuit {
+        opcodes,
+        // The optimizer does not add new public inputs
+        ..acir
+    };
 
     (acir, new_acir_opcode_positions)
 }
diff --git a/tooling/nargo_cli/src/cli/compile_cmd.rs b/tooling/nargo_cli/src/cli/compile_cmd.rs
index 4c7f55fa0cc..9a5a27defca 100644
--- a/tooling/nargo_cli/src/cli/compile_cmd.rs
+++ b/tooling/nargo_cli/src/cli/compile_cmd.rs
@@ -414,7 +414,7 @@ mod tests {
                 let width = get_target_width(package.expression_width, None);
 
                 // Figure out if there is an upper bound to repeating the entire optimization step after which the results are stable:
-                // for _ in 0..2 {
+                // for i in 0..0 {
                 //     program_0 = nargo::ops::transform_program(program_0, width);
                 // }
 

From 7c72b29a2c6916e6f584af409364a69de6b7d4e9 Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Wed, 4 Dec 2024 17:02:55 +0000
Subject: [PATCH 06/13] Only looping in transform

---
 acvm-repo/acvm/src/compiler/mod.rs            | 22 +++++--------------
 .../acvm/src/compiler/transformers/mod.rs     |  5 ++---
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/acvm-repo/acvm/src/compiler/mod.rs b/acvm-repo/acvm/src/compiler/mod.rs
index ad0f0d2df4c..89e0d1dbb4f 100644
--- a/acvm-repo/acvm/src/compiler/mod.rs
+++ b/acvm-repo/acvm/src/compiler/mod.rs
@@ -76,23 +76,13 @@ pub fn compile<F: AcirField>(
     acir: Circuit<F>,
     expression_width: ExpressionWidth,
 ) -> (Circuit<F>, AcirTransformationMap) {
-    let mut pass = 0;
-    let mut prev_acir = acir;
-    loop {
-        let (acir, acir_opcode_positions) = optimize_internal(prev_acir);
+    let (acir, acir_opcode_positions) = optimize_internal(acir);
 
-        let (mut acir, acir_opcode_positions) =
-            transform_internal(acir, expression_width, acir_opcode_positions);
+    let (mut acir, acir_opcode_positions) =
+        transform_internal(acir, expression_width, acir_opcode_positions);
 
-        let transformation_map = AcirTransformationMap::new(&acir_opcode_positions);
+    let transformation_map = AcirTransformationMap::new(&acir_opcode_positions);
+    acir.assert_messages = transform_assert_messages(acir.assert_messages, &transformation_map);
 
-        acir.assert_messages = transform_assert_messages(acir.assert_messages, &transformation_map);
-
-        if pass == 2 {
-            return (acir, transformation_map);
-        }
-        println!("AFTER PASS {pass}: {acir}");
-        pass += 1;
-        prev_acir = acir;
-    }
+    (acir, transformation_map)
 }
diff --git a/acvm-repo/acvm/src/compiler/transformers/mod.rs b/acvm-repo/acvm/src/compiler/transformers/mod.rs
index 6d342731a8b..60cb90e487f 100644
--- a/acvm-repo/acvm/src/compiler/transformers/mod.rs
+++ b/acvm-repo/acvm/src/compiler/transformers/mod.rs
@@ -20,8 +20,7 @@ use super::{
 };
 
 /// The `MergeExpressionOptimizer` needs multiple passes to stabilize the output.
-/// Testing showed two passes to be enough.
-const MAX_OPTIMIZER_PASSES: usize = 2;
+const MAX_OPTIMIZER_PASSES: usize = 3;
 
 /// Applies [`ProofSystemCompiler`][crate::ProofSystemCompiler] specific optimizations to a [`Circuit`].
 fn _transform<F: AcirField>(
@@ -59,7 +58,7 @@ pub(super) fn transform_internal<F: AcirField>(
     // Allow multiple passes until we have stable output.
     let mut prev_opcodes_hash = fxhash::hash64(&acir.opcodes);
 
-    for _ in 0..1 {
+    for _ in 0..MAX_OPTIMIZER_PASSES {
         let (new_acir, new_acir_opcode_positions) =
             transform_internal_once(acir, expression_width, acir_opcode_positions);
 

From 4a0652318ecde1dfa973dad4590f45c3cd62e47c Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Wed, 4 Dec 2024 17:07:12 +0000
Subject: [PATCH 07/13] Allow 3 full transform passes

---
 acvm-repo/acvm/src/compiler/transformers/mod.rs | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/acvm-repo/acvm/src/compiler/transformers/mod.rs b/acvm-repo/acvm/src/compiler/transformers/mod.rs
index 60cb90e487f..a9bcad9e09e 100644
--- a/acvm-repo/acvm/src/compiler/transformers/mod.rs
+++ b/acvm-repo/acvm/src/compiler/transformers/mod.rs
@@ -48,7 +48,7 @@ fn _transform<F: AcirField>(
 ///
 /// Accepts an injected `acir_opcode_positions` to allow transformations to be applied directly after optimizations.
 ///
-/// Does multiple passes until the output stabilises.
+/// Does multiple passes until the output stabilizes.
 #[tracing::instrument(level = "trace", name = "transform_acir", skip(acir, acir_opcode_positions))]
 pub(super) fn transform_internal<F: AcirField>(
     mut acir: Circuit<F>,
@@ -64,6 +64,13 @@ pub(super) fn transform_internal<F: AcirField>(
 
         acir = new_acir;
         acir_opcode_positions = new_acir_opcode_positions;
+
+        let new_opcodes_hash = fxhash::hash64(&acir.opcodes);
+
+        if new_opcodes_hash == prev_opcodes_hash {
+            break;
+        }
+        prev_opcodes_hash = new_opcodes_hash;
     }
     // After the elimination of intermediate variables the `current_witness_index` is potentially higher than it needs to be,
     // which would cause gaps if we ran the optimization a second time, making it look like new variables were added.
@@ -76,7 +83,7 @@ pub(super) fn transform_internal<F: AcirField>(
 ///
 /// Accepts an injected `acir_opcode_positions` to allow transformations to be applied directly after optimizations.
 ///
-/// Does a single optimisation pass.
+/// Does a single optimization pass.
 #[tracing::instrument(
     level = "trace",
     name = "transform_acir_once",

From b163c208defef500e3e26e1f806dd073761f5df8 Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Wed, 4 Dec 2024 17:27:38 +0000
Subject: [PATCH 08/13] Loop both inside and outside

---
 acvm-repo/acvm/src/compiler/mod.rs            | 27 ++++++++++++++++---
 .../acvm/src/compiler/transformers/mod.rs     |  8 +++---
 tooling/nargo_cli/src/cli/compile_cmd.rs      |  2 +-
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/acvm-repo/acvm/src/compiler/mod.rs b/acvm-repo/acvm/src/compiler/mod.rs
index 89e0d1dbb4f..012afd86ab9 100644
--- a/acvm-repo/acvm/src/compiler/mod.rs
+++ b/acvm-repo/acvm/src/compiler/mod.rs
@@ -16,6 +16,9 @@ pub use simulator::CircuitSimulator;
 use transformers::transform_internal;
 pub use transformers::MIN_EXPRESSION_WIDTH;
 
+/// We need multiple passes to stabilize the output.
+const MAX_OPTIMIZER_PASSES: usize = 3;
+
 /// This module moves and decomposes acir opcodes. The transformation map allows consumers of this module to map
 /// metadata they had about the opcodes to the new opcode structure generated after the transformation.
 #[derive(Debug)]
@@ -72,14 +75,32 @@ fn transform_assert_messages<F: Clone>(
 }
 
 /// Applies [`ProofSystemCompiler`][crate::ProofSystemCompiler] specific optimizations to a [`Circuit`].
+///
+/// Runs multiple passes until the output stabilizes.
 pub fn compile<F: AcirField>(
     acir: Circuit<F>,
     expression_width: ExpressionWidth,
 ) -> (Circuit<F>, AcirTransformationMap) {
-    let (acir, acir_opcode_positions) = optimize_internal(acir);
+    let mut pass = 0;
+    let mut prev_opcodes_hash = fxhash::hash64(&acir.opcodes);
+    let mut prev_acir = acir;
+
+    // For most test programs it would be enough to only loop `transform_internal`,
+    // but some of them don't stabilize unless we also repeat the backend agnostic optimizations.
+    let (mut acir, acir_opcode_positions) = loop {
+        let (acir, acir_opcode_positions) = optimize_internal(prev_acir);
+        let (acir, acir_opcode_positions) =
+            transform_internal(acir, expression_width, acir_opcode_positions);
 
-    let (mut acir, acir_opcode_positions) =
-        transform_internal(acir, expression_width, acir_opcode_positions);
+        let opcodes_hash = fxhash::hash64(&acir.opcodes);
+
+        if pass == MAX_OPTIMIZER_PASSES || prev_opcodes_hash == opcodes_hash {
+            break (acir, acir_opcode_positions);
+        }
+        pass += 1;
+        prev_acir = acir;
+        prev_opcodes_hash = opcodes_hash
+    };
 
     let transformation_map = AcirTransformationMap::new(&acir_opcode_positions);
     acir.assert_messages = transform_assert_messages(acir.assert_messages, &transformation_map);
diff --git a/acvm-repo/acvm/src/compiler/transformers/mod.rs b/acvm-repo/acvm/src/compiler/transformers/mod.rs
index a9bcad9e09e..9f140c671d1 100644
--- a/acvm-repo/acvm/src/compiler/transformers/mod.rs
+++ b/acvm-repo/acvm/src/compiler/transformers/mod.rs
@@ -17,12 +17,12 @@ pub use csat::MIN_EXPRESSION_WIDTH;
 
 use super::{
     optimizers::MergeExpressionsOptimizer, transform_assert_messages, AcirTransformationMap,
+    MAX_OPTIMIZER_PASSES,
 };
 
-/// The `MergeExpressionOptimizer` needs multiple passes to stabilize the output.
-const MAX_OPTIMIZER_PASSES: usize = 3;
-
 /// Applies [`ProofSystemCompiler`][crate::ProofSystemCompiler] specific optimizations to a [`Circuit`].
+///
+/// TODO: Can this be removed?
 fn _transform<F: AcirField>(
     mut acir: Circuit<F>,
     expression_width: ExpressionWidth,
@@ -58,6 +58,8 @@ pub(super) fn transform_internal<F: AcirField>(
     // Allow multiple passes until we have stable output.
     let mut prev_opcodes_hash = fxhash::hash64(&acir.opcodes);
 
+    // For most test programs it would be enough to loop here, but some of them
+    // don't stabilize unless we also repeat the backend agnostic optimizations.
     for _ in 0..MAX_OPTIMIZER_PASSES {
         let (new_acir, new_acir_opcode_positions) =
             transform_internal_once(acir, expression_width, acir_opcode_positions);
diff --git a/tooling/nargo_cli/src/cli/compile_cmd.rs b/tooling/nargo_cli/src/cli/compile_cmd.rs
index 9a5a27defca..d019e87eee6 100644
--- a/tooling/nargo_cli/src/cli/compile_cmd.rs
+++ b/tooling/nargo_cli/src/cli/compile_cmd.rs
@@ -414,7 +414,7 @@ mod tests {
                 let width = get_target_width(package.expression_width, None);
 
                 // Figure out if there is an upper bound to repeating the entire optimization step after which the results are stable:
-                // for i in 0..0 {
+                // for i in 0..2 {
                 //     program_0 = nargo::ops::transform_program(program_0, width);
                 // }
 

From ce9d1070ee077818346cde450ef5593e33aa06c7 Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Wed, 4 Dec 2024 17:32:37 +0000
Subject: [PATCH 09/13] Exit after optimisation if it hasn't changed anything

---
 acvm-repo/acvm/src/compiler/mod.rs | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/acvm-repo/acvm/src/compiler/mod.rs b/acvm-repo/acvm/src/compiler/mod.rs
index 012afd86ab9..af8692b9f87 100644
--- a/acvm-repo/acvm/src/compiler/mod.rs
+++ b/acvm-repo/acvm/src/compiler/mod.rs
@@ -17,6 +17,7 @@ use transformers::transform_internal;
 pub use transformers::MIN_EXPRESSION_WIDTH;
 
 /// We need multiple passes to stabilize the output.
+/// The value was determined by running tests.
 const MAX_OPTIMIZER_PASSES: usize = 3;
 
 /// This module moves and decomposes acir opcodes. The transformation map allows consumers of this module to map
@@ -89,17 +90,25 @@ pub fn compile<F: AcirField>(
     // but some of them don't stabilize unless we also repeat the backend agnostic optimizations.
     let (mut acir, acir_opcode_positions) = loop {
         let (acir, acir_opcode_positions) = optimize_internal(prev_acir);
+
+        let opcodes_hash = fxhash::hash64(&acir.opcodes);
+        // Stop if we have already done at least one transform and an extra optimization changed nothing.
+        if pass > 0 && prev_opcodes_hash == opcodes_hash {
+            break (acir, acir_opcode_positions);
+        }
+
         let (acir, acir_opcode_positions) =
             transform_internal(acir, expression_width, acir_opcode_positions);
 
         let opcodes_hash = fxhash::hash64(&acir.opcodes);
-
+        // Stop if the output hasn't change in this loop or we went too long.
         if pass == MAX_OPTIMIZER_PASSES || prev_opcodes_hash == opcodes_hash {
             break (acir, acir_opcode_positions);
         }
+
         pass += 1;
         prev_acir = acir;
-        prev_opcodes_hash = opcodes_hash
+        prev_opcodes_hash = opcodes_hash;
     };
 
     let transformation_map = AcirTransformationMap::new(&acir_opcode_positions);

From 5229857d837e3c06c71863e75b9bf971c2b905a6 Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Wed, 4 Dec 2024 19:42:56 +0000
Subject: [PATCH 10/13] Remove unused transform function

---
 .../acvm/src/compiler/transformers/mod.rs     | 29 +------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

diff --git a/acvm-repo/acvm/src/compiler/transformers/mod.rs b/acvm-repo/acvm/src/compiler/transformers/mod.rs
index 9f140c671d1..2150840f604 100644
--- a/acvm-repo/acvm/src/compiler/transformers/mod.rs
+++ b/acvm-repo/acvm/src/compiler/transformers/mod.rs
@@ -15,34 +15,7 @@ mod csat;
 pub(crate) use csat::CSatTransformer;
 pub use csat::MIN_EXPRESSION_WIDTH;
 
-use super::{
-    optimizers::MergeExpressionsOptimizer, transform_assert_messages, AcirTransformationMap,
-    MAX_OPTIMIZER_PASSES,
-};
-
-/// Applies [`ProofSystemCompiler`][crate::ProofSystemCompiler] specific optimizations to a [`Circuit`].
-///
-/// TODO: Can this be removed?
-fn _transform<F: AcirField>(
-    mut acir: Circuit<F>,
-    expression_width: ExpressionWidth,
-) -> (Circuit<F>, AcirTransformationMap) {
-    // Track original acir opcode positions throughout the transformation passes of the compilation
-    // by applying the modifications done to the circuit opcodes and also to the opcode_positions (delete and insert)
-    let mut acir_opcode_positions =
-        acir.opcodes.iter().enumerate().map(|(i, _)| i).collect::<Vec<_>>();
-
-    let (new_acir, new_acir_opcode_positions) =
-        transform_internal(acir, expression_width, acir_opcode_positions);
-
-    acir = new_acir;
-    acir_opcode_positions = new_acir_opcode_positions;
-
-    let transformation_map = AcirTransformationMap::new(&acir_opcode_positions);
-    acir.assert_messages = transform_assert_messages(acir.assert_messages, &transformation_map);
-
-    (acir, transformation_map)
-}
+use super::{optimizers::MergeExpressionsOptimizer, MAX_OPTIMIZER_PASSES};
 
 /// Applies [`ProofSystemCompiler`][crate::ProofSystemCompiler] specific optimizations to a [`Circuit`].
 ///

From 99d1bdfc3271ee7188b57233fa383a573af9ca33 Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Wed, 4 Dec 2024 19:50:37 +0000
Subject: [PATCH 11/13] Only make max passes

---
 acvm-repo/acvm/src/compiler/mod.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/acvm-repo/acvm/src/compiler/mod.rs b/acvm-repo/acvm/src/compiler/mod.rs
index af8692b9f87..5329debfae8 100644
--- a/acvm-repo/acvm/src/compiler/mod.rs
+++ b/acvm-repo/acvm/src/compiler/mod.rs
@@ -91,9 +91,8 @@ pub fn compile<F: AcirField>(
     let (mut acir, acir_opcode_positions) = loop {
         let (acir, acir_opcode_positions) = optimize_internal(prev_acir);
 
-        let opcodes_hash = fxhash::hash64(&acir.opcodes);
         // Stop if we have already done at least one transform and an extra optimization changed nothing.
-        if pass > 0 && prev_opcodes_hash == opcodes_hash {
+        if pass > 0 && prev_opcodes_hash == fxhash::hash64(&acir.opcodes) {
             break (acir, acir_opcode_positions);
         }
 
@@ -101,8 +100,9 @@ pub fn compile<F: AcirField>(
             transform_internal(acir, expression_width, acir_opcode_positions);
 
         let opcodes_hash = fxhash::hash64(&acir.opcodes);
+
         // Stop if the output hasn't change in this loop or we went too long.
-        if pass == MAX_OPTIMIZER_PASSES || prev_opcodes_hash == opcodes_hash {
+        if pass == MAX_OPTIMIZER_PASSES - 1 || prev_opcodes_hash == opcodes_hash {
             break (acir, acir_opcode_positions);
         }
 

From fc2198300381060430694b0c66cfc4d4e2ca48fc Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Wed, 4 Dec 2024 20:14:08 +0000
Subject: [PATCH 12/13] Add back transform, it was pub-use

---
 acvm-repo/acvm/src/compiler/mod.rs            |  2 +-
 .../acvm/src/compiler/transformers/mod.rs     | 24 ++++++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/acvm-repo/acvm/src/compiler/mod.rs b/acvm-repo/acvm/src/compiler/mod.rs
index 5329debfae8..e32c0665c0f 100644
--- a/acvm-repo/acvm/src/compiler/mod.rs
+++ b/acvm-repo/acvm/src/compiler/mod.rs
@@ -14,7 +14,7 @@ pub use optimizers::optimize;
 use optimizers::optimize_internal;
 pub use simulator::CircuitSimulator;
 use transformers::transform_internal;
-pub use transformers::MIN_EXPRESSION_WIDTH;
+pub use transformers::{transform, MIN_EXPRESSION_WIDTH};
 
 /// We need multiple passes to stabilize the output.
 /// The value was determined by running tests.
diff --git a/acvm-repo/acvm/src/compiler/transformers/mod.rs b/acvm-repo/acvm/src/compiler/transformers/mod.rs
index 2150840f604..e7e8f885710 100644
--- a/acvm-repo/acvm/src/compiler/transformers/mod.rs
+++ b/acvm-repo/acvm/src/compiler/transformers/mod.rs
@@ -15,7 +15,29 @@ mod csat;
 pub(crate) use csat::CSatTransformer;
 pub use csat::MIN_EXPRESSION_WIDTH;
 
-use super::{optimizers::MergeExpressionsOptimizer, MAX_OPTIMIZER_PASSES};
+use super::{
+    optimizers::MergeExpressionsOptimizer, transform_assert_messages, AcirTransformationMap,
+    MAX_OPTIMIZER_PASSES,
+};
+
+/// Applies [`ProofSystemCompiler`][crate::ProofSystemCompiler] specific optimizations to a [`Circuit`].
+pub fn transform<F: AcirField>(
+    acir: Circuit<F>,
+    expression_width: ExpressionWidth,
+) -> (Circuit<F>, AcirTransformationMap) {
+    // Track original acir opcode positions throughout the transformation passes of the compilation
+    // by applying the modifications done to the circuit opcodes and also to the opcode_positions (delete and insert)
+    let acir_opcode_positions = acir.opcodes.iter().enumerate().map(|(i, _)| i).collect();
+
+    let (mut acir, acir_opcode_positions) =
+        transform_internal(acir, expression_width, acir_opcode_positions);
+
+    let transformation_map = AcirTransformationMap::new(&acir_opcode_positions);
+
+    acir.assert_messages = transform_assert_messages(acir.assert_messages, &transformation_map);
+
+    (acir, transformation_map)
+}
 
 /// Applies [`ProofSystemCompiler`][crate::ProofSystemCompiler] specific optimizations to a [`Circuit`].
 ///

From 61efb67b395a35d87723c4422fe9dcffd8a9586d Mon Sep 17 00:00:00 2001
From: Akosh Farkash <aakoshh@gmail.com>
Date: Thu, 5 Dec 2024 17:17:34 +0000
Subject: [PATCH 13/13] Remove commented exploratory looping

---
 tooling/nargo_cli/src/cli/compile_cmd.rs | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tooling/nargo_cli/src/cli/compile_cmd.rs b/tooling/nargo_cli/src/cli/compile_cmd.rs
index d019e87eee6..f134374f89e 100644
--- a/tooling/nargo_cli/src/cli/compile_cmd.rs
+++ b/tooling/nargo_cli/src/cli/compile_cmd.rs
@@ -413,11 +413,6 @@ mod tests {
 
                 let width = get_target_width(package.expression_width, None);
 
-                // Figure out if there is an upper bound to repeating the entire optimization step after which the results are stable:
-                // for i in 0..2 {
-                //     program_0 = nargo::ops::transform_program(program_0, width);
-                // }
-
                 let program_1 = nargo::ops::transform_program(program_0, width);
                 let program_2 = nargo::ops::transform_program(program_1.clone(), width);