From d36e23abcf1b7a4cf8c5f3f6a2afdbb2f620c1ab Mon Sep 17 00:00:00 2001 From: AztecBot Date: Mon, 2 Dec 2024 08:10:01 +0000 Subject: [PATCH] [1 changes] feat(ssa): Option to set the maximum acceptable Brillig bytecode increase in unrolling (https://github.com/noir-lang/noir/pull/6641) feat: Sync from aztec-packages (https://github.com/noir-lang/noir/pull/6656) chore: refactor poseidon2 (https://github.com/noir-lang/noir/pull/6655) fix: correct types returned by constant EC operations simplified within SSA (https://github.com/noir-lang/noir/pull/6652) feat: Sync from aztec-packages (https://github.com/noir-lang/noir/pull/6634) fix: used signed division for signed modulo (https://github.com/noir-lang/noir/pull/6635) fix(ssa): don't deduplicate constraints in blocks that are not dominated (https://github.com/noir-lang/noir/pull/6627) chore: pin foundry version in CI (https://github.com/noir-lang/noir/pull/6642) feat(ssa): Deduplicate intrinsics with predicates (https://github.com/noir-lang/noir/pull/6615) chore: improve error message of `&T` (https://github.com/noir-lang/noir/pull/6633) fix: LSP code action wasn't triggering on beginning or end of identifier (https://github.com/noir-lang/noir/pull/6616) chore!: remove `ec` module from stdlib (https://github.com/noir-lang/noir/pull/6612) fix(LSP): use generic self type to narrow down methods to complete (https://github.com/noir-lang/noir/pull/6617) fix!: Disallow `#[export]` on associated methods (https://github.com/noir-lang/noir/pull/6626) chore: redo typo PR by donatik27 (https://github.com/noir-lang/noir/pull/6575) chore: redo typo PR by Dimitrolito (https://github.com/noir-lang/noir/pull/6614) feat: simplify `jmpif`s by reversing branches if condition is negated (https://github.com/noir-lang/noir/pull/5891) fix: Do not warn on unused functions marked with #[export] (https://github.com/noir-lang/noir/pull/6625) chore: Add panic for compiler error described in #6620 (https://github.com/noir-lang/noir/pull/6621) feat(perf): Track last loads per block in mem2reg and remove them if possible (https://github.com/noir-lang/noir/pull/6088) fix(ssa): Track all local allocations during flattening (https://github.com/noir-lang/noir/pull/6619) feat(comptime): Implement blackbox functions in comptime interpreter (https://github.com/noir-lang/noir/pull/6551) chore: derive PartialEq and Hash for FieldElement (https://github.com/noir-lang/noir/pull/6610) chore: ignore almost-empty directories in nargo_cli tests (https://github.com/noir-lang/noir/pull/6611) chore: remove temporary allocations from `num_bits` (https://github.com/noir-lang/noir/pull/6600) chore: Release Noir(1.0.0-beta.0) (https://github.com/noir-lang/noir/pull/6562) feat: Add `array_refcount` and `slice_refcount` builtins for debugging (https://github.com/noir-lang/noir/pull/6584) chore!: Require types of globals to be specified (https://github.com/noir-lang/noir/pull/6592) fix: don't report visibility errors when elaborating comptime value (https://github.com/noir-lang/noir/pull/6498) fix: preserve newlines between comments when formatting statements (https://github.com/noir-lang/noir/pull/6601) fix: parse a bit more SSA stuff (https://github.com/noir-lang/noir/pull/6599) chore!: remove eddsa from stdlib (https://github.com/noir-lang/noir/pull/6591) chore: Typo in oracles how to (https://github.com/noir-lang/noir/pull/6598) feat(ssa): Loop invariant code motion (https://github.com/noir-lang/noir/pull/6563) fix: remove `compiler_version` from new `Nargo.toml` (https://github.com/noir-lang/noir/pull/6590) feat: Avoid incrementing reference counts in some cases (https://github.com/noir-lang/noir/pull/6568) chore: fix typo in test name (https://github.com/noir-lang/noir/pull/6589) fix: consider prereleases to be compatible with pre-1.0.0 releases (https://github.com/noir-lang/noir/pull/6580) feat: try to inline brillig calls with all constant arguments (https://github.com/noir-lang/noir/pull/6548) fix: correct type when simplifying `derive_pedersen_generators` (https://github.com/noir-lang/noir/pull/6579) feat: Sync from aztec-packages (https://github.com/noir-lang/noir/pull/6576) --- .noir-sync-commit | 2 +- noir/noir-repo/Cargo.lock | 1 + noir/noir-repo/acvm-repo/acvm_js/build.sh | 2 +- .../compiler/integration-tests/package.json | 2 +- .../compiler/noirc_driver/src/lib.rs | 11 +- .../compiler/noirc_evaluator/Cargo.toml | 1 + .../noirc_evaluator/src/brillig/mod.rs | 8 +- .../compiler/noirc_evaluator/src/ssa.rs | 19 +- .../noirc_evaluator/src/ssa/ir/function.rs | 6 + .../noirc_evaluator/src/ssa/ir/instruction.rs | 15 +- .../src/ssa/opt/constant_folding.rs | 445 ++++++++++---- .../src/ssa/opt/flatten_cfg.rs | 556 ++++++++---------- .../noirc_evaluator/src/ssa/opt/inlining.rs | 1 - .../noirc_evaluator/src/ssa/opt/mem2reg.rs | 332 ++++++++--- .../noirc_evaluator/src/ssa/opt/unrolling.rs | 137 ++++- .../src/ssa/ssa_gen/context.rs | 14 +- .../noirc_evaluator/src/ssa/ssa_gen/mod.rs | 5 +- .../noir_stdlib/src/hash/poseidon2.nr | 6 +- .../Nargo.toml | 7 + .../src/main.nr | 15 + noir/noir-repo/tooling/nargo_cli/build.rs | 5 + .../noir-repo/tooling/noirc_abi_wasm/build.sh | 2 +- noir/noir-repo/yarn.lock | 14 +- 23 files changed, 1041 insertions(+), 565 deletions(-) create mode 100644 noir/noir-repo/test_programs/execution_success/inline_decompose_hint_brillig_call/Nargo.toml create mode 100644 noir/noir-repo/test_programs/execution_success/inline_decompose_hint_brillig_call/src/main.nr diff --git a/.noir-sync-commit b/.noir-sync-commit index 9bbde85e56b..93c17670eeb 100644 --- a/.noir-sync-commit +++ b/.noir-sync-commit @@ -1 +1 @@ -68c32b4ffd9b069fe4b119327dbf4018c17ab9d4 +4ff308128755c95b4d461bbcb7e3a49f16145585 diff --git a/noir/noir-repo/Cargo.lock b/noir/noir-repo/Cargo.lock index 94a84b89d05..af91bafef52 100644 --- a/noir/noir-repo/Cargo.lock +++ b/noir/noir-repo/Cargo.lock @@ -3151,6 +3151,7 @@ dependencies = [ "serde_json", "serde_with", "similar-asserts", + "test-case", "thiserror", "tracing", ] diff --git a/noir/noir-repo/acvm-repo/acvm_js/build.sh b/noir/noir-repo/acvm-repo/acvm_js/build.sh index c07d2d8a4c1..16fb26e55db 100755 --- a/noir/noir-repo/acvm-repo/acvm_js/build.sh +++ b/noir/noir-repo/acvm-repo/acvm_js/build.sh @@ -25,7 +25,7 @@ function run_if_available { require_command jq require_command cargo require_command wasm-bindgen -#require_command wasm-opt +require_command wasm-opt self_path=$(dirname "$(readlink -f "$0")") pname=$(cargo read-manifest | jq -r '.name') diff --git a/noir/noir-repo/compiler/integration-tests/package.json b/noir/noir-repo/compiler/integration-tests/package.json index e33179f31e7..a9d437da792 100644 --- a/noir/noir-repo/compiler/integration-tests/package.json +++ b/noir/noir-repo/compiler/integration-tests/package.json @@ -13,7 +13,7 @@ "lint": "NODE_NO_WARNINGS=1 eslint . --ext .ts --ignore-path ./.eslintignore --max-warnings 0" }, "dependencies": { - "@aztec/bb.js": "portal:../../../../barretenberg/ts", + "@aztec/bb.js": "0.63.1", "@noir-lang/noir_js": "workspace:*", "@noir-lang/noir_wasm": "workspace:*", "@nomicfoundation/hardhat-chai-matchers": "^2.0.0", diff --git a/noir/noir-repo/compiler/noirc_driver/src/lib.rs b/noir/noir-repo/compiler/noirc_driver/src/lib.rs index 72ea464805f..a7cd9ff90ac 100644 --- a/noir/noir-repo/compiler/noirc_driver/src/lib.rs +++ b/noir/noir-repo/compiler/noirc_driver/src/lib.rs @@ -126,11 +126,19 @@ pub struct CompileOptions { #[arg(long)] pub skip_underconstrained_check: bool, - /// Setting to decide on an inlining strategy for brillig functions. + /// Setting to decide on an inlining strategy for Brillig functions. /// A more aggressive inliner should generate larger programs but more optimized /// A less aggressive inliner should generate smaller programs #[arg(long, hide = true, allow_hyphen_values = true, default_value_t = i64::MAX)] pub inliner_aggressiveness: i64, + + /// Setting the maximum acceptable increase in Brillig bytecode size due to + /// unrolling small loops. When left empty, any change is accepted as long + /// as it required fewer SSA instructions. + /// A higher value results in fewer jumps but a larger program. + /// A lower value keeps the original program if it was smaller, even if it has more jumps. + #[arg(long, hide = true, allow_hyphen_values = true)] + pub max_bytecode_increase_percent: Option, } pub fn parse_expression_width(input: &str) -> Result { @@ -589,6 +597,7 @@ pub fn compile_no_check( emit_ssa: if options.emit_ssa { Some(context.package_build_path.clone()) } else { None }, skip_underconstrained_check: options.skip_underconstrained_check, inliner_aggressiveness: options.inliner_aggressiveness, + max_bytecode_increase_percent: options.max_bytecode_increase_percent, }; let SsaProgramArtifact { program, debug, warnings, names, brillig_names, error_types, .. } = diff --git a/noir/noir-repo/compiler/noirc_evaluator/Cargo.toml b/noir/noir-repo/compiler/noirc_evaluator/Cargo.toml index e25b5bf855a..bb8c62cfd95 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/Cargo.toml +++ b/noir/noir-repo/compiler/noirc_evaluator/Cargo.toml @@ -33,6 +33,7 @@ cfg-if.workspace = true proptest.workspace = true similar-asserts.workspace = true num-traits.workspace = true +test-case.workspace = true [features] bn254 = ["noirc_frontend/bn254"] diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/brillig/mod.rs b/noir/noir-repo/compiler/noirc_evaluator/src/brillig/mod.rs index 1b61ae1a864..cb8c35cd8e0 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/brillig/mod.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/brillig/mod.rs @@ -12,7 +12,7 @@ use self::{ }, }; use crate::ssa::{ - ir::function::{Function, FunctionId, RuntimeType}, + ir::function::{Function, FunctionId}, ssa_gen::Ssa, }; use fxhash::FxHashMap as HashMap; @@ -59,7 +59,7 @@ impl std::ops::Index for Brillig { } impl Ssa { - /// Compile to brillig brillig functions and ACIR functions reachable from them + /// Compile Brillig functions and ACIR functions reachable from them #[tracing::instrument(level = "trace", skip_all)] pub(crate) fn to_brillig(&self, enable_debug_trace: bool) -> Brillig { // Collect all the function ids that are reachable from brillig @@ -67,9 +67,7 @@ impl Ssa { let brillig_reachable_function_ids = self .functions .iter() - .filter_map(|(id, func)| { - matches!(func.runtime(), RuntimeType::Brillig(_)).then_some(*id) - }) + .filter_map(|(id, func)| func.runtime().is_brillig().then_some(*id)) .collect::>(); let mut brillig = Brillig::default(); diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa.rs index 97c1760d87c..80514b2f2cf 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa.rs @@ -67,6 +67,11 @@ pub struct SsaEvaluatorOptions { /// The higher the value, the more inlined brillig functions will be. pub inliner_aggressiveness: i64, + + /// Maximum accepted percentage increase in the Brillig bytecode size after unrolling loops. + /// When `None` the size increase check is skipped altogether and any decrease in the SSA + /// instruction count is accepted. + pub max_bytecode_increase_percent: Option, } pub(crate) struct ArtifactsAndWarnings(Artifacts, Vec); @@ -104,7 +109,10 @@ pub(crate) fn optimize_into_acir( "After `static_assert` and `assert_constant`:", )? .run_pass(Ssa::loop_invariant_code_motion, "After Loop Invariant Code Motion:") - .try_run_pass(Ssa::unroll_loops_iteratively, "After Unrolling:")? + .try_run_pass( + |ssa| ssa.unroll_loops_iteratively(options.max_bytecode_increase_percent), + "After Unrolling:", + )? .run_pass(Ssa::simplify_cfg, "After Simplifying (2nd):") .run_pass(Ssa::flatten_cfg, "After Flattening:") .run_pass(Ssa::remove_bit_shifts, "After Removing Bit Shifts:") @@ -450,11 +458,10 @@ impl SsaBuilder { } /// The same as `run_pass` but for passes that may fail - fn try_run_pass( - mut self, - pass: fn(Ssa) -> Result, - msg: &str, - ) -> Result { + fn try_run_pass(mut self, pass: F, msg: &str) -> Result + where + F: FnOnce(Ssa) -> Result, + { self.ssa = time(msg, self.print_codegen_timings, || pass(self.ssa))?; Ok(self.print(msg)) } diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/function.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/function.rs index b1233e3063e..6413107c04a 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/function.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/function.rs @@ -197,6 +197,12 @@ impl Function { } } +impl Clone for Function { + fn clone(&self) -> Self { + Function::clone_with_id(self.id(), self) + } +} + impl std::fmt::Display for RuntimeType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction.rs index f606fffbf91..6737b335b7d 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction.rs @@ -11,7 +11,7 @@ use fxhash::FxHasher64; use iter_extended::vecmap; use noirc_frontend::hir_def::types::Type as HirType; -use crate::ssa::opt::flatten_cfg::value_merger::ValueMerger; +use crate::ssa::{ir::function::RuntimeType, opt::flatten_cfg::value_merger::ValueMerger}; use super::{ basic_block::BasicBlockId, @@ -478,8 +478,19 @@ impl Instruction { | ArraySet { .. } | MakeArray { .. } => true, + // Store instructions must be removed by DIE in acir code, any load + // instructions should already be unused by that point. + // + // Note that this check assumes that it is being performed after the flattening + // pass and after the last mem2reg pass. This is currently the case for the DIE + // pass where this check is done, but does mean that we cannot perform mem2reg + // after the DIE pass. + Store { .. } => { + matches!(function.runtime(), RuntimeType::Acir(_)) + && function.reachable_blocks().len() == 1 + } + Constrain(..) - | Store { .. } | EnableSideEffectsIf { .. } | IncrementRc { .. } | DecrementRc { .. } diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/constant_folding.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/constant_folding.rs index ceda0c6272f..41c84c935b1 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/constant_folding.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/constant_folding.rs @@ -149,7 +149,8 @@ impl Function { use_constraint_info: bool, brillig_info: Option, ) { - let mut context = Context::new(self, use_constraint_info, brillig_info); + let mut context = Context::new(use_constraint_info, brillig_info); + let mut dom = DominatorTree::with_function(self); context.block_queue.push_back(self.entry_block()); while let Some(block) = context.block_queue.pop_front() { @@ -158,7 +159,7 @@ impl Function { } context.visited_blocks.insert(block); - context.fold_constants_in_block(self, block); + context.fold_constants_in_block(&mut self.dfg, &mut dom, block); } } } @@ -172,22 +173,15 @@ struct Context<'a> { /// Contains sets of values which are constrained to be equivalent to each other. /// - /// The mapping's structure is `side_effects_enabled_var => (constrained_value => [(block, simplified_value)])`. + /// The mapping's structure is `side_effects_enabled_var => (constrained_value => simplified_value)`. /// /// We partition the maps of constrained values according to the side-effects flag at the point /// at which the values are constrained. This prevents constraints which are only sometimes enforced /// being used to modify the rest of the program. - /// - /// We also keep track of how a value was simplified to other values per block. That is, - /// a same ValueId could have been simplified to one value in one block and to another value - /// in another block. - constraint_simplification_mappings: - HashMap>>, + constraint_simplification_mappings: ConstraintSimplificationCache, // Cache of instructions without any side-effects along with their outputs. cached_instruction_results: InstructionResultCache, - - dom: DominatorTree, } #[derive(Copy, Clone)] @@ -196,9 +190,56 @@ pub(crate) struct BrilligInfo<'a> { brillig_functions: &'a BTreeMap, } +/// Records a simplified equivalents of an [`Instruction`] in the blocks +/// where the constraint that advised the simplification has been encountered. +/// +/// For more information see [`ConstraintSimplificationCache`]. +#[derive(Default)] +struct SimplificationCache { + /// Simplified expressions where we found them. + /// + /// It will always have at least one value because `add` is called + /// after the default is constructed. + simplifications: HashMap, +} + +impl SimplificationCache { + /// Called with a newly encountered simplification. + fn add(&mut self, dfg: &DataFlowGraph, simple: ValueId, block: BasicBlockId) { + self.simplifications + .entry(block) + .and_modify(|existing| { + // `SimplificationCache` may already hold a simplification in this block + // so we check whether `simple` is a better simplification than the current one. + if let Some((_, simpler)) = simplify(dfg, *existing, simple) { + *existing = simpler; + }; + }) + .or_insert(simple); + } + + /// Try to find a simplification in a visible block. + fn get(&self, block: BasicBlockId, dom: &DominatorTree) -> Option { + // Deterministically walk up the dominator chain until we encounter a block that contains a simplification. + dom.find_map_dominator(block, |b| self.simplifications.get(&b).cloned()) + } +} + +/// HashMap from `(side_effects_enabled_var, Instruction)` to a simplified expression that it can +/// be replaced with based on constraints that testify to their equivalence, stored together +/// with the set of blocks at which this constraint has been observed. +/// +/// Only blocks dominated by one in the cache should have access to this information, otherwise +/// we create a sort of time paradox where we replace an instruction with a constant we believe +/// it _should_ equal to, without ever actually producing and asserting the value. +type ConstraintSimplificationCache = HashMap>; + /// HashMap from `(Instruction, side_effects_enabled_var)` to the results of the instruction. /// Stored as a two-level map to avoid cloning Instructions during the `.get` call. /// +/// The `side_effects_enabled_var` is optional because we only use them when `Instruction::requires_acir_gen_predicate` +/// is true _and_ the constraint information is also taken into account. +/// /// In addition to each result, the original BasicBlockId is stored as well. This allows us /// to deduplicate instructions across blocks as long as the new block dominates the original. type InstructionResultCache = HashMap, ResultCache>>; @@ -208,15 +249,11 @@ type InstructionResultCache = HashMap, Resu /// For more information see [`InstructionResultCache`]. #[derive(Default)] struct ResultCache { - results: Vec<(BasicBlockId, Vec)>, + result: Option<(BasicBlockId, Vec)>, } impl<'brillig> Context<'brillig> { - fn new( - function: &Function, - use_constraint_info: bool, - brillig_info: Option>, - ) -> Self { + fn new(use_constraint_info: bool, brillig_info: Option>) -> Self { Self { use_constraint_info, brillig_info, @@ -224,52 +261,65 @@ impl<'brillig> Context<'brillig> { block_queue: Default::default(), constraint_simplification_mappings: Default::default(), cached_instruction_results: Default::default(), - dom: DominatorTree::with_function(function), } } - fn fold_constants_in_block(&mut self, function: &mut Function, block: BasicBlockId) { - let instructions = function.dfg[block].take_instructions(); + fn fold_constants_in_block( + &mut self, + dfg: &mut DataFlowGraph, + dom: &mut DominatorTree, + block: BasicBlockId, + ) { + let instructions = dfg[block].take_instructions(); - let mut side_effects_enabled_var = - function.dfg.make_constant(FieldElement::one(), Type::bool()); + // Default side effect condition variable with an enabled state. + let mut side_effects_enabled_var = dfg.make_constant(FieldElement::one(), Type::bool()); for instruction_id in instructions { self.fold_constants_into_instruction( - &mut function.dfg, + dfg, + dom, block, instruction_id, &mut side_effects_enabled_var, ); } - self.block_queue.extend(function.dfg[block].successors()); + self.block_queue.extend(dfg[block].successors()); } fn fold_constants_into_instruction( &mut self, dfg: &mut DataFlowGraph, - block: BasicBlockId, + dom: &mut DominatorTree, + mut block: BasicBlockId, id: InstructionId, side_effects_enabled_var: &mut ValueId, ) { - let constraint_simplification_mapping = - self.constraint_simplification_mappings.get(side_effects_enabled_var); - let instruction = Self::resolve_instruction( - id, - block, - dfg, - &mut self.dom, - constraint_simplification_mapping, - ); + let constraint_simplification_mapping = self.get_constraint_map(*side_effects_enabled_var); + + let instruction = + Self::resolve_instruction(id, block, dfg, dom, constraint_simplification_mapping); + let old_results = dfg.instruction_results(id).to_vec(); // If a copy of this instruction exists earlier in the block, then reuse the previous results. - if let Some(cached_results) = - self.get_cached(dfg, &instruction, *side_effects_enabled_var, block) + if let Some(cache_result) = + self.get_cached(dfg, dom, &instruction, *side_effects_enabled_var, block) { - Self::replace_result_ids(dfg, &old_results, cached_results); - return; - } + match cache_result { + CacheResult::Cached(cached) => { + Self::replace_result_ids(dfg, &old_results, cached); + return; + } + CacheResult::NeedToHoistToCommonBlock(dominator) => { + // Just change the block to insert in the common dominator instead. + // This will only move the current instance of the instruction right now. + // When constant folding is run a second time later on, it'll catch + // that the previous instance can be deduplicated to this instance. + block = dominator; + } + } + }; let new_results = // First try to inline a call to a brillig function with all constant arguments. @@ -314,7 +364,7 @@ impl<'brillig> Context<'brillig> { block: BasicBlockId, dfg: &DataFlowGraph, dom: &mut DominatorTree, - constraint_simplification_mapping: Option<&HashMap>>, + constraint_simplification_mapping: &HashMap, ) -> Instruction { let instruction = dfg[instruction_id].clone(); @@ -324,30 +374,28 @@ impl<'brillig> Context<'brillig> { // This allows us to reach a stable final `ValueId` for each instruction input as we add more // constraints to the cache. fn resolve_cache( + block: BasicBlockId, dfg: &DataFlowGraph, dom: &mut DominatorTree, - cache: Option<&HashMap>>, + cache: &HashMap, value_id: ValueId, - block: BasicBlockId, ) -> ValueId { let resolved_id = dfg.resolve(value_id); - let Some(cached_values) = cache.and_then(|cache| cache.get(&resolved_id)) else { - return resolved_id; - }; - - for (cached_block, cached_value) in cached_values { - // We can only use the simplified value if it was simplified in a block that dominates the current one - if dom.dominates(*cached_block, block) { - return resolve_cache(dfg, dom, cache, *cached_value, block); + match cache.get(&resolved_id) { + Some(simplification_cache) => { + if let Some(simplified) = simplification_cache.get(block, dom) { + resolve_cache(block, dfg, dom, cache, simplified) + } else { + resolved_id + } } + None => resolved_id, } - - resolved_id } // Resolve any inputs to ensure that we're comparing like-for-like instructions. instruction.map_values(|value_id| { - resolve_cache(dfg, dom, constraint_simplification_mapping, value_id, block) + resolve_cache(block, dfg, dom, constraint_simplification_mapping, value_id) }) } @@ -398,7 +446,7 @@ impl<'brillig> Context<'brillig> { self.get_constraint_map(side_effects_enabled_var) .entry(complex) .or_default() - .push((block, simple)); + .add(dfg, simple, block); } } } @@ -420,10 +468,12 @@ impl<'brillig> Context<'brillig> { } } + /// Get the simplification mapping from complex to simpler instructions, + /// which all depend on the same side effect condition variable. fn get_constraint_map( &mut self, side_effects_enabled_var: ValueId, - ) -> &mut HashMap> { + ) -> &mut HashMap { self.constraint_simplification_mappings.entry(side_effects_enabled_var).or_default() } @@ -438,19 +488,20 @@ impl<'brillig> Context<'brillig> { } } - fn get_cached<'a>( - &'a mut self, + /// Get a cached result if it can be used in this context. + fn get_cached( + &self, dfg: &DataFlowGraph, + dom: &mut DominatorTree, instruction: &Instruction, side_effects_enabled_var: ValueId, block: BasicBlockId, - ) -> Option<&'a [ValueId]> { + ) -> Option { let results_for_instruction = self.cached_instruction_results.get(instruction)?; - let predicate = self.use_constraint_info && instruction.requires_acir_gen_predicate(dfg); let predicate = predicate.then_some(side_effects_enabled_var); - results_for_instruction.get(&predicate)?.get(block, &mut self.dom) + results_for_instruction.get(&predicate)?.get(block, dom, instruction.has_side_effects(dfg)) } /// Checks if the given instruction is a call to a brillig function with all constant arguments. @@ -617,7 +668,9 @@ impl<'brillig> Context<'brillig> { impl ResultCache { /// Records that an `Instruction` in block `block` produced the result values `results`. fn cache(&mut self, block: BasicBlockId, results: Vec) { - self.results.push((block, results)); + if self.result.is_none() { + self.result = Some((block, results)); + } } /// Returns a set of [`ValueId`]s produced from a copy of this [`Instruction`] which sits @@ -626,13 +679,23 @@ impl ResultCache { /// We require that the cached instruction's block dominates `block` in order to avoid /// cycles causing issues (e.g. two instructions being replaced with the results of each other /// such that neither instruction exists anymore.) - fn get(&self, block: BasicBlockId, dom: &mut DominatorTree) -> Option<&[ValueId]> { - for (origin_block, results) in &self.results { + fn get( + &self, + block: BasicBlockId, + dom: &mut DominatorTree, + has_side_effects: bool, + ) -> Option { + self.result.as_ref().and_then(|(origin_block, results)| { if dom.dominates(*origin_block, block) { - return Some(results); + Some(CacheResult::Cached(results)) + } else if !has_side_effects { + // Insert a copy of this instruction in the common dominator + let dominator = dom.common_dominator(*origin_block, block); + Some(CacheResult::NeedToHoistToCommonBlock(dominator)) + } else { + None } - } - None + }) } } @@ -940,32 +1003,22 @@ mod test { // Regression for #4600 #[test] fn array_get_regression() { - // fn main f0 { - // b0(v0: u1, v1: u64): - // enable_side_effects_if v0 - // v2 = make_array [Field 0, Field 1] - // v3 = array_get v2, index v1 - // v4 = not v0 - // enable_side_effects_if v4 - // v5 = array_get v2, index v1 - // } - // // We want to make sure after constant folding both array_gets remain since they are // under different enable_side_effects_if contexts and thus one may be disabled while // the other is not. If one is removed, it is possible e.g. v4 is replaced with v2 which // is disabled (only gets from index 0) and thus returns the wrong result. let src = " - acir(inline) fn main f0 { - b0(v0: u1, v1: u64): - enable_side_effects v0 - v4 = make_array [Field 0, Field 1] : [Field; 2] - v5 = array_get v4, index v1 -> Field - v6 = not v0 - enable_side_effects v6 - v7 = array_get v4, index v1 -> Field - return - } - "; + acir(inline) fn main f0 { + b0(v0: u1, v1: u64): + enable_side_effects v0 + v4 = make_array [Field 0, Field 1] : [Field; 2] + v5 = array_get v4, index v1 -> Field + v6 = not v0 + enable_side_effects v6 + v7 = array_get v4, index v1 -> Field + return + } + "; let ssa = Ssa::from_str(src).unwrap(); // Expected output is unchanged @@ -1032,7 +1085,6 @@ mod test { // v5 = call keccakf1600(v1) // v6 = call keccakf1600(v2) // } - // // Here we're checking a situation where two identical arrays are being initialized twice and being assigned separate `ValueId`s. // This would result in otherwise identical instructions not being deduplicated. let main_id = Id::test_new(0); @@ -1083,6 +1135,106 @@ mod test { assert_eq!(ending_instruction_count, 2); } + #[test] + fn deduplicate_across_blocks() { + // fn main f0 { + // b0(v0: u1): + // v1 = not v0 + // jmp b1() + // b1(): + // v2 = not v0 + // return v2 + // } + let main_id = Id::test_new(0); + + // Compiling main + let mut builder = FunctionBuilder::new("main".into(), main_id); + let b1 = builder.insert_block(); + + let v0 = builder.add_parameter(Type::bool()); + let _v1 = builder.insert_not(v0); + builder.terminate_with_jmp(b1, Vec::new()); + + builder.switch_to_block(b1); + let v2 = builder.insert_not(v0); + builder.terminate_with_return(vec![v2]); + + let ssa = builder.finish(); + let main = ssa.main(); + assert_eq!(main.dfg[main.entry_block()].instructions().len(), 1); + assert_eq!(main.dfg[b1].instructions().len(), 1); + + // Expected output: + // + // fn main f0 { + // b0(v0: u1): + // v1 = not v0 + // jmp b1() + // b1(): + // return v1 + // } + let ssa = ssa.fold_constants_using_constraints(); + let main = ssa.main(); + assert_eq!(main.dfg[main.entry_block()].instructions().len(), 1); + assert_eq!(main.dfg[b1].instructions().len(), 0); + } + + #[test] + fn deduplicate_across_non_dominated_blocks() { + let src = " + brillig(inline) fn main f0 { + b0(v0: u32): + v2 = lt u32 1000, v0 + jmpif v2 then: b1, else: b2 + b1(): + v4 = add v0, u32 1 + v5 = lt v0, v4 + constrain v5 == u1 1 + jmp b2() + b2(): + v7 = lt u32 1000, v0 + jmpif v7 then: b3, else: b4 + b3(): + v8 = add v0, u32 1 + v9 = lt v0, v8 + constrain v9 == u1 1 + jmp b4() + b4(): + return + } + "; + let ssa = Ssa::from_str(src).unwrap(); + + // v4 has been hoisted, although: + // - v5 has not yet been removed since it was encountered earlier in the program + // - v8 hasn't been recognized as a duplicate of v6 yet since they still reference v4 and + // v5 respectively + let expected = " + brillig(inline) fn main f0 { + b0(v0: u32): + v2 = lt u32 1000, v0 + v4 = add v0, u32 1 + jmpif v2 then: b1, else: b2 + b1(): + v5 = add v0, u32 1 + v6 = lt v0, v5 + constrain v6 == u1 1 + jmp b2() + b2(): + jmpif v2 then: b3, else: b4 + b3(): + v8 = lt v0, v4 + constrain v8 == u1 1 + jmp b4() + b4(): + return + } + "; + + let ssa = ssa.fold_constants_using_constraints(); + assert_normalized_ssa_equals(ssa, expected); + } + #[test] fn inlines_brillig_call_without_arguments() { let src = " @@ -1260,46 +1412,87 @@ mod test { } #[test] - fn deduplicate_across_blocks() { - // fn main f0 { - // b0(v0: u1): - // v1 = not v0 - // jmp b1() - // b1(): - // v2 = not v0 - // return v2 - // } - let main_id = Id::test_new(0); - - // Compiling main - let mut builder = FunctionBuilder::new("main".into(), main_id); - let b1 = builder.insert_block(); - - let v0 = builder.add_parameter(Type::bool()); - let _v1 = builder.insert_not(v0); - builder.terminate_with_jmp(b1, Vec::new()); - - builder.switch_to_block(b1); - let v2 = builder.insert_not(v0); - builder.terminate_with_return(vec![v2]); + fn does_not_use_cached_constrain_in_block_that_is_not_dominated() { + let src = " + brillig(inline) fn main f0 { + b0(v0: Field, v1: Field): + v3 = eq v0, Field 0 + jmpif v3 then: b1, else: b2 + b1(): + v5 = eq v1, Field 1 + constrain v1 == Field 1 + jmp b2() + b2(): + v6 = eq v1, Field 0 + constrain v1 == Field 0 + return + } + "; + let ssa = Ssa::from_str(src).unwrap(); + let ssa = ssa.fold_constants_using_constraints(); + assert_normalized_ssa_equals(ssa, src); + } - let ssa = builder.finish(); - let main = ssa.main(); - assert_eq!(main.dfg[main.entry_block()].instructions().len(), 1); - assert_eq!(main.dfg[b1].instructions().len(), 1); + #[test] + fn does_not_hoist_constrain_to_common_ancestor() { + let src = " + brillig(inline) fn main f0 { + b0(v0: Field, v1: Field): + v3 = eq v0, Field 0 + jmpif v3 then: b1, else: b2 + b1(): + constrain v1 == Field 1 + jmp b2() + b2(): + jmpif v0 then: b3, else: b4 + b3(): + constrain v1 == Field 1 // This was incorrectly hoisted to b0 but this condition is not valid when going b0 -> b2 -> b4 + jmp b4() + b4(): + return + } + "; + let ssa = Ssa::from_str(src).unwrap(); + let ssa = ssa.fold_constants_using_constraints(); + assert_normalized_ssa_equals(ssa, src); + } - // Expected output: - // - // fn main f0 { - // b0(v0: u1): - // v1 = not v0 - // jmp b1() - // b1(): - // return v1 - // } + #[test] + fn deduplicates_side_effecting_intrinsics() { + let src = " + // After EnableSideEffectsIf removal: + acir(inline) fn main f0 { + b0(v0: Field, v1: Field, v2: u1): + v4 = call is_unconstrained() -> u1 + v7 = call to_be_radix(v0, u32 256) -> [u8; 1] // `a.to_be_radix(256)`; + inc_rc v7 + v8 = call to_be_radix(v0, u32 256) -> [u8; 1] // duplicate load of `a` + inc_rc v8 + v9 = cast v2 as Field // `if c { a.to_be_radix(256) }` + v10 = mul v0, v9 // attaching `c` to `a` + v11 = call to_be_radix(v10, u32 256) -> [u8; 1] // calling `to_radix(c * a)` + inc_rc v11 + enable_side_effects v2 // side effect var for `c` shifted down by removal + return + } + "; + let ssa = Ssa::from_str(src).unwrap(); + let expected = " + acir(inline) fn main f0 { + b0(v0: Field, v1: Field, v2: u1): + v4 = call is_unconstrained() -> u1 + v7 = call to_be_radix(v0, u32 256) -> [u8; 1] + inc_rc v7 + inc_rc v7 + v8 = cast v2 as Field + v9 = mul v0, v8 + v10 = call to_be_radix(v9, u32 256) -> [u8; 1] + inc_rc v10 + enable_side_effects v2 + return + } + "; let ssa = ssa.fold_constants_using_constraints(); - let main = ssa.main(); - assert_eq!(main.dfg[main.entry_block()].instructions().len(), 1); - assert_eq!(main.dfg[b1].instructions().len(), 0); + assert_normalized_ssa_equals(ssa, expected); } } diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/flatten_cfg.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/flatten_cfg.rs index 5d114672a55..c8dd0e3c5a3 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/flatten_cfg.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/flatten_cfg.rs @@ -131,8 +131,7 @@ //! v11 = mul v4, Field 12 //! v12 = add v10, v11 //! store v12 at v5 (new store) -use fxhash::FxHashMap as HashMap; -use std::collections::{BTreeMap, HashSet}; +use fxhash::{FxHashMap as HashMap, FxHashSet as HashSet}; use acvm::{acir::AcirField, acir::BlackBoxFunc, FieldElement}; use iter_extended::vecmap; @@ -186,18 +185,6 @@ struct Context<'f> { /// Maps start of branch -> end of branch branch_ends: HashMap, - /// Maps an address to the old and new value of the element at that address - /// These only hold stores for one block at a time and is cleared - /// between inlining of branches. - store_values: HashMap, - - /// Stores all allocations local to the current branch. - /// Since these branches are local to the current branch (ie. only defined within one branch of - /// an if expression), they should not be merged with their previous value or stored value in - /// the other branch since there is no such value. The ValueId here is that which is returned - /// by the allocate instruction. - local_allocations: HashSet, - /// A stack of each jmpif condition that was taken to reach a particular point in the program. /// When two branches are merged back into one, this constitutes a join point, and is analogous /// to the rest of the program after an if statement. When such a join point / end block is @@ -214,13 +201,15 @@ struct Context<'f> { /// When processing a block, we pop this stack to get its arguments /// and at the end we push the arguments for his successor arguments_stack: Vec>, -} -#[derive(Clone)] -pub(crate) struct Store { - old_value: ValueId, - new_value: ValueId, - call_stack: CallStack, + /// Stores all allocations local to the current branch. + /// + /// Since these branches are local to the current branch (i.e. only defined within one branch of + /// an if expression), they should not be merged with their previous value or stored value in + /// the other branch since there is no such value. + /// + /// The `ValueId` here is that which is returned by the allocate instruction. + local_allocations: HashSet, } #[derive(Clone)] @@ -231,8 +220,6 @@ struct ConditionalBranch { old_condition: ValueId, // The condition of the branch condition: ValueId, - // The store values accumulated when processing the branch - store_values: HashMap, // The allocations accumulated when processing the branch local_allocations: HashSet, } @@ -263,12 +250,11 @@ fn flatten_function_cfg(function: &mut Function, no_predicates: &HashMap Context<'f> { let old_condition = *condition; let then_condition = self.inserter.resolve(old_condition); - let old_stores = std::mem::take(&mut self.store_values); let old_allocations = std::mem::take(&mut self.local_allocations); let branch = ConditionalBranch { old_condition, condition: self.link_condition(then_condition), - store_values: old_stores, - local_allocations: old_allocations, last_block: *then_destination, + local_allocations: old_allocations, }; let cond_context = ConditionalContext { condition: then_condition, @@ -473,19 +457,12 @@ impl<'f> Context<'f> { ); let else_condition = self.link_condition(else_condition); - // Make sure the else branch sees the previous values of each store - // rather than any values created in the 'then' branch. - let old_stores = std::mem::take(&mut cond_context.then_branch.store_values); - cond_context.then_branch.store_values = std::mem::take(&mut self.store_values); - self.undo_stores_in_then_branch(&cond_context.then_branch.store_values); - let old_allocations = std::mem::take(&mut self.local_allocations); let else_branch = ConditionalBranch { old_condition: cond_context.then_branch.old_condition, condition: else_condition, - store_values: old_stores, - local_allocations: old_allocations, last_block: *block, + local_allocations: old_allocations, }; cond_context.then_branch.local_allocations.clear(); cond_context.else_branch = Some(else_branch); @@ -509,10 +486,8 @@ impl<'f> Context<'f> { } let mut else_branch = cond_context.else_branch.unwrap(); - let stores_in_branch = std::mem::replace(&mut self.store_values, else_branch.store_values); self.local_allocations = std::mem::take(&mut else_branch.local_allocations); else_branch.last_block = *block; - else_branch.store_values = stores_in_branch; cond_context.else_branch = Some(else_branch); // We must remember to reset whether side effects are enabled when both branches @@ -580,8 +555,6 @@ impl<'f> Context<'f> { .first() }); - let call_stack = cond_context.call_stack; - self.merge_stores(cond_context.then_branch, cond_context.else_branch, call_stack); self.arguments_stack.pop(); self.arguments_stack.pop(); self.arguments_stack.push(args); @@ -636,116 +609,29 @@ impl<'f> Context<'f> { self.insert_instruction_with_typevars(enable_side_effects, None, call_stack); } - /// Merge any store instructions found in each branch. - /// - /// This function relies on the 'then' branch being merged before the 'else' branch of a jmpif - /// instruction. If this ordering is changed, the ordering that store values are merged within - /// this function also needs to be changed to reflect that. - fn merge_stores( - &mut self, - then_branch: ConditionalBranch, - else_branch: Option, - call_stack: CallStack, - ) { - // Address -> (then_value, else_value, value_before_the_if) - let mut new_map = BTreeMap::new(); - - for (address, store) in then_branch.store_values { - new_map.insert(address, (store.new_value, store.old_value, store.old_value)); - } - - if else_branch.is_some() { - for (address, store) in else_branch.clone().unwrap().store_values { - if let Some(entry) = new_map.get_mut(&address) { - entry.1 = store.new_value; - } else { - new_map.insert(address, (store.old_value, store.new_value, store.old_value)); - } - } - } - - let then_condition = then_branch.condition; - let block = self.inserter.function.entry_block(); - - // Merging must occur in a separate loop as we cannot borrow `self` as mutable while `value_merger` does - let mut new_values = HashMap::default(); - for (address, (then_case, else_case, _)) in &new_map { - let instruction = Instruction::IfElse { - then_condition, - then_value: *then_case, - else_value: *else_case, - }; - let dfg = &mut self.inserter.function.dfg; - let value = dfg - .insert_instruction_and_results(instruction, block, None, call_stack.clone()) - .first(); - - new_values.insert(address, value); - } - - // Replace stores with new merged values - for (address, (_, _, old_value)) in &new_map { - let value = new_values[address]; - let address = *address; - self.insert_instruction_with_typevars( - Instruction::Store { address, value }, - None, - call_stack.clone(), - ); - - if let Some(store) = self.store_values.get_mut(&address) { - store.new_value = value; - } else { - self.store_values.insert( - address, - Store { - old_value: *old_value, - new_value: value, - call_stack: call_stack.clone(), - }, - ); - } - } - } - - fn remember_store(&mut self, address: ValueId, new_value: ValueId, call_stack: CallStack) { - if !self.local_allocations.contains(&address) { - if let Some(store_value) = self.store_values.get_mut(&address) { - store_value.new_value = new_value; - } else { - let load = Instruction::Load { address }; - - let load_type = Some(vec![self.inserter.function.dfg.type_of_value(new_value)]); - let old_value = self - .insert_instruction_with_typevars(load.clone(), load_type, call_stack.clone()) - .first(); - - self.store_values.insert(address, Store { old_value, new_value, call_stack }); - } - } - } - /// Push the given instruction to the end of the entry block of the current function. /// /// Note that each ValueId of the instruction will be mapped via self.inserter.resolve. /// As a result, the instruction that will be pushed will actually be a new instruction /// with a different InstructionId from the original. The results of the given instruction /// will also be mapped to the results of the new instruction. - fn push_instruction(&mut self, id: InstructionId) -> Vec { + /// + /// `previous_allocate_result` should only be set to the result of an allocate instruction + /// if that instruction was the instruction immediately previous to this one - if there are + /// any instructions in between it should be None. + fn push_instruction(&mut self, id: InstructionId) { let (instruction, call_stack) = self.inserter.map_instruction(id); let instruction = self.handle_instruction_side_effects(instruction, call_stack.clone()); - let is_allocate = matches!(instruction, Instruction::Allocate); + let instruction_is_allocate = matches!(&instruction, Instruction::Allocate); let entry = self.inserter.function.entry_block(); let results = self.inserter.push_instruction_value(instruction, id, entry, call_stack); // Remember an allocate was created local to this branch so that we do not try to merge store // values across branches for it later. - if is_allocate { + if instruction_is_allocate { self.local_allocations.insert(results.first()); } - - results.results().into_owned() } /// If we are currently in a branch, we need to modify constrain instructions @@ -782,8 +668,32 @@ impl<'f> Context<'f> { Instruction::Constrain(lhs, rhs, message) } Instruction::Store { address, value } => { - self.remember_store(address, value, call_stack); - Instruction::Store { address, value } + // If this instruction immediately follows an allocate, and stores to that + // address there is no previous value to load and we don't need a merge anyway. + if self.local_allocations.contains(&address) { + Instruction::Store { address, value } + } else { + // Instead of storing `value`, store `if condition { value } else { previous_value }` + let typ = self.inserter.function.dfg.type_of_value(value); + let load = Instruction::Load { address }; + let previous_value = self + .insert_instruction_with_typevars( + load, + Some(vec![typ]), + call_stack.clone(), + ) + .first(); + + let instruction = Instruction::IfElse { + then_condition: condition, + then_value: value, + + else_value: previous_value, + }; + + let updated_value = self.insert_instruction(instruction, call_stack); + Instruction::Store { address, value: updated_value } + } } Instruction::RangeCheck { value, max_bit_size, assert_message } => { // Replace value with `value * predicate` to zero out value when predicate is inactive. @@ -905,23 +815,11 @@ impl<'f> Context<'f> { call_stack, ) } - - fn undo_stores_in_then_branch(&mut self, store_values: &HashMap) { - for (address, store) in store_values { - let address = *address; - let value = store.old_value; - let instruction = Instruction::Store { address, value }; - // Considering the location of undoing a store to be the same as the original store. - self.insert_instruction_with_typevars(instruction, None, store.call_stack.clone()); - } - } } #[cfg(test)] mod test { - use std::sync::Arc; - - use acvm::{acir::AcirField, FieldElement}; + use acvm::acir::AcirField; use crate::ssa::{ function_builder::FunctionBuilder, @@ -1023,15 +921,13 @@ mod test { b0(v0: u1, v1: &mut Field): enable_side_effects v0 v2 = load v1 -> Field - store Field 5 at v1 - v4 = not v0 - store v2 at v1 + v3 = cast v0 as Field + v5 = sub Field 5, v2 + v6 = mul v3, v5 + v7 = add v2, v6 + store v7 at v1 + v8 = not v0 enable_side_effects u1 1 - v6 = cast v0 as Field - v7 = sub Field 5, v2 - v8 = mul v6, v7 - v9 = add v2, v8 - store v9 at v1 return } "; @@ -1062,17 +958,20 @@ mod test { b0(v0: u1, v1: &mut Field): enable_side_effects v0 v2 = load v1 -> Field - store Field 5 at v1 - v4 = not v0 - store v2 at v1 - enable_side_effects v4 - v5 = load v1 -> Field - store Field 6 at v1 + v3 = cast v0 as Field + v5 = sub Field 5, v2 + v6 = mul v3, v5 + v7 = add v2, v6 + store v7 at v1 + v8 = not v0 + enable_side_effects v8 + v9 = load v1 -> Field + v10 = cast v8 as Field + v12 = sub Field 6, v9 + v13 = mul v10, v12 + v14 = add v9, v13 + store v14 at v1 enable_side_effects u1 1 - v8 = cast v0 as Field - v10 = mul v8, Field -1 - v11 = add Field 6, v10 - store v11 at v1 return } "; @@ -1115,84 +1014,123 @@ mod test { // b7 b8 // ↘ ↙ // b9 - let src = " - acir(inline) fn main f0 { - b0(v0: u1, v1: u1): - v2 = allocate -> &mut Field - store Field 0 at v2 - v4 = load v2 -> Field - // call v1(Field 0, v4) - jmp b1() - b1(): - store Field 1 at v2 - v6 = load v2 -> Field - // call v1(Field 1, v6) - jmpif v0 then: b2, else: b3 - b2(): - store Field 2 at v2 - v8 = load v2 -> Field - // call v1(Field 2, v8) - jmp b4() - b4(): - v12 = load v2 -> Field - // call v1(Field 4, v12) - jmpif v1 then: b5, else: b6 - b5(): - store Field 5 at v2 - v14 = load v2 -> Field - // call v1(Field 5, v14) - jmp b7() - b7(): - v18 = load v2 -> Field - // call v1(Field 7, v18) - jmp b9() - b9(): - v22 = load v2 -> Field - // call v1(Field 9, v22) - v23 = load v2 -> Field - return v23 - b6(): - store Field 6 at v2 - v16 = load v2 -> Field - // call v1(Field 6, v16) - jmp b7() - b3(): - store Field 3 at v2 - v10 = load v2 -> Field - // call v1(Field 3, v10) - jmp b8() - b8(): - v20 = load v2 -> Field - // call v1(Field 8, v20) - jmp b9() - } - "; + let main_id = Id::test_new(0); + let mut builder = FunctionBuilder::new("main".into(), main_id); - let ssa = Ssa::from_str(src).unwrap(); - let ssa = ssa.flatten_cfg().mem2reg(); + let b1 = builder.insert_block(); + let b2 = builder.insert_block(); + let b3 = builder.insert_block(); + let b4 = builder.insert_block(); + let b5 = builder.insert_block(); + let b6 = builder.insert_block(); + let b7 = builder.insert_block(); + let b8 = builder.insert_block(); + let b9 = builder.insert_block(); + + let c1 = builder.add_parameter(Type::bool()); + let c4 = builder.add_parameter(Type::bool()); + + let r1 = builder.insert_allocate(Type::field()); + + let store_value = |builder: &mut FunctionBuilder, value: u128| { + let value = builder.field_constant(value); + builder.insert_store(r1, value); + }; + + let test_function = Id::test_new(1); + + let call_test_function = |builder: &mut FunctionBuilder, block: u128| { + let block = builder.field_constant(block); + let load = builder.insert_load(r1, Type::field()); + builder.insert_call(test_function, vec![block, load], Vec::new()); + }; + + let switch_store_and_test_function = + |builder: &mut FunctionBuilder, block, block_number: u128| { + builder.switch_to_block(block); + store_value(builder, block_number); + call_test_function(builder, block_number); + }; + + let switch_and_test_function = + |builder: &mut FunctionBuilder, block, block_number: u128| { + builder.switch_to_block(block); + call_test_function(builder, block_number); + }; + + store_value(&mut builder, 0); + call_test_function(&mut builder, 0); + builder.terminate_with_jmp(b1, vec![]); + + switch_store_and_test_function(&mut builder, b1, 1); + builder.terminate_with_jmpif(c1, b2, b3); + + switch_store_and_test_function(&mut builder, b2, 2); + builder.terminate_with_jmp(b4, vec![]); + + switch_store_and_test_function(&mut builder, b3, 3); + builder.terminate_with_jmp(b8, vec![]); + + switch_and_test_function(&mut builder, b4, 4); + builder.terminate_with_jmpif(c4, b5, b6); + + switch_store_and_test_function(&mut builder, b5, 5); + builder.terminate_with_jmp(b7, vec![]); + + switch_store_and_test_function(&mut builder, b6, 6); + builder.terminate_with_jmp(b7, vec![]); + + switch_and_test_function(&mut builder, b7, 7); + builder.terminate_with_jmp(b9, vec![]); + + switch_and_test_function(&mut builder, b8, 8); + builder.terminate_with_jmp(b9, vec![]); + + switch_and_test_function(&mut builder, b9, 9); + let load = builder.insert_load(r1, Type::field()); + builder.terminate_with_return(vec![load]); + + let ssa = builder.finish().flatten_cfg().mem2reg(); // Expected results after mem2reg removes the allocation and each load and store: - let expected = " - acir(inline) fn main f0 { - b0(v0: u1, v1: u1): - v2 = allocate -> &mut Field - enable_side_effects v0 - v3 = mul v0, v1 - enable_side_effects v3 - v4 = not v1 - v5 = mul v0, v4 - enable_side_effects v0 - v6 = cast v3 as Field - v8 = mul v6, Field -1 - v10 = add Field 6, v8 - v11 = not v0 - enable_side_effects u1 1 - v13 = cast v0 as Field - v15 = sub v10, Field 3 - v16 = mul v13, v15 - v17 = add Field 3, v16 - return v17 - }"; + // + // fn main f0 { + // b0(v0: u1, v1: u1): + // call test_function(Field 0, Field 0) + // call test_function(Field 1, Field 1) + // enable_side_effects v0 + // call test_function(Field 2, Field 2) + // call test_function(Field 4, Field 2) + // v29 = and v0, v1 + // enable_side_effects v29 + // call test_function(Field 5, Field 5) + // v32 = not v1 + // v33 = and v0, v32 + // enable_side_effects v33 + // call test_function(Field 6, Field 6) + // enable_side_effects v0 + // v36 = mul v1, Field 5 + // v37 = mul v32, Field 2 + // v38 = add v36, v37 + // v39 = mul v1, Field 5 + // v40 = mul v32, Field 6 + // v41 = add v39, v40 + // call test_function(Field 7, v42) + // v43 = not v0 + // enable_side_effects v43 + // store Field 3 at v2 + // call test_function(Field 3, Field 3) + // call test_function(Field 8, Field 3) + // enable_side_effects Field 1 + // v47 = mul v0, v41 + // v48 = mul v43, Field 1 + // v49 = add v47, v48 + // v50 = mul v0, v44 + // v51 = mul v43, Field 3 + // v52 = add v50, v51 + // call test_function(Field 9, v53) + // return v54 + // } let main = ssa.main(); let ret = match main.dfg[main.entry_block()].terminator() { @@ -1201,12 +1139,7 @@ mod test { }; let merged_values = get_all_constants_reachable_from_instruction(&main.dfg, ret); - assert_eq!( - merged_values, - vec![FieldElement::from(3u128), FieldElement::from(6u128), -FieldElement::from(1u128)] - ); - - assert_normalized_ssa_equals(ssa, expected); + assert_eq!(merged_values, vec![1, 3, 5, 6]); } #[test] @@ -1287,7 +1220,7 @@ mod test { fn get_all_constants_reachable_from_instruction( dfg: &DataFlowGraph, value: ValueId, - ) -> Vec { + ) -> Vec { match dfg[value] { Value::Instruction { instruction, .. } => { let mut values = vec![]; @@ -1305,7 +1238,7 @@ mod test { values.dedup(); values } - Value::NumericConstant { constant, .. } => vec![constant], + Value::NumericConstant { constant, .. } => vec![constant.to_u128()], _ => Vec::new(), } } @@ -1344,63 +1277,74 @@ mod test { fn should_not_merge_incorrectly_to_false() { // Regression test for #1792 // Tests that it does not simplify a true constraint an always-false constraint - // acir(inline) fn main f1 { - // b0(v0: [u8; 2]): - // v5 = array_get v0, index u8 0 - // v6 = cast v5 as u32 - // v8 = truncate v6 to 1 bits, max_bit_size: 32 - // v9 = cast v8 as u1 - // v10 = allocate - // store u8 0 at v10 - // jmpif v9 then: b2, else: b3 - // b2(): - // v12 = cast v5 as Field - // v13 = add v12, Field 1 - // store v13 at v10 - // jmp b4() - // b4(): - // constrain v9 == u1 1 - // return - // b3(): - // store u8 0 at v10 - // jmp b4() - // } - let main_id = Id::test_new(1); - let mut builder = FunctionBuilder::new("main".into(), main_id); - builder.insert_block(); // b0 - let b1 = builder.insert_block(); - let b2 = builder.insert_block(); - let b3 = builder.insert_block(); - let element_type = Arc::new(vec![Type::unsigned(8)]); - let array_type = Type::Array(element_type.clone(), 2); - let array = builder.add_parameter(array_type); - let zero = builder.numeric_constant(0_u128, Type::unsigned(8)); - let v5 = builder.insert_array_get(array, zero, Type::unsigned(8)); - let v6 = builder.insert_cast(v5, Type::unsigned(32)); - let i_two = builder.numeric_constant(2_u128, Type::unsigned(32)); - let v8 = builder.insert_binary(v6, BinaryOp::Mod, i_two); - let v9 = builder.insert_cast(v8, Type::bool()); - let v10 = builder.insert_allocate(Type::field()); - builder.insert_store(v10, zero); - builder.terminate_with_jmpif(v9, b1, b2); - builder.switch_to_block(b1); - let one = builder.field_constant(1_u128); - let v5b = builder.insert_cast(v5, Type::field()); - let v13: Id = builder.insert_binary(v5b, BinaryOp::Add, one); - let v14 = builder.insert_cast(v13, Type::unsigned(8)); - builder.insert_store(v10, v14); - builder.terminate_with_jmp(b3, vec![]); - builder.switch_to_block(b2); - builder.insert_store(v10, zero); - builder.terminate_with_jmp(b3, vec![]); - builder.switch_to_block(b3); - let v_true = builder.numeric_constant(true, Type::bool()); - let v12 = builder.insert_binary(v9, BinaryOp::Eq, v_true); - builder.insert_constrain(v12, v_true, None); - builder.terminate_with_return(vec![]); - let ssa = builder.finish(); + + let src = " + acir(inline) fn main f0 { + b0(v0: [u8; 2]): + v2 = array_get v0, index u8 0 -> u8 + v3 = cast v2 as u32 + v4 = truncate v3 to 1 bits, max_bit_size: 32 + v5 = cast v4 as u1 + v6 = allocate -> &mut Field + store u8 0 at v6 + jmpif v5 then: b2, else: b1 + b2(): + v7 = cast v2 as Field + v9 = add v7, Field 1 + v10 = cast v9 as u8 + store v10 at v6 + jmp b3() + b3(): + constrain v5 == u1 1 + return + b1(): + store u8 0 at v6 + jmp b3() + } + "; + + let ssa = Ssa::from_str(src).unwrap(); + + let expected = " + acir(inline) fn main f0 { + b0(v0: [u8; 2]): + v2 = array_get v0, index u8 0 -> u8 + v3 = cast v2 as u32 + v4 = truncate v3 to 1 bits, max_bit_size: 32 + v5 = cast v4 as u1 + v6 = allocate -> &mut Field + store u8 0 at v6 + enable_side_effects v5 + v7 = cast v2 as Field + v9 = add v7, Field 1 + v10 = cast v9 as u8 + v11 = load v6 -> u8 + v12 = cast v4 as Field + v13 = cast v11 as Field + v14 = sub v9, v13 + v15 = mul v12, v14 + v16 = add v13, v15 + v17 = cast v16 as u8 + store v17 at v6 + v18 = not v5 + enable_side_effects v18 + v19 = load v6 -> u8 + v20 = cast v18 as Field + v21 = cast v19 as Field + v23 = sub Field 0, v21 + v24 = mul v20, v23 + v25 = add v21, v24 + v26 = cast v25 as u8 + store v26 at v6 + enable_side_effects u1 1 + constrain v5 == u1 1 + return + } + "; + let flattened_ssa = ssa.flatten_cfg(); let main = flattened_ssa.main(); + // Now assert that there is not an always-false constraint after flattening: let mut constrain_count = 0; for instruction in main.dfg[main.entry_block()].instructions() { @@ -1414,6 +1358,8 @@ mod test { } } assert_eq!(constrain_count, 1); + + assert_normalized_ssa_equals(flattened_ssa, expected); } #[test] @@ -1549,7 +1495,7 @@ mod test { b2(): return b1(): - jmp b2() + jmp b2() } "; let merged_ssa = Ssa::from_str(src).unwrap(); diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/inlining.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/inlining.rs index 6cf7070e65e..f91487fd73e 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/inlining.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/inlining.rs @@ -1089,7 +1089,6 @@ mod test { } #[test] - #[ignore] #[should_panic( expected = "Attempted to recur more than 1000 times during inlining function 'main': acir(inline) fn main f0 {" )] diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs index 0690dbbf204..53a31ae57c1 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs @@ -18,6 +18,7 @@ //! - A reference with 0 aliases means we were unable to find which reference this reference //! refers to. If such a reference is stored to, we must conservatively invalidate every //! reference in the current block. +//! - We also track the last load instruction to each address per block. //! //! From there, to figure out the value of each reference at the end of block, iterate each instruction: //! - On `Instruction::Allocate`: @@ -28,6 +29,13 @@ //! - Furthermore, if the result of the load is a reference, mark the result as an alias //! of the reference it dereferences to (if known). //! - If which reference it dereferences to is not known, this load result has no aliases. +//! - We also track the last instance of a load instruction to each address in a block. +//! If we see that the last load instruction was from the same address as the current load instruction, +//! we move to replace the result of the current load with the result of the previous load. +//! This removal requires a couple conditions: +//! - No store occurs to that address before the next load, +//! - The address is not used as an argument to a call +//! This optimization helps us remove repeated loads for which there are not known values. //! - On `Instruction::Store { address, value }`: //! - If the address of the store is known: //! - If the address has exactly 1 alias: @@ -40,11 +48,13 @@ //! - Conservatively mark every alias in the block to `Unknown`. //! - Additionally, if there were no Loads to any alias of the address between this Store and //! the previous Store to the same address, the previous store can be removed. +//! - Remove the instance of the last load instruction to the address and its aliases //! - On `Instruction::Call { arguments }`: //! - If any argument of the call is a reference, set the value of each alias of that //! reference to `Unknown` //! - Any builtin functions that may return aliases if their input also contains a //! reference should be tracked. Examples: `slice_push_back`, `slice_insert`, `slice_remove`, etc. +//! - Remove the instance of the last load instruction for any reference arguments and their aliases //! //! On a terminator instruction: //! - If the terminator is a `Jmp`: @@ -274,6 +284,9 @@ impl<'f> PerFunctionContext<'f> { if let Some(first_predecessor) = predecessors.next() { let mut first = self.blocks.get(&first_predecessor).cloned().unwrap_or_default(); first.last_stores.clear(); + // Last loads are tracked per block. During unification we are creating a new block from the current one, + // so we must clear the last loads of the current block before we return the new block. + first.last_loads.clear(); // Note that we have to start folding with the first block as the accumulator. // If we started with an empty block, an empty block union'd with any other block @@ -410,6 +423,28 @@ impl<'f> PerFunctionContext<'f> { self.last_loads.insert(address, (instruction, block_id)); } + + // Check whether the block has a repeat load from the same address (w/ no calls or stores in between the loads). + // If we do have a repeat load, we can remove the current load and map its result to the previous load's result. + if let Some(last_load) = references.last_loads.get(&address) { + let Instruction::Load { address: previous_address } = + &self.inserter.function.dfg[*last_load] + else { + panic!("Expected a Load instruction here"); + }; + let result = self.inserter.function.dfg.instruction_results(instruction)[0]; + let previous_result = + self.inserter.function.dfg.instruction_results(*last_load)[0]; + if *previous_address == address { + self.inserter.map_value(result, previous_result); + self.instructions_to_remove.insert(instruction); + } + } + // We want to set the load for every load even if the address has a known value + // and the previous load instruction was removed. + // We are safe to still remove a repeat load in this case as we are mapping from the current load's + // result to the previous load, which if it was removed should already have a mapping to the known value. + references.set_last_load(address, instruction); } Instruction::Store { address, value } => { let address = self.inserter.function.dfg.resolve(*address); @@ -435,6 +470,8 @@ impl<'f> PerFunctionContext<'f> { } references.set_known_value(address, value); + // If we see a store to an address, the last load to that address needs to remain. + references.keep_last_load_for(address, self.inserter.function); references.last_stores.insert(address, instruction); } Instruction::Allocate => { @@ -542,6 +579,9 @@ impl<'f> PerFunctionContext<'f> { let value = self.inserter.function.dfg.resolve(*value); references.set_unknown(value); references.mark_value_used(value, self.inserter.function); + + // If a reference is an argument to a call, the last load to that address and its aliases needs to remain. + references.keep_last_load_for(value, self.inserter.function); } } } @@ -572,6 +612,12 @@ impl<'f> PerFunctionContext<'f> { let destination_parameters = self.inserter.function.dfg[*destination].parameters(); assert_eq!(destination_parameters.len(), arguments.len()); + // If we have multiple parameters that alias that same argument value, + // then those parameters also alias each other. + // We save parameters with repeat arguments to later mark those + // parameters as aliasing one another. + let mut arg_set: HashMap> = HashMap::default(); + // Add an alias for each reference parameter for (parameter, argument) in destination_parameters.iter().zip(arguments) { if self.inserter.function.dfg.value_is_reference(*parameter) { @@ -581,10 +627,27 @@ impl<'f> PerFunctionContext<'f> { if let Some(aliases) = references.aliases.get_mut(expression) { // The argument reference is possibly aliased by this block parameter aliases.insert(*parameter); + + // Check if we have seen the same argument + let seen_parameters = arg_set.entry(argument).or_default(); + // Add the current parameter to the parameters we have seen for this argument. + // The previous parameters and the current one alias one another. + seen_parameters.insert(*parameter); } } } } + + // Set the aliases of the parameters + for (_, aliased_params) in arg_set { + for param in aliased_params.iter() { + self.set_aliases( + references, + *param, + AliasSet::known_multiple(aliased_params.clone()), + ); + } + } } TerminatorInstruction::Return { return_values, .. } => { // Removing all `last_stores` for each returned reference is more important here @@ -612,6 +675,8 @@ mod tests { map::Id, types::Type, }, + opt::assert_normalized_ssa_equals, + Ssa, }; #[test] @@ -822,88 +887,53 @@ mod tests { // is later stored in a successor block #[test] fn load_aliases_in_predecessor_block() { - // fn main { - // b0(): - // v0 = allocate - // store Field 0 at v0 - // v2 = allocate - // store v0 at v2 - // v3 = load v2 - // v4 = load v2 - // jmp b1() - // b1(): - // store Field 1 at v3 - // store Field 2 at v4 - // v7 = load v3 - // v8 = eq v7, Field 2 - // return - // } - let main_id = Id::test_new(0); - let mut builder = FunctionBuilder::new("main".into(), main_id); - - let v0 = builder.insert_allocate(Type::field()); - - let zero = builder.field_constant(0u128); - builder.insert_store(v0, zero); - - let v2 = builder.insert_allocate(Type::Reference(Arc::new(Type::field()))); - builder.insert_store(v2, v0); - - let v3 = builder.insert_load(v2, Type::field()); - let v4 = builder.insert_load(v2, Type::field()); - let b1 = builder.insert_block(); - builder.terminate_with_jmp(b1, vec![]); - - builder.switch_to_block(b1); - - let one = builder.field_constant(1u128); - builder.insert_store(v3, one); - - let two = builder.field_constant(2u128); - builder.insert_store(v4, two); - - let v8 = builder.insert_load(v3, Type::field()); - let _ = builder.insert_binary(v8, BinaryOp::Eq, two); - - builder.terminate_with_return(vec![]); - - let ssa = builder.finish(); - assert_eq!(ssa.main().reachable_blocks().len(), 2); + let src = " + acir(inline) fn main f0 { + b0(): + v0 = allocate -> &mut Field + store Field 0 at v0 + v2 = allocate -> &mut &mut Field + store v0 at v2 + v3 = load v2 -> &mut Field + v4 = load v2 -> &mut Field + jmp b1() + b1(): + store Field 1 at v3 + store Field 2 at v4 + v7 = load v3 -> Field + v8 = eq v7, Field 2 + return + } + "; - // Expected result: - // acir fn main f0 { - // b0(): - // v9 = allocate - // store Field 0 at v9 - // v10 = allocate - // jmp b1() - // b1(): - // return - // } - let ssa = ssa.mem2reg(); - println!("{}", ssa); + let mut ssa = Ssa::from_str(src).unwrap(); + let main = ssa.main_mut(); - let main = ssa.main(); - assert_eq!(main.reachable_blocks().len(), 2); + let instructions = main.dfg[main.entry_block()].instructions(); + assert_eq!(instructions.len(), 6); // The final return is not counted // All loads should be removed - assert_eq!(count_loads(main.entry_block(), &main.dfg), 0); - assert_eq!(count_loads(b1, &main.dfg), 0); - // The first store is not removed as it is used as a nested reference in another store. - // We would need to track whether the store where `v9` is the store value gets removed to know whether + // We would need to track whether the store where `v0` is the store value gets removed to know whether // to remove it. - assert_eq!(count_stores(main.entry_block(), &main.dfg), 1); // The first store in b1 is removed since there is another store to the same reference // in the same block, and the store is not needed before the later store. // The rest of the stores are also removed as no loads are done within any blocks // to the stored values. - assert_eq!(count_stores(b1, &main.dfg), 0); - - let b1_instructions = main.dfg[b1].instructions(); + let expected = " + acir(inline) fn main f0 { + b0(): + v0 = allocate -> &mut Field + store Field 0 at v0 + v2 = allocate -> &mut &mut Field + jmp b1() + b1(): + return + } + "; - // We expect the last eq to be optimized out - assert_eq!(b1_instructions.len(), 0); + let ssa = ssa.mem2reg(); + assert_normalized_ssa_equals(ssa, expected); } #[test] @@ -933,7 +963,7 @@ mod tests { // v10 = eq v9, Field 2 // constrain v9 == Field 2 // v11 = load v2 - // v12 = load v10 + // v12 = load v11 // v13 = eq v12, Field 2 // constrain v11 == Field 2 // return @@ -992,7 +1022,7 @@ mod tests { let main = ssa.main(); assert_eq!(main.reachable_blocks().len(), 4); - // The store from the original SSA should remain + // The stores from the original SSA should remain assert_eq!(count_stores(main.entry_block(), &main.dfg), 2); assert_eq!(count_stores(b2, &main.dfg), 1); @@ -1039,4 +1069,160 @@ mod tests { let main = ssa.main(); assert_eq!(count_loads(main.entry_block(), &main.dfg), 1); } + + #[test] + fn remove_repeat_loads() { + // This tests starts with two loads from the same unknown load. + // Specifically you should look for `load v2` in `b3`. + // We should be able to remove the second repeated load. + let src = " + acir(inline) fn main f0 { + b0(): + v0 = allocate -> &mut Field + store Field 0 at v0 + v2 = allocate -> &mut &mut Field + store v0 at v2 + jmp b1(Field 0) + b1(v3: Field): + v4 = eq v3, Field 0 + jmpif v4 then: b2, else: b3 + b2(): + v5 = load v2 -> &mut Field + store Field 2 at v5 + v8 = add v3, Field 1 + jmp b1(v8) + b3(): + v9 = load v0 -> Field + v10 = eq v9, Field 2 + constrain v9 == Field 2 + v11 = load v2 -> &mut Field + v12 = load v2 -> &mut Field + v13 = load v12 -> Field + v14 = eq v13, Field 2 + constrain v13 == Field 2 + return + } + "; + + let ssa = Ssa::from_str(src).unwrap(); + + // The repeated load from v3 should be removed + // b3 should only have three loads now rather than four previously + // + // All stores are expected to remain. + let expected = " + acir(inline) fn main f0 { + b0(): + v1 = allocate -> &mut Field + store Field 0 at v1 + v3 = allocate -> &mut &mut Field + store v1 at v3 + jmp b1(Field 0) + b1(v0: Field): + v4 = eq v0, Field 0 + jmpif v4 then: b3, else: b2 + b3(): + v11 = load v3 -> &mut Field + store Field 2 at v11 + v13 = add v0, Field 1 + jmp b1(v13) + b2(): + v5 = load v1 -> Field + v7 = eq v5, Field 2 + constrain v5 == Field 2 + v8 = load v3 -> &mut Field + v9 = load v8 -> Field + v10 = eq v9, Field 2 + constrain v9 == Field 2 + return + } + "; + + let ssa = ssa.mem2reg(); + assert_normalized_ssa_equals(ssa, expected); + } + + #[test] + fn keep_repeat_loads_passed_to_a_call() { + // The test is the exact same as `remove_repeat_loads` above except with the call + // to `f1` between the repeated loads. + let src = " + acir(inline) fn main f0 { + b0(): + v1 = allocate -> &mut Field + store Field 0 at v1 + v3 = allocate -> &mut &mut Field + store v1 at v3 + jmp b1(Field 0) + b1(v0: Field): + v4 = eq v0, Field 0 + jmpif v4 then: b3, else: b2 + b3(): + v13 = load v3 -> &mut Field + store Field 2 at v13 + v15 = add v0, Field 1 + jmp b1(v15) + b2(): + v5 = load v1 -> Field + v7 = eq v5, Field 2 + constrain v5 == Field 2 + v8 = load v3 -> &mut Field + call f1(v3) + v10 = load v3 -> &mut Field + v11 = load v10 -> Field + v12 = eq v11, Field 2 + constrain v11 == Field 2 + return + } + acir(inline) fn foo f1 { + b0(v0: &mut Field): + return + } + "; + + let ssa = Ssa::from_str(src).unwrap(); + + let ssa = ssa.mem2reg(); + // We expect the program to be unchanged + assert_normalized_ssa_equals(ssa, src); + } + + #[test] + fn keep_repeat_loads_with_alias_store() { + // v7, v8, and v9 alias one another. We want to make sure that a repeat load to v7 with a store + // to its aliases in between the repeat loads does not remove those loads. + let src = " + acir(inline) fn main f0 { + b0(v0: u1): + jmpif v0 then: b2, else: b1 + b2(): + v6 = allocate -> &mut Field + store Field 0 at v6 + jmp b3(v6, v6, v6) + b3(v1: &mut Field, v2: &mut Field, v3: &mut Field): + v8 = load v1 -> Field + store Field 2 at v2 + v10 = load v1 -> Field + store Field 1 at v3 + v11 = load v1 -> Field + store Field 3 at v3 + v13 = load v1 -> Field + constrain v8 == Field 0 + constrain v10 == Field 2 + constrain v11 == Field 1 + constrain v13 == Field 3 + return + b1(): + v4 = allocate -> &mut Field + store Field 1 at v4 + jmp b3(v4, v4, v4) + } + "; + + let ssa = Ssa::from_str(src).unwrap(); + + let ssa = ssa.mem2reg(); + // We expect the program to be unchanged + assert_normalized_ssa_equals(ssa, src); + } } diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs index 777c16dacd1..5883ce25936 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs @@ -19,8 +19,10 @@ //! When unrolling ACIR code, we remove reference count instructions because they are //! only used by Brillig bytecode. use acvm::{acir::AcirField, FieldElement}; +use im::HashSet; use crate::{ + brillig::brillig_gen::convert_ssa_function, errors::RuntimeError, ssa::{ ir::{ @@ -37,38 +39,60 @@ use crate::{ ssa_gen::Ssa, }, }; -use fxhash::{FxHashMap as HashMap, FxHashSet as HashSet}; +use fxhash::FxHashMap as HashMap; impl Ssa { /// Loop unrolling can return errors, since ACIR functions need to be fully unrolled. /// This meta-pass will keep trying to unroll loops and simplifying the SSA until no more errors are found. - #[tracing::instrument(level = "trace", skip(ssa))] - pub(crate) fn unroll_loops_iteratively(mut ssa: Ssa) -> Result { - for (_, function) in ssa.functions.iter_mut() { + /// + /// The `max_bytecode_incr_pct`, when given, is used to limit the growth of the Brillig bytecode size + /// after unrolling small loops to some percentage of the original loop. For example a value of 150 would + /// mean the new loop can be 150% (ie. 2.5 times) larger than the original loop. It will still contain + /// fewer SSA instructions, but that can still result in more Brillig opcodes. + #[tracing::instrument(level = "trace", skip(self))] + pub(crate) fn unroll_loops_iteratively( + mut self: Ssa, + max_bytecode_increase_percent: Option, + ) -> Result { + for (_, function) in self.functions.iter_mut() { + // Take a snapshot of the function to compare byte size increase, + // but only if the setting indicates we have to, otherwise skip it. + let orig_func_and_max_incr_pct = max_bytecode_increase_percent + .filter(|_| function.runtime().is_brillig()) + .map(|max_incr_pct| (function.clone(), max_incr_pct)); + // Try to unroll loops first: - let mut unroll_errors = function.try_unroll_loops(); + let (mut has_unrolled, mut unroll_errors) = function.try_unroll_loops(); // Keep unrolling until no more errors are found while !unroll_errors.is_empty() { let prev_unroll_err_count = unroll_errors.len(); // Simplify the SSA before retrying - - // Do a mem2reg after the last unroll to aid simplify_cfg - function.mem2reg(); - function.simplify_function(); - // Do another mem2reg after simplify_cfg to aid the next unroll - function.mem2reg(); + simplify_between_unrolls(function); // Unroll again - unroll_errors = function.try_unroll_loops(); + let (new_unrolled, new_errors) = function.try_unroll_loops(); + unroll_errors = new_errors; + has_unrolled |= new_unrolled; + // If we didn't manage to unroll any more loops, exit if unroll_errors.len() >= prev_unroll_err_count { return Err(unroll_errors.swap_remove(0)); } } + + if has_unrolled { + if let Some((orig_function, max_incr_pct)) = orig_func_and_max_incr_pct { + let new_size = brillig_bytecode_size(function); + let orig_size = brillig_bytecode_size(&orig_function); + if !is_new_size_ok(orig_size, new_size, max_incr_pct) { + *function = orig_function; + } + } + } } - Ok(ssa) + Ok(self) } } @@ -77,7 +101,7 @@ impl Function { // This can also be true for ACIR, but we have no alternative to unrolling in ACIR. // Brillig also generally prefers smaller code rather than faster code, // so we only attempt to unroll small loops, which we decide on a case-by-case basis. - fn try_unroll_loops(&mut self) -> Vec { + fn try_unroll_loops(&mut self) -> (bool, Vec) { Loops::find_all(self).unroll_each(self) } } @@ -170,8 +194,10 @@ impl Loops { /// Unroll all loops within a given function. /// Any loops which fail to be unrolled (due to using non-constant indices) will be unmodified. - fn unroll_each(mut self, function: &mut Function) -> Vec { + /// Returns whether any blocks have been modified + fn unroll_each(mut self, function: &mut Function) -> (bool, Vec) { let mut unroll_errors = vec![]; + let mut has_unrolled = false; while let Some(next_loop) = self.yet_to_unroll.pop() { if function.runtime().is_brillig() && !next_loop.is_small_loop(function, &self.cfg) { continue; @@ -181,13 +207,17 @@ impl Loops { if next_loop.blocks.iter().any(|block| self.modified_blocks.contains(block)) { let mut new_loops = Self::find_all(function); new_loops.failed_to_unroll = self.failed_to_unroll; - return unroll_errors.into_iter().chain(new_loops.unroll_each(function)).collect(); + let (new_unrolled, new_errors) = new_loops.unroll_each(function); + return (has_unrolled || new_unrolled, [unroll_errors, new_errors].concat()); } // Don't try to unroll the loop again if it is known to fail if !self.failed_to_unroll.contains(&next_loop.header) { match next_loop.unroll(function, &self.cfg) { - Ok(_) => self.modified_blocks.extend(next_loop.blocks), + Ok(_) => { + has_unrolled = true; + self.modified_blocks.extend(next_loop.blocks); + } Err(call_stack) => { self.failed_to_unroll.insert(next_loop.header); unroll_errors.push(RuntimeError::UnknownLoopBound { call_stack }); @@ -195,7 +225,7 @@ impl Loops { } } } - unroll_errors + (has_unrolled, unroll_errors) } } @@ -947,21 +977,59 @@ impl<'f> LoopIteration<'f> { } } +/// Unrolling leaves some duplicate instructions which can potentially be removed. +fn simplify_between_unrolls(function: &mut Function) { + // Do a mem2reg after the last unroll to aid simplify_cfg + function.mem2reg(); + function.simplify_function(); + // Do another mem2reg after simplify_cfg to aid the next unroll + function.mem2reg(); +} + +/// Convert the function to Brillig bytecode and return the resulting size. +fn brillig_bytecode_size(function: &Function) -> usize { + // We need to do some SSA passes in order for the conversion to be able to go ahead, + // otherwise we can hit `unreachable!()` instructions in `convert_ssa_instruction`. + // Creating a clone so as not to modify the originals. + let mut temp = function.clone(); + + // Might as well give it the best chance. + simplify_between_unrolls(&mut temp); + + // This is to try to prevent hitting ICE. + temp.dead_instruction_elimination(false); + + convert_ssa_function(&temp, false).byte_code.len() +} + +/// Decide if the new bytecode size is acceptable, compared to the original. +/// +/// The maximum increase can be expressed as a negative value if we demand a decrease. +/// (Values -100 and under mean the new size should be 0). +fn is_new_size_ok(orig_size: usize, new_size: usize, max_incr_pct: i32) -> bool { + let max_size_pct = 100i32.saturating_add(max_incr_pct).max(0) as usize; + let max_size = orig_size.saturating_mul(max_size_pct); + new_size.saturating_mul(100) <= max_size +} + #[cfg(test)] mod tests { use acvm::FieldElement; + use test_case::test_case; use crate::errors::RuntimeError; use crate::ssa::{ir::value::ValueId, opt::assert_normalized_ssa_equals, Ssa}; - use super::{BoilerplateStats, Loops}; + use super::{is_new_size_ok, BoilerplateStats, Loops}; - /// Tries to unroll all loops in each SSA function. + /// Tries to unroll all loops in each SSA function once, calling the `Function` directly, + /// bypassing the iterative loop done by the SSA which does further optimisations. + /// /// If any loop cannot be unrolled, it is left as-is or in a partially unrolled state. fn try_unroll_loops(mut ssa: Ssa) -> (Ssa, Vec) { let mut errors = vec![]; for function in ssa.functions.values_mut() { - errors.extend(function.try_unroll_loops()); + errors.extend(function.try_unroll_loops().1); } (ssa, errors) } @@ -1221,9 +1289,26 @@ mod tests { let (ssa, errors) = try_unroll_loops(ssa); assert_eq!(errors.len(), 0, "Unroll should have no errors"); + // Check that it's still the original assert_normalized_ssa_equals(ssa, parse_ssa().to_string().as_str()); } + #[test] + fn test_brillig_unroll_iteratively_respects_max_increase() { + let ssa = brillig_unroll_test_case(); + let ssa = ssa.unroll_loops_iteratively(Some(-90)).unwrap(); + // Check that it's still the original + assert_normalized_ssa_equals(ssa, brillig_unroll_test_case().to_string().as_str()); + } + + #[test] + fn test_brillig_unroll_iteratively_with_large_max_increase() { + let ssa = brillig_unroll_test_case(); + let ssa = ssa.unroll_loops_iteratively(Some(50)).unwrap(); + // Check that it did the unroll + assert_eq!(ssa.main().reachable_blocks().len(), 2, "The loop should be unrolled"); + } + /// Test that `break` and `continue` stop unrolling without any panic. #[test] fn test_brillig_unroll_break_and_continue() { @@ -1377,4 +1462,14 @@ mod tests { let loop0 = loops.yet_to_unroll.pop().expect("there should be a loop"); loop0.boilerplate_stats(function, &loops.cfg).expect("there should be stats") } + + #[test_case(1000, 700, 50, true; "size decreased")] + #[test_case(1000, 1500, 50, true; "size increased just by the max")] + #[test_case(1000, 1501, 50, false; "size increased over the max")] + #[test_case(1000, 700, -50, false; "size decreased but not enough")] + #[test_case(1000, 250, -50, true; "size decreased over expectations")] + #[test_case(1000, 250, -1250, false; "demanding more than minus 100 is handled")] + fn test_is_new_size_ok(old: usize, new: usize, max: i32, ok: bool) { + assert_eq!(is_new_size_ok(old, new, max), ok); + } } diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs index 0c6041029da..e39eed79021 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs @@ -172,6 +172,7 @@ impl<'a> FunctionContext<'a> { /// Always returns a Value::Mutable wrapping the allocate instruction. pub(super) fn new_mutable_variable(&mut self, value_to_store: ValueId) -> Value { let element_type = self.builder.current_function.dfg.type_of_value(value_to_store); + self.builder.increment_array_reference_count(value_to_store); let alloc = self.builder.insert_allocate(element_type); self.builder.insert_store(alloc, value_to_store); let typ = self.builder.type_of_value(value_to_store); @@ -732,10 +733,6 @@ impl<'a> FunctionContext<'a> { let element_types = Self::convert_type(element_type); values.map_both(element_types, |value, element_type| { let reference = value.eval_reference(); - // Reference counting in brillig relies on us incrementing reference - // counts when arrays/slices are constructed or indexed. - // Thus, if we dereference an lvalue which happens to be array/slice we should increment its reference counter. - self.builder.increment_array_reference_count(reference); self.builder.insert_load(reference, element_type).into() }) } @@ -916,7 +913,10 @@ impl<'a> FunctionContext<'a> { let parameters = self.builder.current_function.dfg.block_parameters(entry).to_vec(); for parameter in parameters { - self.builder.increment_array_reference_count(parameter); + // Avoid reference counts for immutable arrays that aren't behind references. + if self.builder.current_function.dfg.value_is_reference(parameter) { + self.builder.increment_array_reference_count(parameter); + } } entry @@ -933,7 +933,9 @@ impl<'a> FunctionContext<'a> { dropped_parameters.retain(|parameter| !terminator_args.contains(parameter)); for parameter in dropped_parameters { - self.builder.decrement_array_reference_count(parameter); + if self.builder.current_function.dfg.value_is_reference(parameter) { + self.builder.decrement_array_reference_count(parameter); + } } } diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs index c50f0a7f45c..d28236bd360 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs @@ -665,12 +665,11 @@ impl<'a> FunctionContext<'a> { values = values.map(|value| { let value = value.eval(self); - // Make sure to increment array reference counts on each let binding - self.builder.increment_array_reference_count(value); - Tree::Leaf(if let_expr.mutable { self.new_mutable_variable(value) } else { + // `new_mutable_variable` already increments rcs internally + self.builder.increment_array_reference_count(value); value::Value::Normal(value) }) }); diff --git a/noir/noir-repo/noir_stdlib/src/hash/poseidon2.nr b/noir/noir-repo/noir_stdlib/src/hash/poseidon2.nr index f2167c43c2c..419f07a2aca 100644 --- a/noir/noir-repo/noir_stdlib/src/hash/poseidon2.nr +++ b/noir/noir-repo/noir_stdlib/src/hash/poseidon2.nr @@ -13,11 +13,7 @@ pub struct Poseidon2 { impl Poseidon2 { #[no_predicates] pub fn hash(input: [Field; N], message_size: u32) -> Field { - if message_size == N { - Poseidon2::hash_internal(input, N, false) - } else { - Poseidon2::hash_internal(input, message_size, true) - } + Poseidon2::hash_internal(input, message_size, message_size != N) } pub(crate) fn new(iv: Field) -> Poseidon2 { diff --git a/noir/noir-repo/test_programs/execution_success/inline_decompose_hint_brillig_call/Nargo.toml b/noir/noir-repo/test_programs/execution_success/inline_decompose_hint_brillig_call/Nargo.toml new file mode 100644 index 00000000000..ecac2dfb197 --- /dev/null +++ b/noir/noir-repo/test_programs/execution_success/inline_decompose_hint_brillig_call/Nargo.toml @@ -0,0 +1,7 @@ +[package] +name = "inline_decompose_hint_brillig_call" +version = "0.1.0" +type = "bin" +authors = [""] + +[dependencies] diff --git a/noir/noir-repo/test_programs/execution_success/inline_decompose_hint_brillig_call/src/main.nr b/noir/noir-repo/test_programs/execution_success/inline_decompose_hint_brillig_call/src/main.nr new file mode 100644 index 00000000000..e500f0f976d --- /dev/null +++ b/noir/noir-repo/test_programs/execution_success/inline_decompose_hint_brillig_call/src/main.nr @@ -0,0 +1,15 @@ +use std::embedded_curve_ops::{EmbeddedCurvePoint, EmbeddedCurveScalar, fixed_base_scalar_mul}; + +fn main() -> pub Field { + let pre_address = 0x23d95e303879a5d0bbef78ecbc335e559da37431f6dcd11da54ed375c2846813; + let (a, b) = std::field::bn254::decompose(pre_address); + let curve = EmbeddedCurveScalar { lo: a, hi: b }; + let key = fixed_base_scalar_mul(curve); + let point = EmbeddedCurvePoint { + x: 0x111223493147f6785514b1c195bb37a2589f22a6596d30bb2bb145fdc9ca8f1e, + y: 0x273bbffd678edce8fe30e0deafc4f66d58357c06fd4a820285294b9746c3be95, + is_infinite: false, + }; + let address_point = key.add(point); + address_point.x +} diff --git a/noir/noir-repo/tooling/nargo_cli/build.rs b/noir/noir-repo/tooling/nargo_cli/build.rs index 740e5ed2052..f0334eaf713 100644 --- a/noir/noir-repo/tooling/nargo_cli/build.rs +++ b/noir/noir-repo/tooling/nargo_cli/build.rs @@ -213,8 +213,13 @@ fn test_{test_name}(force_brillig: ForceBrillig, inliner_aggressiveness: Inliner nargo.arg("--program-dir").arg(test_program_dir); nargo.arg("{test_command}").arg("--force"); nargo.arg("--inliner-aggressiveness").arg(inliner_aggressiveness.0.to_string()); + if force_brillig.0 {{ nargo.arg("--force-brillig"); + + // Set the maximum increase so that part of the optimization is exercised (it might fail). + nargo.arg("--max-bytecode-increase-percent"); + nargo.arg("50"); }} {test_content} diff --git a/noir/noir-repo/tooling/noirc_abi_wasm/build.sh b/noir/noir-repo/tooling/noirc_abi_wasm/build.sh index c07d2d8a4c1..16fb26e55db 100755 --- a/noir/noir-repo/tooling/noirc_abi_wasm/build.sh +++ b/noir/noir-repo/tooling/noirc_abi_wasm/build.sh @@ -25,7 +25,7 @@ function run_if_available { require_command jq require_command cargo require_command wasm-bindgen -#require_command wasm-opt +require_command wasm-opt self_path=$(dirname "$(readlink -f "$0")") pname=$(cargo read-manifest | jq -r '.name') diff --git a/noir/noir-repo/yarn.lock b/noir/noir-repo/yarn.lock index 3c8df2b1772..f7b7b3df372 100644 --- a/noir/noir-repo/yarn.lock +++ b/noir/noir-repo/yarn.lock @@ -221,20 +221,20 @@ __metadata: languageName: node linkType: hard -"@aztec/bb.js@portal:../../../../barretenberg/ts::locator=integration-tests%40workspace%3Acompiler%2Fintegration-tests": - version: 0.0.0-use.local - resolution: "@aztec/bb.js@portal:../../../../barretenberg/ts::locator=integration-tests%40workspace%3Acompiler%2Fintegration-tests" +"@aztec/bb.js@npm:0.63.1": + version: 0.63.1 + resolution: "@aztec/bb.js@npm:0.63.1" dependencies: comlink: ^4.4.1 commander: ^10.0.1 debug: ^4.3.4 fflate: ^0.8.0 - pako: ^2.1.0 tslib: ^2.4.0 bin: - bb.js: ./dest/node/main.js + bb.js: dest/node/main.js + checksum: b80730f1cb87e4d2ca21d991a42950bc069367896db309ab3f909c5f53efa9291538d51e35bc3c6d2eea042ca33c279ae59eb3f5d844a24336c7bb9664c2404b languageName: node - linkType: soft + linkType: hard "@babel/code-frame@npm:^7.0.0, @babel/code-frame@npm:^7.10.4, @babel/code-frame@npm:^7.12.11, @babel/code-frame@npm:^7.16.0, @babel/code-frame@npm:^7.22.13, @babel/code-frame@npm:^7.23.5, @babel/code-frame@npm:^7.8.3": version: 7.23.5 @@ -14123,7 +14123,7 @@ __metadata: version: 0.0.0-use.local resolution: "integration-tests@workspace:compiler/integration-tests" dependencies: - "@aztec/bb.js": "portal:../../../../barretenberg/ts" + "@aztec/bb.js": 0.63.1 "@noir-lang/noir_js": "workspace:*" "@noir-lang/noir_wasm": "workspace:*" "@nomicfoundation/hardhat-chai-matchers": ^2.0.0