Skip to content

Commit

Permalink
fix: Reproduce and fix bytecode blowup (#6972)
Browse files Browse the repository at this point in the history
Co-authored-by: Tom French <[email protected]>
  • Loading branch information
aakoshh and TomAFrench authored Jan 9, 2025
1 parent 526c84e commit 724547d
Show file tree
Hide file tree
Showing 10 changed files with 128 additions and 9 deletions.
3 changes: 2 additions & 1 deletion compiler/noirc_evaluator/src/ssa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,11 @@ fn optimize_all(builder: SsaBuilder, options: &SsaEvaluatorOptions) -> Result<Ss
"Unrolling",
)?
.run_pass(Ssa::simplify_cfg, "Simplifying (2nd)")
.run_pass(Ssa::mem2reg, "Mem2Reg (2nd)")
.run_pass(Ssa::flatten_cfg, "Flattening")
.run_pass(Ssa::remove_bit_shifts, "Removing Bit Shifts")
// Run mem2reg once more with the flattened CFG to catch any remaining loads/stores
.run_pass(Ssa::mem2reg, "Mem2Reg (2nd)")
.run_pass(Ssa::mem2reg, "Mem2Reg (3rd)")
// Run the inlining pass again to handle functions with `InlineType::NoPredicates`.
// Before flattening is run, we treat functions marked with the `InlineType::NoPredicates` as an entry point.
// This pass must come immediately following `mem2reg` as the succeeding passes
Expand Down
9 changes: 7 additions & 2 deletions compiler/noirc_evaluator/src/ssa/ir/dfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -416,10 +416,15 @@ impl DataFlowGraph {
pub(crate) fn get_value_max_num_bits(&self, value: ValueId) -> u32 {
match self[value] {
Value::Instruction { instruction, .. } => {
let value_bit_size = self.type_of_value(value).bit_size();
if let Instruction::Cast(original_value, _) = self[instruction] {
self.type_of_value(original_value).bit_size()
let original_bit_size = self.type_of_value(original_value).bit_size();
// We might have cast e.g. `u1` to `u8` to be able to do arithmetic,
// in which case we want to recover the original smaller bit size;
// OTOH if we cast down, then we don't need the higher original size.
value_bit_size.min(original_bit_size)
} else {
self.type_of_value(value).bit_size()
value_bit_size
}
}

Expand Down
11 changes: 7 additions & 4 deletions compiler/noirc_evaluator/src/ssa/opt/remove_bit_shifts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::ssa::{
basic_block::BasicBlockId,
call_stack::CallStackId,
dfg::InsertInstructionResult,
function::{Function, RuntimeType},
function::Function,
instruction::{Binary, BinaryOp, Endian, Instruction, InstructionId, Intrinsic},
types::{NumericType, Type},
value::ValueId,
Expand All @@ -32,7 +32,7 @@ impl Function {
/// The structure of this pass is simple:
/// Go through each block and re-insert all instructions.
pub(crate) fn remove_bit_shifts(&mut self) {
if matches!(self.runtime(), RuntimeType::Brillig(_)) {
if self.runtime().is_brillig() {
return;
}

Expand Down Expand Up @@ -120,8 +120,11 @@ impl Context<'_> {
let pow = self.numeric_constant(FieldElement::from(rhs_bit_size_pow_2), typ);

let max_lhs_bits = self.function.dfg.get_value_max_num_bits(lhs);

(max_lhs_bits + bit_shift_size, pow)
let max_bit_size = max_lhs_bits + bit_shift_size;
// There is no point trying to truncate to more than the Field size.
// A higher `max_lhs_bits` input can come from trying to left-shift a Field.
let max_bit_size = max_bit_size.min(NumericType::NativeField.bit_size());
(max_bit_size, pow)
} else {
// we use a predicate to nullify the result in case of overflow
let u8_type = NumericType::unsigned(8);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[package]
name = "encrypted_log_regression"
type = "bin"
authors = [""]
compiler_version = ">=0.31.0"

[dependencies]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Using the smaller sizes defined in `main.nr`.
# The reason this program is in the `execution_success` directory is because
# `rebuild.sh` only goes over these programs, but all we really care about is
# any potential future bytecode size regression.
eph_pk_bytes = [1, 2, 3]
incoming_header_ciphertext = [1, 2]
incoming_body_ciphertext = [9, 8, 7, 6, 5, 4, 3, 2, 1]
flag = true
return = [1, 2, 3, 1, 2, 0, 9, 8, 7, 6, 5, 4, 3, 2, 1]
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// The code below is inspired by [compute_encrypted_log](https://github.com/AztecProtocol/aztec-packages/blob/b42756bc10175fea9eb60544759e9dbe41ae5e76/noir-projects/aztec-nr/aztec/src/encrypted_logs/payload.nr#L111)
// which resulted in a bytecode size blowup when compiled to ACIR, see https://github.com/noir-lang/noir/issues/6929
// The issue was around `encrypted_bytes[offset + i]` generating large amounts of gates, as per the `flamegraph.sh` tool in aztec-packages.
// The details around encryption and addresses have been stripped away, focusing on just copying bytes of equivalent size arrays.

// Original values which resulted in huge bytecode even on this example (500K long SSA)
// global PRIVATE_LOG_SIZE_IN_FIELDS: u32 = 18;
// global ENCRYPTED_PAYLOAD_SIZE_IN_BYTES: u32 = (PRIVATE_LOG_SIZE_IN_FIELDS - 1) * 31;
// global EPH_PK_SIZE: u32 = 32;
// global HEADER_SIZE: u32 = 48;
// global OVERHEAD_PADDING: u32 = 15;

// Using the same formulas with smaller numbers; the effect is the same, but the SSA is more manageable.
global PRIVATE_LOG_SIZE_IN_FIELDS: u32 = 4;
global ENCRYPTED_PAYLOAD_SIZE_IN_BYTES: u32 = (PRIVATE_LOG_SIZE_IN_FIELDS - 1) * 5;
global EPH_PK_SIZE: u32 = 3;
global HEADER_SIZE: u32 = 2;
global OVERHEAD_PADDING: u32 = 1;

// Unused because encryption didn't play a role:
// global OVERHEAD_SIZE: u32 = EPH_PK_SIZE + HEADER_SIZE + OVERHEAD_PADDING;
// global PLAINTEXT_LENGTH_SIZE: u32 = 2;
// global MAX_PRIVATE_LOG_PLAINTEXT_SIZE_IN_BYTES: u32 =
// ENCRYPTED_PAYLOAD_SIZE_IN_BYTES - OVERHEAD_SIZE - PLAINTEXT_LENGTH_SIZE - 1 /* aes padding */;

global BODY_SIZE: u32 =
ENCRYPTED_PAYLOAD_SIZE_IN_BYTES - EPH_PK_SIZE - HEADER_SIZE - OVERHEAD_PADDING;

fn main(
eph_pk_bytes: [u8; EPH_PK_SIZE],
incoming_header_ciphertext: [u8; HEADER_SIZE],
incoming_body_ciphertext: [u8; BODY_SIZE],
flag: bool,
) -> pub [u8; ENCRYPTED_PAYLOAD_SIZE_IN_BYTES] {
compute_encrypted_log(
eph_pk_bytes,
incoming_header_ciphertext,
incoming_body_ciphertext,
flag,
)
}

fn compute_encrypted_log<let M: u32>(
eph_pk_bytes: [u8; EPH_PK_SIZE],
incoming_header_ciphertext: [u8; HEADER_SIZE],
incoming_body_ciphertext: [u8; BODY_SIZE],
flag: bool,
) -> [u8; M] {
let mut encrypted_bytes = [0; M];
let mut offset = 0;

// NOTE: Adding a conditional variable can result in the array being fully copied, item by item,
// in each iteration in the second loop that copies incoming_body_ciphertext into encrypted_bytes.
// Depending on where we place the `flag` we either get the item-by-item copying (blowup),
// or just a single array item gets read and a new array constructed in each iteration (no blowup).

// If the `flag` is here then it blows up.
if flag {
// eph_pk
for i in 0..EPH_PK_SIZE {
encrypted_bytes[offset + i] = eph_pk_bytes[i];
}
offset += EPH_PK_SIZE;

// If the `flag` is here then it blows up.
// if flag {

// incoming_header
for i in 0..HEADER_SIZE {
encrypted_bytes[offset + i] = incoming_header_ciphertext[i];
}
offset += HEADER_SIZE;

// Padding.
offset += OVERHEAD_PADDING;

// If the `flag` is here then it does not blow up.
//if flag {
// incoming_body
// Then we fill in the rest as the incoming body ciphertext
let size = M - offset;

// NOTE: This made the bytecode size blowup disappear in aztec packages,
// but in this reproduction the size seems to be statically known regardless.
// let size = M - 32 - HEADER_SIZE - OVERHEAD_PADDING;

assert_eq(size, incoming_body_ciphertext.len(), "ciphertext length mismatch");
for i in 0..size {
encrypted_bytes[offset + i] = incoming_body_ciphertext[i];
}
}

encrypted_bytes
}
4 changes: 2 additions & 2 deletions test_programs/execution_success/sha2_byte/src/main.nr
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Test Noir implementations of SHA256 and SHA512 on a one-byte message.
fn main(x: Field, result256: [u8; 32], result512: [u8; 64]) {
let digest256 = std::sha256::digest([x as u8]);
let digest256 = std::hash::sha256([x as u8]);
assert(digest256 == result256);

let digest512 = std::sha512::digest([x as u8]);
let digest512 = std::hash::sha512::digest([x as u8]);
assert(digest512 == result512);
}

0 comments on commit 724547d

Please sign in to comment.