Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Reproduce and fix bytecode blowup #6972

Merged
merged 20 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion compiler/noirc_evaluator/src/ssa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,11 @@ fn optimize_all(builder: SsaBuilder, options: &SsaEvaluatorOptions) -> Result<Ss
"Unrolling",
)?
.run_pass(Ssa::simplify_cfg, "Simplifying (2nd)")
.run_pass(Ssa::mem2reg, "Mem2Reg (2nd)")
.run_pass(Ssa::flatten_cfg, "Flattening")
.run_pass(Ssa::remove_bit_shifts, "Removing Bit Shifts")
// Run mem2reg once more with the flattened CFG to catch any remaining loads/stores
.run_pass(Ssa::mem2reg, "Mem2Reg (2nd)")
.run_pass(Ssa::mem2reg, "Mem2Reg (3rd)")
// Run the inlining pass again to handle functions with `InlineType::NoPredicates`.
// Before flattening is run, we treat functions marked with the `InlineType::NoPredicates` as an entry point.
// This pass must come immediately following `mem2reg` as the succeeding passes
Expand Down
9 changes: 7 additions & 2 deletions compiler/noirc_evaluator/src/ssa/ir/dfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -416,10 +416,15 @@ impl DataFlowGraph {
pub(crate) fn get_value_max_num_bits(&self, value: ValueId) -> u32 {
match self[value] {
Value::Instruction { instruction, .. } => {
let value_bit_size = self.type_of_value(value).bit_size();
if let Instruction::Cast(original_value, _) = self[instruction] {
self.type_of_value(original_value).bit_size()
let original_bit_size = self.type_of_value(original_value).bit_size();
// We might have cast e.g. `u1` to `u8` to be able to do arithmetic,
// in which case we want to recover the original smaller bit size;
// OTOH if we cast down, then we don't need the higher original size.
value_bit_size.min(original_bit_size)
} else {
self.type_of_value(value).bit_size()
value_bit_size
}
}

Expand Down
11 changes: 7 additions & 4 deletions compiler/noirc_evaluator/src/ssa/opt/remove_bit_shifts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::ssa::{
basic_block::BasicBlockId,
call_stack::CallStackId,
dfg::InsertInstructionResult,
function::{Function, RuntimeType},
function::Function,
instruction::{Binary, BinaryOp, Endian, Instruction, InstructionId, Intrinsic},
types::{NumericType, Type},
value::ValueId,
Expand All @@ -32,7 +32,7 @@ impl Function {
/// The structure of this pass is simple:
/// Go through each block and re-insert all instructions.
pub(crate) fn remove_bit_shifts(&mut self) {
if matches!(self.runtime(), RuntimeType::Brillig(_)) {
if self.runtime().is_brillig() {
return;
}

Expand Down Expand Up @@ -120,8 +120,11 @@ impl Context<'_> {
let pow = self.numeric_constant(FieldElement::from(rhs_bit_size_pow_2), typ);

let max_lhs_bits = self.function.dfg.get_value_max_num_bits(lhs);

(max_lhs_bits + bit_shift_size, pow)
let max_bit_size = max_lhs_bits + bit_shift_size;
// There is no point trying to truncate to more than the Field size.
// A higher `max_lhs_bits` input can come from trying to left-shift a Field.
let max_bit_size = max_bit_size.min(NumericType::NativeField.bit_size());
(max_bit_size, pow)
} else {
// we use a predicate to nullify the result in case of overflow
let u8_type = NumericType::unsigned(8);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[package]
name = "encrypted_log_regression"
type = "bin"
authors = [""]
compiler_version = ">=0.31.0"

[dependencies]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Using the smaller sizes defined in `main.nr`.
# The reason this program is in the `execution_success` directory is because
# `rebuild.sh` only goes over these programs, but all we really care about is
# any potential future bytecode size regression.
eph_pk_bytes = [1, 2, 3]
incoming_header_ciphertext = [1, 2]
incoming_body_ciphertext = [9, 8, 7, 6, 5, 4, 3, 2, 1]
flag = true
return = [1, 2, 3, 1, 2, 0, 9, 8, 7, 6, 5, 4, 3, 2, 1]
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// The code below is inspired by [compute_encrypted_log](https://github.com/AztecProtocol/aztec-packages/blob/b42756bc10175fea9eb60544759e9dbe41ae5e76/noir-projects/aztec-nr/aztec/src/encrypted_logs/payload.nr#L111)
// which resulted in a bytecode size blowup when compiled to ACIR, see https://github.com/noir-lang/noir/issues/6929
// The issue was around `encrypted_bytes[offset + i]` generating large amounts of gates, as per the `flamegraph.sh` tool in aztec-packages.
// The details around encryption and addresses have been stripped away, focusing on just copying bytes of equivalent size arrays.

// Original values which resulted in huge bytecode even on this example (500K long SSA)
// global PRIVATE_LOG_SIZE_IN_FIELDS: u32 = 18;
// global ENCRYPTED_PAYLOAD_SIZE_IN_BYTES: u32 = (PRIVATE_LOG_SIZE_IN_FIELDS - 1) * 31;
// global EPH_PK_SIZE: u32 = 32;
// global HEADER_SIZE: u32 = 48;
// global OVERHEAD_PADDING: u32 = 15;

// Using the same formulas with smaller numbers; the effect is the same, but the SSA is more manageable.
global PRIVATE_LOG_SIZE_IN_FIELDS: u32 = 4;
global ENCRYPTED_PAYLOAD_SIZE_IN_BYTES: u32 = (PRIVATE_LOG_SIZE_IN_FIELDS - 1) * 5;
global EPH_PK_SIZE: u32 = 3;
global HEADER_SIZE: u32 = 2;
global OVERHEAD_PADDING: u32 = 1;

// Unused because encryption didn't play a role:
// global OVERHEAD_SIZE: u32 = EPH_PK_SIZE + HEADER_SIZE + OVERHEAD_PADDING;
// global PLAINTEXT_LENGTH_SIZE: u32 = 2;
// global MAX_PRIVATE_LOG_PLAINTEXT_SIZE_IN_BYTES: u32 =
// ENCRYPTED_PAYLOAD_SIZE_IN_BYTES - OVERHEAD_SIZE - PLAINTEXT_LENGTH_SIZE - 1 /* aes padding */;

global BODY_SIZE: u32 =
ENCRYPTED_PAYLOAD_SIZE_IN_BYTES - EPH_PK_SIZE - HEADER_SIZE - OVERHEAD_PADDING;

fn main(
eph_pk_bytes: [u8; EPH_PK_SIZE],
incoming_header_ciphertext: [u8; HEADER_SIZE],
incoming_body_ciphertext: [u8; BODY_SIZE],
flag: bool,
) -> pub [u8; ENCRYPTED_PAYLOAD_SIZE_IN_BYTES] {
compute_encrypted_log(
eph_pk_bytes,
incoming_header_ciphertext,
incoming_body_ciphertext,
flag,
)
}

fn compute_encrypted_log<let M: u32>(
eph_pk_bytes: [u8; EPH_PK_SIZE],
incoming_header_ciphertext: [u8; HEADER_SIZE],
incoming_body_ciphertext: [u8; BODY_SIZE],
flag: bool,
) -> [u8; M] {
let mut encrypted_bytes = [0; M];
let mut offset = 0;

// NOTE: Adding a conditional variable can result in the array being fully copied, item by item,
// in each iteration in the second loop that copies incoming_body_ciphertext into encrypted_bytes.
// Depending on where we place the `flag` we either get the item-by-item copying (blowup),
// or just a single array item gets read and a new array constructed in each iteration (no blowup).

// If the `flag` is here then it blows up.
if flag {
// eph_pk
for i in 0..EPH_PK_SIZE {
encrypted_bytes[offset + i] = eph_pk_bytes[i];
}
offset += EPH_PK_SIZE;

// If the `flag` is here then it blows up.
// if flag {

// incoming_header
for i in 0..HEADER_SIZE {
encrypted_bytes[offset + i] = incoming_header_ciphertext[i];
}
offset += HEADER_SIZE;

// Padding.
offset += OVERHEAD_PADDING;

// If the `flag` is here then it does not blow up.
//if flag {
// incoming_body
// Then we fill in the rest as the incoming body ciphertext
let size = M - offset;

// NOTE: This made the bytecode size blowup disappear in aztec packages,
// but in this reproduction the size seems to be statically known regardless.
// let size = M - 32 - HEADER_SIZE - OVERHEAD_PADDING;

assert_eq(size, incoming_body_ciphertext.len(), "ciphertext length mismatch");
for i in 0..size {
encrypted_bytes[offset + i] = incoming_body_ciphertext[i];
}
}

encrypted_bytes
}
4 changes: 2 additions & 2 deletions test_programs/execution_success/sha2_byte/src/main.nr
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Test Noir implementations of SHA256 and SHA512 on a one-byte message.
fn main(x: Field, result256: [u8; 32], result512: [u8; 64]) {
let digest256 = std::sha256::digest([x as u8]);
let digest256 = std::hash::sha256([x as u8]);
assert(digest256 == result256);

let digest512 = std::sha512::digest([x as u8]);
let digest512 = std::hash::sha512::digest([x as u8]);
assert(digest512 == result512);
}
Loading