From 1a2ca46af0d1c05813dbe28670a2bc39b79e4c9f Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Mon, 7 Oct 2024 18:58:11 +0100 Subject: [PATCH] feat(test): Fuzz test stdlib hash functions (#6233) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description ## Problem\* Resolves #6141 ## Summary\* Adds property based testing for the following hash function in the standard library: * Keccak256 * Sha256 * Sha512 In a followup I will carry on with Poseidon and Poseidon 2, and Schnorr signatures. I also wanted to run the code with the [interpreter](https://github.com/noir-lang/noir/blob/master/compiler/noirc_frontend/src/hir/comptime/tests.rs) but felt like it would complicate things in this PR. ## Additional Context ```console ❯ cargo test -p nargo_cli --test stdlib-props Finished test [optimized + debuginfo] target(s) in 0.23s Running tests/stdlib-props.rs (target/debug/deps/stdlib_props-cbab5917b839ebf4) running 4 tests test test_basic ... ok test test_sha256 ... ok test test_keccak256 ... ok test test_sha512 ... ok test result: ok. 4 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 20.25s ``` ## Documentation\* Check one: - [x] No documentation needed. - [ ] Documentation included in this PR. - [ ] **[For Experimental Features]** Documentation to be submitted in a separate PR. # PR Checklist\* - [x] I have tested the changes locally. - [ ] I have formatted the changes with [Prettier](https://prettier.io/) and/or `cargo fmt` on default settings. --------- Co-authored-by: TomAFrench Co-authored-by: Tom French <15848336+TomAFrench@users.noreply.github.com> --- Cargo.lock | 3 + Cargo.toml | 10 +- acvm-repo/blackbox_solver/Cargo.toml | 4 +- tooling/nargo_cli/Cargo.toml | 11 +- .../tests/stdlib-props.proptest-regressions | 7 + tooling/nargo_cli/tests/stdlib-props.rs | 261 ++++++++++++++++++ 6 files changed, 291 insertions(+), 5 deletions(-) create mode 100644 tooling/nargo_cli/tests/stdlib-props.proptest-regressions create mode 100644 tooling/nargo_cli/tests/stdlib-props.rs diff --git a/Cargo.lock b/Cargo.lock index a5544310617..c22093d85e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2597,9 +2597,12 @@ dependencies = [ "pprof 0.13.0", "predicates 2.1.5", "prettytable-rs", + "proptest", "rayon", "serde", "serde_json", + "sha2", + "sha3", "similar-asserts", "tempfile", "termcolor", diff --git a/Cargo.toml b/Cargo.toml index 6741fb8a98e..d1864edab6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -87,8 +87,12 @@ bb_abstraction_leaks = { path = "tooling/bb_abstraction_leaks" } acvm_cli = { path = "tooling/acvm_cli" } # Arkworks -ark-bn254 = { version = "^0.4.0", default-features = false, features = ["curve"] } -ark-bls12-381 = { version = "^0.4.0", default-features = false, features = ["curve"] } +ark-bn254 = { version = "^0.4.0", default-features = false, features = [ + "curve", +] } +ark-bls12-381 = { version = "^0.4.0", default-features = false, features = [ + "curve", +] } grumpkin = { version = "0.1.0", package = "noir_grumpkin", features = ["std"] } ark-ec = { version = "^0.4.0", default-features = false } ark-ff = { version = "^0.4.0", default-features = false } @@ -153,6 +157,8 @@ rand = "0.8.5" proptest = "1.2.0" proptest-derive = "0.4.0" rayon = "1.8.0" +sha2 = { version = "0.10.6", features = ["compress"] } +sha3 = "0.10.6" im = { version = "15.1", features = ["serde"] } tracing = "0.1.40" diff --git a/acvm-repo/blackbox_solver/Cargo.toml b/acvm-repo/blackbox_solver/Cargo.toml index 51c782d9581..b57c9356198 100644 --- a/acvm-repo/blackbox_solver/Cargo.toml +++ b/acvm-repo/blackbox_solver/Cargo.toml @@ -22,8 +22,8 @@ num-bigint = "0.4" blake2 = "0.10.6" blake3 = "1.5.0" -sha2 = { version="0.10.6", features = ["compress",] } -sha3 = "0.10.6" +sha2.workspace = true +sha3.workspace = true keccak = "0.1.4" k256 = { version = "0.11.0", features = [ "ecdsa", diff --git a/tooling/nargo_cli/Cargo.toml b/tooling/nargo_cli/Cargo.toml index 284be56d247..acd5623871e 100644 --- a/tooling/nargo_cli/Cargo.toml +++ b/tooling/nargo_cli/Cargo.toml @@ -45,7 +45,12 @@ prettytable-rs = "0.10" rayon.workspace = true thiserror.workspace = true tower.workspace = true -async-lsp = { workspace = true, features = ["client-monitor", "stdio", "tracing", "tokio"] } +async-lsp = { workspace = true, features = [ + "client-monitor", + "stdio", + "tracing", + "tokio", +] } const_format.workspace = true similar-asserts.workspace = true termcolor = "1.1.2" @@ -75,9 +80,13 @@ fm.workspace = true criterion.workspace = true pprof.workspace = true paste = "1.0.14" +proptest.workspace = true +sha2.workspace = true +sha3.workspace = true iai = "0.1.1" test-binary = "3.0.2" + [[bench]] name = "criterion" harness = false diff --git a/tooling/nargo_cli/tests/stdlib-props.proptest-regressions b/tooling/nargo_cli/tests/stdlib-props.proptest-regressions new file mode 100644 index 00000000000..ab88db8b6c2 --- /dev/null +++ b/tooling/nargo_cli/tests/stdlib-props.proptest-regressions @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 88db0227d5547742f771c14b1679f6783570b46bf7cf9e6897ee1aca4bd5034d # shrinks to io = SnippetInputOutput { description: "force_brillig = false, max_len = 200", inputs: {"input": Vec([Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(1), Field(5), Field(20), Field(133), Field(233), Field(99), Field(2⁶), Field(196), Field(232), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0), Field(0)]), "message_size": Field(2⁶)}, expected_output: Vec([Field(102), Field(26), Field(94), Field(212), Field(102), Field(1), Field(215), Field(217), Field(167), Field(175), Field(158), Field(18), Field(20), Field(244), Field(158), Field(200), Field(2⁷), Field(186), Field(251), Field(243), Field(20), Field(207), Field(22), Field(3), Field(139), Field(81), Field(207), Field(2⁴), Field(50), Field(167), Field(1), Field(163)]) } diff --git a/tooling/nargo_cli/tests/stdlib-props.rs b/tooling/nargo_cli/tests/stdlib-props.rs new file mode 100644 index 00000000000..d1ea5bbfaf6 --- /dev/null +++ b/tooling/nargo_cli/tests/stdlib-props.rs @@ -0,0 +1,261 @@ +use std::{cell::RefCell, collections::BTreeMap, path::Path}; + +use acvm::{acir::native_types::WitnessStack, FieldElement}; +use nargo::{ + ops::{execute_program, DefaultForeignCallExecutor}, + parse_all, +}; +use noirc_abi::input_parser::InputValue; +use noirc_driver::{ + compile_main, file_manager_with_stdlib, prepare_crate, CompilationResult, CompileOptions, + CompiledProgram, CrateId, +}; +use noirc_frontend::hir::Context; +use proptest::prelude::*; +use sha3::Digest; + +/// Inputs and expected output of a snippet encoded in ABI format. +#[derive(Debug)] +struct SnippetInputOutput { + description: String, + inputs: BTreeMap, + expected_output: InputValue, +} +impl SnippetInputOutput { + fn new(inputs: Vec<(&str, InputValue)>, output: InputValue) -> Self { + Self { + description: "".to_string(), + inputs: inputs.into_iter().map(|(k, v)| (k.to_string(), v)).collect(), + expected_output: output, + } + } + + /// Attach some description to hint at the scenario we are testing. + fn with_description(mut self, description: String) -> Self { + self.description = description; + self + } +} + +/// Prepare a code snippet. +fn prepare_snippet(source: String) -> (Context<'static, 'static>, CrateId) { + let root = Path::new(""); + let file_name = Path::new("main.nr"); + let mut file_manager = file_manager_with_stdlib(root); + file_manager.add_file_with_source(file_name, source).expect( + "Adding source buffer to file manager should never fail when file manager is empty", + ); + let parsed_files = parse_all(&file_manager); + + let mut context = Context::new(file_manager, parsed_files); + let root_crate_id = prepare_crate(&mut context, file_name); + + (context, root_crate_id) +} + +/// Compile the main function in a code snippet. +/// +/// Use `force_brillig` to test it as an unconstrained function without having to change the code. +/// This is useful for methods that use the `runtime::is_unconstrained()` method to change their behavior. +fn prepare_and_compile_snippet( + source: String, + force_brillig: bool, +) -> CompilationResult { + let (mut context, root_crate_id) = prepare_snippet(source); + let options = CompileOptions { force_brillig, ..Default::default() }; + compile_main(&mut context, root_crate_id, &options, None) +} + +/// Compile a snippet and run property tests against it by generating random input/output pairs +/// according to the strategy, executing the snippet with the input, and asserting that the +/// output it returns is the one we expect. +fn run_snippet_proptest( + source: String, + force_brillig: bool, + strategy: BoxedStrategy, +) { + let program = match prepare_and_compile_snippet(source.clone(), force_brillig) { + Ok((program, _)) => program, + Err(e) => panic!("failed to compile program:\n{source}\n{e:?}"), + }; + + let blackbox_solver = bn254_blackbox_solver::Bn254BlackBoxSolver; + let foreign_call_executor = + RefCell::new(DefaultForeignCallExecutor::new(false, None, None, None)); + + // Generate multiple input/output + proptest!(ProptestConfig::with_cases(100), |(io in strategy)| { + let initial_witness = program.abi.encode(&io.inputs, None).expect("failed to encode"); + let mut foreign_call_executor = foreign_call_executor.borrow_mut(); + + let witness_stack: WitnessStack = execute_program( + &program.program, + initial_witness, + &blackbox_solver, + &mut *foreign_call_executor, + ) + .expect("failed to execute"); + + let main_witness = witness_stack.peek().expect("should have return value on witness stack"); + let main_witness = &main_witness.witness; + + let (_, return_value) = program.abi.decode(main_witness).expect("failed to decode"); + let return_value = return_value.expect("should decode a return value"); + + prop_assert_eq!(return_value, io.expected_output, "{}", io.description); + }); +} + +/// Run property tests on a code snippet which is assumed to execute a hashing function with the following signature: +/// +/// ```ignore +/// fn main(input: [u8; {max_len}], message_size: u32) -> pub [u8; 32] +/// ``` +/// +/// The calls are executed with and without forcing brillig, because it seems common for hash functions to run different +/// code paths based on `runtime::is_unconstrained()`. +fn run_hash_proptest( + // Different generic maximum input sizes to try. + max_lengths: &[usize], + // Some hash functions allow inputs which are less than the generic parameters, others don't. + variable_length: bool, + // Make the source code specialized for a given expected input size. + source: impl Fn(usize) -> String, + // Rust implementation of the hash function. + hash: fn(&[u8]) -> [u8; N], +) { + for max_len in max_lengths { + let max_len = *max_len; + // The maximum length is used to pick the generic version of the method. + let source = source(max_len); + // Hash functions runs differently depending on whether the code is unconstrained or not. + for force_brillig in [false, true] { + let length_strategy = + if variable_length { (0..=max_len).boxed() } else { Just(max_len).boxed() }; + // The actual input length can be up to the maximum. + let strategy = length_strategy + .prop_flat_map(|len| prop::collection::vec(any::(), len)) + .prop_map(move |mut msg| { + // The output is the hash of the data as it is. + let output = hash(&msg); + + // The input has to be padded to the maximum length. + let msg_size = msg.len(); + msg.resize(max_len, 0u8); + + let mut inputs = vec![("input", bytes_input(&msg))]; + + // Omit the `message_size` if the hash function doesn't support it. + if variable_length { + inputs.push(( + "message_size", + InputValue::Field(FieldElement::from(msg_size)), + )); + } + + SnippetInputOutput::new(inputs, bytes_input(&output)).with_description(format!( + "force_brillig = {force_brillig}, max_len = {max_len}" + )) + }) + .boxed(); + + run_snippet_proptest(source.clone(), force_brillig, strategy); + } + } +} + +/// This is just a simple test to check that property testing works. +#[test] +fn fuzz_basic() { + let program = "fn main(init: u32) -> pub u32 { + let mut x = init; + for i in 0 .. 6 { + x += i; + } + x + }"; + + let strategy = any::() + .prop_map(|init| { + let init = init / 2; + SnippetInputOutput::new( + vec![("init", InputValue::Field(init.into()))], + InputValue::Field((init + 15).into()), + ) + }) + .boxed(); + + run_snippet_proptest(program.to_string(), false, strategy); +} + +#[test] +fn fuzz_keccak256_equivalence() { + run_hash_proptest( + // XXX: Currently it fails with inputs >= 135 bytes + &[0, 1, 100, 134], + true, + |max_len| { + format!( + "fn main(input: [u8; {max_len}], message_size: u32) -> pub [u8; 32] {{ + std::hash::keccak256(input, message_size) + }}" + ) + }, + |data| sha3::Keccak256::digest(data).try_into().unwrap(), + ); +} + +#[test] +#[should_panic] // Remove once fixed +fn fuzz_keccak256_equivalence_over_135() { + run_hash_proptest( + &[135, 150], + true, + |max_len| { + format!( + "fn main(input: [u8; {max_len}], message_size: u32) -> pub [u8; 32] {{ + std::hash::keccak256(input, message_size) + }}" + ) + }, + |data| sha3::Keccak256::digest(data).try_into().unwrap(), + ); +} + +#[test] +fn fuzz_sha256_equivalence() { + run_hash_proptest( + &[0, 1, 200, 511, 512], + true, + |max_len| { + format!( + "fn main(input: [u8; {max_len}], message_size: u64) -> pub [u8; 32] {{ + std::hash::sha256_var(input, message_size) + }}" + ) + }, + |data| sha2::Sha256::digest(data).try_into().unwrap(), + ); +} + +#[test] +fn fuzz_sha512_equivalence() { + run_hash_proptest( + &[0, 1, 200], + false, + |max_len| { + format!( + "fn main(input: [u8; {max_len}]) -> pub [u8; 64] {{ + std::hash::sha512::digest(input) + }}" + ) + }, + |data| sha2::Sha512::digest(data).try_into().unwrap(), + ); +} + +fn bytes_input(bytes: &[u8]) -> InputValue { + InputValue::Vec( + bytes.iter().map(|b| InputValue::Field(FieldElement::from(*b as u32))).collect(), + ) +}