diff --git a/crates/wasm-smith/src/config.rs b/crates/wasm-smith/src/config.rs index b183530e4c..6efe88d914 100644 --- a/crates/wasm-smith/src/config.rs +++ b/crates/wasm-smith/src/config.rs @@ -481,6 +481,18 @@ define_config! { /// /// Defaults to `false`. pub threads_enabled: bool = false, + + /// Indicates whether wasm-smith is allowed to generate invalid function + /// bodies. + /// + /// When enabled this option will enable taking raw bytes from the input + /// byte stream and using them as a wasm function body. This means that + /// the output module is not guaranteed to be valid but can help tickle + /// various parts of validation/compilation in some circumstances as + /// well. + /// + /// Defaults to `false`. + pub allow_invalid_funcs: bool = false, } } @@ -617,6 +629,7 @@ impl<'a> Arbitrary<'a> for Config { tail_call_enabled: false, gc_enabled: false, generate_custom_sections: false, + allow_invalid_funcs: false, }) } } diff --git a/crates/wasm-smith/src/core.rs b/crates/wasm-smith/src/core.rs index 6a2836ae0e..ee1b8d2ed8 100644 --- a/crates/wasm-smith/src/core.rs +++ b/crates/wasm-smith/src/core.rs @@ -172,7 +172,7 @@ impl Module { duplicate_imports_behavior: DuplicateImportsBehavior, ) -> Result { let mut module = Module::empty(config, duplicate_imports_behavior); - module.build(u, false)?; + module.build(u)?; Ok(module) } @@ -213,30 +213,6 @@ impl Module { } } -/// Same as [`Module`], but may be invalid. -/// -/// This module generates function bodies differnetly than `Module` to try to -/// better explore wasm decoders and such. -#[derive(Debug)] -pub struct MaybeInvalidModule { - module: Module, -} - -impl MaybeInvalidModule { - /// Encode this Wasm module into bytes. - pub fn to_bytes(&self) -> Vec { - self.module.to_bytes() - } -} - -impl<'a> Arbitrary<'a> for MaybeInvalidModule { - fn arbitrary(u: &mut Unstructured<'a>) -> Result { - let mut module = Module::empty(Config::default(), DuplicateImportsBehavior::Allowed); - module.build(u, true)?; - Ok(MaybeInvalidModule { module }) - } -} - #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub(crate) struct RecGroup { pub(crate) types: Vec, @@ -398,7 +374,7 @@ pub(crate) enum GlobalInitExpr { } impl Module { - fn build(&mut self, u: &mut Unstructured, allow_invalid: bool) -> Result<()> { + fn build(&mut self, u: &mut Unstructured) -> Result<()> { self.valtypes = configured_valtypes(&self.config); // We attempt to figure out our available imports *before* creating the types section here, @@ -425,7 +401,7 @@ impl Module { self.arbitrary_start(u)?; self.arbitrary_elems(u)?; self.arbitrary_data(u)?; - self.arbitrary_code(u, allow_invalid)?; + self.arbitrary_code(u)?; Ok(()) } @@ -1795,11 +1771,11 @@ impl Module { ) } - fn arbitrary_code(&mut self, u: &mut Unstructured, allow_invalid: bool) -> Result<()> { + fn arbitrary_code(&mut self, u: &mut Unstructured) -> Result<()> { self.code.reserve(self.num_defined_funcs); let mut allocs = CodeBuilderAllocations::new(self); for (_, ty) in self.funcs[self.funcs.len() - self.num_defined_funcs..].iter() { - let body = self.arbitrary_func_body(u, ty, &mut allocs, allow_invalid)?; + let body = self.arbitrary_func_body(u, ty, &mut allocs)?; self.code.push(body); } allocs.finish(u, self)?; @@ -1811,11 +1787,10 @@ impl Module { u: &mut Unstructured, ty: &FuncType, allocs: &mut CodeBuilderAllocations, - allow_invalid: bool, ) -> Result { let mut locals = self.arbitrary_locals(u)?; let builder = allocs.builder(ty, &mut locals); - let instructions = if allow_invalid && u.arbitrary().unwrap_or(false) { + let instructions = if self.config.allow_invalid_funcs && u.arbitrary().unwrap_or(false) { Instructions::Arbitrary(arbitrary_vec_u8(u)?) } else { Instructions::Generated(builder.arbitrary(u, self)?) diff --git a/crates/wasm-smith/src/core/terminate.rs b/crates/wasm-smith/src/core/terminate.rs index adcfeed54f..813135c1a3 100644 --- a/crates/wasm-smith/src/core/terminate.rs +++ b/crates/wasm-smith/src/core/terminate.rs @@ -1,4 +1,5 @@ use super::*; +use anyhow::{bail, Result}; use std::mem; use wasm_encoder::BlockType; @@ -12,7 +13,14 @@ impl Module { /// /// The index of the fuel global is returned, so that you may control how /// much fuel the module is given. - pub fn ensure_termination(&mut self, default_fuel: u32) -> u32 { + /// + /// # Errors + /// + /// Returns an error if any function body was generated with + /// possibly-invalid bytes rather than being generated by wasm-smith. In + /// such a situation this pass does not parse the input bytes and inject + /// instructions, instead it returns an error. + pub fn ensure_termination(&mut self, default_fuel: u32) -> Result { let fuel_global = self.globals.len() as u32; self.globals.push(GlobalType { val_type: ValType::I32, @@ -41,10 +49,12 @@ impl Module { let instrs = match &mut code.instructions { Instructions::Generated(list) => list, - // only present on modules contained within - // `MaybeInvalidModule`, which doesn't expose its internal - // `Module`. - Instructions::Arbitrary(_) => unreachable!(), + Instructions::Arbitrary(_) => { + bail!( + "failed to ensure that a function generated due to it \ + containing arbitrary instructions" + ) + } }; let mut new_insts = Vec::with_capacity(instrs.len() * 2); @@ -65,6 +75,6 @@ impl Module { *instrs = new_insts; } - fuel_global + Ok(fuel_global) } } diff --git a/crates/wasm-smith/src/lib.rs b/crates/wasm-smith/src/lib.rs index 132ccaf2ca..b985791aae 100644 --- a/crates/wasm-smith/src/lib.rs +++ b/crates/wasm-smith/src/lib.rs @@ -58,7 +58,7 @@ mod component; mod config; mod core; -pub use crate::core::{InstructionKind, InstructionKinds, MaybeInvalidModule, Module}; +pub use crate::core::{InstructionKind, InstructionKinds, Module}; use arbitrary::{Result, Unstructured}; pub use component::Component; pub use config::{Config, MemoryOffsetChoices}; diff --git a/src/bin/wasm-tools/smith.rs b/src/bin/wasm-tools/smith.rs index dc1c32f133..2a6a1aaf04 100644 --- a/src/bin/wasm-tools/smith.rs +++ b/src/bin/wasm-tools/smith.rs @@ -1,10 +1,9 @@ use anyhow::{Context, Result}; -use arbitrary::Arbitrary; use clap::Parser; use std::io::{stdin, Read}; use std::path::PathBuf; use std::process; -use wasm_smith::{MaybeInvalidModule, Module}; +use wasm_smith::Module; /// A WebAssembly test case generator. /// @@ -52,23 +51,22 @@ pub struct Opts { /// and in function prologues. When the fuel reaches 0, a trap is raised to /// terminate execution. Control the default amount of fuel with the /// `--fuel` flag. - #[clap(long = "ensure-termination")] + /// + /// Note that when combined with `--allow-invalid-funcs` this subcommand may + /// return an error because arbitrary function bodies taken from input bytes + /// cannot be validated to ensure they always terminate. + #[clap(long)] ensure_termination: bool, - /// Indicates that the generated module may contain invalid wasm functions, - /// taken raw from the input DNA. - #[clap(long = "maybe-invalid")] - maybe_invalid: bool, - /// The default amount of fuel used with `--ensure-termination`. /// /// This is roughly the number of loop iterations and function calls that /// will be executed before a trap is raised to prevent infinite loops. - #[clap(short = 'f', long = "fuel")] + #[clap(short, long)] fuel: Option, /// JSON configuration file with settings to control the wasm output. - #[clap(short = 'c', long = "config")] + #[clap(short, long)] config: Option, #[clap(flatten)] @@ -98,36 +96,31 @@ impl Opts { }; let mut u = arbitrary::Unstructured::new(&seed); - let wasm_bytes = if self.maybe_invalid { - MaybeInvalidModule::arbitrary(&mut u) - .unwrap_or_else(|e| { - eprintln!("error: failed to generate module: {}", e); - process::exit(2); - }) - .to_bytes() - } else { - let json = match &self.config { - Some(path) => { - let json = std::fs::read_to_string(&path).with_context(|| { - format!("failed to read json config: {}", path.display()) - })?; - serde_json::from_str(&json).with_context(|| { - format!("failed to decode json config: {}", path.display()) - })? - } - None => wasm_smith::InternalOptionalConfig::default(), - }; - let config = self.module_config.clone().or(json); - let config = wasm_smith::Config::try_from(config)?; - let mut module = Module::new(config, &mut u).unwrap_or_else(|e| { - eprintln!("error: failed to generate module: {}", e); - process::exit(2); - }); - if self.ensure_termination { - module.ensure_termination(self.fuel.unwrap_or(100)); + let json = match &self.config { + Some(path) => { + let json = std::fs::read_to_string(&path) + .with_context(|| format!("failed to read json config: {}", path.display()))?; + serde_json::from_str(&json) + .with_context(|| format!("failed to decode json config: {}", path.display()))? } - module.to_bytes() + None => wasm_smith::InternalOptionalConfig::default(), }; + let config = self.module_config.clone().or(json); + let config = wasm_smith::Config::try_from(config)?; + let mut module = Module::new(config, &mut u).unwrap_or_else(|e| { + eprintln!("error: failed to generate module: {}", e); + process::exit(2); + }); + if self.ensure_termination { + module + .ensure_termination(self.fuel.unwrap_or(100)) + .context( + "failed to ensure module always terminates, \ + `--ensure-termination` is incompatible with \ + `--allow-invalid-funcs`", + )?; + } + let wasm_bytes = module.to_bytes(); self.output.output(wasm_tools::Output::Wasm { bytes: &wasm_bytes,