Skip to content

Commit

Permalink
Fold MaybeInvalidModule into Config
Browse files Browse the repository at this point in the history
This commit removes the top-level `MaybeInvalidModule` type and instead
folds it into a configuration option inside of `Config`. This has the
benefit of being able to still configure all the other `Config` options
during generating possibly-invalid modules at the cost that we can no
longer statically rule out success of `ensure_termination`. This error
is now propagated upwards and documented in a few places.

Closes bytecodealliance#1410
  • Loading branch information
alexcrichton committed Feb 10, 2024
1 parent f43b418 commit 882dadc
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 76 deletions.
13 changes: 13 additions & 0 deletions crates/wasm-smith/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,18 @@ define_config! {
///
/// Defaults to `false`.
pub threads_enabled: bool = false,

/// Indicates whether wasm-smith is allowed to generate invalid function
/// bodies.
///
/// When enabled this option will enable taking raw bytes from the input
/// byte stream and using them as a wasm function body. This means that
/// the output module is not guaranteed to be valid but can help tickle
/// various parts of validation/compilation in some circumstances as
/// well.
///
/// Defaults to `false`.
pub allow_invalid_funcs: bool = false,
}
}

Expand Down Expand Up @@ -617,6 +629,7 @@ impl<'a> Arbitrary<'a> for Config {
tail_call_enabled: false,
gc_enabled: false,
generate_custom_sections: false,
allow_invalid_funcs: false,
})
}
}
37 changes: 6 additions & 31 deletions crates/wasm-smith/src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ impl Module {
duplicate_imports_behavior: DuplicateImportsBehavior,
) -> Result<Self> {
let mut module = Module::empty(config, duplicate_imports_behavior);
module.build(u, false)?;
module.build(u)?;
Ok(module)
}

Expand Down Expand Up @@ -213,30 +213,6 @@ impl Module {
}
}

/// Same as [`Module`], but may be invalid.
///
/// This module generates function bodies differnetly than `Module` to try to
/// better explore wasm decoders and such.
#[derive(Debug)]
pub struct MaybeInvalidModule {
module: Module,
}

impl MaybeInvalidModule {
/// Encode this Wasm module into bytes.
pub fn to_bytes(&self) -> Vec<u8> {
self.module.to_bytes()
}
}

impl<'a> Arbitrary<'a> for MaybeInvalidModule {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
let mut module = Module::empty(Config::default(), DuplicateImportsBehavior::Allowed);
module.build(u, true)?;
Ok(MaybeInvalidModule { module })
}
}

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) struct RecGroup {
pub(crate) types: Vec<SubType>,
Expand Down Expand Up @@ -398,7 +374,7 @@ pub(crate) enum GlobalInitExpr {
}

impl Module {
fn build(&mut self, u: &mut Unstructured, allow_invalid: bool) -> Result<()> {
fn build(&mut self, u: &mut Unstructured) -> Result<()> {
self.valtypes = configured_valtypes(&self.config);

// We attempt to figure out our available imports *before* creating the types section here,
Expand All @@ -425,7 +401,7 @@ impl Module {
self.arbitrary_start(u)?;
self.arbitrary_elems(u)?;
self.arbitrary_data(u)?;
self.arbitrary_code(u, allow_invalid)?;
self.arbitrary_code(u)?;
Ok(())
}

Expand Down Expand Up @@ -1795,11 +1771,11 @@ impl Module {
)
}

fn arbitrary_code(&mut self, u: &mut Unstructured, allow_invalid: bool) -> Result<()> {
fn arbitrary_code(&mut self, u: &mut Unstructured) -> Result<()> {
self.code.reserve(self.num_defined_funcs);
let mut allocs = CodeBuilderAllocations::new(self);
for (_, ty) in self.funcs[self.funcs.len() - self.num_defined_funcs..].iter() {
let body = self.arbitrary_func_body(u, ty, &mut allocs, allow_invalid)?;
let body = self.arbitrary_func_body(u, ty, &mut allocs)?;
self.code.push(body);
}
allocs.finish(u, self)?;
Expand All @@ -1811,11 +1787,10 @@ impl Module {
u: &mut Unstructured,
ty: &FuncType,
allocs: &mut CodeBuilderAllocations,
allow_invalid: bool,
) -> Result<Code> {
let mut locals = self.arbitrary_locals(u)?;
let builder = allocs.builder(ty, &mut locals);
let instructions = if allow_invalid && u.arbitrary().unwrap_or(false) {
let instructions = if self.config.allow_invalid_funcs && u.arbitrary().unwrap_or(false) {
Instructions::Arbitrary(arbitrary_vec_u8(u)?)
} else {
Instructions::Generated(builder.arbitrary(u, self)?)
Expand Down
22 changes: 16 additions & 6 deletions crates/wasm-smith/src/core/terminate.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use super::*;
use anyhow::{bail, Result};
use std::mem;
use wasm_encoder::BlockType;

Expand All @@ -12,7 +13,14 @@ impl Module {
///
/// The index of the fuel global is returned, so that you may control how
/// much fuel the module is given.
pub fn ensure_termination(&mut self, default_fuel: u32) -> u32 {
///
/// # Errors
///
/// Returns an error if any function body was generated with
/// possibly-invalid bytes rather than being generated by wasm-smith. In
/// such a situation this pass does not parse the input bytes and inject
/// instructions, instead it returns an error.
pub fn ensure_termination(&mut self, default_fuel: u32) -> Result<u32> {
let fuel_global = self.globals.len() as u32;
self.globals.push(GlobalType {
val_type: ValType::I32,
Expand Down Expand Up @@ -41,10 +49,12 @@ impl Module {

let instrs = match &mut code.instructions {
Instructions::Generated(list) => list,
// only present on modules contained within
// `MaybeInvalidModule`, which doesn't expose its internal
// `Module`.
Instructions::Arbitrary(_) => unreachable!(),
Instructions::Arbitrary(_) => {
bail!(
"failed to ensure that a function generated due to it \
containing arbitrary instructions"
)
}
};
let mut new_insts = Vec::with_capacity(instrs.len() * 2);

Expand All @@ -65,6 +75,6 @@ impl Module {
*instrs = new_insts;
}

fuel_global
Ok(fuel_global)
}
}
2 changes: 1 addition & 1 deletion crates/wasm-smith/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ mod component;
mod config;
mod core;

pub use crate::core::{InstructionKind, InstructionKinds, MaybeInvalidModule, Module};
pub use crate::core::{InstructionKind, InstructionKinds, Module};
use arbitrary::{Result, Unstructured};
pub use component::Component;
pub use config::{Config, MemoryOffsetChoices};
Expand Down
69 changes: 31 additions & 38 deletions src/bin/wasm-tools/smith.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
use anyhow::{Context, Result};
use arbitrary::Arbitrary;
use clap::Parser;
use std::io::{stdin, Read};
use std::path::PathBuf;
use std::process;
use wasm_smith::{MaybeInvalidModule, Module};
use wasm_smith::Module;

/// A WebAssembly test case generator.
///
Expand Down Expand Up @@ -52,23 +51,22 @@ pub struct Opts {
/// and in function prologues. When the fuel reaches 0, a trap is raised to
/// terminate execution. Control the default amount of fuel with the
/// `--fuel` flag.
#[clap(long = "ensure-termination")]
///
/// Note that when combined with `--allow-invalid-funcs` this subcommand may
/// return an error because arbitrary function bodies taken from input bytes
/// cannot be validated to ensure they always terminate.
#[clap(long)]
ensure_termination: bool,

/// Indicates that the generated module may contain invalid wasm functions,
/// taken raw from the input DNA.
#[clap(long = "maybe-invalid")]
maybe_invalid: bool,

/// The default amount of fuel used with `--ensure-termination`.
///
/// This is roughly the number of loop iterations and function calls that
/// will be executed before a trap is raised to prevent infinite loops.
#[clap(short = 'f', long = "fuel")]
#[clap(short, long)]
fuel: Option<u32>,

/// JSON configuration file with settings to control the wasm output.
#[clap(short = 'c', long = "config")]
#[clap(short, long)]
config: Option<PathBuf>,

#[clap(flatten)]
Expand Down Expand Up @@ -98,36 +96,31 @@ impl Opts {
};

let mut u = arbitrary::Unstructured::new(&seed);
let wasm_bytes = if self.maybe_invalid {
MaybeInvalidModule::arbitrary(&mut u)
.unwrap_or_else(|e| {
eprintln!("error: failed to generate module: {}", e);
process::exit(2);
})
.to_bytes()
} else {
let json = match &self.config {
Some(path) => {
let json = std::fs::read_to_string(&path).with_context(|| {
format!("failed to read json config: {}", path.display())
})?;
serde_json::from_str(&json).with_context(|| {
format!("failed to decode json config: {}", path.display())
})?
}
None => wasm_smith::InternalOptionalConfig::default(),
};
let config = self.module_config.clone().or(json);
let config = wasm_smith::Config::try_from(config)?;
let mut module = Module::new(config, &mut u).unwrap_or_else(|e| {
eprintln!("error: failed to generate module: {}", e);
process::exit(2);
});
if self.ensure_termination {
module.ensure_termination(self.fuel.unwrap_or(100));
let json = match &self.config {
Some(path) => {
let json = std::fs::read_to_string(&path)
.with_context(|| format!("failed to read json config: {}", path.display()))?;
serde_json::from_str(&json)
.with_context(|| format!("failed to decode json config: {}", path.display()))?
}
module.to_bytes()
None => wasm_smith::InternalOptionalConfig::default(),
};
let config = self.module_config.clone().or(json);
let config = wasm_smith::Config::try_from(config)?;
let mut module = Module::new(config, &mut u).unwrap_or_else(|e| {
eprintln!("error: failed to generate module: {}", e);
process::exit(2);
});
if self.ensure_termination {
module
.ensure_termination(self.fuel.unwrap_or(100))
.context(
"failed to ensure module always terminates, \
`--ensure-termination` is incompatible with \
`--allow-invalid-funcs`",
)?;
}
let wasm_bytes = module.to_bytes();

self.output.output(wasm_tools::Output::Wasm {
bytes: &wasm_bytes,
Expand Down

0 comments on commit 882dadc

Please sign in to comment.