Skip to content

Commit

Permalink
Support decoding wasm-encoded WIT packages in wit-parser (bytecodeall…
Browse files Browse the repository at this point in the history
…iance#1408)

* Make serde optional in `wit-parser`

Try to keep its minimal build profile pretty slim.

* Move decoding from wit-component to wit-parser

* Move text detection from wit-component to wat

* Merge `parse_wit_from_path` directly as methods on `Resolve`

This commit moves the `wit_component::parse_wit_from_path` function to
being a suite of methods directly on `Resolve`. The direct equivalent is
now `Resolve::push_path`.

* Fix CI configuration

* Review comments

* More review comments
  • Loading branch information
alexcrichton authored Feb 12, 2024
1 parent 66f63fe commit d24d9c4
Show file tree
Hide file tree
Showing 48 changed files with 869 additions and 536 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ jobs:
- run: cargo check --no-default-features --features metadata
- run: cargo check --no-default-features --features wit-smith
- run: cargo check --no-default-features --features addr2line
- run: cargo check --no-default-features -p wit-parser
- run: cargo check --no-default-features -p wit-parser --features wat
- run: cargo check --no-default-features -p wit-parser --features serde
- run: cargo check --no-default-features -p wit-parser --features decoding
- run: cargo check --no-default-features -p wit-parser --features serde,decoding,wat
- run: |
if cargo tree -p wasm-smith --no-default-features -e no-dev | grep wasmparser; then
echo wasm-smith without default features should not depend on wasmparser
Expand Down
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ cpp_demangle = { version = "0.4.0", optional = true }

# Dependencies of `component`
wit-component = { workspace = true, optional = true, features = ['dummy-module', 'wat', 'semver-check'] }
wit-parser = { workspace = true, optional = true }
wit-parser = { workspace = true, optional = true, features = ['decoding', 'wat', 'serde'] }
wast = { workspace = true, optional = true }

# Dependencies of `metadata`
Expand Down
2 changes: 1 addition & 1 deletion crates/fuzz-stats/src/bin/failed-instantiations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ impl State {
config.gc_enabled = false;

let mut wasm = wasm_smith::Module::new(config, &mut u)?;
wasm.ensure_termination(10_000);
wasm.ensure_termination(10_000).unwrap();
let wasm = wasm.to_bytes();

// We install a resource limiter in the store which limits the store to
Expand Down
68 changes: 68 additions & 0 deletions crates/wat/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ use std::borrow::Cow;
use std::fmt;
use std::path::{Path, PathBuf};
use std::str;
use wast::lexer::{Lexer, TokenKind};
use wast::parser::{self, ParseBuffer};

/// Parses a file on disk as a [WebAssembly Text format][wat] file, or a binary
Expand Down Expand Up @@ -221,6 +222,73 @@ fn _parse_str(wat: &str) -> Result<Vec<u8>> {
ast.encode().map_err(|e| Error::cvt(e, wat))
}

/// Result of [`Detect::from_bytes`] to indicate what some input bytes look
/// like.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum Detect {
/// The input bytes look like the WebAssembly text format.
WasmText,
/// The input bytes look like the WebAssembly binary format.
WasmBinary,
/// The input bytes don't look like WebAssembly at all.
Unknown,
}

impl Detect {
/// Detect quickly if supplied bytes represent a Wasm module,
/// whether binary encoded or in WAT-encoded.
///
/// This briefly lexes past whitespace and comments as a `*.wat` file to see if
/// we can find a left-paren. If that fails then it's probably `*.wit` instead.
///
///
/// Examples
/// ```
/// use wat::Detect;
///
/// assert_eq!(Detect::from_bytes(r#"
/// (module
/// (type (;0;) (func))
/// (func (;0;) (type 0)
/// nop
/// )
/// )
/// "#), Detect::WasmText);
/// ```
pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Detect {
if bytes.as_ref().starts_with(b"\0asm") {
return Detect::WasmBinary;
}
let text = match std::str::from_utf8(bytes.as_ref()) {
Ok(s) => s,
Err(_) => return Detect::Unknown,
};

let lexer = Lexer::new(text);
let mut iter = lexer.iter(0);

while let Some(next) = iter.next() {
match next.map(|t| t.kind) {
Ok(TokenKind::Whitespace)
| Ok(TokenKind::BlockComment)
| Ok(TokenKind::LineComment) => {}
Ok(TokenKind::LParen) => return Detect::WasmText,
_ => break,
}
}

Detect::Unknown
}

/// Returns whether this is either binary or textual wasm.
pub fn is_wasm(&self) -> bool {
match self {
Detect::WasmText | Detect::WasmBinary => true,
Detect::Unknown => false,
}
}
}

/// A convenience type definition for `Result` where the error is [`Error`]
pub type Result<T> = std::result::Result<T, Error>;

Expand Down
2 changes: 1 addition & 1 deletion crates/wit-component/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ workspace = true
wasmparser = { workspace = true }
wasm-encoder = { workspace = true }
wasm-metadata = { workspace = true }
wit-parser = { workspace = true }
wit-parser = { workspace = true, features = ['decoding', 'serde'] }
anyhow = { workspace = true }
log = "0.4.17"
bitflags = "2.3.3"
Expand Down
1 change: 0 additions & 1 deletion crates/wit-component/src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ use wit_parser::{

const INDIRECT_TABLE_NAME: &str = "$imports";

pub mod docs;
mod wit;
pub use wit::{encode, encode_world};

Expand Down
12 changes: 5 additions & 7 deletions crates/wit-component/src/encoding/wit/v1.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
use crate::encoding::{
docs::PackageDocs,
types::{FunctionKey, ValtypeEncoder},
};
use crate::encoding::types::{FunctionKey, ValtypeEncoder};
use anyhow::Result;
use indexmap::IndexSet;
use std::collections::HashMap;
Expand Down Expand Up @@ -36,9 +33,10 @@ pub fn encode_component(resolve: &Resolve, package: PackageId) -> Result<Compone
encoder.run()?;

let package_docs = PackageDocs::extract(resolve, package);
encoder
.component
.raw_custom_section(&package_docs.raw_custom_section()?);
encoder.component.custom_section(&CustomSection {
name: PackageDocs::SECTION_NAME.into(),
data: package_docs.encode()?.into(),
});

Ok(encoder.component)
}
Expand Down
12 changes: 5 additions & 7 deletions crates/wit-component/src/encoding/wit/v2.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
use crate::encoding::{
docs::PackageDocs,
types::{FunctionKey, ValtypeEncoder},
};
use crate::encoding::types::{FunctionKey, ValtypeEncoder};
use anyhow::Result;
use indexmap::IndexSet;
use std::collections::HashMap;
Expand Down Expand Up @@ -36,9 +33,10 @@ pub fn encode_component(resolve: &Resolve, package: PackageId) -> Result<Compone
encoder.run()?;

let package_docs = PackageDocs::extract(resolve, package);
encoder
.component
.raw_custom_section(&package_docs.raw_custom_section()?);
encoder.component.custom_section(&CustomSection {
name: PackageDocs::SECTION_NAME.into(),
data: package_docs.encode()?.into(),
});

Ok(encoder.component)
}
Expand Down
129 changes: 1 addition & 128 deletions crates/wit-component/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,18 @@ use anyhow::{bail, Result};
use wasm_encoder::{CanonicalOption, Encode, Section};
use wit_parser::{Resolve, WorldId};

mod decoding;
mod encoding;
mod gc;
mod linking;
mod printing;
mod targets;
mod validation;

pub use decoding::{decode, decode_reader, DecodedWasm};
pub use encoding::{encode, ComponentEncoder};
pub use linking::Linker;
pub use printing::*;
pub use targets::*;
pub use wit_parser::decoding::{decode, decode_reader, DecodedWasm};

pub mod metadata;

Expand Down Expand Up @@ -88,132 +87,6 @@ pub(crate) fn base_producers() -> wasm_metadata::Producers {
producer
}

/// Parse a WIT package from the input `path`.
///
/// The input `path` can be one of:
///
/// * A directory containing a WIT package with an optional `deps` directory for
/// any dependent WIT packages it references.
/// * A single standalone WIT file with no dependencies.
/// * A wasm-encoded WIT package as a single file in the wasm binary format.
/// * A wasm-encoded WIT package as a single file in the wasm text format.
///
/// The `Resolve` containing WIT information along with the `PackageId` of what
/// was parsed is returned if successful.
pub fn parse_wit_from_path(
path: impl AsRef<std::path::Path>,
) -> Result<(Resolve, wit_parser::PackageId)> {
use anyhow::Context;

let mut resolver = Resolve::default();
let id = match path.as_ref() {
// Directories can be directly fed into the resolver
p if p.is_dir() => {
resolver
.push_dir(p)
.with_context(|| {
format!(
"failed to resolve directory while parsing WIT for path [{}]",
p.display()
)
})?
.0
}
// Non-directory files (including symlinks) can be either:
// - Wasm modules (binary or WAT) that are WIT packages
// - WIT files
#[cfg(not(feature = "wat"))]
p => {
let file_contents = std::fs::read(p)
.with_context(|| format!("failed to parse WIT from path [{}]", p.display()))?;
match decode(&file_contents)? {
DecodedWasm::Component(..) => {
bail!("specified path is a component, not a wit package")
}
DecodedWasm::WitPackage(resolve, pkg) => return Ok((resolve, pkg)),
}
}
#[cfg(feature = "wat")]
p => {
use wit_parser::UnresolvedPackage;

let file_contents = std::fs::read(p)
.with_context(|| format!("failed to parse WIT from path [{}]", p.display()))?;

// Check if the bytes represent a Wasm module (either binary or WAT encoded)
if is_wasm_binary_or_wat(&file_contents) {
let bytes = wat::parse_bytes(&file_contents).map_err(|mut e| {
e.set_path(p);
e
})?;
match decode(&bytes)? {
DecodedWasm::Component(..) => {
bail!("specified path is a component, not a wit package")
}
DecodedWasm::WitPackage(resolve, pkg) => return Ok((resolve, pkg)),
}
} else {
// If the bytes are not a WASM module, they should be WIT that can be parsed
// into a package by the resolver
let text = match std::str::from_utf8(&file_contents) {
Ok(s) => s,
Err(_) => bail!("input file is not valid utf-8"),
};
let pkg = UnresolvedPackage::parse(p, text)?;
resolver.push(pkg)?
}
}
};
Ok((resolver, id))
}

/// Detect quickly if supplied bytes represent a Wasm module,
/// whether binary encoded or in WAT-encoded.
///
/// This briefly lexes past whitespace and comments as a `*.wat` file to see if
/// we can find a left-paren. If that fails then it's probably `*.wit` instead.
///
///
/// Examples
/// ```
/// # use wit_component::is_wasm_binary_or_wat;
/// assert!(is_wasm_binary_or_wat(r#"
/// (module
/// (type (;0;) (func))
/// (func (;0;) (type 0)
/// nop
/// )
/// )
/// "#));
/// ```
#[cfg(feature = "wat")]
pub fn is_wasm_binary_or_wat(bytes: impl AsRef<[u8]>) -> bool {
use wast::lexer::{Lexer, TokenKind};

if bytes.as_ref().starts_with(b"\0asm") {
return true;
}
let text = match std::str::from_utf8(bytes.as_ref()) {
Ok(s) => s,
Err(_) => return true,
};

let lexer = Lexer::new(text);
let mut iter = lexer.iter(0);

while let Some(next) = iter.next() {
match next.map(|t| t.kind) {
Ok(TokenKind::Whitespace)
| Ok(TokenKind::BlockComment)
| Ok(TokenKind::LineComment) => {}
Ok(TokenKind::LParen) => return true,
_ => break,
}
}

false
}

/// Embed component metadata in a buffer of bytes that contains a Wasm module
pub fn embed_component_metadata(
bytes: &mut Vec<u8>,
Expand Down
Loading

0 comments on commit d24d9c4

Please sign in to comment.