diff --git a/crates/wasm-metadata/src/lib.rs b/crates/wasm-metadata/src/lib.rs index 8b08229d71..287fd6e3b7 100644 --- a/crates/wasm-metadata/src/lib.rs +++ b/crates/wasm-metadata/src/lib.rs @@ -6,6 +6,7 @@ pub use add_metadata::AddMetadata; pub use metadata::Metadata; pub use names::{ComponentNames, ModuleNames}; pub use oci_annotations::{Author, Description, Licenses, Source}; +pub use payload::Payload; pub use producers::{Producers, ProducersField}; pub(crate) use rewrite::rewrite_wasm; @@ -14,6 +15,7 @@ mod add_metadata; mod metadata; mod names; mod oci_annotations; +mod payload; mod producers; mod rewrite; diff --git a/crates/wasm-metadata/src/metadata.rs b/crates/wasm-metadata/src/metadata.rs index e9dc316318..fcac7ff6b7 100644 --- a/crates/wasm-metadata/src/metadata.rs +++ b/crates/wasm-metadata/src/metadata.rs @@ -1,237 +1,24 @@ -use anyhow::Result; use serde_derive::Serialize; -use std::fmt; use std::ops::Range; -use wasmparser::{KnownCustom, Parser, Payload::*}; -use crate::{Author, ComponentNames, Description, Licenses, ModuleNames, Producers, Source}; +use crate::{Author, Description, Licenses, Producers, Source}; -/// A tree of the metadata found in a WebAssembly binary. -#[derive(Debug, Serialize)] +/// Metadata associated with a Wasm Component or Module +#[derive(Debug, Serialize, Default)] #[serde(rename_all = "lowercase")] -pub enum Metadata { - /// Metadata found inside a WebAssembly component. - Component { - /// The component name, if any. Found in the component-name section. - name: Option, - /// The component's producers section, if any. - producers: Option, - /// The component's author section, if any. - author: Option, - /// Human-readable description of the binary - description: Option, - /// License(s) under which contained software is distributed as an SPDX License Expression. - licenses: Option, - /// URL to get source code for building the image - source: Option, - /// All child modules and components inside the component. - children: Vec>, - /// Byte range of the module in the parent binary - range: Range, - }, - /// Metadata found inside a WebAssembly module. - Module { - /// The module name, if any. Found in the name section. - name: Option, - /// The module's producers section, if any. - producers: Option, - /// The component's author section, if any. - author: Option, - /// Human-readable description of the binary - description: Option, - /// License(s) under which contained software is distributed as an SPDX License Expression. - licenses: Option, - /// URL to get source code for building the image - source: Option, - /// Byte range of the module in the parent binary - range: Range, - }, -} - -impl Metadata { - /// Parse metadata from a WebAssembly binary. Supports both core WebAssembly modules, and - /// WebAssembly components. - pub fn from_binary(input: &[u8]) -> Result { - let mut metadata = Vec::new(); - - for payload in Parser::new(0).parse_all(&input) { - match payload? { - Version { encoding, .. } => { - if metadata.is_empty() { - match encoding { - wasmparser::Encoding::Module => { - metadata.push(Metadata::empty_module(0..input.len())) - } - wasmparser::Encoding::Component => { - metadata.push(Metadata::empty_component(0..input.len())) - } - } - } - } - ModuleSection { - unchecked_range: range, - .. - } => metadata.push(Metadata::empty_module(range)), - ComponentSection { - unchecked_range: range, - .. - } => metadata.push(Metadata::empty_component(range)), - End { .. } => { - let finished = metadata.pop().expect("non-empty metadata stack"); - if metadata.is_empty() { - return Ok(finished); - } else { - metadata.last_mut().unwrap().push_child(finished); - } - } - CustomSection(c) => match c.as_known() { - KnownCustom::Name(_) => { - let names = ModuleNames::from_bytes(c.data(), c.data_offset())?; - if let Some(name) = names.get_name() { - metadata - .last_mut() - .expect("non-empty metadata stack") - .set_name(&name); - } - } - KnownCustom::ComponentName(_) => { - let names = ComponentNames::from_bytes(c.data(), c.data_offset())?; - if let Some(name) = names.get_name() { - metadata - .last_mut() - .expect("non-empty metadata stack") - .set_name(name); - } - } - KnownCustom::Producers(_) => { - let producers = Producers::from_bytes(c.data(), c.data_offset())?; - metadata - .last_mut() - .expect("non-empty metadata stack") - .set_producers(producers); - } - KnownCustom::Unknown if c.name() == "author" => { - let a = Author::parse_custom_section(&c)?; - match metadata.last_mut().expect("non-empty metadata stack") { - Metadata::Module { author, .. } => *author = Some(a), - Metadata::Component { author, .. } => *author = Some(a), - } - } - KnownCustom::Unknown if c.name() == "description" => { - let a = Description::parse_custom_section(&c)?; - match metadata.last_mut().expect("non-empty metadata stack") { - Metadata::Module { description, .. } => *description = Some(a), - Metadata::Component { description, .. } => *description = Some(a), - } - } - KnownCustom::Unknown if c.name() == "licenses" => { - let a = Licenses::parse_custom_section(&c)?; - match metadata.last_mut().expect("non-empty metadata stack") { - Metadata::Module { licenses, .. } => *licenses = Some(a), - Metadata::Component { licenses, .. } => *licenses = Some(a), - } - } - KnownCustom::Unknown if c.name() == "source" => { - let a = Source::parse_custom_section(&c)?; - match metadata.last_mut().expect("non-empty metadata stack") { - Metadata::Module { source, .. } => *source = Some(a), - Metadata::Component { source, .. } => *source = Some(a), - } - } - _ => {} - }, - _ => {} - } - } - Err(anyhow::anyhow!( - "malformed wasm binary, should have reached end" - )) - } - - fn empty_component(range: Range) -> Self { - Metadata::Component { - name: None, - producers: None, - author: None, - description: None, - licenses: None, - source: None, - children: Vec::new(), - range, - } - } - - fn empty_module(range: Range) -> Self { - Metadata::Module { - name: None, - producers: None, - author: None, - description: None, - licenses: None, - source: None, - range, - } - } - fn set_name(&mut self, n: &str) { - match self { - Metadata::Module { name, .. } => *name = Some(n.to_owned()), - Metadata::Component { name, .. } => *name = Some(n.to_owned()), - } - } - fn set_producers(&mut self, p: Producers) { - match self { - Metadata::Module { producers, .. } => *producers = Some(p), - Metadata::Component { producers, .. } => *producers = Some(p), - } - } - fn push_child(&mut self, child: Self) { - match self { - Metadata::Module { .. } => panic!("module shouldnt have children"), - Metadata::Component { children, .. } => children.push(Box::new(child)), - } - } - - fn display(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { - let spaces = std::iter::repeat(" ").take(indent).collect::(); - match self { - Metadata::Module { - name, producers, .. - } => { - if let Some(name) = name { - writeln!(f, "{spaces}module {name}:")?; - } else { - writeln!(f, "{spaces}module:")?; - } - if let Some(producers) = producers { - producers.display(f, indent + 4)?; - } - Ok(()) - } - Metadata::Component { - name, - producers, - children, - .. - } => { - if let Some(name) = name { - writeln!(f, "{spaces}component {name}:")?; - } else { - writeln!(f, "{spaces}component:")?; - } - if let Some(producers) = producers { - producers.display(f, indent + 4)?; - } - for c in children { - c.display(f, indent + 4)?; - } - Ok(()) - } - } - } -} - -impl fmt::Display for Metadata { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.display(f, 0) - } +pub struct Metadata { + /// The component name, if any. Found in the component-name section. + pub name: Option, + /// The component's producers section, if any. + pub producers: Option, + /// The component's author section, if any. + pub author: Option, + /// Human-readable description of the binary + pub description: Option, + /// License(s) under which contained software is distributed as an SPDX License Expression. + pub licenses: Option, + /// URL to get source code for building the image + pub source: Option, + /// Byte range of the module in the parent binary + pub range: Range, } diff --git a/crates/wasm-metadata/src/payload.rs b/crates/wasm-metadata/src/payload.rs new file mode 100644 index 0000000000..5560373d8c --- /dev/null +++ b/crates/wasm-metadata/src/payload.rs @@ -0,0 +1,219 @@ +use std::fmt::{self, Display}; +use std::ops::Range; + +use anyhow::Result; +use serde_derive::Serialize; +use wasmparser::{KnownCustom, Parser, Payload::*}; + +use crate::{ + Author, ComponentNames, Description, Licenses, Metadata, ModuleNames, Producers, Source, +}; + +/// Data representing either a Wasm Component or module +/// +/// Each payload has additional [`Metadata`] associated with it, +/// but if it's a Component it may have also additional `Payloads` associated +/// with it. +#[derive(Debug, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum Payload { + /// A representation of a Wasm Component + Component { + /// The metadata associated with the Component + metadata: Metadata, + /// The metadata of nested Components or Modules + children: Vec, + }, + /// A representation of a Wasm Module + Module(Metadata), +} + +impl Payload { + /// Parse metadata from a WebAssembly binary. Supports both core WebAssembly modules, and + /// WebAssembly components. + pub fn from_binary(input: &[u8]) -> Result { + let mut output = Vec::new(); + + for payload in Parser::new(0).parse_all(&input) { + match payload? { + Version { encoding, .. } => { + if output.is_empty() { + match encoding { + wasmparser::Encoding::Module => { + output.push(Self::empty_module(0..input.len())) + } + wasmparser::Encoding::Component => { + output.push(Self::empty_component(0..input.len())) + } + } + } + } + ModuleSection { + unchecked_range: range, + .. + } => output.push(Self::empty_module(range)), + ComponentSection { + unchecked_range: range, + .. + } => output.push(Self::empty_component(range)), + End { .. } => { + let finished = output.pop().expect("non-empty metadata stack"); + if output.is_empty() { + return Ok(finished); + } else { + output.last_mut().unwrap().push_child(finished); + } + } + CustomSection(c) => match c.as_known() { + KnownCustom::Name(_) => { + let names = ModuleNames::from_bytes(c.data(), c.data_offset())?; + if let Some(name) = names.get_name() { + output + .last_mut() + .expect("non-empty metadata stack") + .metadata_mut() + .name = Some(name.clone()); + } + } + KnownCustom::ComponentName(_) => { + let names = ComponentNames::from_bytes(c.data(), c.data_offset())?; + if let Some(name) = names.get_name() { + output + .last_mut() + .expect("non-empty metadata stack") + .metadata_mut() + .name = Some(name.clone()); + } + } + KnownCustom::Producers(_) => { + let producers = Producers::from_bytes(c.data(), c.data_offset())?; + output + .last_mut() + .expect("non-empty metadata stack") + .metadata_mut() + .producers = Some(producers); + } + KnownCustom::Unknown if c.name() == "author" => { + let a = Author::parse_custom_section(&c)?; + let Metadata { author, .. } = output + .last_mut() + .expect("non-empty metadata stack") + .metadata_mut(); + *author = Some(a); + } + KnownCustom::Unknown if c.name() == "description" => { + let a = Description::parse_custom_section(&c)?; + let Metadata { description, .. } = output + .last_mut() + .expect("non-empty metadata stack") + .metadata_mut(); + *description = Some(a); + } + KnownCustom::Unknown if c.name() == "licenses" => { + let a = Licenses::parse_custom_section(&c)?; + let Metadata { licenses, .. } = output + .last_mut() + .expect("non-empty metadata stack") + .metadata_mut(); + *licenses = Some(a); + } + KnownCustom::Unknown if c.name() == "source" => { + let a = Source::parse_custom_section(&c)?; + let Metadata { source, .. } = output + .last_mut() + .expect("non-empty metadata stack") + .metadata_mut(); + *source = Some(a); + } + _ => {} + }, + _ => {} + } + } + Err(anyhow::anyhow!( + "malformed wasm binary, should have reached end" + )) + } + + /// Get a reference te the metadata + pub fn metadata(&self) -> &Metadata { + match self { + Payload::Component { metadata, .. } => metadata, + Payload::Module(metadata) => metadata, + } + } + + /// Get a mutable reference te the metadata + pub fn metadata_mut(&mut self) -> &mut Metadata { + match self { + Payload::Component { metadata, .. } => metadata, + Payload::Module(metadata) => metadata, + } + } + + fn empty_component(range: Range) -> Self { + let mut this = Self::Component { + metadata: Metadata::default(), + children: vec![], + }; + this.metadata_mut().range = range; + this + } + + fn empty_module(range: Range) -> Self { + let mut this = Self::Module(Metadata::default()); + this.metadata_mut().range = range; + this + } + + fn push_child(&mut self, child: Self) { + match self { + Self::Module { .. } => panic!("module shouldnt have children"), + Self::Component { children, .. } => children.push(child), + } + } + + fn display(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { + let spaces = std::iter::repeat(" ").take(indent).collect::(); + match self { + Self::Module(Metadata { + name, producers, .. + }) => { + if let Some(name) = name { + writeln!(f, "{spaces}module {name}:")?; + } else { + writeln!(f, "{spaces}module:")?; + } + if let Some(producers) = producers { + producers.display(f, indent + 4)?; + } + Ok(()) + } + Self::Component { + children, + metadata: Metadata { + name, producers, .. + }, + } => { + if let Some(name) = name { + writeln!(f, "{spaces}component {name}:")?; + } else { + writeln!(f, "{spaces}component:")?; + } + if let Some(producers) = producers { + producers.display(f, indent + 4)?; + } + for c in children { + c.display(f, indent + 4)?; + } + Ok(()) + } + } + } +} + +impl Display for Payload { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.display(f, 0) + } +} diff --git a/crates/wasm-metadata/src/producers.rs b/crates/wasm-metadata/src/producers.rs index 2e0124c929..fca295ad99 100644 --- a/crates/wasm-metadata/src/producers.rs +++ b/crates/wasm-metadata/src/producers.rs @@ -191,7 +191,7 @@ impl<'a> ProducersField<'a> { #[cfg(test)] mod test { use super::*; - use crate::Metadata; + use crate::{Metadata, Payload}; use wasm_encoder::Module; #[test] @@ -203,11 +203,10 @@ mod test { let module = producers.add_to_wasm(&module).unwrap(); - let metadata = Metadata::from_binary(&module).unwrap(); - match metadata { - Metadata::Module { + match Payload::from_binary(&module).unwrap() { + Payload::Module(Metadata { name, producers, .. - } => { + }) => { assert_eq!(name, None); let producers = producers.expect("some producers"); assert_eq!(producers.get("language").unwrap().get("bar").unwrap(), ""); @@ -232,11 +231,10 @@ mod test { producers.add("language", "waaat", ""); let module = producers.add_to_wasm(&module).unwrap(); - let metadata = Metadata::from_binary(&module).unwrap(); - match metadata { - Metadata::Module { + match Payload::from_binary(&module).unwrap() { + Payload::Module(Metadata { name, producers, .. - } => { + }) => { assert_eq!(name, None); let producers = producers.expect("some producers"); assert_eq!(producers.get("language").unwrap().get("bar").unwrap(), ""); @@ -261,9 +259,8 @@ mod test { producers.add("processed-by", "baz", "420"); let module = producers.add_to_wasm(&module).unwrap(); - let metadata = Metadata::from_binary(&module).unwrap(); - match metadata { - Metadata::Module { producers, .. } => { + match Payload::from_binary(&module).unwrap() { + Payload::Module(Metadata { producers, .. }) => { let producers = producers.expect("some producers"); assert_eq!( producers.get("processed-by").unwrap().get("baz").unwrap(), diff --git a/crates/wasm-metadata/tests/component.rs b/crates/wasm-metadata/tests/component.rs index 61ed32c68b..6eba7d465c 100644 --- a/crates/wasm-metadata/tests/component.rs +++ b/crates/wasm-metadata/tests/component.rs @@ -20,17 +20,19 @@ fn add_to_empty_component() { }; let component = add.to_wasm(&component).unwrap(); - let metadata = Metadata::from_binary(&component).unwrap(); - match metadata { - Metadata::Component { - name, - producers, - author, - description, - licenses, - source, + match Payload::from_binary(&component).unwrap() { + Payload::Component { children, - range, + metadata: + Metadata { + name, + producers, + author, + description, + licenses, + source, + range, + }, } => { assert!(children.is_empty()); assert_eq!(name, Some("foo".to_owned())); @@ -96,13 +98,12 @@ fn add_to_nested_component() { }; let component = add.to_wasm(&component).unwrap(); - let metadata = Metadata::from_binary(&component).unwrap(); - match metadata { - Metadata::Component { - name, - producers, + match Payload::from_binary(&component).unwrap() { + Payload::Component { children, - .. + metadata: Metadata { + name, producers, .. + }, } => { // Check that the component metadata is in the component assert_eq!(name, Some("gussie".to_owned())); @@ -113,9 +114,9 @@ fn add_to_nested_component() { ); // Check that there is a single child with the metadata set for the module assert_eq!(children.len(), 1); - let child = children.get(0).unwrap(); - match &**child { - Metadata::Module { + + match children.get(0).unwrap() { + Payload::Module(Metadata { name, producers, author, @@ -123,7 +124,7 @@ fn add_to_nested_component() { source, range, description, - } => { + }) => { assert_eq!(name, &Some("foo".to_owned())); let producers = producers.as_ref().expect("some producers"); assert_eq!( diff --git a/crates/wasm-metadata/tests/module.rs b/crates/wasm-metadata/tests/module.rs index 1d5b46f4c5..91c064d3d2 100644 --- a/crates/wasm-metadata/tests/module.rs +++ b/crates/wasm-metadata/tests/module.rs @@ -20,17 +20,16 @@ fn add_to_empty_module() { }; let module = add.to_wasm(&module).unwrap(); - let metadata = Metadata::from_binary(&module).unwrap(); - match metadata { - Metadata::Module { + match Payload::from_binary(&module).unwrap() { + Payload::Module(Metadata { name, producers, author, - description, licenses, source, range, - } => { + description, + }) => { assert_eq!(name, Some("foo".to_owned())); let producers = producers.expect("some producers"); assert_eq!( diff --git a/crates/wit-component/tests/components.rs b/crates/wit-component/tests/components.rs index 3684b8ec33..4a335bbbff 100644 --- a/crates/wit-component/tests/components.rs +++ b/crates/wit-component/tests/components.rs @@ -3,6 +3,7 @@ use libtest_mimic::{Arguments, Trial}; use pretty_assertions::assert_eq; use std::{borrow::Cow, fs, path::Path}; use wasm_encoder::{Encode, Section}; +use wasm_metadata::{Metadata, Payload}; use wit_component::{ComponentEncoder, DecodedWasm, Linker, StringEncoding, WitPrinter}; use wit_parser::{PackageId, Resolve, UnresolvedPackageGroup}; @@ -167,11 +168,10 @@ fn run_test(path: &Path) -> Result<()> { .context("failed to parse printed WIT")?; // Check that the producer data got piped through properly - let metadata = wasm_metadata::Metadata::from_binary(&bytes)?; - match metadata { + match Payload::from_binary(&bytes).unwrap() { // Depends on the ComponentEncoder always putting the first module as the 0th child: - wasm_metadata::Metadata::Component { children, .. } => match children[0].as_ref() { - wasm_metadata::Metadata::Module { producers, .. } => { + Payload::Component { children, .. } => match &children[0] { + Payload::Module(Metadata { producers, .. }) => { let producers = producers.as_ref().expect("child module has producers"); let processed_by = producers .get("processed-by") diff --git a/src/bin/wasm-tools/metadata.rs b/src/bin/wasm-tools/metadata.rs index e4849dbc0d..3e0ff6e13e 100644 --- a/src/bin/wasm-tools/metadata.rs +++ b/src/bin/wasm-tools/metadata.rs @@ -44,7 +44,7 @@ impl ShowOpts { let input = self.io.parse_input_wasm()?; let mut output = self.io.output_writer()?; - let metadata = wasm_metadata::Metadata::from_binary(&input)?; + let metadata = wasm_metadata::Payload::from_binary(&input)?; if self.json { write!(output, "{}", serde_json::to_string(&metadata)?)?; } else {