From 33a8bd03e6eeb0f060cd738a8e4b40bc792591d7 Mon Sep 17 00:00:00 2001 From: Yosh <2467194+yoshuawuyts@users.noreply.github.com> Date: Fri, 29 Nov 2024 00:26:53 +0100 Subject: [PATCH] [wasm-metadata] Split `lib.rs` into separate files (#1917) * split tests into their own file * move producers * move component_names * move module_names * continue splitting tests * group names in a subdir * group names in subdir * move metadata to its own file * refactor remaining parts * add doc comment * inline module tests into their respective modules --- crates/wasm-metadata/src/add_metadata.rs | 61 + crates/wasm-metadata/src/lib.rs | 1526 +------------------ crates/wasm-metadata/src/metadata.rs | 219 +++ crates/wasm-metadata/src/names/component.rs | 96 ++ crates/wasm-metadata/src/names/mod.rs | 5 + crates/wasm-metadata/src/names/module.rs | 91 ++ crates/wasm-metadata/src/producers.rs | 277 ++++ crates/wasm-metadata/src/registry.rs | 376 +++++ crates/wasm-metadata/src/rewrite.rs | 130 ++ crates/wasm-metadata/src/utils.rs | 21 + crates/wasm-metadata/tests/component.rs | 185 +++ crates/wasm-metadata/tests/module.rs | 101 ++ 12 files changed, 1576 insertions(+), 1512 deletions(-) create mode 100644 crates/wasm-metadata/src/add_metadata.rs create mode 100644 crates/wasm-metadata/src/metadata.rs create mode 100644 crates/wasm-metadata/src/names/component.rs create mode 100644 crates/wasm-metadata/src/names/mod.rs create mode 100644 crates/wasm-metadata/src/names/module.rs create mode 100644 crates/wasm-metadata/src/producers.rs create mode 100644 crates/wasm-metadata/src/registry.rs create mode 100644 crates/wasm-metadata/src/rewrite.rs create mode 100644 crates/wasm-metadata/src/utils.rs create mode 100644 crates/wasm-metadata/tests/component.rs create mode 100644 crates/wasm-metadata/tests/module.rs diff --git a/crates/wasm-metadata/src/add_metadata.rs b/crates/wasm-metadata/src/add_metadata.rs new file mode 100644 index 0000000000..d8c52da491 --- /dev/null +++ b/crates/wasm-metadata/src/add_metadata.rs @@ -0,0 +1,61 @@ +use crate::{rewrite_wasm, Producers, RegistryMetadata}; + +use anyhow::Result; + +/// Add metadata (module name, producers) to a WebAssembly file. +/// +/// Supports both core WebAssembly modules and components. In components, +/// metadata will be added to the outermost component. +#[cfg_attr(feature = "clap", derive(clap::Parser))] +#[derive(Debug, Clone, Default)] +pub struct AddMetadata { + /// Add a module or component name to the names section + #[cfg_attr(feature = "clap", clap(long, value_name = "NAME"))] + pub name: Option, + + /// Add a programming language to the producers section + #[cfg_attr(feature = "clap", clap(long, value_name = "NAME"))] + pub language: Vec, + + /// Add a tool and its version to the producers section + #[cfg_attr(feature = "clap", clap(long = "processed-by", value_parser = parse_key_value, value_name="NAME=VERSION"))] + pub processed_by: Vec<(String, String)>, + + /// Add an SDK and its version to the producers section + #[cfg_attr(feature="clap", clap(long, value_parser = parse_key_value, value_name="NAME=VERSION"))] + pub sdk: Vec<(String, String)>, + + /// Add an registry metadata to the registry-metadata section + #[cfg_attr(feature="clap", clap(long, value_parser = parse_registry_metadata_value, value_name="PATH"))] + pub registry_metadata: Option, +} + +#[cfg(feature = "clap")] +pub(crate) fn parse_key_value(s: &str) -> Result<(String, String)> { + s.split_once('=') + .map(|(k, v)| (k.to_owned(), v.to_owned())) + .ok_or_else(|| anyhow::anyhow!("expected KEY=VALUE")) +} + +#[cfg(feature = "clap")] +pub(crate) fn parse_registry_metadata_value(s: &str) -> Result { + let contents = std::fs::read(s)?; + + let registry_metadata = RegistryMetadata::from_bytes(&contents, 0)?; + + Ok(registry_metadata) +} + +impl AddMetadata { + /// Process a WebAssembly binary. Supports both core WebAssembly modules, and WebAssembly + /// components. The module and component will have, at very least, an empty name and producers + /// section created. + pub fn to_wasm(&self, input: &[u8]) -> Result> { + rewrite_wasm( + &self.name, + &Producers::from_meta(self), + self.registry_metadata.as_ref(), + input, + ) + } +} diff --git a/crates/wasm-metadata/src/lib.rs b/crates/wasm-metadata/src/lib.rs index 358224d552..991ee98324 100644 --- a/crates/wasm-metadata/src/lib.rs +++ b/crates/wasm-metadata/src/lib.rs @@ -1,1516 +1,18 @@ -use anyhow::Result; -use indexmap::{map::Entry, IndexMap}; -use serde_derive::{Deserialize, Serialize}; -use spdx::Expression; -use std::borrow::Cow; -use std::fmt; -use std::fmt::Display; -use std::mem; -use std::ops::Range; -use wasm_encoder::{ComponentSection as _, ComponentSectionId, Encode, Section}; -use wasmparser::{ - BinaryReader, ComponentNameSectionReader, KnownCustom, NameSectionReader, Parser, Payload::*, - ProducersSectionReader, -}; +//! Read and manipulate WebAssembly metadata -/// A representation of a WebAssembly producers section. -/// -/// Spec: -#[derive(Debug, Serialize)] -pub struct Producers( - #[serde(serialize_with = "indexmap::map::serde_seq::serialize")] - IndexMap>, -); +pub use add_metadata::AddMetadata; +pub use metadata::Metadata; +pub use names::{ComponentNames, ModuleNames}; +pub use producers::{Producers, ProducersField}; +pub use registry::{CustomLicense, Link, LinkType, RegistryMetadata}; -impl Default for Producers { - fn default() -> Self { - Self::empty() - } -} +pub(crate) use rewrite::rewrite_wasm; -impl Producers { - /// Creates an empty producers section - pub fn empty() -> Self { - Producers(IndexMap::new()) - } +mod add_metadata; +mod metadata; +mod names; +mod producers; +mod registry; +mod rewrite; - /// Indicates if section is empty - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } - - /// Read the producers section from a Wasm binary. Supports both core - /// Modules and Components. In the component case, only returns the - /// producers section in the outer component, ignoring all interior - /// components and modules. - pub fn from_wasm(bytes: &[u8]) -> Result> { - let mut depth = 0; - for payload in Parser::new(0).parse_all(bytes) { - let payload = payload?; - use wasmparser::Payload::*; - match payload { - ModuleSection { .. } | ComponentSection { .. } => depth += 1, - End { .. } => depth -= 1, - CustomSection(c) if depth == 0 => { - if let KnownCustom::Producers(_) = c.as_known() { - let producers = Self::from_bytes(c.data(), c.data_offset())?; - return Ok(Some(producers)); - } - } - _ => {} - } - } - Ok(None) - } - /// Read the producers section from a Wasm binary. - pub fn from_bytes(bytes: &[u8], offset: usize) -> Result { - let reader = BinaryReader::new(bytes, offset); - let section = ProducersSectionReader::new(reader)?; - let mut fields = IndexMap::new(); - for field in section.into_iter() { - let field = field?; - let mut values = IndexMap::new(); - for value in field.values.into_iter() { - let value = value?; - values.insert(value.name.to_owned(), value.version.to_owned()); - } - fields.insert(field.name.to_owned(), values); - } - Ok(Producers(fields)) - } - /// Add a name & version value to a field. - /// - /// The spec says expected field names are "language", "processed-by", and "sdk". - /// The version value should be left blank for languages. - pub fn add(&mut self, field: &str, name: &str, version: &str) { - match self.0.entry(field.to_string()) { - Entry::Occupied(e) => { - e.into_mut().insert(name.to_owned(), version.to_owned()); - } - Entry::Vacant(e) => { - let mut m = IndexMap::new(); - m.insert(name.to_owned(), version.to_owned()); - e.insert(m); - } - } - } - - /// Add all values found in another `Producers` section. Values in `other` take - /// precedence. - pub fn merge(&mut self, other: &Self) { - for (field, values) in other.iter() { - for (name, version) in values.iter() { - self.add(field, name, version); - } - } - } - - /// Get the contents of a field - pub fn get<'a>(&'a self, field: &str) -> Option> { - self.0.get(&field.to_owned()).map(ProducersField) - } - - /// Iterate through all fields - pub fn iter<'a>(&'a self) -> impl Iterator)> + 'a { - self.0 - .iter() - .map(|(name, field)| (name, ProducersField(field))) - } - - /// Construct the fields specified by [`AddMetadata`] - fn from_meta(add: &AddMetadata) -> Self { - let mut s = Self::empty(); - for lang in add.language.iter() { - s.add("language", &lang, ""); - } - for (name, version) in add.processed_by.iter() { - s.add("processed-by", &name, &version); - } - for (name, version) in add.sdk.iter() { - s.add("sdk", &name, &version); - } - s - } - - /// Serialize into [`wasm_encoder::ProducersSection`]. - fn section(&self) -> wasm_encoder::ProducersSection { - let mut section = wasm_encoder::ProducersSection::new(); - for (fieldname, fieldvalues) in self.0.iter() { - let mut field = wasm_encoder::ProducersField::new(); - for (name, version) in fieldvalues { - field.value(&name, &version); - } - section.field(&fieldname, &field); - } - section - } - - /// Serialize into the raw bytes of a wasm custom section. - pub fn raw_custom_section(&self) -> Vec { - let mut ret = Vec::new(); - self.section().encode(&mut ret); - ret - } - - /// Merge into an existing wasm module. Rewrites the module with this producers section - /// merged into its existing one, or adds this producers section if none is present. - pub fn add_to_wasm(&self, input: &[u8]) -> Result> { - rewrite_wasm(&None, self, None, input) - } - - fn display(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { - let indent = std::iter::repeat(" ").take(indent).collect::(); - for (fieldname, fieldvalues) in self.0.iter() { - writeln!(f, "{indent}{fieldname}:")?; - for (name, version) in fieldvalues { - if version.is_empty() { - writeln!(f, "{indent} {name}")?; - } else { - writeln!(f, "{indent} {name}: {version}")?; - } - } - } - Ok(()) - } -} - -impl fmt::Display for Producers { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.display(f, 0) - } -} - -/// Contents of a producers field -pub struct ProducersField<'a>(&'a IndexMap); - -impl<'a> ProducersField<'a> { - /// Get the version associated with a name in the field - pub fn get(&self, name: &str) -> Option<&'a String> { - self.0.get(&name.to_owned()) - } - /// Iterate through all name-version pairs in the field - pub fn iter(&self) -> impl Iterator + 'a { - self.0.iter() - } -} - -/// Add metadata (module name, producers) to a WebAssembly file. -/// -/// Supports both core WebAssembly modules and components. In components, -/// metadata will be added to the outermost component. -#[cfg_attr(feature = "clap", derive(clap::Parser))] -#[derive(Debug, Clone, Default)] -pub struct AddMetadata { - /// Add a module or component name to the names section - #[cfg_attr(feature = "clap", clap(long, value_name = "NAME"))] - pub name: Option, - - /// Add a programming language to the producers section - #[cfg_attr(feature = "clap", clap(long, value_name = "NAME"))] - pub language: Vec, - - /// Add a tool and its version to the producers section - #[cfg_attr(feature = "clap", clap(long = "processed-by", value_parser = parse_key_value, value_name="NAME=VERSION"))] - pub processed_by: Vec<(String, String)>, - - /// Add an SDK and its version to the producers section - #[cfg_attr(feature="clap", clap(long, value_parser = parse_key_value, value_name="NAME=VERSION"))] - pub sdk: Vec<(String, String)>, - - /// Add an registry metadata to the registry-metadata section - #[cfg_attr(feature="clap", clap(long, value_parser = parse_registry_metadata_value, value_name="PATH"))] - pub registry_metadata: Option, -} - -#[cfg(feature = "clap")] -fn parse_key_value(s: &str) -> Result<(String, String)> { - s.split_once('=') - .map(|(k, v)| (k.to_owned(), v.to_owned())) - .ok_or_else(|| anyhow::anyhow!("expected KEY=VALUE")) -} - -#[cfg(feature = "clap")] -fn parse_registry_metadata_value(s: &str) -> Result { - let contents = std::fs::read(s)?; - - let registry_metadata = RegistryMetadata::from_bytes(&contents, 0)?; - - Ok(registry_metadata) -} - -impl AddMetadata { - /// Process a WebAssembly binary. Supports both core WebAssembly modules, and WebAssembly - /// components. The module and component will have, at very least, an empty name and producers - /// section created. - pub fn to_wasm(&self, input: &[u8]) -> Result> { - rewrite_wasm( - &self.name, - &Producers::from_meta(self), - self.registry_metadata.as_ref(), - input, - ) - } -} - -fn rewrite_wasm( - add_name: &Option, - add_producers: &Producers, - add_registry_metadata: Option<&RegistryMetadata>, - input: &[u8], -) -> Result> { - let mut producers_found = false; - let mut names_found = false; - let mut stack = Vec::new(); - let mut output = Vec::new(); - for payload in Parser::new(0).parse_all(&input) { - let payload = payload?; - - // Track nesting depth, so that we don't mess with inner producer sections: - match payload { - Version { encoding, .. } => { - output.extend_from_slice(match encoding { - wasmparser::Encoding::Component => &wasm_encoder::Component::HEADER, - wasmparser::Encoding::Module => &wasm_encoder::Module::HEADER, - }); - } - ModuleSection { .. } | ComponentSection { .. } => { - stack.push(mem::take(&mut output)); - continue; - } - End { .. } => { - let mut parent = match stack.pop() { - Some(c) => c, - None => break, - }; - if output.starts_with(&wasm_encoder::Component::HEADER) { - parent.push(ComponentSectionId::Component as u8); - output.encode(&mut parent); - } else { - parent.push(ComponentSectionId::CoreModule as u8); - output.encode(&mut parent); - } - output = parent; - } - _ => {} - } - - // Only rewrite the outermost custom sections - if let CustomSection(c) = &payload { - if stack.len() == 0 { - match c.as_known() { - KnownCustom::Producers(_) => { - producers_found = true; - let mut producers = Producers::from_bytes(c.data(), c.data_offset())?; - // Add to the section according to the command line flags: - producers.merge(&add_producers); - // Encode into output: - producers.section().append_to(&mut output); - continue; - } - KnownCustom::Name(_) => { - names_found = true; - let mut names = ModuleNames::from_bytes(c.data(), c.data_offset())?; - names.merge(&ModuleNames::from_name(add_name)); - - names.section()?.as_custom().append_to(&mut output); - continue; - } - KnownCustom::ComponentName(_) => { - names_found = true; - let mut names = ComponentNames::from_bytes(c.data(), c.data_offset())?; - names.merge(&ComponentNames::from_name(add_name)); - names.section()?.as_custom().append_to(&mut output); - continue; - } - KnownCustom::Unknown if c.name() == "registry-metadata" => { - // Pass section through if a new registry metadata isn't provided, otherwise ignore and overwrite with new - if add_registry_metadata.is_none() { - let registry: RegistryMetadata = - RegistryMetadata::from_bytes(&c.data(), 0)?; - - let registry_metadata = wasm_encoder::CustomSection { - name: Cow::Borrowed("registry-metadata"), - data: Cow::Owned(serde_json::to_vec(®istry)?), - }; - registry_metadata.append_to(&mut output); - continue; - } - } - _ => {} - } - } - } - // All other sections get passed through unmodified: - if let Some((id, range)) = payload.as_section() { - wasm_encoder::RawSection { - id, - data: &input[range], - } - .append_to(&mut output); - } - } - if !names_found && add_name.is_some() { - if output.starts_with(&wasm_encoder::Component::HEADER) { - let names = ComponentNames::from_name(add_name); - names.section()?.append_to_component(&mut output); - } else { - let names = ModuleNames::from_name(add_name); - names.section()?.append_to(&mut output) - } - } - if !producers_found && !add_producers.is_empty() { - let mut producers = Producers::empty(); - // Add to the section according to the command line flags: - producers.merge(add_producers); - // Encode into output: - producers.section().append_to(&mut output); - } - if add_registry_metadata.is_some() { - let registry_metadata = wasm_encoder::CustomSection { - name: Cow::Borrowed("registry-metadata"), - data: Cow::Owned(serde_json::to_vec(&add_registry_metadata)?), - }; - registry_metadata.append_to(&mut output); - } - Ok(output) -} - -/// A tree of the metadata found in a WebAssembly binary. -#[derive(Debug, Serialize)] -#[serde(rename_all = "lowercase")] -pub enum Metadata { - /// Metadata found inside a WebAssembly component. - Component { - /// The component name, if any. Found in the component-name section. - name: Option, - /// The component's producers section, if any. - producers: Option, - /// The component's registry metadata section, if any. - registry_metadata: Option, - /// All child modules and components inside the component. - children: Vec>, - /// Byte range of the module in the parent binary - range: Range, - }, - /// Metadata found inside a WebAssembly module. - Module { - /// The module name, if any. Found in the name section. - name: Option, - /// The module's producers section, if any. - producers: Option, - /// The module's registry metadata section, if any. - registry_metadata: Option, - /// Byte range of the module in the parent binary - range: Range, - }, -} - -impl Metadata { - /// Parse metadata from a WebAssembly binary. Supports both core WebAssembly modules, and - /// WebAssembly components. - pub fn from_binary(input: &[u8]) -> Result { - let mut metadata = Vec::new(); - - for payload in Parser::new(0).parse_all(&input) { - match payload? { - Version { encoding, .. } => { - if metadata.is_empty() { - match encoding { - wasmparser::Encoding::Module => { - metadata.push(Metadata::empty_module(0..input.len())) - } - wasmparser::Encoding::Component => { - metadata.push(Metadata::empty_component(0..input.len())) - } - } - } - } - ModuleSection { - unchecked_range: range, - .. - } => metadata.push(Metadata::empty_module(range)), - ComponentSection { - unchecked_range: range, - .. - } => metadata.push(Metadata::empty_component(range)), - End { .. } => { - let finished = metadata.pop().expect("non-empty metadata stack"); - if metadata.is_empty() { - return Ok(finished); - } else { - metadata.last_mut().unwrap().push_child(finished); - } - } - CustomSection(c) => match c.as_known() { - KnownCustom::Name(_) => { - let names = ModuleNames::from_bytes(c.data(), c.data_offset())?; - if let Some(name) = names.get_name() { - metadata - .last_mut() - .expect("non-empty metadata stack") - .set_name(&name); - } - } - KnownCustom::ComponentName(_) => { - let names = ComponentNames::from_bytes(c.data(), c.data_offset())?; - if let Some(name) = names.get_name() { - metadata - .last_mut() - .expect("non-empty metadata stack") - .set_name(name); - } - } - KnownCustom::Producers(_) => { - let producers = Producers::from_bytes(c.data(), c.data_offset())?; - metadata - .last_mut() - .expect("non-empty metadata stack") - .set_producers(producers); - } - KnownCustom::Unknown if c.name() == "registry-metadata" => { - let registry: RegistryMetadata = - RegistryMetadata::from_bytes(&c.data(), 0)?; - metadata - .last_mut() - .expect("non-empty metadata stack") - .set_registry_metadata(registry); - } - _ => {} - }, - _ => {} - } - } - Err(anyhow::anyhow!( - "malformed wasm binary, should have reached end" - )) - } - - fn empty_component(range: Range) -> Self { - Metadata::Component { - name: None, - producers: None, - registry_metadata: None, - children: Vec::new(), - range, - } - } - - fn empty_module(range: Range) -> Self { - Metadata::Module { - name: None, - producers: None, - registry_metadata: None, - range, - } - } - fn set_name(&mut self, n: &str) { - match self { - Metadata::Module { name, .. } => *name = Some(n.to_owned()), - Metadata::Component { name, .. } => *name = Some(n.to_owned()), - } - } - fn set_producers(&mut self, p: Producers) { - match self { - Metadata::Module { producers, .. } => *producers = Some(p), - Metadata::Component { producers, .. } => *producers = Some(p), - } - } - fn set_registry_metadata(&mut self, r: RegistryMetadata) { - match self { - Metadata::Module { - registry_metadata, .. - } => *registry_metadata = Some(r), - Metadata::Component { - registry_metadata, .. - } => *registry_metadata = Some(r), - } - } - fn push_child(&mut self, child: Self) { - match self { - Metadata::Module { .. } => panic!("module shouldnt have children"), - Metadata::Component { children, .. } => children.push(Box::new(child)), - } - } - - fn display(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { - let spaces = std::iter::repeat(" ").take(indent).collect::(); - match self { - Metadata::Module { - name, - producers, - registry_metadata, - .. - } => { - if let Some(name) = name { - writeln!(f, "{spaces}module {name}:")?; - } else { - writeln!(f, "{spaces}module:")?; - } - if let Some(producers) = producers { - producers.display(f, indent + 4)?; - } - if let Some(registry_metadata) = registry_metadata { - registry_metadata.display(f, indent + 4)?; - } - Ok(()) - } - Metadata::Component { - name, - producers, - registry_metadata, - children, - .. - } => { - if let Some(name) = name { - writeln!(f, "{spaces}component {name}:")?; - } else { - writeln!(f, "{spaces}component:")?; - } - if let Some(producers) = producers { - producers.display(f, indent + 4)?; - } - if let Some(registry_metadata) = registry_metadata { - registry_metadata.display(f, indent + 4)?; - } - for c in children { - c.display(f, indent + 4)?; - } - Ok(()) - } - } - } -} - -impl fmt::Display for Metadata { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.display(f, 0) - } -} - -/// Helper for rewriting a module's name section with a new module name. -pub struct ModuleNames<'a> { - module_name: Option, - names: Vec>, -} - -impl<'a> ModuleNames<'a> { - /// Create an empty name section. - pub fn empty() -> Self { - ModuleNames { - module_name: None, - names: Vec::new(), - } - } - /// Read a name section from a WebAssembly binary. Records the module name, and all other - /// contents of name section, for later serialization. - pub fn from_bytes(bytes: &'a [u8], offset: usize) -> Result> { - let reader = BinaryReader::new(bytes, offset); - let section = NameSectionReader::new(reader); - let mut s = Self::empty(); - for name in section.into_iter() { - let name = name?; - match name { - wasmparser::Name::Module { name, .. } => s.module_name = Some(name.to_owned()), - _ => s.names.push(name), - } - } - Ok(s) - } - /// Update module section according to [`AddMetadata`] - fn from_name(name: &Option) -> Self { - let mut s = Self::empty(); - s.module_name = name.clone(); - s - } - - /// Merge with another section - fn merge(&mut self, other: &Self) { - if other.module_name.is_some() { - self.module_name = other.module_name.clone(); - } - self.names.extend_from_slice(&other.names); - } - - /// Set module name - pub fn set_name(&mut self, name: &str) { - self.module_name = Some(name.to_owned()) - } - /// Get module name - pub fn get_name(&self) -> Option<&String> { - self.module_name.as_ref() - } - /// Serialize into [`wasm_encoder::NameSection`]. - fn section(&self) -> Result { - let mut section = wasm_encoder::NameSection::new(); - if let Some(module_name) = &self.module_name { - section.module(&module_name); - } - for n in self.names.iter() { - match n { - wasmparser::Name::Module { .. } => unreachable!(), - wasmparser::Name::Function(m) => section.functions(&name_map(&m)?), - wasmparser::Name::Local(m) => section.locals(&indirect_name_map(&m)?), - wasmparser::Name::Label(m) => section.labels(&indirect_name_map(&m)?), - wasmparser::Name::Type(m) => section.types(&name_map(&m)?), - wasmparser::Name::Table(m) => section.tables(&name_map(&m)?), - wasmparser::Name::Memory(m) => section.memories(&name_map(&m)?), - wasmparser::Name::Global(m) => section.globals(&name_map(&m)?), - wasmparser::Name::Element(m) => section.elements(&name_map(&m)?), - wasmparser::Name::Data(m) => section.data(&name_map(&m)?), - wasmparser::Name::Field(m) => section.fields(&indirect_name_map(&m)?), - wasmparser::Name::Tag(m) => section.tags(&name_map(&m)?), - wasmparser::Name::Unknown { .. } => {} // wasm-encoder doesn't support it - } - } - Ok(section) - } - - /// Serialize into the raw bytes of a wasm custom section. - pub fn raw_custom_section(&self) -> Result> { - let mut ret = Vec::new(); - self.section()?.encode(&mut ret); - Ok(ret) - } -} - -/// Helper for rewriting a component's component-name section with a new component name. -pub struct ComponentNames<'a> { - component_name: Option, - names: Vec>, -} - -impl<'a> ComponentNames<'a> { - /// Create an empty component-name section. - pub fn empty() -> Self { - ComponentNames { - component_name: None, - names: Vec::new(), - } - } - /// Read a component-name section from a WebAssembly binary. Records the component name, as - /// well as all other component name fields for later serialization. - pub fn from_bytes(bytes: &'a [u8], offset: usize) -> Result> { - let reader = BinaryReader::new(bytes, offset); - let section = ComponentNameSectionReader::new(reader); - let mut s = Self::empty(); - for name in section.into_iter() { - let name = name?; - match name { - wasmparser::ComponentName::Component { name, .. } => { - s.component_name = Some(name.to_owned()) - } - _ => s.names.push(name), - } - } - Ok(s) - } - /// Set component name according to [`AddMetadata`] - fn from_name(name: &Option) -> Self { - let mut s = Self::empty(); - s.component_name = name.clone(); - s - } - - /// Merge with another section - fn merge(&mut self, other: &Self) { - if other.component_name.is_some() { - self.component_name = other.component_name.clone(); - } - self.names.extend_from_slice(&other.names); - } - - /// Set component name - pub fn set_name(&mut self, name: &str) { - self.component_name = Some(name.to_owned()) - } - /// Get component name - pub fn get_name(&self) -> Option<&String> { - self.component_name.as_ref() - } - /// Serialize into [`wasm_encoder::ComponentNameSection`] - fn section(&self) -> Result { - let mut section = wasm_encoder::ComponentNameSection::new(); - if let Some(component_name) = &self.component_name { - section.component(&component_name); - } - for n in self.names.iter() { - match n { - wasmparser::ComponentName::Component { .. } => unreachable!(), - wasmparser::ComponentName::CoreFuncs(m) => section.core_funcs(&name_map(&m)?), - wasmparser::ComponentName::CoreGlobals(m) => section.core_globals(&name_map(&m)?), - wasmparser::ComponentName::CoreMemories(m) => section.core_memories(&name_map(&m)?), - wasmparser::ComponentName::CoreTables(m) => section.core_tables(&name_map(&m)?), - wasmparser::ComponentName::CoreModules(m) => section.core_modules(&name_map(&m)?), - wasmparser::ComponentName::CoreInstances(m) => { - section.core_instances(&name_map(&m)?) - } - wasmparser::ComponentName::CoreTypes(m) => section.core_types(&name_map(&m)?), - wasmparser::ComponentName::Types(m) => section.types(&name_map(&m)?), - wasmparser::ComponentName::Instances(m) => section.instances(&name_map(&m)?), - wasmparser::ComponentName::Components(m) => section.components(&name_map(&m)?), - wasmparser::ComponentName::Funcs(m) => section.funcs(&name_map(&m)?), - wasmparser::ComponentName::Values(m) => section.values(&name_map(&m)?), - wasmparser::ComponentName::Unknown { .. } => {} // wasm-encoder doesn't support it - } - } - Ok(section) - } - - /// Serialize into the raw bytes of a wasm custom section. - pub fn raw_custom_section(&self) -> Result> { - let mut ret = Vec::new(); - self.section()?.encode(&mut ret); - Ok(ret) - } -} - -fn name_map(map: &wasmparser::NameMap<'_>) -> Result { - let mut out = wasm_encoder::NameMap::new(); - for m in map.clone().into_iter() { - let m = m?; - out.append(m.index, m.name); - } - Ok(out) -} - -fn indirect_name_map( - map: &wasmparser::IndirectNameMap<'_>, -) -> Result { - let mut out = wasm_encoder::IndirectNameMap::new(); - for m in map.clone().into_iter() { - let m = m?; - out.append(m.index, &name_map(&m.names)?); - } - Ok(out) -} - -#[derive(Debug, Deserialize, Serialize, Clone, Default, PartialEq)] -pub struct RegistryMetadata { - /// List of authors who has created this package. - #[serde(skip_serializing_if = "Option::is_none")] - authors: Option>, - - /// Package description in markdown format. - #[serde(skip_serializing_if = "Option::is_none")] - description: Option, - - /// SPDX License Expression - /// - /// SPDX License List: - #[serde(skip_serializing_if = "Option::is_none")] - license: Option, - - /// A list of custom licenses that should be referenced to from the license expression. - /// - #[serde(skip_serializing_if = "Option::is_none")] - custom_licenses: Option>, - - /// A list of links that can contain predefined link types or custom links for use with tooling or registries. - #[serde(skip_serializing_if = "Option::is_none")] - links: Option>, - - /// A list of categories that a package should be listed under when uploaded to a registry. - #[serde(skip_serializing_if = "Option::is_none")] - categories: Option>, -} - -const LICENSE_REF: &str = "LicenseRef-"; - -impl RegistryMetadata { - /// Merge into an existing wasm module. Rewrites the module with this registry-metadata section - /// overwriting its existing one, or adds this registry-metadata section if none is present. - pub fn add_to_wasm(&self, input: &[u8]) -> Result> { - rewrite_wasm(&None, &Producers::empty(), Some(&self), input) - } - - pub fn from_wasm(bytes: &[u8]) -> Result> { - let mut depth = 0; - for payload in Parser::new(0).parse_all(bytes) { - let payload = payload?; - use wasmparser::Payload::*; - match payload { - ModuleSection { .. } | ComponentSection { .. } => depth += 1, - End { .. } => depth -= 1, - CustomSection(c) if c.name() == "registry-metadata" && depth == 0 => { - let registry = RegistryMetadata::from_bytes(&c.data(), 0)?; - return Ok(Some(registry)); - } - _ => {} - } - } - Ok(None) - } - - /// Gets the registry-matadata from a slice of bytes - pub fn from_bytes(bytes: &[u8], offset: usize) -> Result { - let registry: RegistryMetadata = serde_json::from_slice(&bytes[offset..])?; - return Ok(registry); - } - - pub fn validate(&self) -> Result<()> { - fn validate_expression(expression: &str) -> Result> { - let expression = Expression::parse(expression)?; - - let mut licenses = Vec::new(); - - for license in expression.iter() { - match license { - spdx::expression::ExprNode::Op(_) => continue, - spdx::expression::ExprNode::Req(req) => { - if let spdx::LicenseItem::Spdx { .. } = req.req.license { - // Continue if it's a license that exists on the Spdx license list - continue; - } - - let license_id = req.req.to_string(); - - // Strip "LicenseRef-", convert to lowercase and then append - if let Some(id) = license_id.strip_prefix(LICENSE_REF) { - licenses.push(id.to_lowercase()); - } - } - } - } - - Ok(licenses) - } - - match (&self.license, &self.custom_licenses) { - (None, Some(custom_licenses)) => { - let ids = custom_licenses - .iter() - .map(|license| license.id.clone()) - .collect::>() - .join(", "); - - return Err(anyhow::anyhow!( - "{ids} are defined but nevered referenced in license expression" - )); - } - (Some(license), Some(custom_licenses)) => { - let licenses = validate_expression(license.as_str())?; - - if !licenses.is_empty() { - for license in &licenses { - let mut match_found = false; - for custom_license in custom_licenses { - // Ignore license id casing - if custom_license.id.to_lowercase() == *license { - match_found = true; - } - } - - if !match_found { - return Err(anyhow::anyhow!( - "No matching reference for license '{license}' was defined" - )); - } - } - } - } - (Some(license), None) => { - let licenses = validate_expression(license.as_str())?; - - if !licenses.is_empty() { - return Err(anyhow::anyhow!( - "Reference to custom license exists but no custom license was given" - )); - } - } - (None, None) => {} - } - - Ok(()) - } - - /// Get authors - pub fn get_authors(&self) -> Option<&Vec> { - self.authors.as_ref() - } - - /// Set authors - pub fn set_authors(&mut self, authors: Option>) { - self.authors = authors; - } - - /// Get description - pub fn get_description(&self) -> Option<&String> { - self.description.as_ref() - } - - /// Set description - pub fn set_description(&mut self, description: Option) { - self.description = description; - } - - /// Get license - pub fn get_license(&self) -> Option<&String> { - self.license.as_ref() - } - - /// Set license - pub fn set_license(&mut self, license: Option) { - self.license = license; - } - - /// Get custom_licenses - pub fn get_custom_licenses(&self) -> Option<&Vec> { - self.custom_licenses.as_ref() - } - - /// Set custom_licenses - pub fn set_custom_licenses(&mut self, custom_licenses: Option>) { - self.custom_licenses = custom_licenses; - } - - /// Get links - pub fn get_links(&self) -> Option<&Vec> { - self.links.as_ref() - } - - /// Set links - pub fn set_links(&mut self, links: Option>) { - self.links = links; - } - - /// Get categories - pub fn get_categories(&self) -> Option<&Vec> { - self.categories.as_ref() - } - - /// Set categories - pub fn set_categories(&mut self, categories: Option>) { - self.categories = categories; - } - - fn display(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { - let spaces = std::iter::repeat(" ").take(indent).collect::(); - - if let Some(authors) = &self.authors { - writeln!(f, "{spaces}authors:")?; - for author in authors { - writeln!(f, "{spaces} {author}")?; - } - } - - if let Some(license) = &self.license { - writeln!(f, "{spaces}license:")?; - writeln!(f, "{spaces} {license}")?; - } - - if let Some(links) = &self.links { - writeln!(f, "{spaces}links:")?; - for link in links { - writeln!(f, "{spaces} {link}")?; - } - } - - if let Some(categories) = &self.categories { - writeln!(f, "{spaces}categories:")?; - for category in categories { - writeln!(f, "{spaces} {category}")?; - } - } - - if let Some(description) = &self.description { - writeln!(f, "{spaces}description:")?; - writeln!(f, "{spaces} {description}")?; - } - - if let Some(custom_licenses) = &self.custom_licenses { - writeln!(f, "{spaces}custom_licenses:")?; - for license in custom_licenses { - license.display(f, indent + 4)?; - } - } - - Ok(()) - } -} - -impl Display for RegistryMetadata { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.display(f, 0) - } -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct Link { - pub ty: LinkType, - pub value: String, -} - -impl Display for Link { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}: {}", self.ty, self.value) - } -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub enum LinkType { - Documentation, - Homepage, - Repository, - Funding, - #[serde(untagged)] - Custom(String), -} - -impl Display for LinkType { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let s = match self { - LinkType::Documentation => "Documentation", - LinkType::Homepage => "Homepage", - LinkType::Repository => "Repository", - LinkType::Funding => "Funding", - LinkType::Custom(s) => s.as_str(), - }; - - write!(f, "{s}") - } -} - -#[derive(Debug, Deserialize, Serialize, Default, Clone, PartialEq)] -pub struct CustomLicense { - /// License Identifier - /// Provides a locally unique identifier to refer to licenses that are not found on the SPDX License List. - /// - pub id: String, - - /// License Name - /// Provide a common name of the license that is not on the SPDX list. - /// - pub name: String, - - /// Extracted Text - /// Provides a copy of the actual text of the license reference extracted from the package or file that is associated with the License Identifier to aid in future analysis. - /// - pub text: String, - - /// License Cross Reference - /// Provides a pointer to the official source of a license that is not included in the SPDX License List, that is referenced by the License Identifier. - /// - #[serde(skip_serializing_if = "Option::is_none")] - pub reference: Option, -} - -impl CustomLicense { - fn display(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { - let spaces = std::iter::repeat(" ").take(indent).collect::(); - - writeln!(f, "{spaces}{}:", self.id)?; - writeln!(f, "{spaces} name: {}", self.name)?; - - if let Some(reference) = &self.reference { - writeln!(f, "{spaces} reference: {reference}")?; - } - - writeln!(f, "{spaces} text:")?; - writeln!(f, "{spaces} {}", self.text)?; - - Ok(()) - } -} - -impl Display for CustomLicense { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.display(f, 0) - } -} - -#[cfg(test)] -mod test { - use std::vec; - - use super::*; - #[test] - fn add_to_empty_module() { - let wat = "(module)"; - let module = wat::parse_str(wat).unwrap(); - let add = AddMetadata { - name: Some("foo".to_owned()), - language: vec!["bar".to_owned()], - processed_by: vec![("baz".to_owned(), "1.0".to_owned())], - sdk: vec![], - registry_metadata: Some(RegistryMetadata { - authors: Some(vec!["foo".to_owned()]), - description: Some("foo bar baz".to_owned()), - license: Some("MIT OR LicenseRef-FOO".to_owned()), - custom_licenses: Some(vec![CustomLicense { - id: "FOO".to_owned(), - name: "Foo".to_owned(), - text: "Foo License".to_owned(), - reference: Some("https://exaple.com/license/foo".to_owned()), - }]), - links: Some(vec![ - Link { - ty: LinkType::Custom("CustomFoo".to_owned()), - value: "https://example.com/custom".to_owned(), - }, - Link { - ty: LinkType::Homepage, - value: "https://example.com".to_owned(), - }, - ]), - categories: Some(vec!["Tools".to_owned()]), - }), - }; - let module = add.to_wasm(&module).unwrap(); - - let metadata = Metadata::from_binary(&module).unwrap(); - match metadata { - Metadata::Module { - name, - producers, - registry_metadata, - range, - } => { - assert_eq!(name, Some("foo".to_owned())); - let producers = producers.expect("some producers"); - assert_eq!(producers.get("language").unwrap().get("bar").unwrap(), ""); - assert_eq!( - producers.get("processed-by").unwrap().get("baz").unwrap(), - "1.0" - ); - - let registry_metadata = registry_metadata.unwrap(); - - assert!(registry_metadata.validate().is_ok()); - - assert_eq!(registry_metadata.authors.unwrap(), vec!["foo".to_owned()]); - assert_eq!( - registry_metadata.description.unwrap(), - "foo bar baz".to_owned() - ); - - assert_eq!( - registry_metadata.license.unwrap(), - "MIT OR LicenseRef-FOO".to_owned() - ); - assert_eq!( - registry_metadata.custom_licenses.unwrap(), - vec![CustomLicense { - id: "FOO".to_owned(), - name: "Foo".to_owned(), - text: "Foo License".to_owned(), - reference: Some("https://exaple.com/license/foo".to_owned()), - }] - ); - assert_eq!( - registry_metadata.links.unwrap(), - vec![ - Link { - ty: LinkType::Custom("CustomFoo".to_owned()), - value: "https://example.com/custom".to_owned(), - }, - Link { - ty: LinkType::Homepage, - value: "https://example.com".to_owned(), - }, - ] - ); - assert_eq!( - registry_metadata.categories.unwrap(), - vec!["Tools".to_owned()] - ); - - assert_eq!(range.start, 0); - assert_eq!(range.end, 422); - } - _ => panic!("metadata should be module"), - } - } - - #[test] - fn add_to_empty_component() { - let wat = "(component)"; - let component = wat::parse_str(wat).unwrap(); - let add = AddMetadata { - name: Some("foo".to_owned()), - language: vec!["bar".to_owned()], - processed_by: vec![("baz".to_owned(), "1.0".to_owned())], - sdk: vec![], - registry_metadata: Some(RegistryMetadata { - authors: Some(vec!["foo".to_owned()]), - description: Some("foo bar baz".to_owned()), - license: Some("MIT OR LicenseRef-FOO".to_owned()), - custom_licenses: Some(vec![CustomLicense { - id: "FOO".to_owned(), - name: "Foo".to_owned(), - text: "Foo License".to_owned(), - reference: Some("https://exaple.com/license/foo".to_owned()), - }]), - links: Some(vec![ - Link { - ty: LinkType::Custom("CustomFoo".to_owned()), - value: "https://example.com/custom".to_owned(), - }, - Link { - ty: LinkType::Homepage, - value: "https://example.com".to_owned(), - }, - ]), - categories: Some(vec!["Tools".to_owned()]), - }), - }; - let component = add.to_wasm(&component).unwrap(); - - let metadata = Metadata::from_binary(&component).unwrap(); - match metadata { - Metadata::Component { - name, - producers, - registry_metadata, - children, - range, - } => { - assert!(children.is_empty()); - assert_eq!(name, Some("foo".to_owned())); - let producers = producers.expect("some producers"); - assert_eq!(producers.get("language").unwrap().get("bar").unwrap(), ""); - assert_eq!( - producers.get("processed-by").unwrap().get("baz").unwrap(), - "1.0" - ); - - let registry_metadata = registry_metadata.unwrap(); - - assert!(registry_metadata.validate().is_ok()); - - assert_eq!(registry_metadata.authors.unwrap(), vec!["foo".to_owned()]); - assert_eq!( - registry_metadata.description.unwrap(), - "foo bar baz".to_owned() - ); - - assert_eq!( - registry_metadata.license.unwrap(), - "MIT OR LicenseRef-FOO".to_owned() - ); - assert_eq!( - registry_metadata.custom_licenses.unwrap(), - vec![CustomLicense { - id: "FOO".to_owned(), - name: "Foo".to_owned(), - text: "Foo License".to_owned(), - reference: Some("https://exaple.com/license/foo".to_owned()), - }] - ); - assert_eq!( - registry_metadata.links.unwrap(), - vec![ - Link { - ty: LinkType::Custom("CustomFoo".to_owned()), - value: "https://example.com/custom".to_owned(), - }, - Link { - ty: LinkType::Homepage, - value: "https://example.com".to_owned(), - }, - ] - ); - assert_eq!( - registry_metadata.categories.unwrap(), - vec!["Tools".to_owned()] - ); - - assert_eq!(range.start, 0); - assert_eq!(range.end, 432); - } - _ => panic!("metadata should be component"), - } - } - - #[test] - fn add_to_nested_component() { - // Create the same old module, stick some metadata into it - let wat = "(module)"; - let module = wat::parse_str(wat).unwrap(); - let add = AddMetadata { - name: Some("foo".to_owned()), - language: vec!["bar".to_owned()], - processed_by: vec![("baz".to_owned(), "1.0".to_owned())], - sdk: vec![], - registry_metadata: Some(RegistryMetadata { - authors: Some(vec!["Foo".to_owned()]), - ..Default::default() - }), - }; - let module = add.to_wasm(&module).unwrap(); - - // Stick that module inside a component. - let mut component = wasm_encoder::Component::new(); - component.section(&wasm_encoder::RawSection { - id: wasm_encoder::ComponentSectionId::CoreModule.into(), - data: &module, - }); - let component = component.finish(); - - // Add some different metadata to the component. - let add = AddMetadata { - name: Some("gussie".to_owned()), - sdk: vec![("willa".to_owned(), "sparky".to_owned())], - ..Default::default() - }; - let component = add.to_wasm(&component).unwrap(); - - let metadata = Metadata::from_binary(&component).unwrap(); - match metadata { - Metadata::Component { - name, - producers, - children, - .. - } => { - // Check that the component metadata is in the component - assert_eq!(name, Some("gussie".to_owned())); - let producers = producers.as_ref().expect("some producers"); - assert_eq!( - producers.get("sdk").unwrap().get("willa").unwrap(), - &"sparky".to_owned() - ); - // Check that there is a single child with the metadata set for the module - assert_eq!(children.len(), 1); - let child = children.get(0).unwrap(); - match &**child { - Metadata::Module { - name, - producers, - registry_metadata, - range, - } => { - assert_eq!(name, &Some("foo".to_owned())); - let producers = producers.as_ref().expect("some producers"); - assert_eq!(producers.get("language").unwrap().get("bar").unwrap(), ""); - assert_eq!( - producers.get("processed-by").unwrap().get("baz").unwrap(), - "1.0" - ); - - let registry_metadata = registry_metadata.as_ref().unwrap(); - assert_eq!( - registry_metadata.authors.as_ref().unwrap(), - &["Foo".to_owned()] - ); - - assert_eq!(range.start, 10); - assert_eq!(range.end, 120); - } - _ => panic!("child is a module"), - } - } - _ => panic!("root should be component"), - } - } - - #[test] - fn producers_empty_module() { - let wat = "(module)"; - let module = wat::parse_str(wat).unwrap(); - let mut producers = Producers::empty(); - producers.add("language", "bar", ""); - producers.add("processed-by", "baz", "1.0"); - - let module = producers.add_to_wasm(&module).unwrap(); - - let metadata = Metadata::from_binary(&module).unwrap(); - match metadata { - Metadata::Module { - name, producers, .. - } => { - assert_eq!(name, None); - let producers = producers.expect("some producers"); - assert_eq!(producers.get("language").unwrap().get("bar").unwrap(), ""); - assert_eq!( - producers.get("processed-by").unwrap().get("baz").unwrap(), - "1.0" - ); - } - _ => panic!("metadata should be module"), - } - } - - #[test] - fn producers_add_another_field() { - let wat = "(module)"; - let module = wat::parse_str(wat).unwrap(); - let mut producers = Producers::empty(); - producers.add("language", "bar", ""); - producers.add("processed-by", "baz", "1.0"); - let module = producers.add_to_wasm(&module).unwrap(); - - let mut producers = Producers::empty(); - producers.add("language", "waaat", ""); - let module = producers.add_to_wasm(&module).unwrap(); - - let metadata = Metadata::from_binary(&module).unwrap(); - match metadata { - Metadata::Module { - name, producers, .. - } => { - assert_eq!(name, None); - let producers = producers.expect("some producers"); - assert_eq!(producers.get("language").unwrap().get("bar").unwrap(), ""); - assert_eq!(producers.get("language").unwrap().get("waaat").unwrap(), ""); - assert_eq!( - producers.get("processed-by").unwrap().get("baz").unwrap(), - "1.0" - ); - } - _ => panic!("metadata should be module"), - } - } - - #[test] - fn producers_overwrite_field() { - let wat = "(module)"; - let module = wat::parse_str(wat).unwrap(); - let mut producers = Producers::empty(); - producers.add("processed-by", "baz", "1.0"); - let module = producers.add_to_wasm(&module).unwrap(); - - let mut producers = Producers::empty(); - producers.add("processed-by", "baz", "420"); - let module = producers.add_to_wasm(&module).unwrap(); - - let metadata = Metadata::from_binary(&module).unwrap(); - match metadata { - Metadata::Module { producers, .. } => { - let producers = producers.expect("some producers"); - assert_eq!( - producers.get("processed-by").unwrap().get("baz").unwrap(), - "420" - ); - } - _ => panic!("metadata should be module"), - } - } - - #[test] - fn overwrite_registry_metadata() { - let wat = "(module)"; - let module = wat::parse_str(wat).unwrap(); - let registry_metadata = RegistryMetadata { - authors: Some(vec!["Foo".to_owned()]), - ..Default::default() - }; - let module = registry_metadata.add_to_wasm(&module).unwrap(); - - let registry_metadata = RegistryMetadata { - authors: Some(vec!["Bar".to_owned()]), - ..Default::default() - }; - let module = registry_metadata.add_to_wasm(&module).unwrap(); - - let metadata = Metadata::from_binary(&module).unwrap(); - match metadata { - Metadata::Module { - registry_metadata, .. - } => { - let registry_metadata = registry_metadata.expect("some registry_metadata"); - assert_eq!(registry_metadata.authors.unwrap(), vec!["Bar".to_owned()]); - } - _ => panic!("metadata should be module"), - } - } -} +pub(crate) mod utils; diff --git a/crates/wasm-metadata/src/metadata.rs b/crates/wasm-metadata/src/metadata.rs new file mode 100644 index 0000000000..17845ac170 --- /dev/null +++ b/crates/wasm-metadata/src/metadata.rs @@ -0,0 +1,219 @@ +use anyhow::Result; +use serde_derive::Serialize; +use std::fmt; +use std::ops::Range; +use wasmparser::{KnownCustom, Parser, Payload::*}; + +use crate::{ComponentNames, ModuleNames, Producers, RegistryMetadata}; + +/// A tree of the metadata found in a WebAssembly binary. +#[derive(Debug, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum Metadata { + /// Metadata found inside a WebAssembly component. + Component { + /// The component name, if any. Found in the component-name section. + name: Option, + /// The component's producers section, if any. + producers: Option, + /// The component's registry metadata section, if any. + registry_metadata: Option, + /// All child modules and components inside the component. + children: Vec>, + /// Byte range of the module in the parent binary + range: Range, + }, + /// Metadata found inside a WebAssembly module. + Module { + /// The module name, if any. Found in the name section. + name: Option, + /// The module's producers section, if any. + producers: Option, + /// The module's registry metadata section, if any. + registry_metadata: Option, + /// Byte range of the module in the parent binary + range: Range, + }, +} + +impl Metadata { + /// Parse metadata from a WebAssembly binary. Supports both core WebAssembly modules, and + /// WebAssembly components. + pub fn from_binary(input: &[u8]) -> Result { + let mut metadata = Vec::new(); + + for payload in Parser::new(0).parse_all(&input) { + match payload? { + Version { encoding, .. } => { + if metadata.is_empty() { + match encoding { + wasmparser::Encoding::Module => { + metadata.push(Metadata::empty_module(0..input.len())) + } + wasmparser::Encoding::Component => { + metadata.push(Metadata::empty_component(0..input.len())) + } + } + } + } + ModuleSection { + unchecked_range: range, + .. + } => metadata.push(Metadata::empty_module(range)), + ComponentSection { + unchecked_range: range, + .. + } => metadata.push(Metadata::empty_component(range)), + End { .. } => { + let finished = metadata.pop().expect("non-empty metadata stack"); + if metadata.is_empty() { + return Ok(finished); + } else { + metadata.last_mut().unwrap().push_child(finished); + } + } + CustomSection(c) => match c.as_known() { + KnownCustom::Name(_) => { + let names = ModuleNames::from_bytes(c.data(), c.data_offset())?; + if let Some(name) = names.get_name() { + metadata + .last_mut() + .expect("non-empty metadata stack") + .set_name(&name); + } + } + KnownCustom::ComponentName(_) => { + let names = ComponentNames::from_bytes(c.data(), c.data_offset())?; + if let Some(name) = names.get_name() { + metadata + .last_mut() + .expect("non-empty metadata stack") + .set_name(name); + } + } + KnownCustom::Producers(_) => { + let producers = Producers::from_bytes(c.data(), c.data_offset())?; + metadata + .last_mut() + .expect("non-empty metadata stack") + .set_producers(producers); + } + KnownCustom::Unknown if c.name() == "registry-metadata" => { + let registry: RegistryMetadata = + RegistryMetadata::from_bytes(&c.data(), 0)?; + metadata + .last_mut() + .expect("non-empty metadata stack") + .set_registry_metadata(registry); + } + _ => {} + }, + _ => {} + } + } + Err(anyhow::anyhow!( + "malformed wasm binary, should have reached end" + )) + } + + fn empty_component(range: Range) -> Self { + Metadata::Component { + name: None, + producers: None, + registry_metadata: None, + children: Vec::new(), + range, + } + } + + fn empty_module(range: Range) -> Self { + Metadata::Module { + name: None, + producers: None, + registry_metadata: None, + range, + } + } + fn set_name(&mut self, n: &str) { + match self { + Metadata::Module { name, .. } => *name = Some(n.to_owned()), + Metadata::Component { name, .. } => *name = Some(n.to_owned()), + } + } + fn set_producers(&mut self, p: Producers) { + match self { + Metadata::Module { producers, .. } => *producers = Some(p), + Metadata::Component { producers, .. } => *producers = Some(p), + } + } + fn set_registry_metadata(&mut self, r: RegistryMetadata) { + match self { + Metadata::Module { + registry_metadata, .. + } => *registry_metadata = Some(r), + Metadata::Component { + registry_metadata, .. + } => *registry_metadata = Some(r), + } + } + fn push_child(&mut self, child: Self) { + match self { + Metadata::Module { .. } => panic!("module shouldnt have children"), + Metadata::Component { children, .. } => children.push(Box::new(child)), + } + } + + fn display(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { + let spaces = std::iter::repeat(" ").take(indent).collect::(); + match self { + Metadata::Module { + name, + producers, + registry_metadata, + .. + } => { + if let Some(name) = name { + writeln!(f, "{spaces}module {name}:")?; + } else { + writeln!(f, "{spaces}module:")?; + } + if let Some(producers) = producers { + producers.display(f, indent + 4)?; + } + if let Some(registry_metadata) = registry_metadata { + registry_metadata.display(f, indent + 4)?; + } + Ok(()) + } + Metadata::Component { + name, + producers, + registry_metadata, + children, + .. + } => { + if let Some(name) = name { + writeln!(f, "{spaces}component {name}:")?; + } else { + writeln!(f, "{spaces}component:")?; + } + if let Some(producers) = producers { + producers.display(f, indent + 4)?; + } + if let Some(registry_metadata) = registry_metadata { + registry_metadata.display(f, indent + 4)?; + } + for c in children { + c.display(f, indent + 4)?; + } + Ok(()) + } + } + } +} + +impl fmt::Display for Metadata { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.display(f, 0) + } +} diff --git a/crates/wasm-metadata/src/names/component.rs b/crates/wasm-metadata/src/names/component.rs new file mode 100644 index 0000000000..38b31e7ab0 --- /dev/null +++ b/crates/wasm-metadata/src/names/component.rs @@ -0,0 +1,96 @@ +use anyhow::Result; +use wasm_encoder::Encode; +use wasmparser::{BinaryReader, ComponentNameSectionReader}; + +use crate::utils::name_map; + +/// Helper for rewriting a component's component-name section with a new component name. +pub struct ComponentNames<'a> { + component_name: Option, + names: Vec>, +} + +impl<'a> ComponentNames<'a> { + /// Create an empty component-name section. + pub fn empty() -> Self { + ComponentNames { + component_name: None, + names: Vec::new(), + } + } + /// Read a component-name section from a WebAssembly binary. Records the component name, as + /// well as all other component name fields for later serialization. + pub fn from_bytes(bytes: &'a [u8], offset: usize) -> Result> { + let reader = BinaryReader::new(bytes, offset); + let section = ComponentNameSectionReader::new(reader); + let mut s = Self::empty(); + for name in section.into_iter() { + let name = name?; + match name { + wasmparser::ComponentName::Component { name, .. } => { + s.component_name = Some(name.to_owned()) + } + _ => s.names.push(name), + } + } + Ok(s) + } + /// Set component name according to [`AddMetadata`] + pub(crate) fn from_name(name: &Option) -> Self { + let mut s = Self::empty(); + s.component_name = name.clone(); + s + } + + /// Merge with another section + pub(crate) fn merge(&mut self, other: &Self) { + if other.component_name.is_some() { + self.component_name = other.component_name.clone(); + } + self.names.extend_from_slice(&other.names); + } + + /// Set component name + pub fn set_name(&mut self, name: &str) { + self.component_name = Some(name.to_owned()) + } + /// Get component name + pub fn get_name(&self) -> Option<&String> { + self.component_name.as_ref() + } + /// Serialize into [`wasm_encoder::ComponentNameSection`] + pub(crate) fn section(&self) -> Result { + let mut section = wasm_encoder::ComponentNameSection::new(); + if let Some(component_name) = &self.component_name { + section.component(&component_name); + } + for n in self.names.iter() { + match n { + wasmparser::ComponentName::Component { .. } => unreachable!(), + wasmparser::ComponentName::CoreFuncs(m) => section.core_funcs(&name_map(&m)?), + wasmparser::ComponentName::CoreGlobals(m) => section.core_globals(&name_map(&m)?), + wasmparser::ComponentName::CoreMemories(m) => section.core_memories(&name_map(&m)?), + wasmparser::ComponentName::CoreTables(m) => section.core_tables(&name_map(&m)?), + wasmparser::ComponentName::CoreModules(m) => section.core_modules(&name_map(&m)?), + wasmparser::ComponentName::CoreInstances(m) => { + section.core_instances(&name_map(&m)?) + } + wasmparser::ComponentName::CoreTypes(m) => section.core_types(&name_map(&m)?), + wasmparser::ComponentName::Types(m) => section.types(&name_map(&m)?), + wasmparser::ComponentName::Instances(m) => section.instances(&name_map(&m)?), + wasmparser::ComponentName::Components(m) => section.components(&name_map(&m)?), + wasmparser::ComponentName::Funcs(m) => section.funcs(&name_map(&m)?), + wasmparser::ComponentName::Values(m) => section.values(&name_map(&m)?), + wasmparser::ComponentName::Unknown { .. } => {} // wasm-encoder doesn't support it + } + } + Ok(section) + } + + /// Serialize into the raw bytes of a wasm custom section. + pub fn raw_custom_section(&self) -> Result> { + let mut ret = Vec::new(); + self.section()?.encode(&mut ret); + Ok(ret) + } +} diff --git a/crates/wasm-metadata/src/names/mod.rs b/crates/wasm-metadata/src/names/mod.rs new file mode 100644 index 0000000000..1545dac32a --- /dev/null +++ b/crates/wasm-metadata/src/names/mod.rs @@ -0,0 +1,5 @@ +mod component; +mod module; + +pub use component::ComponentNames; +pub use module::ModuleNames; diff --git a/crates/wasm-metadata/src/names/module.rs b/crates/wasm-metadata/src/names/module.rs new file mode 100644 index 0000000000..43e7cc4410 --- /dev/null +++ b/crates/wasm-metadata/src/names/module.rs @@ -0,0 +1,91 @@ +use anyhow::Result; +use wasm_encoder::Encode; +use wasmparser::{BinaryReader, NameSectionReader}; + +use crate::utils::{indirect_name_map, name_map}; + +/// Helper for rewriting a module's name section with a new module name. +pub struct ModuleNames<'a> { + module_name: Option, + names: Vec>, +} + +impl<'a> ModuleNames<'a> { + /// Create an empty name section. + pub fn empty() -> Self { + ModuleNames { + module_name: None, + names: Vec::new(), + } + } + /// Read a name section from a WebAssembly binary. Records the module name, and all other + /// contents of name section, for later serialization. + pub fn from_bytes(bytes: &'a [u8], offset: usize) -> Result> { + let reader = BinaryReader::new(bytes, offset); + let section = NameSectionReader::new(reader); + let mut s = Self::empty(); + for name in section.into_iter() { + let name = name?; + match name { + wasmparser::Name::Module { name, .. } => s.module_name = Some(name.to_owned()), + _ => s.names.push(name), + } + } + Ok(s) + } + /// Update module section according to [`AddMetadata`] + pub(crate) fn from_name(name: &Option) -> Self { + let mut s = Self::empty(); + s.module_name = name.clone(); + s + } + + /// Merge with another section + pub(crate) fn merge(&mut self, other: &Self) { + if other.module_name.is_some() { + self.module_name = other.module_name.clone(); + } + self.names.extend_from_slice(&other.names); + } + + /// Set module name + pub fn set_name(&mut self, name: &str) { + self.module_name = Some(name.to_owned()) + } + /// Get module name + pub fn get_name(&self) -> Option<&String> { + self.module_name.as_ref() + } + /// Serialize into [`wasm_encoder::NameSection`]. + pub(crate) fn section(&self) -> Result { + let mut section = wasm_encoder::NameSection::new(); + if let Some(module_name) = &self.module_name { + section.module(&module_name); + } + for n in self.names.iter() { + match n { + wasmparser::Name::Module { .. } => unreachable!(), + wasmparser::Name::Function(m) => section.functions(&name_map(&m)?), + wasmparser::Name::Local(m) => section.locals(&indirect_name_map(&m)?), + wasmparser::Name::Label(m) => section.labels(&indirect_name_map(&m)?), + wasmparser::Name::Type(m) => section.types(&name_map(&m)?), + wasmparser::Name::Table(m) => section.tables(&name_map(&m)?), + wasmparser::Name::Memory(m) => section.memories(&name_map(&m)?), + wasmparser::Name::Global(m) => section.globals(&name_map(&m)?), + wasmparser::Name::Element(m) => section.elements(&name_map(&m)?), + wasmparser::Name::Data(m) => section.data(&name_map(&m)?), + wasmparser::Name::Field(m) => section.fields(&indirect_name_map(&m)?), + wasmparser::Name::Tag(m) => section.tags(&name_map(&m)?), + wasmparser::Name::Unknown { .. } => {} // wasm-encoder doesn't support it + } + } + Ok(section) + } + + /// Serialize into the raw bytes of a wasm custom section. + pub fn raw_custom_section(&self) -> Result> { + let mut ret = Vec::new(); + self.section()?.encode(&mut ret); + Ok(ret) + } +} diff --git a/crates/wasm-metadata/src/producers.rs b/crates/wasm-metadata/src/producers.rs new file mode 100644 index 0000000000..5583be4bdf --- /dev/null +++ b/crates/wasm-metadata/src/producers.rs @@ -0,0 +1,277 @@ +use anyhow::Result; +use indexmap::{map::Entry, IndexMap}; +use serde_derive::Serialize; +use std::fmt; +use wasm_encoder::Encode; +use wasmparser::{BinaryReader, KnownCustom, Parser, ProducersSectionReader}; + +use crate::{rewrite_wasm, AddMetadata}; +/// A representation of a WebAssembly producers section. +/// +/// Spec: +#[derive(Debug, Serialize)] +pub struct Producers( + #[serde(serialize_with = "indexmap::map::serde_seq::serialize")] + IndexMap>, +); + +impl Default for Producers { + fn default() -> Self { + Self::empty() + } +} + +impl Producers { + /// Creates an empty producers section + pub fn empty() -> Self { + Producers(IndexMap::new()) + } + + /// Indicates if section is empty + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Read the producers section from a Wasm binary. Supports both core + /// Modules and Components. In the component case, only returns the + /// producers section in the outer component, ignoring all interior + /// components and modules. + pub fn from_wasm(bytes: &[u8]) -> Result> { + let mut depth = 0; + for payload in Parser::new(0).parse_all(bytes) { + let payload = payload?; + use wasmparser::Payload::*; + match payload { + ModuleSection { .. } | ComponentSection { .. } => depth += 1, + End { .. } => depth -= 1, + CustomSection(c) if depth == 0 => { + if let KnownCustom::Producers(_) = c.as_known() { + let producers = Self::from_bytes(c.data(), c.data_offset())?; + return Ok(Some(producers)); + } + } + _ => {} + } + } + Ok(None) + } + /// Read the producers section from a Wasm binary. + pub fn from_bytes(bytes: &[u8], offset: usize) -> Result { + let reader = BinaryReader::new(bytes, offset); + let section = ProducersSectionReader::new(reader)?; + let mut fields = IndexMap::new(); + for field in section.into_iter() { + let field = field?; + let mut values = IndexMap::new(); + for value in field.values.into_iter() { + let value = value?; + values.insert(value.name.to_owned(), value.version.to_owned()); + } + fields.insert(field.name.to_owned(), values); + } + Ok(Producers(fields)) + } + /// Add a name & version value to a field. + /// + /// The spec says expected field names are "language", "processed-by", and "sdk". + /// The version value should be left blank for languages. + pub fn add(&mut self, field: &str, name: &str, version: &str) { + match self.0.entry(field.to_string()) { + Entry::Occupied(e) => { + e.into_mut().insert(name.to_owned(), version.to_owned()); + } + Entry::Vacant(e) => { + let mut m = IndexMap::new(); + m.insert(name.to_owned(), version.to_owned()); + e.insert(m); + } + } + } + + /// Add all values found in another `Producers` section. Values in `other` take + /// precedence. + pub fn merge(&mut self, other: &Self) { + for (field, values) in other.iter() { + for (name, version) in values.iter() { + self.add(field, name, version); + } + } + } + + /// Get the contents of a field + pub fn get<'a>(&'a self, field: &str) -> Option> { + self.0.get(&field.to_owned()).map(ProducersField) + } + + /// Iterate through all fields + pub fn iter<'a>(&'a self) -> impl Iterator)> + 'a { + self.0 + .iter() + .map(|(name, field)| (name, ProducersField(field))) + } + + /// Construct the fields specified by [`AddMetadata`] + pub(crate) fn from_meta(add: &AddMetadata) -> Self { + let mut s = Self::empty(); + for lang in add.language.iter() { + s.add("language", &lang, ""); + } + for (name, version) in add.processed_by.iter() { + s.add("processed-by", &name, &version); + } + for (name, version) in add.sdk.iter() { + s.add("sdk", &name, &version); + } + s + } + + /// Serialize into [`wasm_encoder::ProducersSection`]. + pub(crate) fn section(&self) -> wasm_encoder::ProducersSection { + let mut section = wasm_encoder::ProducersSection::new(); + for (fieldname, fieldvalues) in self.0.iter() { + let mut field = wasm_encoder::ProducersField::new(); + for (name, version) in fieldvalues { + field.value(&name, &version); + } + section.field(&fieldname, &field); + } + section + } + + /// Serialize into the raw bytes of a wasm custom section. + pub fn raw_custom_section(&self) -> Vec { + let mut ret = Vec::new(); + self.section().encode(&mut ret); + ret + } + + /// Merge into an existing wasm module. Rewrites the module with this producers section + /// merged into its existing one, or adds this producers section if none is present. + pub fn add_to_wasm(&self, input: &[u8]) -> Result> { + rewrite_wasm(&None, self, None, input) + } + + pub(crate) fn display(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { + let indent = std::iter::repeat(" ").take(indent).collect::(); + for (fieldname, fieldvalues) in self.0.iter() { + writeln!(f, "{indent}{fieldname}:")?; + for (name, version) in fieldvalues { + if version.is_empty() { + writeln!(f, "{indent} {name}")?; + } else { + writeln!(f, "{indent} {name}: {version}")?; + } + } + } + Ok(()) + } +} + +impl fmt::Display for Producers { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.display(f, 0) + } +} + +/// Contents of a producers field +pub struct ProducersField<'a>(&'a IndexMap); + +impl<'a> ProducersField<'a> { + /// Get the version associated with a name in the field + pub fn get(&self, name: &str) -> Option<&'a String> { + self.0.get(&name.to_owned()) + } + /// Iterate through all name-version pairs in the field + pub fn iter(&self) -> impl Iterator + 'a { + self.0.iter() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::Metadata; + + #[test] + fn producers_empty_module() { + let wat = "(module)"; + let module = wat::parse_str(wat).unwrap(); + let mut producers = Producers::empty(); + producers.add("language", "bar", ""); + producers.add("processed-by", "baz", "1.0"); + + let module = producers.add_to_wasm(&module).unwrap(); + + let metadata = Metadata::from_binary(&module).unwrap(); + match metadata { + Metadata::Module { + name, producers, .. + } => { + assert_eq!(name, None); + let producers = producers.expect("some producers"); + assert_eq!(producers.get("language").unwrap().get("bar").unwrap(), ""); + assert_eq!( + producers.get("processed-by").unwrap().get("baz").unwrap(), + "1.0" + ); + } + _ => panic!("metadata should be module"), + } + } + + #[test] + fn producers_add_another_field() { + let wat = "(module)"; + let module = wat::parse_str(wat).unwrap(); + let mut producers = Producers::empty(); + producers.add("language", "bar", ""); + producers.add("processed-by", "baz", "1.0"); + let module = producers.add_to_wasm(&module).unwrap(); + + let mut producers = Producers::empty(); + producers.add("language", "waaat", ""); + let module = producers.add_to_wasm(&module).unwrap(); + + let metadata = Metadata::from_binary(&module).unwrap(); + match metadata { + Metadata::Module { + name, producers, .. + } => { + assert_eq!(name, None); + let producers = producers.expect("some producers"); + assert_eq!(producers.get("language").unwrap().get("bar").unwrap(), ""); + assert_eq!(producers.get("language").unwrap().get("waaat").unwrap(), ""); + assert_eq!( + producers.get("processed-by").unwrap().get("baz").unwrap(), + "1.0" + ); + } + _ => panic!("metadata should be module"), + } + } + + #[test] + fn producers_overwrite_field() { + let wat = "(module)"; + let module = wat::parse_str(wat).unwrap(); + let mut producers = Producers::empty(); + producers.add("processed-by", "baz", "1.0"); + let module = producers.add_to_wasm(&module).unwrap(); + + let mut producers = Producers::empty(); + producers.add("processed-by", "baz", "420"); + let module = producers.add_to_wasm(&module).unwrap(); + + let metadata = Metadata::from_binary(&module).unwrap(); + match metadata { + Metadata::Module { producers, .. } => { + let producers = producers.expect("some producers"); + assert_eq!( + producers.get("processed-by").unwrap().get("baz").unwrap(), + "420" + ); + } + _ => panic!("metadata should be module"), + } + } +} diff --git a/crates/wasm-metadata/src/registry.rs b/crates/wasm-metadata/src/registry.rs new file mode 100644 index 0000000000..4c0b54a4e1 --- /dev/null +++ b/crates/wasm-metadata/src/registry.rs @@ -0,0 +1,376 @@ +use anyhow::Result; +use serde_derive::{Deserialize, Serialize}; +use spdx::Expression; +use std::fmt; +use std::fmt::Display; +use wasmparser::Parser; + +use crate::{rewrite_wasm, Producers}; + +#[derive(Debug, Deserialize, Serialize, Clone, Default, PartialEq)] +pub struct RegistryMetadata { + /// List of authors who has created this package. + #[serde(skip_serializing_if = "Option::is_none")] + pub authors: Option>, + + /// Package description in markdown format. + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + + /// SPDX License Expression + /// + /// SPDX License List: + #[serde(skip_serializing_if = "Option::is_none")] + pub license: Option, + + /// A list of custom licenses that should be referenced to from the license expression. + /// + #[serde(skip_serializing_if = "Option::is_none")] + pub custom_licenses: Option>, + + /// A list of links that can contain predefined link types or custom links for use with tooling or registries. + #[serde(skip_serializing_if = "Option::is_none")] + pub links: Option>, + + /// A list of categories that a package should be listed under when uploaded to a registry. + #[serde(skip_serializing_if = "Option::is_none")] + pub categories: Option>, +} + +const LICENSE_REF: &str = "LicenseRef-"; + +impl RegistryMetadata { + /// Merge into an existing wasm module. Rewrites the module with this registry-metadata section + /// overwriting its existing one, or adds this registry-metadata section if none is present. + pub fn add_to_wasm(&self, input: &[u8]) -> Result> { + rewrite_wasm(&None, &Producers::empty(), Some(&self), input) + } + + pub fn from_wasm(bytes: &[u8]) -> Result> { + let mut depth = 0; + for payload in Parser::new(0).parse_all(bytes) { + let payload = payload?; + use wasmparser::Payload::*; + match payload { + ModuleSection { .. } | ComponentSection { .. } => depth += 1, + End { .. } => depth -= 1, + CustomSection(c) if c.name() == "registry-metadata" && depth == 0 => { + let registry = RegistryMetadata::from_bytes(&c.data(), 0)?; + return Ok(Some(registry)); + } + _ => {} + } + } + Ok(None) + } + + /// Gets the registry-matadata from a slice of bytes + pub fn from_bytes(bytes: &[u8], offset: usize) -> Result { + let registry: RegistryMetadata = serde_json::from_slice(&bytes[offset..])?; + return Ok(registry); + } + + pub fn validate(&self) -> Result<()> { + fn validate_expression(expression: &str) -> Result> { + let expression = Expression::parse(expression)?; + + let mut licenses = Vec::new(); + + for license in expression.iter() { + match license { + spdx::expression::ExprNode::Op(_) => continue, + spdx::expression::ExprNode::Req(req) => { + if let spdx::LicenseItem::Spdx { .. } = req.req.license { + // Continue if it's a license that exists on the Spdx license list + continue; + } + + let license_id = req.req.to_string(); + + // Strip "LicenseRef-", convert to lowercase and then append + if let Some(id) = license_id.strip_prefix(LICENSE_REF) { + licenses.push(id.to_lowercase()); + } + } + } + } + + Ok(licenses) + } + + match (&self.license, &self.custom_licenses) { + (None, Some(custom_licenses)) => { + let ids = custom_licenses + .iter() + .map(|license| license.id.clone()) + .collect::>() + .join(", "); + + return Err(anyhow::anyhow!( + "{ids} are defined but nevered referenced in license expression" + )); + } + (Some(license), Some(custom_licenses)) => { + let licenses = validate_expression(license.as_str())?; + + if !licenses.is_empty() { + for license in &licenses { + let mut match_found = false; + for custom_license in custom_licenses { + // Ignore license id casing + if custom_license.id.to_lowercase() == *license { + match_found = true; + } + } + + if !match_found { + return Err(anyhow::anyhow!( + "No matching reference for license '{license}' was defined" + )); + } + } + } + } + (Some(license), None) => { + let licenses = validate_expression(license.as_str())?; + + if !licenses.is_empty() { + return Err(anyhow::anyhow!( + "Reference to custom license exists but no custom license was given" + )); + } + } + (None, None) => {} + } + + Ok(()) + } + + /// Get authors + pub fn get_authors(&self) -> Option<&Vec> { + self.authors.as_ref() + } + + /// Set authors + pub fn set_authors(&mut self, authors: Option>) { + self.authors = authors; + } + + /// Get description + pub fn get_description(&self) -> Option<&String> { + self.description.as_ref() + } + + /// Set description + pub fn set_description(&mut self, description: Option) { + self.description = description; + } + + /// Get license + pub fn get_license(&self) -> Option<&String> { + self.license.as_ref() + } + + /// Set license + pub fn set_license(&mut self, license: Option) { + self.license = license; + } + + /// Get custom_licenses + pub fn get_custom_licenses(&self) -> Option<&Vec> { + self.custom_licenses.as_ref() + } + + /// Set custom_licenses + pub fn set_custom_licenses(&mut self, custom_licenses: Option>) { + self.custom_licenses = custom_licenses; + } + + /// Get links + pub fn get_links(&self) -> Option<&Vec> { + self.links.as_ref() + } + + /// Set links + pub fn set_links(&mut self, links: Option>) { + self.links = links; + } + + /// Get categories + pub fn get_categories(&self) -> Option<&Vec> { + self.categories.as_ref() + } + + /// Set categories + pub fn set_categories(&mut self, categories: Option>) { + self.categories = categories; + } + + pub(crate) fn display(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { + let spaces = std::iter::repeat(" ").take(indent).collect::(); + + if let Some(authors) = &self.authors { + writeln!(f, "{spaces}authors:")?; + for author in authors { + writeln!(f, "{spaces} {author}")?; + } + } + + if let Some(license) = &self.license { + writeln!(f, "{spaces}license:")?; + writeln!(f, "{spaces} {license}")?; + } + + if let Some(links) = &self.links { + writeln!(f, "{spaces}links:")?; + for link in links { + writeln!(f, "{spaces} {link}")?; + } + } + + if let Some(categories) = &self.categories { + writeln!(f, "{spaces}categories:")?; + for category in categories { + writeln!(f, "{spaces} {category}")?; + } + } + + if let Some(description) = &self.description { + writeln!(f, "{spaces}description:")?; + writeln!(f, "{spaces} {description}")?; + } + + if let Some(custom_licenses) = &self.custom_licenses { + writeln!(f, "{spaces}custom_licenses:")?; + for license in custom_licenses { + license.display(f, indent + 4)?; + } + } + + Ok(()) + } +} + +impl Display for RegistryMetadata { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.display(f, 0) + } +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct Link { + pub ty: LinkType, + pub value: String, +} + +impl Display for Link { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}: {}", self.ty, self.value) + } +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub enum LinkType { + Documentation, + Homepage, + Repository, + Funding, + #[serde(untagged)] + Custom(String), +} + +impl Display for LinkType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + LinkType::Documentation => "Documentation", + LinkType::Homepage => "Homepage", + LinkType::Repository => "Repository", + LinkType::Funding => "Funding", + LinkType::Custom(s) => s.as_str(), + }; + + write!(f, "{s}") + } +} + +#[derive(Debug, Deserialize, Serialize, Default, Clone, PartialEq)] +pub struct CustomLicense { + /// License Identifier + /// Provides a locally unique identifier to refer to licenses that are not found on the SPDX License List. + /// + pub id: String, + + /// License Name + /// Provide a common name of the license that is not on the SPDX list. + /// + pub name: String, + + /// Extracted Text + /// Provides a copy of the actual text of the license reference extracted from the package or file that is associated with the License Identifier to aid in future analysis. + /// + pub text: String, + + /// License Cross Reference + /// Provides a pointer to the official source of a license that is not included in the SPDX License List, that is referenced by the License Identifier. + /// + #[serde(skip_serializing_if = "Option::is_none")] + pub reference: Option, +} + +impl CustomLicense { + fn display(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result { + let spaces = std::iter::repeat(" ").take(indent).collect::(); + + writeln!(f, "{spaces}{}:", self.id)?; + writeln!(f, "{spaces} name: {}", self.name)?; + + if let Some(reference) = &self.reference { + writeln!(f, "{spaces} reference: {reference}")?; + } + + writeln!(f, "{spaces} text:")?; + writeln!(f, "{spaces} {}", self.text)?; + + Ok(()) + } +} + +impl Display for CustomLicense { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.display(f, 0) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::Metadata; + + #[test] + fn overwrite_registry_metadata() { + let wat = "(module)"; + let module = wat::parse_str(wat).unwrap(); + let registry_metadata = RegistryMetadata { + authors: Some(vec!["Foo".to_owned()]), + ..Default::default() + }; + let module = registry_metadata.add_to_wasm(&module).unwrap(); + + let registry_metadata = RegistryMetadata { + authors: Some(vec!["Bar".to_owned()]), + ..Default::default() + }; + let module = registry_metadata.add_to_wasm(&module).unwrap(); + + let metadata = Metadata::from_binary(&module).unwrap(); + match metadata { + Metadata::Module { + registry_metadata, .. + } => { + let registry_metadata = registry_metadata.expect("some registry_metadata"); + assert_eq!(registry_metadata.authors.unwrap(), vec!["Bar".to_owned()]); + } + _ => panic!("metadata should be module"), + } + } +} diff --git a/crates/wasm-metadata/src/rewrite.rs b/crates/wasm-metadata/src/rewrite.rs new file mode 100644 index 0000000000..77d351729e --- /dev/null +++ b/crates/wasm-metadata/src/rewrite.rs @@ -0,0 +1,130 @@ +use crate::{ComponentNames, ModuleNames, Producers, RegistryMetadata}; +use anyhow::Result; +use std::borrow::Cow; +use std::mem; +use wasm_encoder::ComponentSection as _; +use wasm_encoder::{ComponentSectionId, Encode, Section}; +use wasmparser::{KnownCustom, Parser, Payload::*}; + +pub(crate) fn rewrite_wasm( + add_name: &Option, + add_producers: &Producers, + add_registry_metadata: Option<&RegistryMetadata>, + input: &[u8], +) -> Result> { + let mut producers_found = false; + let mut names_found = false; + let mut stack = Vec::new(); + let mut output = Vec::new(); + for payload in Parser::new(0).parse_all(&input) { + let payload = payload?; + + // Track nesting depth, so that we don't mess with inner producer sections: + match payload { + Version { encoding, .. } => { + output.extend_from_slice(match encoding { + wasmparser::Encoding::Component => &wasm_encoder::Component::HEADER, + wasmparser::Encoding::Module => &wasm_encoder::Module::HEADER, + }); + } + ModuleSection { .. } | ComponentSection { .. } => { + stack.push(mem::take(&mut output)); + continue; + } + End { .. } => { + let mut parent = match stack.pop() { + Some(c) => c, + None => break, + }; + if output.starts_with(&wasm_encoder::Component::HEADER) { + parent.push(ComponentSectionId::Component as u8); + output.encode(&mut parent); + } else { + parent.push(ComponentSectionId::CoreModule as u8); + output.encode(&mut parent); + } + output = parent; + } + _ => {} + } + + // Only rewrite the outermost custom sections + if let CustomSection(c) = &payload { + if stack.len() == 0 { + match c.as_known() { + KnownCustom::Producers(_) => { + producers_found = true; + let mut producers = Producers::from_bytes(c.data(), c.data_offset())?; + // Add to the section according to the command line flags: + producers.merge(&add_producers); + // Encode into output: + producers.section().append_to(&mut output); + continue; + } + KnownCustom::Name(_) => { + names_found = true; + let mut names = ModuleNames::from_bytes(c.data(), c.data_offset())?; + names.merge(&ModuleNames::from_name(add_name)); + + names.section()?.as_custom().append_to(&mut output); + continue; + } + KnownCustom::ComponentName(_) => { + names_found = true; + let mut names = ComponentNames::from_bytes(c.data(), c.data_offset())?; + names.merge(&ComponentNames::from_name(add_name)); + names.section()?.as_custom().append_to(&mut output); + continue; + } + KnownCustom::Unknown if c.name() == "registry-metadata" => { + // Pass section through if a new registry metadata isn't provided, otherwise ignore and overwrite with new + if add_registry_metadata.is_none() { + let registry: RegistryMetadata = + RegistryMetadata::from_bytes(&c.data(), 0)?; + + let registry_metadata = wasm_encoder::CustomSection { + name: Cow::Borrowed("registry-metadata"), + data: Cow::Owned(serde_json::to_vec(®istry)?), + }; + registry_metadata.append_to(&mut output); + continue; + } + } + _ => {} + } + } + } + // All other sections get passed through unmodified: + if let Some((id, range)) = payload.as_section() { + wasm_encoder::RawSection { + id, + data: &input[range], + } + .append_to(&mut output); + } + } + if !names_found && add_name.is_some() { + if output.starts_with(&wasm_encoder::Component::HEADER) { + let names = ComponentNames::from_name(add_name); + names.section()?.append_to_component(&mut output); + } else { + let names = ModuleNames::from_name(add_name); + names.section()?.append_to(&mut output) + } + } + if !producers_found && !add_producers.is_empty() { + let mut producers = Producers::empty(); + // Add to the section according to the command line flags: + producers.merge(add_producers); + // Encode into output: + producers.section().append_to(&mut output); + } + if add_registry_metadata.is_some() { + let registry_metadata = wasm_encoder::CustomSection { + name: Cow::Borrowed("registry-metadata"), + data: Cow::Owned(serde_json::to_vec(&add_registry_metadata)?), + }; + registry_metadata.append_to(&mut output); + } + Ok(output) +} diff --git a/crates/wasm-metadata/src/utils.rs b/crates/wasm-metadata/src/utils.rs new file mode 100644 index 0000000000..b002571b5c --- /dev/null +++ b/crates/wasm-metadata/src/utils.rs @@ -0,0 +1,21 @@ +use anyhow::Result; + +pub(crate) fn name_map(map: &wasmparser::NameMap<'_>) -> Result { + let mut out = wasm_encoder::NameMap::new(); + for m in map.clone().into_iter() { + let m = m?; + out.append(m.index, m.name); + } + Ok(out) +} + +pub(crate) fn indirect_name_map( + map: &wasmparser::IndirectNameMap<'_>, +) -> Result { + let mut out = wasm_encoder::IndirectNameMap::new(); + for m in map.clone().into_iter() { + let m = m?; + out.append(m.index, &name_map(&m.names)?); + } + Ok(out) +} diff --git a/crates/wasm-metadata/tests/component.rs b/crates/wasm-metadata/tests/component.rs new file mode 100644 index 0000000000..3c282c9ed6 --- /dev/null +++ b/crates/wasm-metadata/tests/component.rs @@ -0,0 +1,185 @@ +use std::vec; + +use wasm_metadata::*; + +#[test] +fn add_to_empty_component() { + let wat = "(component)"; + let component = wat::parse_str(wat).unwrap(); + let add = AddMetadata { + name: Some("foo".to_owned()), + language: vec!["bar".to_owned()], + processed_by: vec![("baz".to_owned(), "1.0".to_owned())], + sdk: vec![], + registry_metadata: Some(RegistryMetadata { + authors: Some(vec!["foo".to_owned()]), + description: Some("foo bar baz".to_owned()), + license: Some("MIT OR LicenseRef-FOO".to_owned()), + custom_licenses: Some(vec![CustomLicense { + id: "FOO".to_owned(), + name: "Foo".to_owned(), + text: "Foo License".to_owned(), + reference: Some("https://exaple.com/license/foo".to_owned()), + }]), + links: Some(vec![ + Link { + ty: LinkType::Custom("CustomFoo".to_owned()), + value: "https://example.com/custom".to_owned(), + }, + Link { + ty: LinkType::Homepage, + value: "https://example.com".to_owned(), + }, + ]), + categories: Some(vec!["Tools".to_owned()]), + }), + }; + let component = add.to_wasm(&component).unwrap(); + + let metadata = Metadata::from_binary(&component).unwrap(); + match metadata { + Metadata::Component { + name, + producers, + registry_metadata, + children, + range, + } => { + assert!(children.is_empty()); + assert_eq!(name, Some("foo".to_owned())); + let producers = producers.expect("some producers"); + assert_eq!(producers.get("language").unwrap().get("bar").unwrap(), ""); + assert_eq!( + producers.get("processed-by").unwrap().get("baz").unwrap(), + "1.0" + ); + + let registry_metadata = registry_metadata.unwrap(); + + assert!(registry_metadata.validate().is_ok()); + + assert_eq!(registry_metadata.authors.unwrap(), vec!["foo".to_owned()]); + assert_eq!( + registry_metadata.description.unwrap(), + "foo bar baz".to_owned() + ); + + assert_eq!( + registry_metadata.license.unwrap(), + "MIT OR LicenseRef-FOO".to_owned() + ); + assert_eq!( + registry_metadata.custom_licenses.unwrap(), + vec![CustomLicense { + id: "FOO".to_owned(), + name: "Foo".to_owned(), + text: "Foo License".to_owned(), + reference: Some("https://exaple.com/license/foo".to_owned()), + }] + ); + assert_eq!( + registry_metadata.links.unwrap(), + vec![ + Link { + ty: LinkType::Custom("CustomFoo".to_owned()), + value: "https://example.com/custom".to_owned(), + }, + Link { + ty: LinkType::Homepage, + value: "https://example.com".to_owned(), + }, + ] + ); + assert_eq!( + registry_metadata.categories.unwrap(), + vec!["Tools".to_owned()] + ); + + assert_eq!(range.start, 0); + assert_eq!(range.end, 432); + } + _ => panic!("metadata should be component"), + } +} + +#[test] +fn add_to_nested_component() { + // Create the same old module, stick some metadata into it + let wat = "(module)"; + let module = wat::parse_str(wat).unwrap(); + let add = AddMetadata { + name: Some("foo".to_owned()), + language: vec!["bar".to_owned()], + processed_by: vec![("baz".to_owned(), "1.0".to_owned())], + sdk: vec![], + registry_metadata: Some(RegistryMetadata { + authors: Some(vec!["Foo".to_owned()]), + ..Default::default() + }), + }; + let module = add.to_wasm(&module).unwrap(); + + // Stick that module inside a component. + let mut component = wasm_encoder::Component::new(); + component.section(&wasm_encoder::RawSection { + id: wasm_encoder::ComponentSectionId::CoreModule.into(), + data: &module, + }); + let component = component.finish(); + + // Add some different metadata to the component. + let add = AddMetadata { + name: Some("gussie".to_owned()), + sdk: vec![("willa".to_owned(), "sparky".to_owned())], + ..Default::default() + }; + let component = add.to_wasm(&component).unwrap(); + + let metadata = Metadata::from_binary(&component).unwrap(); + match metadata { + Metadata::Component { + name, + producers, + children, + .. + } => { + // Check that the component metadata is in the component + assert_eq!(name, Some("gussie".to_owned())); + let producers = producers.as_ref().expect("some producers"); + assert_eq!( + producers.get("sdk").unwrap().get("willa").unwrap(), + &"sparky".to_owned() + ); + // Check that there is a single child with the metadata set for the module + assert_eq!(children.len(), 1); + let child = children.get(0).unwrap(); + match &**child { + Metadata::Module { + name, + producers, + registry_metadata, + range, + } => { + assert_eq!(name, &Some("foo".to_owned())); + let producers = producers.as_ref().expect("some producers"); + assert_eq!(producers.get("language").unwrap().get("bar").unwrap(), ""); + assert_eq!( + producers.get("processed-by").unwrap().get("baz").unwrap(), + "1.0" + ); + + let registry_metadata = registry_metadata.as_ref().unwrap(); + assert_eq!( + registry_metadata.authors.as_ref().unwrap(), + &["Foo".to_owned()] + ); + + assert_eq!(range.start, 10); + assert_eq!(range.end, 120); + } + _ => panic!("child is a module"), + } + } + _ => panic!("root should be component"), + } +} diff --git a/crates/wasm-metadata/tests/module.rs b/crates/wasm-metadata/tests/module.rs new file mode 100644 index 0000000000..f747730469 --- /dev/null +++ b/crates/wasm-metadata/tests/module.rs @@ -0,0 +1,101 @@ +use std::vec; + +use wasm_metadata::*; + +#[test] +fn add_to_empty_module() { + let wat = "(module)"; + let module = wat::parse_str(wat).unwrap(); + let add = AddMetadata { + name: Some("foo".to_owned()), + language: vec!["bar".to_owned()], + processed_by: vec![("baz".to_owned(), "1.0".to_owned())], + sdk: vec![], + registry_metadata: Some(RegistryMetadata { + authors: Some(vec!["foo".to_owned()]), + description: Some("foo bar baz".to_owned()), + license: Some("MIT OR LicenseRef-FOO".to_owned()), + custom_licenses: Some(vec![CustomLicense { + id: "FOO".to_owned(), + name: "Foo".to_owned(), + text: "Foo License".to_owned(), + reference: Some("https://exaple.com/license/foo".to_owned()), + }]), + links: Some(vec![ + Link { + ty: LinkType::Custom("CustomFoo".to_owned()), + value: "https://example.com/custom".to_owned(), + }, + Link { + ty: LinkType::Homepage, + value: "https://example.com".to_owned(), + }, + ]), + categories: Some(vec!["Tools".to_owned()]), + }), + }; + let module = add.to_wasm(&module).unwrap(); + + let metadata = Metadata::from_binary(&module).unwrap(); + match metadata { + Metadata::Module { + name, + producers, + registry_metadata, + range, + } => { + assert_eq!(name, Some("foo".to_owned())); + let producers = producers.expect("some producers"); + assert_eq!(producers.get("language").unwrap().get("bar").unwrap(), ""); + assert_eq!( + producers.get("processed-by").unwrap().get("baz").unwrap(), + "1.0" + ); + + let registry_metadata = registry_metadata.unwrap(); + + assert!(registry_metadata.validate().is_ok()); + + assert_eq!(registry_metadata.authors.unwrap(), vec!["foo".to_owned()]); + assert_eq!( + registry_metadata.description.unwrap(), + "foo bar baz".to_owned() + ); + + assert_eq!( + registry_metadata.license.unwrap(), + "MIT OR LicenseRef-FOO".to_owned() + ); + assert_eq!( + registry_metadata.custom_licenses.unwrap(), + vec![CustomLicense { + id: "FOO".to_owned(), + name: "Foo".to_owned(), + text: "Foo License".to_owned(), + reference: Some("https://exaple.com/license/foo".to_owned()), + }] + ); + assert_eq!( + registry_metadata.links.unwrap(), + vec![ + Link { + ty: LinkType::Custom("CustomFoo".to_owned()), + value: "https://example.com/custom".to_owned(), + }, + Link { + ty: LinkType::Homepage, + value: "https://example.com".to_owned(), + }, + ] + ); + assert_eq!( + registry_metadata.categories.unwrap(), + vec!["Tools".to_owned()] + ); + + assert_eq!(range.start, 0); + assert_eq!(range.end, 422); + } + _ => panic!("metadata should be module"), + } +}