From 51a2f8be16ed98aed9e07b8cc11d4b415b3df51a Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 25 Sep 2024 20:46:48 -0700 Subject: [PATCH] Decouple import/export encodings from core names This commit decouples the string encodings listed for imports/exports from their core wasm names to instead being registered with WIT-level constructs instead. Previously the parsing phase of a module would register a string encoding for core wasm import/export names but this subverted the logic of validation where detection of how exactly an import lines up with WIT-level items is determined. The goal of this commit is to decouple this relation. Worlds are encoding into custom sections with a known string encoding for all imports/exports of that world. This can possibly differ for different parts of an application to theoretically enable one interface to be imported with UTF-8 and another with UTF-16. This means that encodings are tracked per-import/export rather than per-world. Previously this process would assume that there is a single name for an import's/export's encoding but with new detection and names coming down the line this is no longer going to be the case. For example with the new names in WebAssembly/component-model#378 there are new names to be supported meaning that there's not one single name to register encodings with. To help bridge this gap the abstraction here is changed to where metadata for a module records string encodings on a WIT level, for example per WIT import/export, instead of per core wasm import/export. Then during encoding of a component the WIT level constructs are matched up instead of the core names to determine the string encoding in the lift/lower operation. The end goal is that the connection between core wasm names and WIT names continues to be decoupled where validation is the only location concerned about this. --- crates/wit-component/src/encoding.rs | 60 +++---- crates/wit-component/src/encoding/world.rs | 4 +- crates/wit-component/src/metadata.rs | 172 +++++++++++++-------- crates/wit-component/src/validation.rs | 43 +++--- 4 files changed, 162 insertions(+), 117 deletions(-) diff --git a/crates/wit-component/src/encoding.rs b/crates/wit-component/src/encoding.rs index 1917e3db25..b5f0b7d938 100644 --- a/crates/wit-component/src/encoding.rs +++ b/crates/wit-component/src/encoding.rs @@ -655,13 +655,19 @@ impl<'a> EncodingState<'a> { .root_import_type_encoder(None) .encode_func_type(resolve, func)?; let core_name = world_func_core_names[&func.name]; - let idx = self.encode_lift(module, &core_name, None, func, ty)?; + let idx = self.encode_lift(module, &core_name, export_name, func, ty)?; self.component .export(&export_string, ComponentExportKind::Func, idx, None); } WorldItem::Interface { id, .. } => { let core_names = interface_func_core_names.get(id); - self.encode_interface_export(&export_string, module, *id, core_names)?; + self.encode_interface_export( + &export_string, + module, + export_name, + *id, + core_names, + )?; } WorldItem::Type(_) => unreachable!(), } @@ -674,6 +680,7 @@ impl<'a> EncodingState<'a> { &mut self, export_name: &str, module: CustomModule<'_>, + key: &WorldKey, export: InterfaceId, interface_func_core_names: Option<&IndexMap<&str, &str>>, ) -> Result<()> { @@ -691,9 +698,7 @@ impl<'a> EncodingState<'a> { for (_, func) in &resolve.interfaces[export].functions { let core_name = interface_func_core_names.unwrap()[func.name.as_str()]; let ty = root.encode_func_type(resolve, func)?; - let func_index = root - .state - .encode_lift(module, &core_name, Some(export), func, ty)?; + let func_index = root.state.encode_lift(module, &core_name, key, func, ty)?; imports.push(( import_func_name(func), ComponentExportKind::Func, @@ -986,7 +991,7 @@ impl<'a> EncodingState<'a> { &mut self, module: CustomModule<'_>, core_name: &str, - interface: Option, + key: &WorldKey, func: &Function, ty: u32, ) -> Result { @@ -997,16 +1002,19 @@ impl<'a> EncodingState<'a> { let options = RequiredOptions::for_export(resolve, func); - let encoding = metadata.export_encodings[core_name]; + let encoding = metadata + .export_encodings + .get(resolve, key, &func.name) + .unwrap(); let exports = self.info.exports_for(module); let realloc_index = exports - .export_realloc_for(interface, func) + .export_realloc_for(key, func) .map(|name| self.core_alias_export(instance_index, name, ExportKind::Func)); let mut options = options .into_iter(encoding, self.memory_index, realloc_index)? .collect::>(); - if let Some(post_return) = exports.post_return(interface, func) { + if let Some(post_return) = exports.post_return(key, func) { let post_return = self.core_alias_export(instance_index, post_return, ExportKind::Func); options.push(CanonicalOption::PostReturn(post_return)); } @@ -1379,7 +1387,7 @@ impl<'a> EncodingState<'a> { log::trace!("attempting to materialize import of `{module}::{field}` for {for_module:?}"); let resolve = &self.info.encoder.metadata.resolve; let name_tmp; - let (key, name) = match import { + let (key, name, interface_key) = match import { // Main module dependencies on an adapter in use are done with an // indirection here, so load the shim function and use that. Import::AdapterExport(_) => { @@ -1446,18 +1454,17 @@ impl<'a> EncodingState<'a> { // through to the code below. This is where these are connected to a // WIT `ImportedInterface` one way or another with the name that was // detected during validation. - Import::ImportedResourceDrop(key, id) => { + Import::ImportedResourceDrop(key, iface, id) => { let ty = &resolve.types[*id]; let name = ty.name.as_ref().unwrap(); name_tmp = format!("{RESOURCE_DROP}{name}"); - (key.as_ref(), &name_tmp) + (key, &name_tmp, iface.map(|_| resolve.name_world_key(key))) } - Import::WorldFunc(name) => (None, name), - Import::InterfaceFunc(key, _, name) => (Some(key), name), + Import::WorldFunc(key, name) => (key, name, None), + Import::InterfaceFunc(key, _, name) => (key, name, Some(resolve.name_world_key(key))), }; - let interface = key.map(|key| resolve.name_world_key(key)); - let import = &self.info.import_map[&interface]; + let import = &self.info.import_map[&interface_key]; let (index, _, lowering) = import.lowerings.get_full(name).unwrap(); let metadata = self.info.module_metadata_for(for_module); @@ -1480,12 +1487,12 @@ impl<'a> EncodingState<'a> { // created, so the specific export is loaded here and used as an // import. Lowering::Indirect { .. } => { - let encoding = metadata.import_encodings[&(module.to_string(), field.to_string())]; + let encoding = metadata.import_encodings.get(resolve, key, name).unwrap(); self.core_alias_export( self.shim_instance_index .expect("shim should be instantiated"), &shims.shims[&ShimKind::IndirectLowering { - interface: interface.clone(), + interface: interface_key, index, realloc: for_module, encoding, @@ -1696,7 +1703,7 @@ impl<'a> Shims<'a> { let resolve = &world.encoder.metadata.resolve; for (module, field, import) in module_imports.imports() { - let (key, name) = match import { + let (key, name, interface_key) = match import { // These imports don't require shims, they can be satisfied // as-needed when required. Import::ImportedResourceDrop(..) @@ -1746,11 +1753,12 @@ impl<'a> Shims<'a> { // WIT-level functions may require an indirection, so yield some // metadata out of this `match` to the loop below to figure that // out. - Import::InterfaceFunc(key, _, name) => (Some(key), name), - Import::WorldFunc(name) => (None, name), + Import::InterfaceFunc(key, _, name) => { + (key, name, Some(resolve.name_world_key(key))) + } + Import::WorldFunc(key, name) => (key, name, None), }; - let key = key.map(|key| resolve.name_world_key(key)); - let interface = &world.import_map[&key]; + let interface = &world.import_map[&interface_key]; let (index, _, lowering) = interface.lowerings.get_full(name).unwrap(); let shim_name = self.shims.len().to_string(); match lowering { @@ -1760,9 +1768,9 @@ impl<'a> Shims<'a> { log::debug!( "shim {shim_name} is import `{module}::{field}` lowering {index} `{name}`", ); - let encoding = *metadata + let encoding = metadata .import_encodings - .get(&(module.to_string(), field.to_string())) + .get(resolve, key, name) .ok_or_else(|| { anyhow::anyhow!( "missing component metadata for import of \ @@ -1774,7 +1782,7 @@ impl<'a> Shims<'a> { debug_name: format!("indirect-{module}-{field}"), options: *options, kind: ShimKind::IndirectLowering { - interface: key, + interface: interface_key, index, realloc: for_module, encoding, diff --git a/crates/wit-component/src/encoding/world.rs b/crates/wit-component/src/encoding/world.rs index d969e39798..18943abd39 100644 --- a/crates/wit-component/src/encoding/world.rs +++ b/crates/wit-component/src/encoding/world.rs @@ -250,7 +250,7 @@ impl<'a> ComponentWorld<'a> { .chain(self.info.imports.imports()) { match import { - Import::WorldFunc(name) => { + Import::WorldFunc(_, name) => { required .interface_funcs .entry(None) @@ -264,7 +264,7 @@ impl<'a> ComponentWorld<'a> { .or_default() .insert(name); } - Import::ImportedResourceDrop(_, id) => { + Import::ImportedResourceDrop(_, _, id) => { required.resource_drops.insert(*id); } _ => {} diff --git a/crates/wit-component/src/metadata.rs b/crates/wit-component/src/metadata.rs index 18b2608def..7eaf5a68d4 100644 --- a/crates/wit-component/src/metadata.rs +++ b/crates/wit-component/src/metadata.rs @@ -41,7 +41,6 @@ //! The dual of `encode` is the `decode_custom_section` fucntion which decodes //! the three arguments originally passed to `encode`. -use crate::validation::BARE_FUNC_MODULE_NAME; use crate::{DecodedWasm, StringEncoding}; use anyhow::{bail, Context, Result}; use indexmap::{IndexMap, IndexSet}; @@ -112,11 +111,105 @@ impl Default for Bindgen { pub struct ModuleMetadata { /// Per-function options imported into the core wasm module, currently only /// related to string encoding. - pub import_encodings: IndexMap<(String, String), StringEncoding>, + pub import_encodings: EncodingMap, /// Per-function options exported from the core wasm module, currently only /// related to string encoding. - pub export_encodings: IndexMap, + pub export_encodings: EncodingMap, +} + +/// Internal map that keeps track of encodings for various world imports and +/// exports. +/// +/// Stored in [`ModuleMetadata`]. +#[derive(Default)] +pub struct EncodingMap { + /// A map of an "identifying string" for world items to what string + /// encoding the import or export is using. + /// + /// The keys of this map are created by `EncodingMap::key` and are + /// specifically chosen to be able to be looked up during both insertion and + /// fetching. Note that in particular this map does not use `*Id` types such + /// as `InterfaceId` from `wit_parser`. This is due to the fact that during + /// world merging new interfaces are created for named imports (e.g. `import + /// x: interface { ... }`) as inline interfaces are copied from one world to + /// another. Additionally during world merging different interfaces at the + /// same version may be deduplicated. + /// + /// For these reasons a string-based key is chosen to avoid juggling IDs + /// through the world merging process. Additionally versions are chopped off + /// for now to help with a problem such as: + /// + /// * The main module imports a:b/c@0.1.0 + /// * An adapter imports a:b/c@0.1.1 + /// * The final world uses a:b/c@0.1.1, but the main module has no + /// encoding listed for that exact item. + /// + /// By chopping off versions this is able to get everything registered + /// correctly even in the fact of merging interfaces and worlds. + encodings: IndexMap, +} + +impl EncodingMap { + fn insert_all( + &mut self, + resolve: &Resolve, + set: &IndexMap, + encoding: StringEncoding, + ) { + for (name, item) in set { + match item { + WorldItem::Function(func) => { + let key = self.key(resolve, name, &func.name); + self.encodings.insert(key, encoding); + } + WorldItem::Interface { id, .. } => { + for (func, _) in resolve.interfaces[*id].functions.iter() { + let key = self.key(resolve, name, func); + self.encodings.insert(key, encoding); + } + } + WorldItem::Type(_) => {} + } + } + } + + /// Looks up the encoding of the function `func` which is scoped under `key` + /// in the world in question. + pub fn get(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> Option { + let key = self.key(resolve, key, func); + self.encodings.get(&key).copied() + } + + fn key(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> String { + format!( + "{}/{func}", + match key { + WorldKey::Name(name) => name.to_string(), + WorldKey::Interface(id) => { + let iface = &resolve.interfaces[*id]; + let pkg = &resolve.packages[iface.package.unwrap()]; + format!( + "{}:{}/{}", + pkg.name.namespace, + pkg.name.name, + iface.name.as_ref().unwrap() + ) + } + } + ) + } + + fn merge(&mut self, other: EncodingMap) -> Result<()> { + for (key, encoding) in other.encodings { + if let Some(prev) = self.encodings.insert(key.clone(), encoding) { + if prev != encoding { + bail!("conflicting string encodings specified for `{key}`"); + } + } + } + Ok(()) + } } /// This function will parse the core `wasm` binary given as input and return a @@ -313,38 +406,18 @@ impl Bindgen { producers, } = other; - let world = self + let remap = self .resolve .merge(resolve) - .context("failed to merge WIT package sets together")? - .map_world(world, None)?; + .context("failed to merge WIT package sets together")?; + let world = remap.map_world(world, None)?; let exports = self.resolve.worlds[world].exports.keys().cloned().collect(); self.resolve .merge_worlds(world, self.world) .context("failed to merge worlds from two documents")?; - for (name, encoding) in export_encodings { - let prev = self - .metadata - .export_encodings - .insert(name.clone(), encoding); - if let Some(prev) = prev { - if prev != encoding { - bail!("conflicting string encodings specified for export `{name}`"); - } - } - } - for ((module, name), encoding) in import_encodings { - let prev = self - .metadata - .import_encodings - .insert((module.clone(), name.clone()), encoding); - if let Some(prev) = prev { - if prev != encoding { - bail!("conflicting string encodings specified for import `{module}::{name}`"); - } - } - } + self.metadata.import_encodings.merge(import_encodings)?; + self.metadata.export_encodings.merge(export_encodings)?; if let Some(producers) = producers { if let Some(mine) = &mut self.producers { mine.merge(&producers); @@ -364,45 +437,10 @@ impl ModuleMetadata { let mut ret = ModuleMetadata::default(); let world = &resolve.worlds[world]; - for (name, item) in world.imports.iter() { - let name = resolve.name_world_key(name); - match item { - WorldItem::Function(_) => { - let prev = ret - .import_encodings - .insert((BARE_FUNC_MODULE_NAME.to_string(), name.clone()), encoding); - assert!(prev.is_none()); - } - WorldItem::Interface { id, .. } => { - for (func, _) in resolve.interfaces[*id].functions.iter() { - let prev = ret - .import_encodings - .insert((name.clone(), func.clone()), encoding); - assert!(prev.is_none()); - } - } - WorldItem::Type(_) => {} - } - } - - for (name, item) in world.exports.iter() { - let name = resolve.name_world_key(name); - match item { - WorldItem::Function(func) => { - let name = func.core_export_name(None).into_owned(); - let prev = ret.export_encodings.insert(name.clone(), encoding); - assert!(prev.is_none()); - } - WorldItem::Interface { id, .. } => { - for (_, func) in resolve.interfaces[*id].functions.iter() { - let name = func.core_export_name(Some(&name)).into_owned(); - let prev = ret.export_encodings.insert(name, encoding); - assert!(prev.is_none()); - } - } - WorldItem::Type(_) => {} - } - } + ret.export_encodings + .insert_all(resolve, &world.exports, encoding); + ret.import_encodings + .insert_all(resolve, &world.imports, encoding); ret } diff --git a/crates/wit-component/src/validation.rs b/crates/wit-component/src/validation.rs index 3bb7dfdddf..489a1ab120 100644 --- a/crates/wit-component/src/validation.rs +++ b/crates/wit-component/src/validation.rs @@ -140,7 +140,7 @@ pub enum ImportInstance { pub enum Import { /// A top-level world function, with the name provided here, is imported /// into the module. - WorldFunc(String), + WorldFunc(WorldKey, String), /// An interface's function is imported into the module. /// @@ -154,7 +154,7 @@ pub enum Import { /// The key provided indicates whether it's for the top-level types of the /// world (`None`) or an interface (`Some` with the name of the interface). /// The `TypeId` is what resource is being dropped. - ImportedResourceDrop(Option, TypeId), + ImportedResourceDrop(WorldKey, Option, TypeId), /// A `canon resource.drop` intrinsic for an exported item is being /// imported. @@ -210,7 +210,7 @@ impl ImportMap { ImportInstance::Names(names) => names.get(func), _ => None, }); - matches!(item, Some(Import::WorldFunc(_))) + matches!(item, Some(Import::WorldFunc(_, _))) } /// Returns whether the interface function specified is imported. @@ -224,7 +224,7 @@ impl ImportMap { /// Returns whether the specified resource's drop method is needed to import. pub fn uses_imported_resource_drop(&self, resource: TypeId) -> bool { self.imports().any(|(_, _, import)| match import { - Import::ImportedResourceDrop(_, id) => resource == *id, + Import::ImportedResourceDrop(_, _, id) => resource == *id, _ => false, }) } @@ -337,12 +337,12 @@ impl ImportMap { let key = WorldKey::Name(name.to_string()); if let Some(WorldItem::Function(func)) = world.imports.get(&key) { validate_func(resolve, ty, func, AbiVariant::GuestImport)?; - return Ok(Import::WorldFunc(func.name.clone())); + return Ok(Import::WorldFunc(key, func.name.clone())); } let get_resource = resource_test_for_world(resolve, world_id); if let Some(id) = valid_resource_drop(name, ty, get_resource)? { - return Ok(Import::ImportedResourceDrop(None, id)); + return Ok(Import::ImportedResourceDrop(key, None, id)); } match world.imports.get(&key) { @@ -397,7 +397,7 @@ impl ImportMap { })?; Ok(Import::InterfaceFunc(key, id, f.name.clone())) } else if let Some(ty) = valid_resource_drop(import.name, ty, get_resource)? { - Ok(Import::ImportedResourceDrop(Some(key), ty)) + Ok(Import::ImportedResourceDrop(key, Some(id), ty)) } else { bail!( "import interface `{}` is missing function \ @@ -576,13 +576,13 @@ pub enum Export { WorldFunc(String), /// A post-return for a top-level function of a world. - WorldFuncPostReturn(String), + WorldFuncPostReturn(WorldKey), /// An export of a function in an interface. InterfaceFunc(InterfaceId, String), /// A post-return for the above function. - InterfaceFuncPostReturn(InterfaceId, String), + InterfaceFuncPostReturn(WorldKey, String), /// A destructor for an exported resource. ResourceDtor(TypeId), @@ -676,11 +676,14 @@ impl ExportMap { format!("failed to validate export for `{key}`") })?; match id { - Some(id) => { - return Ok(Some(Export::InterfaceFuncPostReturn(id, f.name.clone()))); + Some(_id) => { + return Ok(Some(Export::InterfaceFuncPostReturn( + key.clone(), + f.name.clone(), + ))); } None => { - return Ok(Some(Export::WorldFuncPostReturn(f.name.clone()))); + return Ok(Some(Export::WorldFuncPostReturn(key.clone()))); } } } @@ -765,25 +768,21 @@ impl ExportMap { /// Returns the name of the post-return export, if any, for the `interface` /// and `func` combo. - pub fn post_return(&self, interface: Option, func: &Function) -> Option<&str> { - self.find(|m| match (m, interface) { - (Export::WorldFuncPostReturn(f), None) => func.name == *f, - (Export::InterfaceFuncPostReturn(i, f), Some(id)) => *i == id && func.name == *f, + pub fn post_return(&self, key: &WorldKey, func: &Function) -> Option<&str> { + self.find(|m| match m { + Export::WorldFuncPostReturn(k) => k == key, + Export::InterfaceFuncPostReturn(k, f) => k == key && func.name == *f, _ => false, }) } /// Returns the realloc that the exported function `interface` and `func` /// are using. - pub fn export_realloc_for( - &self, - interface: Option, - func: &Function, - ) -> Option<&str> { + pub fn export_realloc_for(&self, key: &WorldKey, func: &Function) -> Option<&str> { // TODO: This realloc detection should probably be improved with // some sort of scheme to have per-function reallocs like // `cabi_realloc_{name}` or something like that. - let _ = (interface, func); + let _ = (key, func); if let Some(name) = self.find(|m| matches!(m, Export::GeneralPurposeExportRealloc)) { return Some(name);