Skip to content

Commit

Permalink
Decouple import/export encodings from core names
Browse files Browse the repository at this point in the history
This commit decouples the string encodings listed for imports/exports
from their core wasm names to instead being registered with WIT-level
constructs instead. Previously the parsing phase of a module would
register a string encoding for core wasm import/export names but this
subverted the logic of validation where detection of how exactly an
import lines up with WIT-level items is determined. The goal of this
commit is to decouple this relation.

Worlds are encoding into custom sections with a known string encoding
for all imports/exports of that world. This can possibly differ for
different parts of an application to theoretically enable one interface
to be imported with UTF-8 and another with UTF-16. This means that
encodings are tracked per-import/export rather than per-world.
Previously this process would assume that there is a single name for an
import's/export's encoding but with new detection and names coming down
the line this is no longer going to be the case. For example with the
new names in WebAssembly/component-model#378 there are new names to be
supported meaning that there's not one single name to register encodings
with.

To help bridge this gap the abstraction here is changed to where
metadata for a module records string encodings on a WIT level, for
example per WIT import/export, instead of per core wasm import/export.
Then during encoding of a component the WIT level constructs are matched
up instead of the core names to determine the string encoding in the
lift/lower operation.

The end goal is that the connection between core wasm names and WIT
names continues to be decoupled where validation is the only location
concerned about this.
  • Loading branch information
alexcrichton committed Sep 28, 2024
1 parent 464aeab commit 51a2f8b
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 117 deletions.
60 changes: 34 additions & 26 deletions crates/wit-component/src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -655,13 +655,19 @@ impl<'a> EncodingState<'a> {
.root_import_type_encoder(None)
.encode_func_type(resolve, func)?;
let core_name = world_func_core_names[&func.name];
let idx = self.encode_lift(module, &core_name, None, func, ty)?;
let idx = self.encode_lift(module, &core_name, export_name, func, ty)?;
self.component
.export(&export_string, ComponentExportKind::Func, idx, None);
}
WorldItem::Interface { id, .. } => {
let core_names = interface_func_core_names.get(id);
self.encode_interface_export(&export_string, module, *id, core_names)?;
self.encode_interface_export(
&export_string,
module,
export_name,
*id,
core_names,
)?;
}
WorldItem::Type(_) => unreachable!(),
}
Expand All @@ -674,6 +680,7 @@ impl<'a> EncodingState<'a> {
&mut self,
export_name: &str,
module: CustomModule<'_>,
key: &WorldKey,
export: InterfaceId,
interface_func_core_names: Option<&IndexMap<&str, &str>>,
) -> Result<()> {
Expand All @@ -691,9 +698,7 @@ impl<'a> EncodingState<'a> {
for (_, func) in &resolve.interfaces[export].functions {
let core_name = interface_func_core_names.unwrap()[func.name.as_str()];
let ty = root.encode_func_type(resolve, func)?;
let func_index = root
.state
.encode_lift(module, &core_name, Some(export), func, ty)?;
let func_index = root.state.encode_lift(module, &core_name, key, func, ty)?;
imports.push((
import_func_name(func),
ComponentExportKind::Func,
Expand Down Expand Up @@ -986,7 +991,7 @@ impl<'a> EncodingState<'a> {
&mut self,
module: CustomModule<'_>,
core_name: &str,
interface: Option<InterfaceId>,
key: &WorldKey,
func: &Function,
ty: u32,
) -> Result<u32> {
Expand All @@ -997,16 +1002,19 @@ impl<'a> EncodingState<'a> {

let options = RequiredOptions::for_export(resolve, func);

let encoding = metadata.export_encodings[core_name];
let encoding = metadata
.export_encodings
.get(resolve, key, &func.name)
.unwrap();
let exports = self.info.exports_for(module);
let realloc_index = exports
.export_realloc_for(interface, func)
.export_realloc_for(key, func)
.map(|name| self.core_alias_export(instance_index, name, ExportKind::Func));
let mut options = options
.into_iter(encoding, self.memory_index, realloc_index)?
.collect::<Vec<_>>();

if let Some(post_return) = exports.post_return(interface, func) {
if let Some(post_return) = exports.post_return(key, func) {
let post_return = self.core_alias_export(instance_index, post_return, ExportKind::Func);
options.push(CanonicalOption::PostReturn(post_return));
}
Expand Down Expand Up @@ -1379,7 +1387,7 @@ impl<'a> EncodingState<'a> {
log::trace!("attempting to materialize import of `{module}::{field}` for {for_module:?}");
let resolve = &self.info.encoder.metadata.resolve;
let name_tmp;
let (key, name) = match import {
let (key, name, interface_key) = match import {
// Main module dependencies on an adapter in use are done with an
// indirection here, so load the shim function and use that.
Import::AdapterExport(_) => {
Expand Down Expand Up @@ -1446,18 +1454,17 @@ impl<'a> EncodingState<'a> {
// through to the code below. This is where these are connected to a
// WIT `ImportedInterface` one way or another with the name that was
// detected during validation.
Import::ImportedResourceDrop(key, id) => {
Import::ImportedResourceDrop(key, iface, id) => {
let ty = &resolve.types[*id];
let name = ty.name.as_ref().unwrap();
name_tmp = format!("{RESOURCE_DROP}{name}");
(key.as_ref(), &name_tmp)
(key, &name_tmp, iface.map(|_| resolve.name_world_key(key)))
}
Import::WorldFunc(name) => (None, name),
Import::InterfaceFunc(key, _, name) => (Some(key), name),
Import::WorldFunc(key, name) => (key, name, None),
Import::InterfaceFunc(key, _, name) => (key, name, Some(resolve.name_world_key(key))),
};

let interface = key.map(|key| resolve.name_world_key(key));
let import = &self.info.import_map[&interface];
let import = &self.info.import_map[&interface_key];
let (index, _, lowering) = import.lowerings.get_full(name).unwrap();
let metadata = self.info.module_metadata_for(for_module);

Expand All @@ -1480,12 +1487,12 @@ impl<'a> EncodingState<'a> {
// created, so the specific export is loaded here and used as an
// import.
Lowering::Indirect { .. } => {
let encoding = metadata.import_encodings[&(module.to_string(), field.to_string())];
let encoding = metadata.import_encodings.get(resolve, key, name).unwrap();
self.core_alias_export(
self.shim_instance_index
.expect("shim should be instantiated"),
&shims.shims[&ShimKind::IndirectLowering {
interface: interface.clone(),
interface: interface_key,
index,
realloc: for_module,
encoding,
Expand Down Expand Up @@ -1696,7 +1703,7 @@ impl<'a> Shims<'a> {
let resolve = &world.encoder.metadata.resolve;

for (module, field, import) in module_imports.imports() {
let (key, name) = match import {
let (key, name, interface_key) = match import {
// These imports don't require shims, they can be satisfied
// as-needed when required.
Import::ImportedResourceDrop(..)
Expand Down Expand Up @@ -1746,11 +1753,12 @@ impl<'a> Shims<'a> {
// WIT-level functions may require an indirection, so yield some
// metadata out of this `match` to the loop below to figure that
// out.
Import::InterfaceFunc(key, _, name) => (Some(key), name),
Import::WorldFunc(name) => (None, name),
Import::InterfaceFunc(key, _, name) => {
(key, name, Some(resolve.name_world_key(key)))
}
Import::WorldFunc(key, name) => (key, name, None),
};
let key = key.map(|key| resolve.name_world_key(key));
let interface = &world.import_map[&key];
let interface = &world.import_map[&interface_key];
let (index, _, lowering) = interface.lowerings.get_full(name).unwrap();
let shim_name = self.shims.len().to_string();
match lowering {
Expand All @@ -1760,9 +1768,9 @@ impl<'a> Shims<'a> {
log::debug!(
"shim {shim_name} is import `{module}::{field}` lowering {index} `{name}`",
);
let encoding = *metadata
let encoding = metadata
.import_encodings
.get(&(module.to_string(), field.to_string()))
.get(resolve, key, name)
.ok_or_else(|| {
anyhow::anyhow!(
"missing component metadata for import of \
Expand All @@ -1774,7 +1782,7 @@ impl<'a> Shims<'a> {
debug_name: format!("indirect-{module}-{field}"),
options: *options,
kind: ShimKind::IndirectLowering {
interface: key,
interface: interface_key,
index,
realloc: for_module,
encoding,
Expand Down
4 changes: 2 additions & 2 deletions crates/wit-component/src/encoding/world.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ impl<'a> ComponentWorld<'a> {
.chain(self.info.imports.imports())
{
match import {
Import::WorldFunc(name) => {
Import::WorldFunc(_, name) => {
required
.interface_funcs
.entry(None)
Expand All @@ -264,7 +264,7 @@ impl<'a> ComponentWorld<'a> {
.or_default()
.insert(name);
}
Import::ImportedResourceDrop(_, id) => {
Import::ImportedResourceDrop(_, _, id) => {
required.resource_drops.insert(*id);
}
_ => {}
Expand Down
172 changes: 105 additions & 67 deletions crates/wit-component/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
//! The dual of `encode` is the `decode_custom_section` fucntion which decodes
//! the three arguments originally passed to `encode`.
use crate::validation::BARE_FUNC_MODULE_NAME;
use crate::{DecodedWasm, StringEncoding};
use anyhow::{bail, Context, Result};
use indexmap::{IndexMap, IndexSet};
Expand Down Expand Up @@ -112,11 +111,105 @@ impl Default for Bindgen {
pub struct ModuleMetadata {
/// Per-function options imported into the core wasm module, currently only
/// related to string encoding.
pub import_encodings: IndexMap<(String, String), StringEncoding>,
pub import_encodings: EncodingMap,

/// Per-function options exported from the core wasm module, currently only
/// related to string encoding.
pub export_encodings: IndexMap<String, StringEncoding>,
pub export_encodings: EncodingMap,
}

/// Internal map that keeps track of encodings for various world imports and
/// exports.
///
/// Stored in [`ModuleMetadata`].
#[derive(Default)]
pub struct EncodingMap {
/// A map of an "identifying string" for world items to what string
/// encoding the import or export is using.
///
/// The keys of this map are created by `EncodingMap::key` and are
/// specifically chosen to be able to be looked up during both insertion and
/// fetching. Note that in particular this map does not use `*Id` types such
/// as `InterfaceId` from `wit_parser`. This is due to the fact that during
/// world merging new interfaces are created for named imports (e.g. `import
/// x: interface { ... }`) as inline interfaces are copied from one world to
/// another. Additionally during world merging different interfaces at the
/// same version may be deduplicated.
///
/// For these reasons a string-based key is chosen to avoid juggling IDs
/// through the world merging process. Additionally versions are chopped off
/// for now to help with a problem such as:
///
/// * The main module imports a:b/[email protected]
/// * An adapter imports a:b/[email protected]
/// * The final world uses a:b/[email protected], but the main module has no
/// encoding listed for that exact item.
///
/// By chopping off versions this is able to get everything registered
/// correctly even in the fact of merging interfaces and worlds.
encodings: IndexMap<String, StringEncoding>,
}

impl EncodingMap {
fn insert_all(
&mut self,
resolve: &Resolve,
set: &IndexMap<WorldKey, WorldItem>,
encoding: StringEncoding,
) {
for (name, item) in set {
match item {
WorldItem::Function(func) => {
let key = self.key(resolve, name, &func.name);
self.encodings.insert(key, encoding);
}
WorldItem::Interface { id, .. } => {
for (func, _) in resolve.interfaces[*id].functions.iter() {
let key = self.key(resolve, name, func);
self.encodings.insert(key, encoding);
}
}
WorldItem::Type(_) => {}
}
}
}

/// Looks up the encoding of the function `func` which is scoped under `key`
/// in the world in question.
pub fn get(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> Option<StringEncoding> {
let key = self.key(resolve, key, func);
self.encodings.get(&key).copied()
}

fn key(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> String {
format!(
"{}/{func}",
match key {
WorldKey::Name(name) => name.to_string(),
WorldKey::Interface(id) => {
let iface = &resolve.interfaces[*id];
let pkg = &resolve.packages[iface.package.unwrap()];
format!(
"{}:{}/{}",
pkg.name.namespace,
pkg.name.name,
iface.name.as_ref().unwrap()
)
}
}
)
}

fn merge(&mut self, other: EncodingMap) -> Result<()> {
for (key, encoding) in other.encodings {
if let Some(prev) = self.encodings.insert(key.clone(), encoding) {
if prev != encoding {
bail!("conflicting string encodings specified for `{key}`");
}
}
}
Ok(())
}
}

/// This function will parse the core `wasm` binary given as input and return a
Expand Down Expand Up @@ -313,38 +406,18 @@ impl Bindgen {
producers,
} = other;

let world = self
let remap = self
.resolve
.merge(resolve)
.context("failed to merge WIT package sets together")?
.map_world(world, None)?;
.context("failed to merge WIT package sets together")?;
let world = remap.map_world(world, None)?;
let exports = self.resolve.worlds[world].exports.keys().cloned().collect();
self.resolve
.merge_worlds(world, self.world)
.context("failed to merge worlds from two documents")?;

for (name, encoding) in export_encodings {
let prev = self
.metadata
.export_encodings
.insert(name.clone(), encoding);
if let Some(prev) = prev {
if prev != encoding {
bail!("conflicting string encodings specified for export `{name}`");
}
}
}
for ((module, name), encoding) in import_encodings {
let prev = self
.metadata
.import_encodings
.insert((module.clone(), name.clone()), encoding);
if let Some(prev) = prev {
if prev != encoding {
bail!("conflicting string encodings specified for import `{module}::{name}`");
}
}
}
self.metadata.import_encodings.merge(import_encodings)?;
self.metadata.export_encodings.merge(export_encodings)?;
if let Some(producers) = producers {
if let Some(mine) = &mut self.producers {
mine.merge(&producers);
Expand All @@ -364,45 +437,10 @@ impl ModuleMetadata {
let mut ret = ModuleMetadata::default();

let world = &resolve.worlds[world];
for (name, item) in world.imports.iter() {
let name = resolve.name_world_key(name);
match item {
WorldItem::Function(_) => {
let prev = ret
.import_encodings
.insert((BARE_FUNC_MODULE_NAME.to_string(), name.clone()), encoding);
assert!(prev.is_none());
}
WorldItem::Interface { id, .. } => {
for (func, _) in resolve.interfaces[*id].functions.iter() {
let prev = ret
.import_encodings
.insert((name.clone(), func.clone()), encoding);
assert!(prev.is_none());
}
}
WorldItem::Type(_) => {}
}
}

for (name, item) in world.exports.iter() {
let name = resolve.name_world_key(name);
match item {
WorldItem::Function(func) => {
let name = func.core_export_name(None).into_owned();
let prev = ret.export_encodings.insert(name.clone(), encoding);
assert!(prev.is_none());
}
WorldItem::Interface { id, .. } => {
for (_, func) in resolve.interfaces[*id].functions.iter() {
let name = func.core_export_name(Some(&name)).into_owned();
let prev = ret.export_encodings.insert(name, encoding);
assert!(prev.is_none());
}
}
WorldItem::Type(_) => {}
}
}
ret.export_encodings
.insert_all(resolve, &world.exports, encoding);
ret.import_encodings
.insert_all(resolve, &world.imports, encoding);

ret
}
Expand Down
Loading

0 comments on commit 51a2f8b

Please sign in to comment.