Skip to content

Commit

Permalink
Canonicalize types earlier in validation (#1299)
Browse files Browse the repository at this point in the history
* Canonicalize types earlier in validation

This commit refactors the internals of validation to centralize
canonicalization in one location. This means that stages such as
operator validation no longer need to perform canonicalization.
Additionally types are always stored in their canonical forms.

The refactoring here is done when a `RecGroup` is inserted into a
`TypeList`. The rec-group-local offsets are used to intern the group but
when actually adding to a `TypeList` all rec-group-local offsets are
updated to an ID-based form. This is then plumbed in many other
locations as well by changing many `t: ValType` validators to `t: &mut
ValType` to internally canonicalize.

This is originally motivated by trying to update Wasmtime to the latest
`wasmparser` but many APIs which previously returned `&FuncType` for
example returned `FuncType` instead due to the
canonicalization-on-the-fly. This change means that the old
`&FuncType`-style APIs can return because types are always canonicalized
at-rest.

This change additionally changes the behavior of one test. It was
previously considered valid and is now considered valid. I've confirmed
that `wasm-opt` considers the test invalid, so I've updated the test to
`assert_invalid` instead.

* Fix test compile

* Remove unused arguments

* Review comments
  • Loading branch information
alexcrichton authored Nov 20, 2023
1 parent acee342 commit 230ce7f
Show file tree
Hide file tree
Showing 14 changed files with 429 additions and 585 deletions.
4 changes: 0 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion crates/wasmparser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ edition.workspace = true
exclude = ["benches/*.wasm"]

[dependencies]
hashbrown = { version = "0.14.2", default-features = false, features = ["ahash"] }
indexmap = { workspace = true }
semver = { workspace = true }

Expand Down
77 changes: 75 additions & 2 deletions crates/wasmparser/src/readers/core/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
* limitations under the License.
*/

use std::fmt::{self, Debug, Write};

use crate::limits::{
MAX_WASM_FUNCTION_PARAMS, MAX_WASM_FUNCTION_RETURNS, MAX_WASM_STRUCT_FIELDS,
MAX_WASM_SUPERTYPES, MAX_WASM_TYPES,
};
use crate::types::CoreTypeId;
use crate::{BinaryReader, BinaryReaderError, FromReader, Result, SectionLimited};
use std::fmt::{self, Debug, Write};
use std::hash::{Hash, Hasher};

mod matches;
pub(crate) use self::matches::{Matches, WithRecGroup};
Expand Down Expand Up @@ -384,6 +384,20 @@ impl RecGroup {
}
}

impl Hash for RecGroup {
fn hash<H: Hasher>(&self, hasher: &mut H) {
self.types().hash(hasher)
}
}

impl PartialEq for RecGroup {
fn eq(&self, other: &RecGroup) -> bool {
self.types() == other.types()
}
}

impl Eq for RecGroup {}

/// Represents a subtype of possible other types in a WebAssembly module.
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct SubType {
Expand Down Expand Up @@ -416,6 +430,35 @@ impl SubType {
pub fn unwrap_struct(&self) -> &StructType {
self.composite_type.unwrap_struct()
}

/// Maps any `UnpackedIndex` via the specified closure.
pub(crate) fn remap_indices(
&mut self,
f: &mut dyn FnMut(&mut PackedIndex) -> Result<()>,
) -> Result<()> {
if let Some(idx) = &mut self.supertype_idx {
f(idx)?;
}
match &mut self.composite_type {
CompositeType::Func(ty) => {
for ty in ty.params_mut() {
ty.remap_indices(f)?;
}
for ty in ty.results_mut() {
ty.remap_indices(f)?;
}
}
CompositeType::Array(ty) => {
ty.0.remap_indices(f)?;
}
CompositeType::Struct(ty) => {
for field in ty.fields.iter_mut() {
field.remap_indices(f)?;
}
}
}
Ok(())
}
}

/// Represents a composite type in a WebAssembly module.
Expand Down Expand Up @@ -562,6 +605,19 @@ pub struct FieldType {
pub mutable: bool,
}

impl FieldType {
/// Maps any `UnpackedIndex` via the specified closure.
pub(crate) fn remap_indices(
&mut self,
f: &mut dyn FnMut(&mut PackedIndex) -> Result<()>,
) -> Result<()> {
match &mut self.element_type {
StorageType::I8 | StorageType::I16 => Ok(()),
StorageType::Val(ty) => ty.remap_indices(f),
}
}
}

/// Represents storage types introduced in the GC spec for array and struct fields.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum StorageType {
Expand Down Expand Up @@ -640,6 +696,23 @@ impl ValType {
Self::Ref(rt) => rt.is_nullable(),
}
}

/// Maps any `UnpackedIndex` via the specified closure.
pub(crate) fn remap_indices(
&mut self,
map: &mut dyn FnMut(&mut PackedIndex) -> Result<()>,
) -> Result<()> {
match self {
ValType::Ref(r) => {
if let Some(mut idx) = r.type_index() {
map(&mut idx)?;
*r = RefType::concrete(r.is_nullable(), idx);
}
}
ValType::I32 | ValType::I64 | ValType::F32 | ValType::F64 | ValType::V128 => {}
}
Ok(())
}
}

/// A reference type.
Expand Down
Loading

0 comments on commit 230ce7f

Please sign in to comment.