diff --git a/Cargo.lock b/Cargo.lock index 182b3eb594..3bba059320 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -742,9 +742,6 @@ name = "hashbrown" version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" -dependencies = [ - "ahash", -] [[package]] name = "heck" @@ -1930,7 +1927,6 @@ dependencies = [ "anyhow", "criterion", "env_logger", - "hashbrown 0.14.2", "indexmap 2.1.0", "log", "once_cell", diff --git a/crates/wasmparser/Cargo.toml b/crates/wasmparser/Cargo.toml index 6a119523da..5a61918cb8 100644 --- a/crates/wasmparser/Cargo.toml +++ b/crates/wasmparser/Cargo.toml @@ -13,7 +13,6 @@ edition.workspace = true exclude = ["benches/*.wasm"] [dependencies] -hashbrown = { version = "0.14.2", default-features = false, features = ["ahash"] } indexmap = { workspace = true } semver = { workspace = true } diff --git a/crates/wasmparser/src/readers/core/types.rs b/crates/wasmparser/src/readers/core/types.rs index 25744e1b62..5c189da003 100644 --- a/crates/wasmparser/src/readers/core/types.rs +++ b/crates/wasmparser/src/readers/core/types.rs @@ -13,14 +13,14 @@ * limitations under the License. */ -use std::fmt::{self, Debug, Write}; - use crate::limits::{ MAX_WASM_FUNCTION_PARAMS, MAX_WASM_FUNCTION_RETURNS, MAX_WASM_STRUCT_FIELDS, MAX_WASM_SUPERTYPES, MAX_WASM_TYPES, }; use crate::types::CoreTypeId; use crate::{BinaryReader, BinaryReaderError, FromReader, Result, SectionLimited}; +use std::fmt::{self, Debug, Write}; +use std::hash::{Hash, Hasher}; mod matches; pub(crate) use self::matches::{Matches, WithRecGroup}; @@ -384,6 +384,20 @@ impl RecGroup { } } +impl Hash for RecGroup { + fn hash(&self, hasher: &mut H) { + self.types().hash(hasher) + } +} + +impl PartialEq for RecGroup { + fn eq(&self, other: &RecGroup) -> bool { + self.types() == other.types() + } +} + +impl Eq for RecGroup {} + /// Represents a subtype of possible other types in a WebAssembly module. #[derive(Debug, Clone, Hash, PartialEq, Eq)] pub struct SubType { @@ -416,6 +430,35 @@ impl SubType { pub fn unwrap_struct(&self) -> &StructType { self.composite_type.unwrap_struct() } + + /// Maps any `UnpackedIndex` via the specified closure. + pub(crate) fn remap_indices( + &mut self, + f: &mut dyn FnMut(&mut PackedIndex) -> Result<()>, + ) -> Result<()> { + if let Some(idx) = &mut self.supertype_idx { + f(idx)?; + } + match &mut self.composite_type { + CompositeType::Func(ty) => { + for ty in ty.params_mut() { + ty.remap_indices(f)?; + } + for ty in ty.results_mut() { + ty.remap_indices(f)?; + } + } + CompositeType::Array(ty) => { + ty.0.remap_indices(f)?; + } + CompositeType::Struct(ty) => { + for field in ty.fields.iter_mut() { + field.remap_indices(f)?; + } + } + } + Ok(()) + } } /// Represents a composite type in a WebAssembly module. @@ -562,6 +605,19 @@ pub struct FieldType { pub mutable: bool, } +impl FieldType { + /// Maps any `UnpackedIndex` via the specified closure. + pub(crate) fn remap_indices( + &mut self, + f: &mut dyn FnMut(&mut PackedIndex) -> Result<()>, + ) -> Result<()> { + match &mut self.element_type { + StorageType::I8 | StorageType::I16 => Ok(()), + StorageType::Val(ty) => ty.remap_indices(f), + } + } +} + /// Represents storage types introduced in the GC spec for array and struct fields. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum StorageType { @@ -640,6 +696,23 @@ impl ValType { Self::Ref(rt) => rt.is_nullable(), } } + + /// Maps any `UnpackedIndex` via the specified closure. + pub(crate) fn remap_indices( + &mut self, + map: &mut dyn FnMut(&mut PackedIndex) -> Result<()>, + ) -> Result<()> { + match self { + ValType::Ref(r) => { + if let Some(mut idx) = r.type_index() { + map(&mut idx)?; + *r = RefType::concrete(r.is_nullable(), idx); + } + } + ValType::I32 | ValType::I64 | ValType::F32 | ValType::F64 | ValType::V128 => {} + } + Ok(()) + } } /// A reference type. diff --git a/crates/wasmparser/src/resources.rs b/crates/wasmparser/src/resources.rs index 7b20e0e97c..adcfd0df1a 100644 --- a/crates/wasmparser/src/resources.rs +++ b/crates/wasmparser/src/resources.rs @@ -13,6 +13,7 @@ * limitations under the License. */ +use crate::types::CoreTypeId; use crate::{ BinaryReaderError, FuncType, GlobalType, HeapType, MemoryType, RefType, TableType, ValType, WasmFeatures, @@ -48,7 +49,7 @@ pub trait WasmFuncType: Clone { fn output_at(&self, at: u32) -> Option; /// Returns the list of inputs as an iterator. - fn inputs(self) -> WasmFuncTypeInputs + fn inputs(&self) -> WasmFuncTypeInputs<'_, Self> where Self: Sized, { @@ -60,7 +61,7 @@ pub trait WasmFuncType: Clone { } /// Returns the list of outputs as an iterator. - fn outputs(self) -> WasmFuncTypeOutputs + fn outputs(&self) -> WasmFuncTypeOutputs<'_, Self> where Self: Sized, { @@ -92,14 +93,14 @@ where /// Iterator over the inputs of a Wasm function type. #[derive(Clone)] -pub struct WasmFuncTypeInputs { +pub struct WasmFuncTypeInputs<'a, T> { /// The iterated-over function type. - func_type: T, + func_type: &'a T, /// The range we're iterating over. range: Range, } -impl Iterator for WasmFuncTypeInputs +impl Iterator for WasmFuncTypeInputs<'_, T> where T: WasmFuncType, { @@ -116,7 +117,7 @@ where } } -impl DoubleEndedIterator for WasmFuncTypeInputs +impl DoubleEndedIterator for WasmFuncTypeInputs<'_, T> where T: WasmFuncType, { @@ -127,7 +128,7 @@ where } } -impl ExactSizeIterator for WasmFuncTypeInputs +impl ExactSizeIterator for WasmFuncTypeInputs<'_, T> where T: WasmFuncType, { @@ -138,14 +139,14 @@ where /// Iterator over the outputs of a Wasm function type. #[derive(Clone)] -pub struct WasmFuncTypeOutputs { +pub struct WasmFuncTypeOutputs<'a, T> { /// The iterated-over function type. - func_type: T, + func_type: &'a T, /// The range we're iterating over. range: Range, } -impl Iterator for WasmFuncTypeOutputs +impl Iterator for WasmFuncTypeOutputs<'_, T> where T: WasmFuncType, { @@ -162,7 +163,7 @@ where } } -impl DoubleEndedIterator for WasmFuncTypeOutputs +impl DoubleEndedIterator for WasmFuncTypeOutputs<'_, T> where T: WasmFuncType, { @@ -173,7 +174,7 @@ where } } -impl ExactSizeIterator for WasmFuncTypeOutputs +impl ExactSizeIterator for WasmFuncTypeOutputs<'_, T> where T: WasmFuncType, { @@ -205,7 +206,7 @@ pub trait WasmModuleResources { /// Returns the tag at given index. /// /// The tag's function type must be canonicalized. - fn tag_at(&self, at: u32) -> Option; + fn tag_at(&self, at: u32) -> Option<&Self::FuncType>; /// Returns the global variable at given index. /// @@ -215,20 +216,16 @@ pub trait WasmModuleResources { /// Returns the `FuncType` associated with the given type index. /// /// The function type must be canonicalized. - fn func_type_at(&self, type_idx: u32) -> Option; + fn func_type_at(&self, type_idx: u32) -> Option<&Self::FuncType>; - /// Returns the type index associated with the given function + /// Returns the type id associated with the given function /// index. - /// - /// ```ignore - /// type_of_function = func_type_at(type_index_of_function) - /// ``` - fn type_index_of_function(&self, func_idx: u32) -> Option; + fn type_id_of_function(&self, func_idx: u32) -> Option; /// Returns the `FuncType` associated with the given function index. /// /// The function type must be canonicalized. - fn type_of_function(&self, func_idx: u32) -> Option; + fn type_of_function(&self, func_idx: u32) -> Option<&Self::FuncType>; /// Returns the element type at the given index. /// @@ -238,39 +235,41 @@ pub trait WasmModuleResources { /// Is `a` a subtype of `b`? fn is_subtype(&self, a: ValType, b: ValType) -> bool; - /// Check a value type. + /// Check and canonicalize a value type. /// - /// This requires using func_type_at to check references + /// This will validate that `t` is valid under the `features` provided and + /// then additionally validate the structure of `t`. For example any type + /// references that `t` makes are validated and canonicalized. fn check_value_type( &self, - t: ValType, + t: &mut ValType, features: &WasmFeatures, offset: usize, - ) -> Result<(), BinaryReaderError>; + ) -> Result<(), BinaryReaderError> { + features + .check_value_type(*t) + .map_err(|s| BinaryReaderError::new(s, offset))?; + match t { + ValType::Ref(r) => { + let nullable = r.is_nullable(); + let mut hty = r.heap_type(); + self.check_heap_type(&mut hty, offset)?; + *r = RefType::new(nullable, hty).unwrap(); + Ok(()) + } + ValType::I32 | ValType::I64 | ValType::F32 | ValType::F64 | ValType::V128 => Ok(()), + } + } - /// Checks that a `HeapType` is valid, notably its function index if one is - /// used. + /// Checks that a `HeapType` is valid and then additionally place it in its + /// canonical form. + /// + /// Similar to `check_value_type` but for heap types. fn check_heap_type( &self, - heap_type: HeapType, - features: &WasmFeatures, + heap_type: &mut HeapType, offset: usize, - ) -> Result<(), BinaryReaderError> { - // Delegate to the generic value type validation which will have the - // same validity checks. - self.check_value_type( - RefType::new(true, heap_type) - .ok_or_else(|| { - BinaryReaderError::new( - "heap type index beyond this crate's implementation limits", - offset, - ) - })? - .into(), - features, - offset, - ) - } + ) -> Result<(), BinaryReaderError>; /// Returns the number of elements. fn element_count(&self) -> u32; @@ -281,9 +280,6 @@ pub trait WasmModuleResources { /// Returns whether the function index is referenced in the module anywhere /// outside of the start/function sections. fn is_function_referenced(&self, idx: u32) -> bool; - - /// Canonicalize the given value type in place. - fn canonicalize_valtype(&self, valtype: &mut ValType); } impl WasmModuleResources for &'_ T @@ -298,28 +294,23 @@ where fn memory_at(&self, at: u32) -> Option { T::memory_at(self, at) } - fn tag_at(&self, at: u32) -> Option { + fn tag_at(&self, at: u32) -> Option<&Self::FuncType> { T::tag_at(self, at) } fn global_at(&self, at: u32) -> Option { T::global_at(self, at) } - fn func_type_at(&self, at: u32) -> Option { + fn func_type_at(&self, at: u32) -> Option<&Self::FuncType> { T::func_type_at(self, at) } - fn type_index_of_function(&self, func_idx: u32) -> Option { - T::type_index_of_function(self, func_idx) + fn type_id_of_function(&self, func_idx: u32) -> Option { + T::type_id_of_function(self, func_idx) } - fn type_of_function(&self, func_idx: u32) -> Option { + fn type_of_function(&self, func_idx: u32) -> Option<&Self::FuncType> { T::type_of_function(self, func_idx) } - fn check_value_type( - &self, - t: ValType, - features: &WasmFeatures, - offset: usize, - ) -> Result<(), BinaryReaderError> { - T::check_value_type(self, t, features, offset) + fn check_heap_type(&self, t: &mut HeapType, offset: usize) -> Result<(), BinaryReaderError> { + T::check_heap_type(self, t, offset) } fn element_type_at(&self, at: u32) -> Option { T::element_type_at(self, at) @@ -337,9 +328,6 @@ where fn is_function_referenced(&self, idx: u32) -> bool { T::is_function_referenced(self, idx) } - fn canonicalize_valtype(&self, valtype: &mut ValType) { - T::canonicalize_valtype(self, valtype) - } } impl WasmModuleResources for std::sync::Arc @@ -356,7 +344,7 @@ where T::memory_at(self, at) } - fn tag_at(&self, at: u32) -> Option { + fn tag_at(&self, at: u32) -> Option<&Self::FuncType> { T::tag_at(self, at) } @@ -364,25 +352,20 @@ where T::global_at(self, at) } - fn func_type_at(&self, type_idx: u32) -> Option { + fn func_type_at(&self, type_idx: u32) -> Option<&Self::FuncType> { T::func_type_at(self, type_idx) } - fn type_index_of_function(&self, func_idx: u32) -> Option { - T::type_index_of_function(self, func_idx) + fn type_id_of_function(&self, func_idx: u32) -> Option { + T::type_id_of_function(self, func_idx) } - fn type_of_function(&self, func_idx: u32) -> Option { + fn type_of_function(&self, func_idx: u32) -> Option<&Self::FuncType> { T::type_of_function(self, func_idx) } - fn check_value_type( - &self, - t: ValType, - features: &WasmFeatures, - offset: usize, - ) -> Result<(), BinaryReaderError> { - T::check_value_type(self, t, features, offset) + fn check_heap_type(&self, t: &mut HeapType, offset: usize) -> Result<(), BinaryReaderError> { + T::check_heap_type(self, t, offset) } fn element_type_at(&self, at: u32) -> Option { @@ -404,10 +387,6 @@ where fn is_function_referenced(&self, idx: u32) -> bool { T::is_function_referenced(self, idx) } - - fn canonicalize_valtype(&self, valtype: &mut ValType) { - T::canonicalize_valtype(self, valtype) - } } impl WasmFuncType for FuncType { diff --git a/crates/wasmparser/src/validator.rs b/crates/wasmparser/src/validator.rs index ebb4744315..12beb46526 100644 --- a/crates/wasmparser/src/validator.rs +++ b/crates/wasmparser/src/validator.rs @@ -15,7 +15,7 @@ use crate::{ limits::*, BinaryReaderError, Encoding, FromReader, FunctionBody, HeapType, Parser, Payload, - Result, SectionLimited, ValType, WASM_COMPONENT_VERSION, WASM_MODULE_VERSION, + RefType, Result, SectionLimited, ValType, WASM_COMPONENT_VERSION, WASM_MODULE_VERSION, }; use std::mem; use std::ops::Range; @@ -292,58 +292,61 @@ impl WasmFeatures { Err("floating-point support is disabled") } } - ValType::Ref(r) => { - if !self.reference_types { - return Err("reference types support is not enabled"); + ValType::Ref(r) => self.check_ref_type(r), + ValType::V128 => { + if self.simd { + Ok(()) + } else { + Err("SIMD support is not enabled") } - match (r.heap_type(), r.is_nullable()) { - // funcref/externref only require `reference-types` - (HeapType::Func, true) | (HeapType::Extern, true) => Ok(()), - - // non-nullable func/extern references requires the - // `function-references` proposal - (HeapType::Func | HeapType::Extern, false) => { - if self.function_references { - Ok(()) - } else { - Err("function references required for non-nullable types") - } - } - // indexed types require at least the function-references - // proposal - (HeapType::Concrete(_), _) => { - if self.function_references { - Ok(()) - } else { - Err("function references required for index reference types") - } - } + } + } + } - // types added in the gc proposal - ( - HeapType::Any - | HeapType::None - | HeapType::Eq - | HeapType::Struct - | HeapType::Array - | HeapType::I31 - | HeapType::NoExtern - | HeapType::NoFunc, - _, - ) => { - if self.gc { - Ok(()) - } else { - Err("heap types not supported without the gc feature") - } - } + pub(crate) fn check_ref_type(&self, r: RefType) -> Result<(), &'static str> { + if !self.reference_types { + return Err("reference types support is not enabled"); + } + match (r.heap_type(), r.is_nullable()) { + // funcref/externref only require `reference-types`. + (HeapType::Func, true) | (HeapType::Extern, true) => Ok(()), + + // Non-nullable func/extern references requires the + // `function-references` proposal. + (HeapType::Func | HeapType::Extern, false) => { + if self.function_references { + Ok(()) + } else { + Err("function references required for non-nullable types") } } - ValType::V128 => { - if self.simd { + + // Indexed types require either the function-references or gc + // proposal as gc implies function references here. + (HeapType::Concrete(_), _) => { + if self.function_references || self.gc { Ok(()) } else { - Err("SIMD support is not enabled") + Err("function references required for index reference types") + } + } + + // These types were added in the gc proposal. + ( + HeapType::Any + | HeapType::None + | HeapType::Eq + | HeapType::Struct + | HeapType::Array + | HeapType::I31 + | HeapType::NoExtern + | HeapType::NoFunc, + _, + ) => { + if self.gc { + Ok(()) + } else { + Err("heap types not supported without the gc feature") } } } diff --git a/crates/wasmparser/src/validator/component.rs b/crates/wasmparser/src/validator/component.rs index d4035b1720..808e939213 100644 --- a/crates/wasmparser/src/validator/component.rs +++ b/crates/wasmparser/src/validator/component.rs @@ -1485,8 +1485,8 @@ impl ComponentState { crate::ModuleTypeDeclaration::Type(ty) => { state.add_types(RecGroup::implicit(ty), features, types, offset, true)?; } - crate::ModuleTypeDeclaration::Export { name, ty } => { - let ty = state.check_type_ref(&ty, features, types, offset)?; + crate::ModuleTypeDeclaration::Export { name, mut ty } => { + let ty = state.check_type_ref(&mut ty, features, types, offset)?; state.add_export(name, ty, features, offset, true, types)?; } crate::ModuleTypeDeclaration::OuterAlias { kind, count, index } => { diff --git a/crates/wasmparser/src/validator/core.rs b/crates/wasmparser/src/validator/core.rs index 0c1cd5228a..651f085738 100644 --- a/crates/wasmparser/src/validator/core.rs +++ b/crates/wasmparser/src/validator/core.rs @@ -3,15 +3,13 @@ mod canonical; -use self::{ - arc::MaybeOwned, - canonical::{canonicalize_and_intern_rec_group, TypeCanonicalizer}, -}; +use self::{arc::MaybeOwned, canonical::canonicalize_and_intern_rec_group}; use super::{ check_max, combine_type_sizes, operators::{ty_to_str, OperatorValidator, OperatorValidatorAllocations}, types::{CoreTypeId, EntityType, RecGroupId, TypeAlloc, TypeList}, }; +use crate::validator::types::TypeIdentifier; use crate::{ limits::*, BinaryReaderError, CompositeType, ConstExpr, Data, DataKind, Element, ElementKind, ExternalKind, FuncType, Global, GlobalType, HeapType, MemoryType, PackedIndex, RecGroup, @@ -128,13 +126,13 @@ impl ModuleState { pub fn add_global( &mut self, - global: Global, + mut global: Global, features: &WasmFeatures, types: &TypeList, offset: usize, ) -> Result<()> { self.module - .check_global_type(&global.ty, features, offset)?; + .check_global_type(&mut global.ty, features, offset)?; self.check_const_expr(&global.init_expr, global.ty.content_type, features, types)?; self.module.assert_mut().globals.push(global.ty); Ok(()) @@ -142,12 +140,13 @@ impl ModuleState { pub fn add_table( &mut self, - table: Table<'_>, + mut table: Table<'_>, features: &WasmFeatures, types: &TypeList, offset: usize, ) -> Result<()> { - self.module.check_table_type(&table.ty, features, offset)?; + self.module + .check_table_type(&mut table.ty, features, offset)?; match &table.init { TableInit::RefNull => { @@ -191,18 +190,17 @@ impl ModuleState { pub fn add_element_segment( &mut self, - e: Element, + mut e: Element, features: &WasmFeatures, types: &TypeList, offset: usize, ) -> Result<()> { // the `funcref` value type is allowed all the way back to the MVP, so // don't check it here - let element_ty = match &e.items { + let element_ty = match &mut e.items { crate::ElementItems::Functions(_) => RefType::FUNC, crate::ElementItems::Expressions(ty, _) => { - self.module - .check_value_type(ValType::Ref(*ty), features, offset)?; + self.module.check_ref_type(ty, features, offset)?; *ty } }; @@ -560,8 +558,6 @@ impl Module { canonicalize_and_intern_rec_group(features, types, self, rec_group, offset)?; let range = &types[rec_group_id]; - - use crate::validator::types::TypeIdentifier; let start = range.start.index(); let end = range.end.index(); @@ -613,10 +609,15 @@ impl Module { features: &WasmFeatures, offset: usize, ) -> Result<()> { + let check = |ty: &ValType| { + features + .check_value_type(*ty) + .map_err(|e| BinaryReaderError::new(e, offset)) + }; match ty { CompositeType::Func(t) => { for ty in t.params().iter().chain(t.results()) { - self.check_value_type(*ty, features, offset)?; + check(ty)?; } if t.results().len() > 1 && !features.multi_value { return Err(BinaryReaderError::new( @@ -632,11 +633,9 @@ impl Module { offset, )); } - match t.0.element_type { + match &t.0.element_type { StorageType::I8 | StorageType::I16 => {} - StorageType::Val(value_type) => { - self.check_value_type(value_type, features, offset)?; - } + StorageType::Val(value_type) => check(value_type)?, }; } CompositeType::Struct(t) => { @@ -647,7 +646,10 @@ impl Module { )); } for ty in t.fields.iter() { - self.check_storage_type(ty.element_type, features, offset)?; + match &ty.element_type { + StorageType::I8 | StorageType::I16 => {} + StorageType::Val(value_type) => check(value_type)?, + } } } } @@ -656,12 +658,12 @@ impl Module { pub fn add_import( &mut self, - import: crate::Import, + mut import: crate::Import, features: &WasmFeatures, types: &TypeList, offset: usize, ) -> Result<()> { - let entity = self.check_type_ref(&import.ty, features, types, offset)?; + let entity = self.check_type_ref(&mut import.ty, features, types, offset)?; let (len, max, desc) = match import.ty { TypeRef::Func(type_index) => { @@ -796,7 +798,7 @@ impl Module { pub fn check_type_ref( &self, - type_ref: &TypeRef, + type_ref: &mut TypeRef, features: &WasmFeatures, types: &TypeList, offset: usize, @@ -827,14 +829,14 @@ impl Module { fn check_table_type( &self, - ty: &TableType, + ty: &mut TableType, features: &WasmFeatures, offset: usize, ) -> Result<()> { // the `funcref` value type is allowed all the way back to the MVP, so // don't check it here if ty.element_type != RefType::FUNCREF { - self.check_value_type(ValType::Ref(ty.element_type), features, offset)? + self.check_ref_type(&mut ty.element_type, features, offset)? } self.check_limits(ty.initial, ty.maximum, offset)?; @@ -916,37 +918,40 @@ impl Module { .collect::>() } - fn check_storage_type( + fn check_value_type( &self, - ty: StorageType, + ty: &mut ValType, features: &WasmFeatures, offset: usize, ) -> Result<()> { + // The above only checks the value type for features. + // We must check it if it's a reference. match ty { - StorageType::I8 | StorageType::I16 => {} - StorageType::Val(value_type) => { - self.check_value_type(value_type, features, offset)?; - } + ValType::Ref(rt) => self.check_ref_type(rt, features, offset), + _ => features + .check_value_type(*ty) + .map_err(|e| BinaryReaderError::new(e, offset)), } - Ok(()) } - fn check_value_type(&self, ty: ValType, features: &WasmFeatures, offset: usize) -> Result<()> { + fn check_ref_type( + &self, + ty: &mut RefType, + features: &WasmFeatures, + offset: usize, + ) -> Result<()> { features - .check_value_type(ty) + .check_ref_type(*ty) .map_err(|e| BinaryReaderError::new(e, offset))?; - - // The above only checks the value type for features. - // We must check it if it's a reference. - match ty { - ValType::Ref(rt) => self.check_ref_type(rt, offset), - _ => Ok(()), - } + let mut hty = ty.heap_type(); + self.check_heap_type(&mut hty, offset)?; + *ty = RefType::new(ty.is_nullable(), hty).unwrap(); + Ok(()) } - fn check_ref_type(&self, ty: RefType, offset: usize) -> Result<()> { + fn check_heap_type(&self, ty: &mut HeapType, offset: usize) -> Result<()> { // Check that the heap type is valid - match ty.heap_type() { + let type_index = match ty { HeapType::Func | HeapType::Extern | HeapType::Any @@ -956,21 +961,19 @@ impl Module { | HeapType::Eq | HeapType::Struct | HeapType::Array - | HeapType::I31 => Ok(()), - HeapType::Concrete(type_index) => { - match type_index { - UnpackedIndex::Module(idx) => { - let _ = self.type_id_at(idx, offset)?; - Ok(()) - } - UnpackedIndex::RecGroup(_) | UnpackedIndex::Id(_) => { - // If the type index has already been canonicalized, - // then we already checked that it was in bounds and - // valid at that time. - Ok(()) - } - } + | HeapType::I31 => return Ok(()), + HeapType::Concrete(type_index) => type_index, + }; + match type_index { + UnpackedIndex::Module(idx) => { + let id = self.type_id_at(*idx, offset)?; + *type_index = UnpackedIndex::Id(id); + Ok(()) } + // Types at this stage should not be canonicalized. All + // canonicalized types should already be validated meaning they + // shouldn't be double-checked here again. + UnpackedIndex::RecGroup(_) | UnpackedIndex::Id(_) => unreachable!(), } } @@ -999,11 +1002,11 @@ impl Module { fn check_global_type( &self, - ty: &GlobalType, + ty: &mut GlobalType, features: &WasmFeatures, offset: usize, ) -> Result<()> { - self.check_value_type(ty.content_type, features, offset) + self.check_value_type(&mut ty.content_type, features, offset) } fn check_limits(&self, initial: T, maximum: Option, offset: usize) -> Result<()> @@ -1151,94 +1154,53 @@ struct OperatorValidatorResources<'a> { types: &'a TypeList, } -fn canonicalize_valtype_impl( - module: &Module, - valtype: &mut ValType, - referenced_from: Option<(&TypeList, CoreTypeId)>, -) { - let mut canonicalizer = TypeCanonicalizer::new(module, usize::MAX); - canonicalizer.with_only_ids(); - - if let Some((types, id)) = referenced_from { - let group_id = types.rec_group_id_of(id); - canonicalizer.within_rec_group(types, group_id); - } - - canonicalizer - .canonicalize_val_type(valtype) - .expect("already checked type references are in-bounds at this point"); -} - impl WasmModuleResources for OperatorValidatorResources<'_> { type FuncType = crate::FuncType; fn table_at(&self, at: u32) -> Option { - let mut ty = self.module.tables.get(at as usize).cloned()?; - let mut val_ty = ValType::Ref(ty.element_type); - self.canonicalize_valtype(&mut val_ty); - ty.element_type = match val_ty { - ValType::Ref(r) => r, - _ => unreachable!(), - }; - Some(ty) + self.module.tables.get(at as usize).cloned() } fn memory_at(&self, at: u32) -> Option { self.module.memories.get(at as usize).cloned() } - fn tag_at(&self, at: u32) -> Option { + fn tag_at(&self, at: u32) -> Option<&Self::FuncType> { let type_id = *self.module.tags.get(at as usize)?; - let mut f = self.types[type_id].unwrap_func().clone(); - for ty in f.params_mut() { - canonicalize_valtype_impl(self.module, ty, Some((self.types, type_id))); - } - for ty in f.results_mut() { - canonicalize_valtype_impl(self.module, ty, Some((self.types, type_id))); - } - Some(f) + Some(self.types[type_id].unwrap_func()) } fn global_at(&self, at: u32) -> Option { - let mut ty = self.module.globals.get(at as usize).cloned()?; - self.canonicalize_valtype(&mut ty.content_type); - Some(ty) + self.module.globals.get(at as usize).cloned() } - fn func_type_at(&self, at: u32) -> Option { + fn func_type_at(&self, at: u32) -> Option<&Self::FuncType> { let id = *self.module.types.get(at as usize)?; - let mut f = match &self.types[id].composite_type { - CompositeType::Func(f) => f.clone(), - _ => return None, - }; - for ty in f.params_mut() { - canonicalize_valtype_impl(self.module, ty, Some((self.types, id))); - } - for ty in f.results_mut() { - canonicalize_valtype_impl(self.module, ty, Some((self.types, id))); + match &self.types[id].composite_type { + CompositeType::Func(f) => Some(f), + _ => None, } - Some(f) } - fn type_index_of_function(&self, at: u32) -> Option { - self.module.functions.get(at as usize).cloned() + fn type_id_of_function(&self, at: u32) -> Option { + let type_index = self.module.functions.get(at as usize)?; + self.module.types.get(*type_index as usize).copied() } - fn type_of_function(&self, at: u32) -> Option { - self.func_type_at(self.type_index_of_function(at)?.into()) + fn type_of_function(&self, at: u32) -> Option<&Self::FuncType> { + let type_index = self.module.functions.get(at as usize)?; + self.func_type_at(*type_index) } - fn check_value_type(&self, t: ValType, features: &WasmFeatures, offset: usize) -> Result<()> { - self.module.check_value_type(t, features, offset) + fn check_heap_type(&self, t: &mut HeapType, offset: usize) -> Result<()> { + self.module.check_heap_type(t, offset) } fn element_type_at(&self, at: u32) -> Option { self.module.element_types.get(at as usize).cloned() } - fn is_subtype(&self, mut a: ValType, mut b: ValType) -> bool { - self.canonicalize_valtype(&mut a); - self.canonicalize_valtype(&mut b); + fn is_subtype(&self, a: ValType, b: ValType) -> bool { self.types.valtype_is_subtype(a, b) } @@ -1253,10 +1215,6 @@ impl WasmModuleResources for OperatorValidatorResources<'_> { fn is_function_referenced(&self, idx: u32) -> bool { self.module.function_references.contains(&idx) } - - fn canonicalize_valtype(&self, valtype: &mut ValType) { - canonicalize_valtype_impl(self.module, valtype, None); - } } /// The implementation of [`WasmModuleResources`] used by @@ -1267,77 +1225,54 @@ impl WasmModuleResources for ValidatorResources { type FuncType = crate::FuncType; fn table_at(&self, at: u32) -> Option { - let mut ty = self.0.tables.get(at as usize).cloned()?; - let mut val_ty = ValType::Ref(ty.element_type); - self.canonicalize_valtype(&mut val_ty); - ty.element_type = match val_ty { - ValType::Ref(r) => r, - _ => unreachable!(), - }; - Some(ty) + self.0.tables.get(at as usize).cloned() } fn memory_at(&self, at: u32) -> Option { self.0.memories.get(at as usize).cloned() } - fn tag_at(&self, at: u32) -> Option { + fn tag_at(&self, at: u32) -> Option<&Self::FuncType> { let id = *self.0.tags.get(at as usize)?; let types = self.0.snapshot.as_ref().unwrap(); - let mut f = match &types[id].composite_type { - CompositeType::Func(f) => f.clone(), - _ => return None, - }; - for ty in f.params_mut() { - canonicalize_valtype_impl(&self.0, ty, Some((types, id))); + match &types[id].composite_type { + CompositeType::Func(f) => Some(f), + _ => None, } - for ty in f.results_mut() { - canonicalize_valtype_impl(&self.0, ty, Some((types, id))); - } - Some(f) } fn global_at(&self, at: u32) -> Option { - let mut ty = self.0.globals.get(at as usize).cloned()?; - self.canonicalize_valtype(&mut ty.content_type); - Some(ty) + self.0.globals.get(at as usize).cloned() } - fn func_type_at(&self, at: u32) -> Option { + fn func_type_at(&self, at: u32) -> Option<&Self::FuncType> { let id = *self.0.types.get(at as usize)?; let types = self.0.snapshot.as_ref().unwrap(); - let mut f = match &types[id].composite_type { - CompositeType::Func(f) => f.clone(), - _ => return None, - }; - for ty in f.params_mut() { - canonicalize_valtype_impl(&self.0, ty, Some((types, id))); - } - for ty in f.results_mut() { - canonicalize_valtype_impl(&self.0, ty, Some((types, id))); + match &types[id].composite_type { + CompositeType::Func(f) => Some(f), + _ => None, } - Some(f) } - fn type_index_of_function(&self, at: u32) -> Option { - self.0.functions.get(at as usize).cloned() + fn type_id_of_function(&self, at: u32) -> Option { + let type_index = *self.0.functions.get(at as usize)?; + self.0.types.get(type_index as usize).copied() } - fn type_of_function(&self, at: u32) -> Option { - self.func_type_at(self.type_index_of_function(at)?) + fn type_of_function(&self, at: u32) -> Option<&Self::FuncType> { + let type_index = *self.0.functions.get(at as usize)?; + self.func_type_at(type_index) } - fn check_value_type(&self, t: ValType, features: &WasmFeatures, offset: usize) -> Result<()> { - self.0.check_value_type(t, features, offset) + fn check_heap_type(&self, t: &mut HeapType, offset: usize) -> Result<()> { + self.0.check_heap_type(t, offset) } fn element_type_at(&self, at: u32) -> Option { self.0.element_types.get(at as usize).cloned() } - fn is_subtype(&self, mut a: ValType, mut b: ValType) -> bool { - self.canonicalize_valtype(&mut a); - self.canonicalize_valtype(&mut b); + fn is_subtype(&self, a: ValType, b: ValType) -> bool { self.0.snapshot.as_ref().unwrap().valtype_is_subtype(a, b) } @@ -1352,10 +1287,6 @@ impl WasmModuleResources for ValidatorResources { fn is_function_referenced(&self, idx: u32) -> bool { self.0.function_references.contains(&idx) } - - fn canonicalize_valtype(&self, valtype: &mut ValType) { - canonicalize_valtype_impl(&self.0, valtype, None); - } } const _: () = { diff --git a/crates/wasmparser/src/validator/core/canonical.rs b/crates/wasmparser/src/validator/core/canonical.rs index a94e40b624..6e04148f7c 100644 --- a/crates/wasmparser/src/validator/core/canonical.rs +++ b/crates/wasmparser/src/validator/core/canonical.rs @@ -67,11 +67,10 @@ //! perform additional expensive checks to see if the types match or not //! (since the whole point of canonicalization is to avoid that!). -use super::{Module, RecGroupId, TypeAlloc, TypeList}; +use super::{Module, RecGroupId, TypeAlloc}; use crate::{ types::{CoreTypeId, TypeIdentifier}, - ArrayType, CompositeType, FieldType, FuncType, HeapType, PackedIndex, RecGroup, RefType, - Result, StorageType, StructType, SubType, UnpackedIndex, ValType, WasmFeatures, + PackedIndex, RecGroup, Result, UnpackedIndex, WasmFeatures, }; /// Canonicalize the rec group and return its id and whether it is a new group @@ -140,19 +139,6 @@ impl<'a> TypeCanonicalizer<'a> { self } - /// Configure canonicalization to transform all type indices into - /// `CoreTypeId`s, even from within the same rec group. - pub fn with_only_ids(&mut self) -> &mut Self { - self.mode = CanonicalizationMode::OnlyIds; - self - } - - pub fn within_rec_group(&mut self, types: &TypeList, rec_group_id: RecGroupId) -> &mut Self { - assert_eq!(self.mode, CanonicalizationMode::OnlyIds); - self.within_rec_group = Some(types[rec_group_id].clone()); - self - } - fn allow_gc(&self) -> bool { self.features.map_or(true, |f| f.gc) } @@ -167,7 +153,14 @@ impl<'a> TypeCanonicalizer<'a> { for (rec_group_index, ty) in rec_group.types_mut().iter_mut().enumerate() { let rec_group_index = u32::try_from(rec_group_index).unwrap(); let type_index = self.rec_group_start + rec_group_index; - self.canonicalize_sub_type(ty, type_index)?; + + if let Some(sup) = ty.supertype_idx.as_mut() { + if sup.as_module_index().map_or(false, |i| i >= type_index) { + bail!(self.offset, "supertypes must be defined before subtypes"); + } + } + + ty.remap_indices(&mut |idx| self.canonicalize_type_index(idx))?; } Ok(()) @@ -237,86 +230,4 @@ impl<'a> TypeCanonicalizer<'a> { }, } } - - fn canonicalize_sub_type(&self, ty: &mut SubType, index: u32) -> Result<()> { - if let Some(sup) = ty.supertype_idx.as_mut() { - if sup.as_module_index().map_or(false, |i| i >= index) { - bail!(self.offset, "supertypes must be defined before subtypes"); - } - - self.canonicalize_type_index(sup)?; - } - - self.canonicalize_composite_type(&mut ty.composite_type) - } - - fn canonicalize_composite_type(&self, ty: &mut CompositeType) -> Result<()> { - match ty { - CompositeType::Func(f) => self.canonicalize_func_type(f), - CompositeType::Array(a) => self.canonicalize_array_type(a), - CompositeType::Struct(s) => self.canonicalize_struct_type(s), - } - } - - fn canonicalize_func_type(&self, ty: &mut FuncType) -> Result<()> { - for ty in ty.params_mut() { - self.canonicalize_val_type(ty)?; - } - for ty in ty.results_mut() { - self.canonicalize_val_type(ty)?; - } - Ok(()) - } - - fn canonicalize_array_type(&self, ty: &mut ArrayType) -> Result<()> { - self.canonicalize_field_type(&mut ty.0) - } - - fn canonicalize_struct_type(&self, ty: &mut StructType) -> Result<()> { - for ty in ty.fields.iter_mut() { - self.canonicalize_field_type(ty)?; - } - Ok(()) - } - - fn canonicalize_field_type(&self, ty: &mut FieldType) -> Result<()> { - self.canonicalize_storage_type(&mut ty.element_type) - } - - fn canonicalize_storage_type(&self, ty: &mut StorageType) -> Result<()> { - match ty { - StorageType::I8 | StorageType::I16 => Ok(()), - StorageType::Val(ty) => self.canonicalize_val_type(ty), - } - } - - pub fn canonicalize_val_type(&self, ty: &mut ValType) -> Result<()> { - match ty { - ValType::I32 | ValType::I64 | ValType::F32 | ValType::F64 | ValType::V128 => Ok(()), - ValType::Ref(ty) => self.canonicalize_ref_type(ty), - } - } - - fn canonicalize_ref_type(&self, ty: &mut RefType) -> Result<()> { - match ty.heap_type() { - HeapType::Concrete(unpacked_index) => { - let mut packed_index = unpacked_index - .pack() - .expect("it was just packed in the `RefType` so we know it fits"); - self.canonicalize_type_index(&mut packed_index)?; - *ty = RefType::concrete(ty.is_nullable(), packed_index); - Ok(()) - } - HeapType::Func - | HeapType::Extern - | HeapType::Any - | HeapType::None - | HeapType::NoExtern - | HeapType::NoFunc - | HeapType::Eq - | HeapType::Struct - | HeapType::Array - | HeapType::I31 => Ok(()), - } - } } diff --git a/crates/wasmparser/src/validator/func.rs b/crates/wasmparser/src/validator/func.rs index 0fdf23504f..4b3ee67722 100644 --- a/crates/wasmparser/src/validator/func.rs +++ b/crates/wasmparser/src/validator/func.rs @@ -248,7 +248,8 @@ impl FuncValidator { #[cfg(test)] mod tests { use super::*; - use crate::WasmFuncType; + use crate::types::CoreTypeId; + use crate::{HeapType, WasmFuncType}; struct EmptyResources; @@ -261,27 +262,22 @@ mod tests { fn memory_at(&self, _at: u32) -> Option { todo!() } - fn tag_at(&self, _at: u32) -> Option { + fn tag_at(&self, _at: u32) -> Option<&Self::FuncType> { todo!() } fn global_at(&self, _at: u32) -> Option { todo!() } - fn func_type_at(&self, _type_idx: u32) -> Option { - Some(EmptyFuncType) + fn func_type_at(&self, _type_idx: u32) -> Option<&Self::FuncType> { + Some(&EmptyFuncType) } - fn type_index_of_function(&self, _at: u32) -> Option { + fn type_id_of_function(&self, _at: u32) -> Option { todo!() } - fn type_of_function(&self, _func_idx: u32) -> Option { + fn type_of_function(&self, _func_idx: u32) -> Option<&Self::FuncType> { todo!() } - fn check_value_type( - &self, - _t: ValType, - _features: &WasmFeatures, - _offset: usize, - ) -> Result<()> { + fn check_heap_type(&self, _t: &mut HeapType, _offset: usize) -> Result<()> { Ok(()) } fn element_type_at(&self, _at: u32) -> Option { @@ -299,9 +295,6 @@ mod tests { fn is_function_referenced(&self, _idx: u32) -> bool { todo!() } - fn canonicalize_valtype(&self, _valtype: &mut ValType) { - todo!() - } } #[derive(Clone)] diff --git a/crates/wasmparser/src/validator/operators.rs b/crates/wasmparser/src/validator/operators.rs index 3fe32da306..314d26245e 100644 --- a/crates/wasmparser/src/validator/operators.rs +++ b/crates/wasmparser/src/validator/operators.rs @@ -24,8 +24,8 @@ use crate::{ limits::MAX_WASM_FUNCTION_LOCALS, BinaryReaderError, BlockType, BrTable, HeapType, Ieee32, - Ieee64, MemArg, PackedIndex, RefType, Result, UnpackedIndex, ValType, VisitOperator, - WasmFeatures, WasmFuncType, WasmModuleResources, V128, + Ieee64, MemArg, RefType, Result, UnpackedIndex, ValType, VisitOperator, WasmFeatures, + WasmFuncType, WasmModuleResources, V128, }; use std::ops::{Deref, DerefMut}; @@ -245,8 +245,7 @@ impl OperatorValidator { } .func_type_at(ty)? .inputs(); - for mut ty in params { - resources.canonicalize_valtype(&mut ty); + for ty in params { ret.locals.define(1, ty); ret.local_inits.push(true); } @@ -279,8 +278,7 @@ impl OperatorValidator { mut ty: ValType, resources: &impl WasmModuleResources, ) -> Result<()> { - resources.check_value_type(ty, &self.features, offset)?; - resources.canonicalize_valtype(&mut ty); + resources.check_value_type(&mut ty, &self.features, offset)?; if count == 0 { return Ok(()); } @@ -711,7 +709,7 @@ where } /// Validates a block type, primarily with various in-flight proposals. - fn check_block_type(&self, ty: BlockType) -> Result<()> { + fn check_block_type(&self, ty: &mut BlockType) -> Result<()> { match ty { BlockType::Empty => Ok(()), BlockType::Type(t) => self @@ -725,7 +723,7 @@ where when multi-value is not enabled", ); } - self.func_type_at(idx)?; + self.func_type_at(*idx)?; Ok(()) } } @@ -734,7 +732,7 @@ where /// Validates a `call` instruction, ensuring that the function index is /// in-bounds and the right types are on the stack to call the function. fn check_call(&mut self, function_index: u32) -> Result<()> { - let ty = match self.resources.type_index_of_function(function_index) { + let ty = match self.resources.type_of_function(function_index) { Some(i) => i, None => { bail!( @@ -746,7 +744,7 @@ where self.check_call_ty(ty) } - fn check_call_ty(&mut self, type_index: u32) -> Result<()> { + fn check_call_type_index(&mut self, type_index: u32) -> Result<()> { let ty = match self.resources.func_type_at(type_index) { Some(i) => i, None => { @@ -756,7 +754,11 @@ where ); } }; - for ty in ty.clone().inputs().rev() { + self.check_call_ty(ty) + } + + fn check_call_ty(&mut self, ty: &R::FuncType) -> Result<()> { + for ty in ty.inputs().rev() { debug_assert_type_indices_are_ids(ty); self.pop_operand(Some(ty))?; } @@ -968,13 +970,13 @@ where Ok(()) } - fn func_type_at(&self, at: u32) -> Result { + fn func_type_at(&self, at: u32) -> Result<&'resources R::FuncType> { self.resources .func_type_at(at) .ok_or_else(|| format_err!(self.offset, "unknown type: type index out of bounds")) } - fn tag_at(&self, at: u32) -> Result { + fn tag_at(&self, at: u32) -> Result<&'resources R::FuncType> { self.resources .tag_at(at) .ok_or_else(|| format_err!(self.offset, "unknown tag {}: tag index out of bounds", at)) @@ -990,10 +992,7 @@ where fn results(&self, ty: BlockType) -> Result + 'resources> { Ok(match ty { BlockType::Empty => Either::B(None.into_iter()), - BlockType::Type(mut t) => { - self.resources.canonicalize_valtype(&mut t); - Either::B(Some(t).into_iter()) - } + BlockType::Type(t) => Either::B(Some(t).into_iter()), BlockType::FuncType(t) => Either::A(self.func_type_at(t)?.outputs()), }) } @@ -1108,24 +1107,24 @@ where self.unreachable()?; Ok(()) } - fn visit_block(&mut self, ty: BlockType) -> Self::Output { - self.check_block_type(ty)?; + fn visit_block(&mut self, mut ty: BlockType) -> Self::Output { + self.check_block_type(&mut ty)?; for ty in self.params(ty)?.rev() { self.pop_operand(Some(ty))?; } self.push_ctrl(FrameKind::Block, ty)?; Ok(()) } - fn visit_loop(&mut self, ty: BlockType) -> Self::Output { - self.check_block_type(ty)?; + fn visit_loop(&mut self, mut ty: BlockType) -> Self::Output { + self.check_block_type(&mut ty)?; for ty in self.params(ty)?.rev() { self.pop_operand(Some(ty))?; } self.push_ctrl(FrameKind::Loop, ty)?; Ok(()) } - fn visit_if(&mut self, ty: BlockType) -> Self::Output { - self.check_block_type(ty)?; + fn visit_if(&mut self, mut ty: BlockType) -> Self::Output { + self.check_block_type(&mut ty)?; self.pop_operand(Some(ValType::I32))?; for ty in self.params(ty)?.rev() { self.pop_operand(Some(ty))?; @@ -1141,8 +1140,8 @@ where self.push_ctrl(FrameKind::Else, frame.block_type)?; Ok(()) } - fn visit_try(&mut self, ty: BlockType) -> Self::Output { - self.check_block_type(ty)?; + fn visit_try(&mut self, mut ty: BlockType) -> Self::Output { + self.check_block_type(&mut ty)?; for ty in self.params(ty)?.rev() { self.pop_operand(Some(ty))?; } @@ -1315,23 +1314,13 @@ where } fn visit_call_ref(&mut self, type_index: u32) -> Self::Output { let unpacked_index = UnpackedIndex::Module(type_index); - let hty = HeapType::Concrete(unpacked_index); - self.resources - .check_heap_type(hty, &self.features, self.offset)?; + let mut hty = HeapType::Concrete(unpacked_index); + self.resources.check_heap_type(&mut hty, self.offset)?; // If `None` is popped then that means a "bottom" type was popped which // is always considered equivalent to the `hty` tag. if let Some(rt) = self.pop_ref()? { - let expected = RefType::concrete( - true, - unpacked_index.pack().ok_or_else(|| { - BinaryReaderError::new( - "implementation limit: type index too large", - self.offset, - ) - })?, - ); - let mut expected = ValType::Ref(expected); - self.resources.canonicalize_valtype(&mut expected); + let expected = RefType::new(true, hty).expect("hty should be previously validated"); + let expected = ValType::Ref(expected); if !self.resources.is_subtype(ValType::Ref(rt), expected) { bail!( self.offset, @@ -1339,7 +1328,7 @@ where ); } } - self.check_call_ty(type_index) + self.check_call_type_index(type_index) } fn visit_return_call_ref(&mut self, type_index: u32) -> Self::Output { self.visit_call_ref(type_index)?; @@ -1408,8 +1397,7 @@ where } fn visit_typed_select(&mut self, mut ty: ValType) -> Self::Output { self.resources - .check_value_type(ty, &self.features, self.offset)?; - self.resources.canonicalize_valtype(&mut ty); + .check_value_type(&mut ty, &self.features, self.offset)?; self.pop_operand(Some(ValType::I32))?; self.pop_operand(Some(ty))?; self.pop_operand(Some(ty))?; @@ -2254,13 +2242,12 @@ where fn visit_atomic_fence(&mut self) -> Self::Output { Ok(()) } - fn visit_ref_null(&mut self, heap_type: HeapType) -> Self::Output { + fn visit_ref_null(&mut self, mut heap_type: HeapType) -> Self::Output { self.resources - .check_heap_type(heap_type, &self.features, self.offset)?; - let mut ty = ValType::Ref( + .check_heap_type(&mut heap_type, self.offset)?; + let ty = ValType::Ref( RefType::new(true, heap_type).expect("existing heap types should be within our limits"), ); - self.resources.canonicalize_valtype(&mut ty); self.push_operand(ty)?; Ok(()) } @@ -2331,8 +2318,8 @@ where Ok(()) } fn visit_ref_func(&mut self, function_index: u32) -> Self::Output { - let type_index = match self.resources.type_index_of_function(function_index) { - Some(idx) => idx, + let type_id = match self.resources.type_id_of_function(function_index) { + Some(id) => id, None => bail!( self.offset, "unknown function {}: function index out of bounds", @@ -2343,18 +2330,13 @@ where bail!(self.offset, "undeclared function reference"); } - // FIXME(#924) this should not be conditional based on enabled - // proposals. - if self.features.function_references { - let index = PackedIndex::from_module_index(type_index).ok_or_else(|| { + let index = UnpackedIndex::Id(type_id); + let ty = ValType::Ref( + RefType::new(false, HeapType::Concrete(index)).ok_or_else(|| { BinaryReaderError::new("implementation limit: type index too large", self.offset) - })?; - let mut ty = ValType::Ref(RefType::concrete(false, index)); - self.resources.canonicalize_valtype(&mut ty); - self.push_operand(ty)?; - } else { - self.push_operand(ValType::FUNCREF)?; - } + })?, + ); + self.push_operand(ty)?; Ok(()) } fn visit_v128_load(&mut self, memarg: MemArg) -> Self::Output { diff --git a/crates/wasmparser/src/validator/types.rs b/crates/wasmparser/src/validator/types.rs index ca01534c6f..1a67346cab 100644 --- a/crates/wasmparser/src/validator/types.rs +++ b/crates/wasmparser/src/validator/types.rs @@ -11,9 +11,9 @@ use crate::{ TypeRef, UnpackedIndex, ValType, WithRecGroup, }; use indexmap::{IndexMap, IndexSet}; -use std::collections::HashMap; -use std::collections::HashSet; -use std::ops::Index; +use std::collections::hash_map::Entry; +use std::collections::{HashMap, HashSet}; +use std::ops::{Index, Range}; use std::sync::atomic::{AtomicU64, Ordering}; use std::{ borrow::Borrow, @@ -595,12 +595,12 @@ impl ComponentAnyTypeId { define_type_id!( RecGroupId, - std::ops::Range, + Range, rec_group_elements, "recursion group" ); -impl TypeData for std::ops::Range { +impl TypeData for Range { type Id = RecGroupId; fn type_info(&self, _types: &TypeList) -> TypeInfo { @@ -2403,12 +2403,12 @@ pub struct TypeList { core_type_to_supertype: SnapshotList>, // A primary map from `RecGroupId` to the range of the rec group's elements // within `core_types`. - rec_group_elements: SnapshotList>, + rec_group_elements: SnapshotList>, // A hash map from rec group elements to their canonical `RecGroupId`. // - // This hash map is queried by the full `RecGroup` structure but actually - // only stores the range of the rec group's elements as a key. - canonical_rec_groups: hashbrown::HashTable<(std::ops::Range, RecGroupId)>, + // This is `None` when a list is "committed" meaning that no more insertions + // can happen. + canonical_rec_groups: Option>, // Component model types. components: SnapshotList, @@ -2466,93 +2466,61 @@ impl TypeList { /// and return its associated id and whether this was a new recursion group /// or not. pub fn intern_canonical_rec_group(&mut self, rec_group: RecGroup) -> (bool, RecGroupId) { - /// Hasher for the elements in a rec group. - /// - /// Doesn't take a slice because a `SnapshotList` doesn't necessarily - /// hold its elements in a contiguous slice. - fn rec_group_hasher<'a, I>(rec_group_elems: impl IntoIterator) -> u64 - where - I: ExactSizeIterator, - { - let iter = rec_group_elems.into_iter(); - let mut state = std::collections::hash_map::DefaultHasher::default(); - std::hash::Hash::hash(&iter.len(), &mut state); - for ty in iter { - std::hash::Hash::hash(ty, &mut state); - } - state.finish() - } - - let hash = rec_group_hasher(rec_group.types()); - - let entry = self.canonical_rec_groups.find_entry(hash, |(range, _)| { - let len = range.end.index() - range.start.index(); - if len != rec_group.types().len() { - return false; - } - - (range.start.index()..range.end.index()) - .map(|i| &self.core_types[i]) - .zip(rec_group.types()) - .all(|(canon_ty, new_ty)| canon_ty == new_ty) - }); - - let (is_new, occupied_entry) = match entry { - // Occupied: use the existing entry. - Ok(entry) => (false, entry), - - // Absent: intern the types, record their range, add a new canonical - // rec group for that range, insert it into the hash table, and - // return the new entry. - Err(absent_entry) => { - let table = absent_entry.into_table(); - - let rec_group_id = self.rec_group_elements.len(); - let rec_group_id = u32::try_from(rec_group_id).unwrap(); - let rec_group_id = RecGroupId::from_index(rec_group_id); - - let start = self.core_types.len(); - let start = u32::try_from(start).unwrap(); - let start = CoreTypeId::from_index(start); - - for ty in rec_group.into_types() { - debug_assert_eq!(self.core_types.len(), self.core_type_to_supertype.len()); - debug_assert_eq!(self.core_types.len(), self.core_type_to_rec_group.len()); - - self.core_type_to_supertype.push(ty.supertype_idx.map( - |idx| match idx.unpack() { - UnpackedIndex::RecGroup(offset) => { - CoreTypeId::from_index(start.index + offset) - } - UnpackedIndex::Id(id) => id, - UnpackedIndex::Module(_) => unreachable!("in canonical form"), - }, - )); - self.core_types.push(ty); - self.core_type_to_rec_group.push(rec_group_id); - } + let canonical_rec_groups = self + .canonical_rec_groups + .as_mut() + .expect("cannot intern into a committed list"); + let entry = match canonical_rec_groups.entry(rec_group) { + Entry::Occupied(e) => return (false, *e.get()), + Entry::Vacant(e) => e, + }; - let end = self.core_types.len(); - let end = u32::try_from(end).unwrap(); - let end = CoreTypeId::from_index(end); + let rec_group_id = self.rec_group_elements.len(); + let rec_group_id = u32::try_from(rec_group_id).unwrap(); + let rec_group_id = RecGroupId::from_index(rec_group_id); + + let start = self.core_types.len(); + let start = u32::try_from(start).unwrap(); + let start = CoreTypeId::from_index(start); + + for ty in entry.key().types() { + debug_assert_eq!(self.core_types.len(), self.core_type_to_supertype.len()); + debug_assert_eq!(self.core_types.len(), self.core_type_to_rec_group.len()); + + self.core_type_to_supertype + .push(ty.supertype_idx.map(|idx| match idx.unpack() { + UnpackedIndex::RecGroup(offset) => CoreTypeId::from_index(start.index + offset), + UnpackedIndex::Id(id) => id, + UnpackedIndex::Module(_) => unreachable!("in canonical form"), + })); + let mut ty = ty.clone(); + ty.remap_indices(&mut |index| { + match index.unpack() { + UnpackedIndex::Id(_) => {} + UnpackedIndex::Module(_) => unreachable!(), + UnpackedIndex::RecGroup(offset) => { + *index = UnpackedIndex::Id(CoreTypeId::from_index(start.index + offset)) + .pack() + .unwrap(); + } + }; + Ok(()) + }) + .expect("cannot fail"); + self.core_types.push(ty); + self.core_type_to_rec_group.push(rec_group_id); + } - let range = start..end; + let end = self.core_types.len(); + let end = u32::try_from(end).unwrap(); + let end = CoreTypeId::from_index(end); - self.rec_group_elements.push(range.clone()); + let range = start..end; - let occupied_entry = table.insert_unique(hash, (range, rec_group_id), |entry| { - let range = &entry.0; - let start = range.start.index(); - let end = range.end.index(); - rec_group_hasher((start..end).map(|i| &self.core_types[i])) - }); + self.rec_group_elements.push(range.clone()); - (true, occupied_entry) - } - }; - - let rec_group_id = occupied_entry.get().1; - (is_new, rec_group_id) + entry.insert(rec_group_id); + return (true, rec_group_id); } /// Get the `CoreTypeId` for a local index into a rec group. @@ -2790,7 +2758,7 @@ impl TypeList { core_type_to_rec_group: core_type_to_rec_group.len(), core_type_to_supertype: core_type_to_supertype.len(), rec_group_elements: rec_group_elements.len(), - canonical_rec_groups: canonical_rec_groups.len(), + canonical_rec_groups: canonical_rec_groups.as_ref().map(|m| m.len()).unwrap_or(0), } } @@ -2825,12 +2793,14 @@ impl TypeList { core_type_to_supertype.truncate(checkpoint.core_type_to_supertype); rec_group_elements.truncate(checkpoint.rec_group_elements); - assert_eq!( - canonical_rec_groups.len(), - checkpoint.canonical_rec_groups, - "checkpointing does not support resetting `canonical_rec_groups` (it would require a \ - proper immutable and persistent hash map) so adding new groups is disallowed" - ); + if let Some(canonical_rec_groups) = canonical_rec_groups { + assert_eq!( + canonical_rec_groups.len(), + checkpoint.canonical_rec_groups, + "checkpointing does not support resetting `canonical_rec_groups` (it would require a \ + proper immutable and persistent hash map) so adding new groups is disallowed" + ); + } } pub fn commit(&mut self) -> TypeList { @@ -2860,7 +2830,7 @@ impl TypeList { core_type_to_rec_group: self.core_type_to_rec_group.commit(), core_type_to_supertype: self.core_type_to_supertype.commit(), rec_group_elements: self.rec_group_elements.commit(), - canonical_rec_groups: self.canonical_rec_groups.clone(), + canonical_rec_groups: None, } } @@ -2939,11 +2909,13 @@ pub(crate) struct TypeAlloc { impl Default for TypeAlloc { fn default() -> TypeAlloc { static NEXT_GLOBAL_ID: AtomicU64 = AtomicU64::new(0); - TypeAlloc { + let mut ret = TypeAlloc { list: TypeList::default(), globally_unique_id: NEXT_GLOBAL_ID.fetch_add(1, Ordering::Relaxed), next_resource_id: 0, - } + }; + ret.list.canonical_rec_groups = Some(Default::default()); + ret } } diff --git a/tests/local/gc/gc-rec-groups-invalid.wast b/tests/local/gc/gc-rec-groups-invalid.wast index b2de1893c3..ffbffef199 100644 --- a/tests/local/gc/gc-rec-groups-invalid.wast +++ b/tests/local/gc/gc-rec-groups-invalid.wast @@ -14,3 +14,17 @@ ) "unknown type" ) + +(assert_invalid + (module + (rec + (type $t1 (struct (field (ref $t2)))) + (type $t2 (sub (struct (field (ref $t1))))) + ) + + (rec + (type $t3 (struct (field (ref $t4)))) + (type $t4 (sub $t2 (struct (field (ref $t3))))) + ) + ) + "subtype must match supertype") diff --git a/tests/local/gc/gc-rec-sub.wat b/tests/local/gc/gc-rec-sub.wat index 2a29625cc3..40d83ffeed 100644 --- a/tests/local/gc/gc-rec-sub.wat +++ b/tests/local/gc/gc-rec-sub.wat @@ -45,10 +45,5 @@ (type $t2 (sub (struct (field (ref $t1))))) ) - (rec - (type $t3 (struct (field (ref $t4)))) - (type $t4 (sub $t2 (struct (field (ref $t3))))) - ) - (rec) ) diff --git a/tests/snapshots/local/gc/gc-rec-sub.wat.print b/tests/snapshots/local/gc/gc-rec-sub.wat.print index 9bce1a12c9..61319c59ec 100644 --- a/tests/snapshots/local/gc/gc-rec-sub.wat.print +++ b/tests/snapshots/local/gc/gc-rec-sub.wat.print @@ -40,9 +40,5 @@ (type $t1 (;17;) (struct (field (ref 18)))) (type $t2 (;18;) (sub (struct (field (ref 17))))) ) - (rec - (type $t3 (;19;) (struct (field (ref 20)))) - (type $t4 (;20;) (sub $t2 (;18;) (struct (field (ref 19))))) - ) (rec) ) \ No newline at end of file