Skip to content

Commit

Permalink
Reimplement fast-path validation of MVP types (#1930)
Browse files Browse the repository at this point in the history
* Add a benchmark for validating with older features

Benchmark the "fast path" in type interning.

* Reimplement fast-path validation of MVP types

This commit fixes an issue from #1906 which is preventing the upgrade of
wasm-tools in Wasmtime. That commit implemented a fast path by skipping
a function and reimplementing parts of it internally, but that then
caused Wasmtime to panic when other data structures weren't filled out.
The intention was that the user-facing interface doesn't change
depending on features, so this is an attempt at fixing the mistake in
that commit.

The fix here is to remove the fast path added and restructure it
differently. Instead now the fast and normal paths have much less
divergence which should prevent this issue from re-surfacing. This is
15% slower than `main` but it doesn't have the same bug as `main` so for
now that may be the best that can be done.

* Fix dead code warning
  • Loading branch information
alexcrichton authored Dec 2, 2024
1 parent 95a3882 commit f51bdbb
Show file tree
Hide file tree
Showing 9 changed files with 166 additions and 91 deletions.
11 changes: 11 additions & 0 deletions crates/wasmparser/benches/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,9 @@ fn define_benchmarks(c: &mut Criterion) {
fn validator() -> Validator {
Validator::new_with_features(WasmFeatures::all())
}
fn old_validator() -> Validator {
Validator::new_with_features(WasmFeatures::WASM2)
}

let test_inputs = once_cell::unsync::Lazy::new(collect_benchmark_inputs);

Expand Down Expand Up @@ -330,6 +333,14 @@ fn define_benchmarks(c: &mut Criterion) {
validator().validate_all(&wasm).unwrap();
})
});
if old_validator().validate_all(&wasm).is_ok() {
c.bench_function(&format!("validate-old/{name}"), |b| {
Lazy::force(&wasm);
b.iter(|| {
old_validator().validate_all(&wasm).unwrap();
})
});
}
c.bench_function(&format!("parse/{name}"), |b| {
Lazy::force(&wasm);
b.iter(|| {
Expand Down
38 changes: 38 additions & 0 deletions crates/wasmparser/src/features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,3 +302,41 @@ impl From<WasmFeatures> for WasmFeaturesInflated {
features.inflate()
}
}

impl WasmFeatures {
/// Returns whether any types considered valid with this set of
/// `WasmFeatures` requires any type canonicalization/interning internally.
#[cfg(feature = "validate")]
pub(crate) fn needs_type_canonicalization(&self) -> bool {
#[cfg(feature = "features")]
{
// Types from the function-references proposal and beyond (namely
// GC) need type canonicalization for inter-type references and for
// rec-groups to work. Prior proposals have no such inter-type
// references structurally and as such can hit various fast paths in
// the validator to do a bit less work when processing the type
// section.
//
// None of these proposals below support inter-type references. If
// `self` contains any proposal outside of this set then it requires
// type canonicalization.
const FAST_VALIDATION_FEATURES: WasmFeatures = WasmFeatures::WASM2
.union(WasmFeatures::CUSTOM_PAGE_SIZES)
.union(WasmFeatures::EXTENDED_CONST)
.union(WasmFeatures::MEMORY64)
.union(WasmFeatures::MULTI_MEMORY)
.union(WasmFeatures::RELAXED_SIMD)
.union(WasmFeatures::TAIL_CALL)
.union(WasmFeatures::THREADS)
.union(WasmFeatures::WIDE_ARITHMETIC);
!FAST_VALIDATION_FEATURES.contains(*self)
}
#[cfg(not(feature = "features"))]
{
// GC/function-references are enabled by default, so
// canonicalization is required if feature flags aren't enabled at
// runtime.
true
}
}
}
60 changes: 0 additions & 60 deletions crates/wasmparser/src/validator/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -587,69 +587,9 @@ impl Module {
offset,
)?;
}
if self.try_fast_validation(&rec_group, features, types, offset)? {
return Ok(());
}
self.canonicalize_and_intern_rec_group(features, types, rec_group, offset)
}

#[cfg(not(feature = "features"))]
fn try_fast_validation(
&mut self,
_rec_group: &RecGroup,
_features: &WasmFeatures,
_types: &mut TypeAlloc,
_offset: usize,
) -> Result<bool> {
Ok(false)
}

/// Performs fast type section validation if possible.
///
/// - Returns `Ok(true)` if fast validation was performed, else returns `Ok(false)`.
/// - Returns `Err(_)` if a type section validation error was encountered.
///
/// # Note
///
/// Fast type section validation can only be performed on a
/// statically known subset of `WasmFeatures`.
#[cfg(feature = "features")]
fn try_fast_validation(
&mut self,
rec_group: &RecGroup,
features: &WasmFeatures,
types: &mut TypeAlloc,
offset: usize,
) -> Result<bool> {
/// The subset of `WasmFeatures` for which we know that the
/// fast type section validation can be safely applied.
///
/// Fast type section validation does not have to canonicalize
/// (deduplicate) types and does not have to perform sub-typing
/// checks.
const FAST_VALIDATION_FEATURES: WasmFeatures = WasmFeatures::WASM2
.union(WasmFeatures::CUSTOM_PAGE_SIZES)
.union(WasmFeatures::EXTENDED_CONST)
.union(WasmFeatures::MEMORY64)
.union(WasmFeatures::MULTI_MEMORY)
.union(WasmFeatures::RELAXED_SIMD)
.union(WasmFeatures::TAIL_CALL)
.union(WasmFeatures::THREADS)
.union(WasmFeatures::WIDE_ARITHMETIC);
if !FAST_VALIDATION_FEATURES.contains(*features) {
return Ok(false);
}
if rec_group.is_explicit_rec_group() {
bail!(offset, "requires `gc` proposal to be enabled")
}
for ty in rec_group.types() {
let id = types.push(ty.clone());
self.add_type_id(id);
self.check_composite_type(&ty.composite_type, features, &types, offset)?;
}
Ok(true)
}

pub fn add_import(
&mut self,
mut import: crate::Import,
Expand Down
11 changes: 7 additions & 4 deletions crates/wasmparser/src/validator/core/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,13 @@ pub(crate) trait InternRecGroup {
"rec group usage requires `gc` proposal to be enabled"
);
}
TypeCanonicalizer::new(self, offset)
.with_features(features)
.canonicalize_rec_group(&mut rec_group)?;
let (is_new, rec_group_id) = types.intern_canonical_rec_group(rec_group);
if features.needs_type_canonicalization() {
TypeCanonicalizer::new(self, offset)
.with_features(features)
.canonicalize_rec_group(&mut rec_group)?;
}
let (is_new, rec_group_id) =
types.intern_canonical_rec_group(features.needs_type_canonicalization(), rec_group);
let range = &types[rec_group_id];
let start = range.start.index();
let end = range.end.index();
Expand Down
71 changes: 44 additions & 27 deletions crates/wasmparser/src/validator/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -868,45 +868,63 @@ impl TypeList {
/// Intern the given recursion group (that has already been canonicalized)
/// and return its associated id and whether this was a new recursion group
/// or not.
pub fn intern_canonical_rec_group(&mut self, rec_group: RecGroup) -> (bool, RecGroupId) {
let canonical_rec_groups = self
.canonical_rec_groups
.as_mut()
.expect("cannot intern into a committed list");
let entry = match canonical_rec_groups.entry(rec_group) {
Entry::Occupied(e) => return (false, *e.get()),
Entry::Vacant(e) => e,
};

///
/// If the `needs_type_canonicalization` flag is provided then the type will
/// be intern'd here and its indices will be canonicalized to `CoreTypeId`
/// from the previous `RecGroup`-based indices.
///
/// If the `needs_type_canonicalization` flag is `false` then it must be
/// required that `RecGroup` doesn't have any rec-group-relative references
/// and it will additionally not be intern'd.
pub fn intern_canonical_rec_group(
&mut self,
needs_type_canonicalization: bool,
mut rec_group: RecGroup,
) -> (bool, RecGroupId) {
let rec_group_id = self.rec_group_elements.len();
let rec_group_id = u32::try_from(rec_group_id).unwrap();
let rec_group_id = RecGroupId::from_index(rec_group_id);

if needs_type_canonicalization {
let canonical_rec_groups = self
.canonical_rec_groups
.as_mut()
.expect("cannot intern into a committed list");
let entry = match canonical_rec_groups.entry(rec_group) {
Entry::Occupied(e) => return (false, *e.get()),
Entry::Vacant(e) => e,
};
rec_group = entry.key().clone();
entry.insert(rec_group_id);
}

let start = self.core_types.len();
let start = u32::try_from(start).unwrap();
let start = CoreTypeId::from_index(start);

for ty in entry.key().types() {
for mut ty in rec_group.into_types() {
debug_assert_eq!(self.core_types.len(), self.core_type_to_supertype.len());
debug_assert_eq!(self.core_types.len(), self.core_type_to_rec_group.len());

self.core_type_to_supertype
.push(ty.supertype_idx.map(|idx| match idx.unpack() {
UnpackedIndex::RecGroup(offset) => CoreTypeId::from_index(start.index + offset),
UnpackedIndex::Id(id) => id,
UnpackedIndex::Module(_) => unreachable!("in canonical form"),
}));
let mut ty = ty.clone();
ty.remap_indices(&mut |index| {
match index.unpack() {
UnpackedIndex::Id(_) => {}
UnpackedIndex::Module(_) => unreachable!(),
.push(ty.supertype_idx.and_then(|idx| match idx.unpack() {
UnpackedIndex::RecGroup(offset) => {
*index = UnpackedIndex::Id(CoreTypeId::from_index(start.index + offset))
.pack()
.unwrap();
Some(CoreTypeId::from_index(start.index + offset))
}
};
UnpackedIndex::Id(id) => Some(id),
// Only invalid wasm has this, at this point, so defer the
// error to later.
UnpackedIndex::Module(_) => None,
}));
ty.remap_indices(&mut |index| {
// Note that `UnpackedIndex::Id` is unmodified and
// `UnpackedIndex::Module` means that this is invalid wasm which
// will get an error returned later.
if let UnpackedIndex::RecGroup(offset) = index.unpack() {
*index = UnpackedIndex::Id(CoreTypeId::from_index(start.index + offset))
.pack()
.unwrap();
}
Ok(())
})
.expect("cannot fail");
Expand All @@ -922,15 +940,14 @@ impl TypeList {

self.rec_group_elements.push(range.clone());

entry.insert(rec_group_id);
return (true, rec_group_id);
}

/// Helper for interning a sub type as a rec group; see
/// [`Self::intern_canonical_rec_group`].
pub fn intern_sub_type(&mut self, sub_ty: SubType, offset: usize) -> CoreTypeId {
let (_is_new, group_id) =
self.intern_canonical_rec_group(RecGroup::implicit(offset, sub_ty));
self.intern_canonical_rec_group(false, RecGroup::implicit(offset, sub_ty));
self[group_id].start
}

Expand Down
12 changes: 12 additions & 0 deletions tests/local/missing-features/rec-group.wast
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,15 @@
(assert_invalid
(module (rec (type (func)) (type (func))))
"requires `gc` proposal to be enabled")

(assert_invalid
(module (type $t (func (param (ref $t)))))
"reference types support is not enabled")

(assert_invalid
(module (type $t (sub (func))))
"gc proposal must be enabled")

(assert_invalid
(module (type $t (func)) (type (sub $t (func))))
"gc proposal must be enabled")
12 changes: 12 additions & 0 deletions tests/local/missing-features/reference-types/gc.wast
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,15 @@
(assert_invalid
(module (func ref.null array drop))
"heap types not supported without the gc feature")

(assert_invalid
(module (type $t (func (param (ref $t)))))
"function references required for index")

(assert_invalid
(module (type $t (sub (func))))
"gc proposal must be enabled")

(assert_invalid
(module (type $t (func)) (type (sub $t (func))))
"gc proposal must be enabled")
21 changes: 21 additions & 0 deletions tests/snapshots/local/missing-features/rec-group.wast.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,27 @@
"filename": "rec-group.2.wasm",
"module_type": "binary",
"text": "requires `gc` proposal to be enabled"
},
{
"type": "assert_invalid",
"line": 14,
"filename": "rec-group.3.wasm",
"module_type": "binary",
"text": "reference types support is not enabled"
},
{
"type": "assert_invalid",
"line": 18,
"filename": "rec-group.4.wasm",
"module_type": "binary",
"text": "gc proposal must be enabled"
},
{
"type": "assert_invalid",
"line": 22,
"filename": "rec-group.5.wasm",
"module_type": "binary",
"text": "gc proposal must be enabled"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,27 @@
"filename": "gc.10.wasm",
"module_type": "binary",
"text": "heap types not supported without the gc feature"
},
{
"type": "assert_invalid",
"line": 36,
"filename": "gc.11.wasm",
"module_type": "binary",
"text": "function references required for index"
},
{
"type": "assert_invalid",
"line": 40,
"filename": "gc.12.wasm",
"module_type": "binary",
"text": "gc proposal must be enabled"
},
{
"type": "assert_invalid",
"line": 44,
"filename": "gc.13.wasm",
"module_type": "binary",
"text": "gc proposal must be enabled"
}
]
}

0 comments on commit f51bdbb

Please sign in to comment.