Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store and restore hash from a compiled module #4654

Merged
merged 18 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion lib/api/src/js/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ impl Module {
type_hints,
name,
#[cfg(feature = "js-serializable-module")]
raw_bytes: Some(binary),
raw_bytes: Some(binary.clone()),
maminrayej marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down
3 changes: 2 additions & 1 deletion lib/api/src/jsc/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ impl Module {
///
pub(crate) unsafe fn from_js_module(module: JSObject, binary: impl IntoBytes) -> Self {
let binary = binary.into_bytes();

// The module is now validated, so we can safely parse it's types
let info = crate::module_info_polyfill::translate_module(&binary[..])
let mut info = crate::module_info_polyfill::translate_module(&binary[..])
maminrayej marked this conversation as resolved.
Show resolved Hide resolved
.unwrap()
.info;

Expand Down
12 changes: 5 additions & 7 deletions lib/cli/src/commands/run/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ use wasmer::{
use wasmer_compiler::ArtifactBuild;
use wasmer_config::package::PackageSource as PackageSpecifier;
use wasmer_registry::{wasmer_env::WasmerEnv, Package};
use wasmer_types::ModuleHash;
#[cfg(feature = "journal")]
use wasmer_wasix::journal::{LogFileJournal, SnapshotTrigger};
use wasmer_wasix::{
Expand All @@ -43,9 +44,7 @@ use wasmer_wasix::{
MappedCommand, MappedDirectory, Runner,
},
runtime::{
module_cache::{CacheError, ModuleHash},
package_loader::PackageLoader,
resolver::QueryError,
module_cache::CacheError, package_loader::PackageLoader, resolver::QueryError,
task_manager::VirtualTaskManagerExt,
},
Runtime, WasiError,
Expand Down Expand Up @@ -712,10 +711,9 @@ impl ExecutableTarget {
let engine = runtime.engine();
pb.set_message("Deserializing pre-compiled WebAssembly module");
let module = unsafe { Module::deserialize_from_file(&engine, path)? };
let module_hash = {
let wasm = std::fs::read(path)?;
ModuleHash::xxhash(wasm)
};

// FIXME: what if the artifact does not have the hash
let module_hash = module.info().hash.unwrap_or(ModuleHash::XXHash([0u8; 8]));

Ok(ExecutableTarget::WebAssembly {
module,
Expand Down
3 changes: 2 additions & 1 deletion lib/cli/src/commands/run/wasi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use virtual_fs::{DeviceFile, FileSystem, PassthruFileSystem, RootFileSystemBuild
use wasmer::{Engine, Function, Instance, Memory32, Memory64, Module, RuntimeError, Store, Value};
use wasmer_config::package::PackageSource as PackageSpecifier;
use wasmer_registry::wasmer_env::WasmerEnv;
use wasmer_types::ModuleHash;
#[cfg(feature = "journal")]
use wasmer_wasix::journal::{LogFileJournal, SnapshotTrigger};
use wasmer_wasix::{
Expand All @@ -27,7 +28,7 @@ use wasmer_wasix::{
rewind_ext,
runners::{MappedCommand, MappedDirectory},
runtime::{
module_cache::{FileSystemCache, ModuleCache, ModuleHash},
module_cache::{FileSystemCache, ModuleCache},
package_loader::{BuiltinPackageLoader, PackageLoader},
resolver::{FileSystemSource, InMemorySource, MultiSource, Source, WapmSource, WebSource},
task_manager::{
Expand Down
1 change: 1 addition & 0 deletions lib/compiler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ serde = { version = "1.0", features = ["derive"], optional = true }
thiserror = "1.0"
serde_bytes = { version = "0.11", optional = true }
smallvec = "1.6"
xxhash-rust = { version = "0.8.10", features = ["xxh64"] }

backtrace = "0.3"
memmap2 = "0.5"
Expand Down
5 changes: 4 additions & 1 deletion lib/compiler/src/translator/environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ use crate::wasmparser::{Operator, ValType};
use std::convert::{TryFrom, TryInto};
use std::ops::Range;
use wasmer_types::entity::PrimaryMap;
use wasmer_types::FunctionType;
use wasmer_types::WasmResult;
use wasmer_types::{
CustomSectionIndex, DataIndex, DataInitializer, DataInitializerLocation, ElemIndex,
ExportIndex, FunctionIndex, GlobalIndex, GlobalInit, GlobalType, ImportIndex,
LocalFunctionIndex, MemoryIndex, MemoryType, ModuleInfo, SignatureIndex, TableIndex,
TableInitializer, TableType,
};
use wasmer_types::{FunctionType, ModuleHash};

/// Contains function data: bytecode and its offset in the module.
#[derive(Hash)]
Expand Down Expand Up @@ -89,6 +89,9 @@ impl<'data> ModuleEnvironment<'data> {
assert!(self.module_translation_state.is_none());
let module_translation_state = translate_module(data, &mut self)?;
self.module_translation_state = Some(module_translation_state);

self.module.hash = Some(ModuleHash::xxhash(data));
maminrayej marked this conversation as resolved.
Show resolved Hide resolved

Ok(self)
}

Expand Down
11 changes: 11 additions & 0 deletions lib/types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,17 @@ enum-iterator = "0.7.0"
target-lexicon = { version = "0.12.2", default-features = false }
enumset.workspace = true
bytecheck = "0.6.8"
rand = "0.8"
maminrayej marked this conversation as resolved.
Show resolved Hide resolved
webc = { workspace = true, default-features = false }
xxhash-rust = { version = "0.8.8", features = ["xxh64"] }
sha2 = { version = "0.10" }
hex = { version = "^0.4" }

# `rand` uses `getrandom` transitively, and to be able to
# compile the project for `js`, we need to enable this feature
[dependencies.getrandom]
version = "*"
features = ["js"]

[dev-dependencies]
memoffset.workspace = true
Expand Down
2 changes: 2 additions & 0 deletions lib/types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ mod initializers;
mod libcalls;
mod memory;
mod module;
mod module_hash;
mod serialize;
mod stack;
mod store_id;
Expand Down Expand Up @@ -97,6 +98,7 @@ pub use crate::initializers::{
};
pub use crate::memory::{Memory32, Memory64, MemorySize};
pub use crate::module::{ExportsIterator, ImportKey, ImportsIterator, ModuleInfo};
pub use crate::module_hash::ModuleHash;
pub use crate::units::{
Bytes, PageCountOutOfRange, Pages, WASM_MAX_PAGES, WASM_MIN_PAGES, WASM_PAGE_SIZE,
};
Expand Down
20 changes: 18 additions & 2 deletions lib/types/src/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ use crate::entity::{EntityRef, PrimaryMap};
use crate::{
CustomSectionIndex, DataIndex, ElemIndex, ExportIndex, ExportType, ExternType, FunctionIndex,
FunctionType, GlobalIndex, GlobalInit, GlobalType, ImportIndex, ImportType, LocalFunctionIndex,
LocalGlobalIndex, LocalMemoryIndex, LocalTableIndex, MemoryIndex, MemoryType, SignatureIndex,
TableIndex, TableInitializer, TableType,
LocalGlobalIndex, LocalMemoryIndex, LocalTableIndex, MemoryIndex, MemoryType, ModuleHash,
SignatureIndex, TableIndex, TableInitializer, TableType,
};

use indexmap::IndexMap;
use rkyv::{
de::SharedDeserializeRegistry, ser::ScratchSpace, ser::Serializer,
Expand Down Expand Up @@ -99,6 +100,10 @@ mod serde_imports {

/// A translated WebAssembly module, excluding the function bodies and
/// memory initializers.
///
/// IMPORTANT: since this struct will be serialized as part of the compiled module artifact,
/// if you change this struct, do not forget to update [`MetadataHeader::version`](crate::serialize::MetadataHeader)
/// to make sure we don't break compatibility between versions.
#[derive(Debug, Clone, Default)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct ModuleInfo {
Expand All @@ -111,6 +116,9 @@ pub struct ModuleInfo {
#[cfg_attr(feature = "enable-serde", serde(skip_serializing, skip_deserializing))]
pub id: ModuleId,

/// hash of the module
pub hash: Option<ModuleHash>,

/// The name of this wasm module, often found in the wasm file.
pub name: Option<String>,

Expand Down Expand Up @@ -182,6 +190,7 @@ pub struct ModuleInfo {
#[archive_attr(derive(CheckBytes, Debug))]
pub struct ArchivableModuleInfo {
name: Option<String>,
hash: Option<ModuleHash>,
imports: IndexMap<ImportKey, ImportIndex>,
exports: IndexMap<String, ExportIndex>,
start_function: Option<FunctionIndex>,
Expand All @@ -207,6 +216,7 @@ impl From<ModuleInfo> for ArchivableModuleInfo {
fn from(it: ModuleInfo) -> Self {
Self {
name: it.name,
hash: it.hash,
imports: it.imports,
exports: it.exports,
start_function: it.start_function,
Expand Down Expand Up @@ -235,6 +245,7 @@ impl From<ArchivableModuleInfo> for ModuleInfo {
Self {
id: Default::default(),
name: it.name,
hash: it.hash,
imports: it.imports,
exports: it.exports,
start_function: it.start_function,
Expand Down Expand Up @@ -325,6 +336,11 @@ impl ModuleInfo {
Default::default()
}

/// Returns the module hash if available
pub fn hash(&self) -> Option<ModuleHash> {
self.hash
}

/// Get the given passive element, if it exists.
pub fn get_passive_element(&self, index: ElemIndex) -> Option<&[FunctionIndex]> {
self.passive_elements.get(&index).map(|es| &**es)
Expand Down
126 changes: 126 additions & 0 deletions lib/types/src/module_hash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
use std::fmt::{self, Display, Formatter};

use rand::RngCore;
use rkyv::{Archive, CheckBytes, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
use sha2::Digest;

/// The hash of a WebAssembly module.
#[derive(
Debug,
Copy,
Clone,
PartialEq,
Eq,
Hash,
PartialOrd,
Ord,
RkyvSerialize,
RkyvDeserialize,
Archive,
)]
#[archive_attr(derive(CheckBytes, Debug))]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum ModuleHash {
/// xxhash
XXHash([u8; 8]),

/// sha256
Sha256([u8; 32]),
}

impl ModuleHash {
/// Create a new [`ModuleHash`] from the raw xxhash hash.
pub fn xxhash_from_bytes(key: [u8; 8]) -> Self {
Self::XXHash(key)
}

/// Create a new [`ModuleHash`] from the raw sha256 hash.
pub fn sha256_from_bytes(key: [u8; 32]) -> Self {
Self::Sha256(key)
}

/// Creates a random xxhash for the module
pub fn xxhash_random() -> Self {
let mut rand = rand::thread_rng();
let mut key = [0u8; 8];
rand.fill_bytes(&mut key);
Self::xxhash_from_bytes(key)
}

/// Creates a random sha256 hash for the module
pub fn sha256_random() -> Self {
let mut rand = rand::thread_rng();
let mut key = [0u8; 32];
rand.fill_bytes(&mut key);
Self::sha256_from_bytes(key)
}
maminrayej marked this conversation as resolved.
Show resolved Hide resolved

/// Parse a XXHash hash from a hex-encoded string.
pub fn xxhash_parse_hex(hex_str: &str) -> Result<Self, hex::FromHexError> {
let mut hash = [0_u8; 8];
hex::decode_to_slice(hex_str, &mut hash)?;
Ok(Self::xxhash_from_bytes(hash))
}

/// Parse a Sha256 hash from a hex-encoded string.
pub fn sha256_parse_hex(hex_str: &str) -> Result<Self, hex::FromHexError> {
let mut hash = [0_u8; 32];
hex::decode_to_slice(hex_str, &mut hash)?;
Ok(Self::sha256_from_bytes(hash))
}

/// Generate a new [`ModuleCache`] based on the XXHash hash of some bytes.
pub fn xxhash(wasm: impl AsRef<[u8]>) -> Self {
let wasm = wasm.as_ref();

let hash = xxhash_rust::xxh64::xxh64(wasm, 0);

Self::XXHash(hash.to_ne_bytes())
}

/// Generate a new [`ModuleCache`] based on the Sha256 hash of some bytes.
pub fn sha256(wasm: impl AsRef<[u8]>) -> Self {
let wasm = wasm.as_ref();

let hash: [u8; 32] = sha2::Sha256::digest(wasm).into();

Self::Sha256(hash)
}

/// Get the raw hash.
pub fn as_bytes(&self) -> &[u8] {
match self {
Self::XXHash(bytes) => bytes.as_slice(),
Self::Sha256(bytes) => bytes.as_slice(),
}
}
}

impl From<webc::metadata::AtomSignature> for ModuleHash {
fn from(value: webc::metadata::AtomSignature) -> Self {
match value {
webc::metadata::AtomSignature::Sha256(bytes) => Self::Sha256(bytes),
}
}
}

impl Display for ModuleHash {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
fn format<const N: usize>(f: &mut Formatter<'_>, bytes: &[u8; N]) -> fmt::Result {
for byte in bytes {
write!(f, "{byte:02X}")?;
}

Ok(())
}

match self {
Self::XXHash(bytes) => format(f, bytes)?,
Self::Sha256(bytes) => format(f, bytes)?,
}

Ok(())
}
}
2 changes: 1 addition & 1 deletion lib/types/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ pub struct MetadataHeader {
impl MetadataHeader {
/// Current ABI version. Increment this any time breaking changes are made
/// to the format of the serialized data.
pub const CURRENT_VERSION: u32 = 6;
pub const CURRENT_VERSION: u32 = 7;

/// Magic number to identify wasmer metadata.
const MAGIC: [u8; 8] = *b"WASMER\0\0";
Expand Down
6 changes: 2 additions & 4 deletions lib/wasix/src/bin_factory/binary_package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@ use wasmer_config::package::{PackageHash, PackageId, PackageSource};
use webc::{compat::SharedBytes, Container};

use crate::{
runtime::{
module_cache::ModuleHash,
resolver::{PackageInfo, ResolveError},
},
runtime::resolver::{PackageInfo, ResolveError},
Runtime,
};
use wasmer_types::ModuleHash;

#[derive(Derivative, Clone)]
#[derivative(Debug)]
Expand Down
Loading
Loading