From 8384f3a3479e071175194710ea71873e9cabd3d3 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 27 Apr 2021 10:55:12 -0500 Subject: [PATCH] Bring back `Module::deserialize` (#2858) * Bring back `Module::deserialize` I thought I was being clever suggesting that `Module::deserialize` was removed from #2791 by funneling all module constructors into `Module::new`. As our studious fuzzers have found, though, this means that `Module::new` is not safe currently to pass arbitrary user-defined input into. Now one might pretty reasonable expect to be able to do that, however, being a WebAssembly engine and all. This PR as a result separates the `deserialize` part of `Module::new` back into `Module::deserialize`. This means that binary blobs created with `Module::serialize` and `Engine::precompile_module` will need to be passed to `Module::deserialize` to "rehydrate" them back into a `Module`. This restores the property that it should be safe to pass arbitrary input to `Module::new` since it's always expected to be a wasm module. This also means that fuzzing will no longer attempt to fuzz `Module::deserialize` which isn't something we want to do anyway. * Fix an example * Mark `Module::deserialize` as `unsafe` --- crates/c-api/include/wasmtime.h | 6 +- crates/c-api/src/module.rs | 11 ++-- crates/wasmtime/src/engine.rs | 5 +- crates/wasmtime/src/module.rs | 63 +++++++++++++++------ crates/wasmtime/src/module/serialization.rs | 12 ++-- examples/serialize.rs | 7 ++- src/commands/compile.rs | 3 +- tests/all/module.rs | 52 +++++++++-------- tests/all/module_linking.rs | 4 +- tests/all/module_serialize.rs | 10 ++-- 10 files changed, 109 insertions(+), 64 deletions(-) diff --git a/crates/c-api/include/wasmtime.h b/crates/c-api/include/wasmtime.h index 8babeabdff05..f163f923bbd3 100644 --- a/crates/c-api/include/wasmtime.h +++ b/crates/c-api/include/wasmtime.h @@ -992,9 +992,13 @@ WASM_API_EXTERN own wasmtime_error_t* wasmtime_module_serialize( /** * \brief Build a module from serialized data. - * * + * * This function does not take ownership of any of its arguments, but the * returned error and module are owned by the caller. + * + * This function is not safe to receive arbitrary user input. See the Rust + * documentation for more information on what inputs are safe to pass in here + * (e.g. only that of #wasmtime_module_serialize) */ WASM_API_EXTERN own wasmtime_error_t *wasmtime_module_deserialize( wasm_engine_t *engine, diff --git a/crates/c-api/src/module.rs b/crates/c-api/src/module.rs index 1757191026f6..4b60c67e73cb 100644 --- a/crates/c-api/src/module.rs +++ b/crates/c-api/src/module.rs @@ -185,10 +185,13 @@ pub extern "C" fn wasmtime_module_deserialize( binary: &wasm_byte_vec_t, ret: &mut *mut wasm_module_t, ) -> Option> { - handle_result(Module::new(&engine.engine, binary.as_slice()), |module| { - let module = Box::new(wasm_module_t::new(module)); - *ret = Box::into_raw(module); - }) + handle_result( + unsafe { Module::deserialize(&engine.engine, binary.as_slice()) }, + |module| { + let module = Box::new(wasm_module_t::new(module)); + *ret = Box::into_raw(module); + }, + ) } #[no_mangle] diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index acafaa5797a3..8876eb042dfb 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -160,8 +160,9 @@ impl Engine { /// Note that the `wat` feature is enabled by default. /// /// This method may be used to compile a module for use with a different target - /// host. The output of this method may be used with [`Module::new`](crate::Module::new) - /// on hosts compatible with the [`Config`] associated with this [`Engine`]. + /// host. The output of this method may be used with + /// [`Module::deserialize`](crate::Module::deserialize) on hosts compatible + /// with the [`Config`] associated with this [`Engine`]. /// /// The output of this method is safe to send to another host machine for later /// execution. As the output is already a compiled module, translation and code diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index e9ae9579e326..4964189d7bb7 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -121,9 +121,6 @@ impl Module { /// This is only supported when the `wat` feature of this crate is enabled. /// If this is supplied then the text format will be parsed before validation. /// Note that the `wat` feature is enabled by default. - /// * A module serialized with [`Module::serialize`]. - /// * A module compiled with [`Engine::precompile_module`] or the - /// `wasmtime compile` command. /// /// The data for the wasm module must be loaded in-memory if it's present /// elsewhere, for example on disk. This requires that the entire binary is @@ -182,11 +179,6 @@ impl Module { /// ``` pub fn new(engine: &Engine, bytes: impl AsRef<[u8]>) -> Result { let bytes = bytes.as_ref(); - - if let Some(module) = SerializedModule::from_bytes(bytes)? { - return module.into_module(engine); - } - #[cfg(feature = "wat")] let bytes = wat::parse_bytes(bytes)?; Self::from_binary(engine, &bytes) @@ -258,10 +250,10 @@ impl Module { /// data. /// /// This is similar to [`Module::new`] except that it requires that the - /// `binary` input is a WebAssembly binary or a compiled module, the - /// text format is not supported by this function. It's generally - /// recommended to use [`Module::new`], but if it's required to not - /// support the text format this function can be used instead. + /// `binary` input is a WebAssembly binary, the text format is not supported + /// by this function. It's generally recommended to use [`Module::new`], but + /// if it's required to not support the text format this function can be + /// used instead. /// /// # Examples /// @@ -286,10 +278,6 @@ impl Module { /// # } /// ``` pub fn from_binary(engine: &Engine, binary: &[u8]) -> Result { - if let Some(module) = SerializedModule::from_bytes(binary)? { - return module.into_module(engine); - } - // Check to see that the config's target matches the host let target = engine.config().isa_flags.triple(); if *target != target_lexicon::Triple::host() { @@ -329,6 +317,49 @@ impl Module { Self::from_parts(engine, modules, main_module, Arc::new(types), &[]) } + /// Deserializes an in-memory compiled module previously created with + /// [`Module::serialize`] or [`Engine::precompile_module`]. + /// + /// This function will deserialize the binary blobs emitted by + /// [`Module::serialize`] and [`Engine::precompile_module`] back into an + /// in-memory [`Module`] that's ready to be instantiated. + /// + /// # Unsafety + /// + /// This function is marked as `unsafe` because if fed invalid input or used + /// improperly this could lead to memory safety vulnerabilities. This method + /// should not, for example, be exposed to arbitrary user input. + /// + /// The structure of the binary blob read here is only lightly validated + /// internally in `wasmtime`. This is intended to be an efficient + /// "rehydration" for a [`Module`] which has very few runtime checks beyond + /// deserialization. Arbitrary input could, for example, replace valid + /// compiled code with any other valid compiled code, meaning that this can + /// trivially be used to execute arbitrary code otherwise. + /// + /// For these reasons this function is `unsafe`. This function is only + /// designed to receive the previous input from [`Module::serialize`] and + /// [`Engine::precompile_module`]. If the exact output of those functions + /// (unmodified) is passed to this function then calls to this function can + /// be considered safe. It is the caller's responsibility to provide the + /// guarantee that only previously-serialized bytes are being passed in + /// here. + /// + /// Note that this function is designed to be safe receiving output from + /// *any* compiled version of `wasmtime` itself. This means that it is safe + /// to feed output from older versions of Wasmtime into this function, in + /// addition to newer versions of wasmtime (from the future!). These inputs + /// will deterministically and safely produce an `Err`. This function only + /// successfully accepts inputs from the same version of `wasmtime`, but the + /// safety guarantee only applies to externally-defined blobs of bytes, not + /// those defined by any version of wasmtime. (this means that if you cache + /// blobs across versions of wasmtime you can be safely guaranteed that + /// future versions of wasmtime will reject old cache entries). + pub unsafe fn deserialize(engine: &Engine, bytes: impl AsRef<[u8]>) -> Result { + let module = SerializedModule::from_bytes(bytes.as_ref())?; + module.into_module(engine) + } + fn from_parts( engine: &Engine, mut modules: Vec>, diff --git a/crates/wasmtime/src/module/serialization.rs b/crates/wasmtime/src/module/serialization.rs index 8b3620a75db7..e566d01ed5ff 100644 --- a/crates/wasmtime/src/module/serialization.rs +++ b/crates/wasmtime/src/module/serialization.rs @@ -329,9 +329,9 @@ impl<'a> SerializedModule<'a> { Ok(bytes) } - pub fn from_bytes(bytes: &[u8]) -> Result> { + pub fn from_bytes(bytes: &[u8]) -> Result { if !bytes.starts_with(HEADER) { - return Ok(None); + bail!("bytes are not a compatible serialized wasmtime module"); } let bytes = &bytes[HEADER.len()..]; @@ -353,11 +353,9 @@ impl<'a> SerializedModule<'a> { ); } - Ok(Some( - bincode_options() - .deserialize::>(&bytes[1 + version_len..]) - .context("deserialize compilation artifacts")?, - )) + Ok(bincode_options() + .deserialize::>(&bytes[1 + version_len..]) + .context("deserialize compilation artifacts")?) } fn check_triple(&self, isa: &dyn TargetIsa) -> Result<()> { diff --git a/examples/serialize.rs b/examples/serialize.rs index 3cd709e7489d..70a875c3bf20 100644 --- a/examples/serialize.rs +++ b/examples/serialize.rs @@ -29,9 +29,12 @@ fn deserialize(buffer: &[u8]) -> Result<()> { println!("Initializing..."); let store = Store::default(); - // Compile the wasm binary into an in-memory instance of a `Module`. + // Compile the wasm binary into an in-memory instance of a `Module`. Note + // that this is `unsafe` because it is our responsibility for guaranteeing + // that these bytes are valid precompiled module bytes. We know that from + // the structure of this example program. println!("Deserialize module..."); - let module = Module::new(store.engine(), buffer)?; + let module = unsafe { Module::deserialize(store.engine(), buffer)? }; // Here we handle the imports of the module, which in this case is our // `HelloCallback` type and its associated implementation of `Callback. diff --git a/src/commands/compile.rs b/src/commands/compile.rs index 7b9a7ebcc20e..1ad611623c16 100644 --- a/src/commands/compile.rs +++ b/src/commands/compile.rs @@ -126,7 +126,8 @@ mod test { command.execute()?; let engine = Engine::default(); - let module = Module::from_file(&engine, output_path)?; + let contents = std::fs::read(output_path)?; + let module = unsafe { Module::deserialize(&engine, contents)? }; let store = Store::new(&engine); let instance = Instance::new(&store, &module, &[])?; let f = instance.get_typed_func::("f")?; diff --git a/tests/all/module.rs b/tests/all/module.rs index 279c91e829f4..f6b3b5b0d051 100644 --- a/tests/all/module.rs +++ b/tests/all/module.rs @@ -27,35 +27,37 @@ fn caches_across_engines() { .serialize() .unwrap(); - let res = Module::new(&Engine::new(&Config::new()).unwrap(), &bytes); - assert!(res.is_ok()); + unsafe { + let res = Module::deserialize(&Engine::new(&Config::new()).unwrap(), &bytes); + assert!(res.is_ok()); - // differ in shared cranelift flags - let res = Module::new( - &Engine::new(Config::new().cranelift_nan_canonicalization(true)).unwrap(), - &bytes, - ); - assert!(res.is_err()); - - // differ in cranelift settings - let res = Module::new( - &Engine::new(Config::new().cranelift_opt_level(OptLevel::None)).unwrap(), - &bytes, - ); - assert!(res.is_err()); + // differ in shared cranelift flags + let res = Module::deserialize( + &Engine::new(Config::new().cranelift_nan_canonicalization(true)).unwrap(), + &bytes, + ); + assert!(res.is_err()); - // Missing required cpu flags - if cfg!(target_arch = "x86_64") { - let res = Module::new( - &Engine::new( - Config::new() - .target(&target_lexicon::Triple::host().to_string()) - .unwrap(), - ) - .unwrap(), + // differ in cranelift settings + let res = Module::deserialize( + &Engine::new(Config::new().cranelift_opt_level(OptLevel::None)).unwrap(), &bytes, ); assert!(res.is_err()); + + // Missing required cpu flags + if cfg!(target_arch = "x86_64") { + let res = Module::deserialize( + &Engine::new( + Config::new() + .target(&target_lexicon::Triple::host().to_string()) + .unwrap(), + ) + .unwrap(), + &bytes, + ); + assert!(res.is_err()); + } } } @@ -66,7 +68,7 @@ fn aot_compiles() -> Result<()> { "(module (func (export \"f\") (param i32) (result i32) local.get 0))".as_bytes(), )?; - let module = Module::from_binary(&engine, &bytes)?; + let module = unsafe { Module::deserialize(&engine, &bytes)? }; let store = Store::new(&engine); let instance = Instance::new(&store, &module, &[])?; diff --git a/tests/all/module_linking.rs b/tests/all/module_linking.rs index 357caa982f39..28fb802fae47 100644 --- a/tests/all/module_linking.rs +++ b/tests/all/module_linking.rs @@ -39,7 +39,9 @@ fn compile() -> Result<()> { assert_eq!(m.imports().len(), 0); assert_eq!(m.exports().len(), 0); let bytes = m.serialize()?; - Module::new(&engine, &bytes)?; + unsafe { + Module::deserialize(&engine, &bytes)?; + } assert_eq!(m.imports().len(), 0); assert_eq!(m.exports().len(), 0); Ok(()) diff --git a/tests/all/module_serialize.rs b/tests/all/module_serialize.rs index 4409659fcd75..7f68b48a58ca 100644 --- a/tests/all/module_serialize.rs +++ b/tests/all/module_serialize.rs @@ -6,8 +6,8 @@ fn serialize(engine: &Engine, wat: &'static str) -> Result> { Ok(module.serialize()?) } -fn deserialize_and_instantiate(store: &Store, buffer: &[u8]) -> Result { - let module = Module::new(store.engine(), buffer)?; +unsafe fn deserialize_and_instantiate(store: &Store, buffer: &[u8]) -> Result { + let module = Module::deserialize(store.engine(), buffer)?; Ok(Instance::new(&store, &module, &[])?) } @@ -17,7 +17,7 @@ fn test_version_mismatch() -> Result<()> { let mut buffer = serialize(&engine, "(module)")?; buffer[13 /* header length */ + 1 /* version length */] = 'x' as u8; - match Module::new(&engine, &buffer) { + match unsafe { Module::deserialize(&engine, &buffer) } { Ok(_) => bail!("expected deserialization to fail"), Err(e) => assert!(e .to_string() @@ -35,7 +35,7 @@ fn test_module_serialize_simple() -> Result<()> { )?; let store = Store::default(); - let instance = deserialize_and_instantiate(&store, &buffer)?; + let instance = unsafe { deserialize_and_instantiate(&store, &buffer)? }; let run = instance.get_typed_func::<(), i32>("run")?; let result = run.call(())?; @@ -53,7 +53,7 @@ fn test_module_serialize_fail() -> Result<()> { let mut config = Config::new(); config.cranelift_opt_level(OptLevel::None); let store = Store::new(&Engine::new(&config)?); - match deserialize_and_instantiate(&store, &buffer) { + match unsafe { deserialize_and_instantiate(&store, &buffer) } { Ok(_) => bail!("expected failure at deserialization"), Err(_) => (), }