From 6f53aaf301981c64b374c0fd2ea775657b0adf78 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Mon, 11 Jul 2022 17:20:45 -0700 Subject: [PATCH] fuzzing: Add a fuzz target to check that our stack traces are correct We generate Wasm modules that keep track of their own stack as they call and return between functions, and then we periodically check that if the host captures a backtrace, it matches what the Wasm module has recorded. --- crates/fuzzing/src/generators.rs | 2 + crates/fuzzing/src/generators/stacks.rs | 327 ++++++++++++++++++++++++ crates/fuzzing/src/oracles.rs | 2 + crates/fuzzing/src/oracles/stacks.rs | 100 ++++++++ fuzz/Cargo.toml | 6 + fuzz/fuzz_targets/stacks.rs | 10 + 6 files changed, 447 insertions(+) create mode 100644 crates/fuzzing/src/generators/stacks.rs create mode 100644 crates/fuzzing/src/oracles/stacks.rs create mode 100644 fuzz/fuzz_targets/stacks.rs diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index c0b9c9caf68e..83492b2fda77 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -17,6 +17,7 @@ mod memory; mod module_config; mod single_inst_module; mod spec_test; +mod stacks; pub mod table_ops; pub use codegen_settings::CodegenSettings; @@ -27,3 +28,4 @@ pub use memory::{MemoryConfig, NormalMemoryConfig, UnalignedMemory, UnalignedMem pub use module_config::ModuleConfig; pub use single_inst_module::SingleInstModule; pub use spec_test::SpecTest; +pub use stacks::Stacks; diff --git a/crates/fuzzing/src/generators/stacks.rs b/crates/fuzzing/src/generators/stacks.rs new file mode 100644 index 000000000000..06e41d2f4f26 --- /dev/null +++ b/crates/fuzzing/src/generators/stacks.rs @@ -0,0 +1,327 @@ +//! Generate a Wasm program that keeps track of its current stack frames. +//! +//! We can then compare the stack trace we observe in Wasmtime to what the Wasm +//! program believes its stack should be. Any discrepencies between the two +//! points to a bug in either this test case generator or Wasmtime's stack +//! walker. + +use std::mem; + +use arbitrary::{Arbitrary, Result, Unstructured}; +use wasm_encoder::Instruction; + +const MAX_FUNCS: usize = 20; + +/// Generate a Wasm module that keeps track of its current call stack, to +/// compare to the host. +#[derive(Debug)] +pub struct Stacks { + funcs: Vec, + inputs: Vec, +} + +#[derive(Debug, Default)] +struct Function { + ops: Vec, +} + +#[derive(Arbitrary, Debug, Clone, Copy)] +enum Op { + CheckStackInHost, + Call(u32), +} + +impl<'a> Arbitrary<'a> for Stacks { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let funcs = Self::arbitrary_funcs(u)?; + let n = u.len(); + let inputs = u.bytes(n)?.to_vec(); + Ok(Stacks { funcs, inputs }) + } +} + +impl Stacks { + fn arbitrary_funcs(u: &mut Unstructured) -> Result> { + let mut funcs = vec![Function::default()]; + + // The indices of functions within `funcs` that we still need to + // generate. + let mut work_list = vec![0]; + + while let Some(f) = work_list.pop() { + let mut ops = u.arbitrary::>()?; + for op in &mut ops { + if let Op::Call(idx) = op { + if u.is_empty() || funcs.len() >= MAX_FUNCS || u.ratio(4, 5)? { + // Call an existing function. + *idx = *idx % u32::try_from(funcs.len()).unwrap(); + } else { + // Call a new function... + *idx = u32::try_from(funcs.len()).unwrap(); + // ...which means we also need to eventually define it. + work_list.push(funcs.len()); + funcs.push(Function::default()); + } + } + } + funcs[f].ops = ops; + } + + Ok(funcs) + } + + /// Get the input values to run the Wasm module with. + pub fn inputs(&self) -> &[u8] { + &self.inputs + } + + /// Get this test case's Wasm module. + /// + /// The Wasm module imports a function `host.check_stack: [i32 i32] -> []` + /// from the host. This function is given an array (as pointer and length) + /// of `u32`s. This is the Wasm program's understanding of its current + /// stack. The host can check this against its own understanding of the Wasm + /// stack to find bugs. + /// + /// The Wasm module exports two functions: + /// + /// 1. `run: [i32] -> []`: This function should be called with each of the + /// input values to run this generated test case. + /// + /// 2. `get_stack: [] -> [i32 i32]`: Get the pointer and length of the `u32` + /// array of this Wasm's understanding of its stack. This is useful for + /// checking whether the host's view of the stack at a trap matches the + /// Wasm program's understanding. + pub fn wasm(&self) -> Vec { + let mut module = wasm_encoder::Module::new(); + + let mut types = wasm_encoder::TypeSection::new(); + let check_stack_type = types.len(); + types.function( + vec![wasm_encoder::ValType::I32, wasm_encoder::ValType::I32], + vec![], + ); + let run_type = types.len(); + types.function(vec![wasm_encoder::ValType::I32], vec![]); + let get_stack_type = types.len(); + types.function( + vec![], + vec![wasm_encoder::ValType::I32, wasm_encoder::ValType::I32], + ); + let null_type = types.len(); + types.function(vec![], vec![]); + section(&mut module, types); + + let mut imports = wasm_encoder::ImportSection::new(); + let check_stack_func = 0; + imports.import( + "host", + "check_stack", + wasm_encoder::EntityType::Function(check_stack_type), + ); + let num_imported_funcs = 1; + section(&mut module, imports); + + let mut funcs = wasm_encoder::FunctionSection::new(); + for _ in &self.funcs { + funcs.function(null_type); + } + let run_func = funcs.len() + num_imported_funcs; + funcs.function(run_type); + let get_stack_func = funcs.len() + num_imported_funcs; + funcs.function(get_stack_type); + section(&mut module, funcs); + + let mut mems = wasm_encoder::MemorySection::new(); + let memory = mems.len(); + mems.memory(wasm_encoder::MemoryType { + minimum: 1, + maximum: Some(1), + memory64: false, + shared: false, + }); + section(&mut module, mems); + + let mut globals = wasm_encoder::GlobalSection::new(); + let fuel_global = globals.len(); + globals.global( + wasm_encoder::GlobalType { + val_type: wasm_encoder::ValType::I32, + mutable: true, + }, + &Instruction::I32Const(0), + ); + let stack_len_global = globals.len(); + globals.global( + wasm_encoder::GlobalType { + val_type: wasm_encoder::ValType::I32, + mutable: true, + }, + &Instruction::I32Const(0), + ); + section(&mut module, globals); + + let mut exports = wasm_encoder::ExportSection::new(); + exports.export("run", wasm_encoder::ExportKind::Func, run_func); + exports.export("get_stack", wasm_encoder::ExportKind::Func, get_stack_func); + exports.export("memory", wasm_encoder::ExportKind::Memory, memory); + section(&mut module, exports); + + let mut code = wasm_encoder::CodeSection::new(); + for (func_index, func) in self.funcs.iter().enumerate() { + let mut body = wasm_encoder::Function::new(vec![]); + + // Add this function to our internal stack. + // + // Note that we know our `stack_len_global` can't go beyond memory + // bounds because we limit fuel to at most `u8::MAX` and each stack + // entry is an `i32` and `u8::MAX * size_of(i32)` still fits in one + // page. + body.instruction(&Instruction::GlobalGet(stack_len_global)) + .instruction(&Instruction::I32Const( + (num_imported_funcs + u32::try_from(func_index).unwrap()) as i32, + )) + .instruction(&Instruction::I32Store(wasm_encoder::MemArg { + offset: 0, + align: 0, + memory_index: memory, + })) + .instruction(&Instruction::GlobalGet(stack_len_global)) + .instruction(&Instruction::I32Const(mem::size_of::() as i32)) + .instruction(&Instruction::I32Add) + .instruction(&Instruction::GlobalSet(stack_len_global)); + + // Trap if we are out of fuel. + body.instruction(&Instruction::GlobalGet(fuel_global)) + .instruction(&Instruction::I32Eqz) + .instruction(&Instruction::If(wasm_encoder::BlockType::Empty)) + .instruction(&Instruction::Unreachable) + .instruction(&Instruction::End); + + // Decrement fuel. + body.instruction(&Instruction::GlobalGet(fuel_global)) + .instruction(&Instruction::I32Const(1)) + .instruction(&Instruction::I32Sub) + .instruction(&Instruction::GlobalSet(fuel_global)); + + // Perform our specified operations. + for op in &func.ops { + match op { + Op::CheckStackInHost => { + body.instruction(&Instruction::I32Const(0)) + .instruction(&Instruction::GlobalGet(stack_len_global)) + .instruction(&Instruction::Call(check_stack_func)); + } + Op::Call(f) => { + body.instruction(&Instruction::Call(f + num_imported_funcs)); + } + } + } + + // Remove this function from our internal stack. + body.instruction(&Instruction::GlobalGet(stack_len_global)) + .instruction(&Instruction::I32Const(mem::size_of::() as i32)) + .instruction(&Instruction::I32Sub) + .instruction(&Instruction::GlobalSet(stack_len_global)); + + body.instruction(&Instruction::End); + function(&mut code, body); + } + + let mut run_body = wasm_encoder::Function::new(vec![]); + + // Add the `run` function to our internal stack. + // + // See above comments about overflow. + run_body + .instruction(&Instruction::GlobalGet(stack_len_global)) + .instruction(&Instruction::I32Const( + u32::try_from(run_func).unwrap() as i32 + )) + .instruction(&Instruction::I32Store(wasm_encoder::MemArg { + offset: 0, + align: 0, + memory_index: memory, + })) + .instruction(&Instruction::GlobalGet(stack_len_global)) + .instruction(&Instruction::I32Const(mem::size_of::() as i32)) + .instruction(&Instruction::I32Add) + .instruction(&Instruction::GlobalSet(stack_len_global)); + + // Initialize the fuel global and call the first locally defined + // function. + run_body + .instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::GlobalSet(fuel_global)) + .instruction(&Instruction::Call(num_imported_funcs)); + + // Remove the `run` function from our internal stack. + run_body + .instruction(&Instruction::GlobalGet(stack_len_global)) + .instruction(&Instruction::I32Const(mem::size_of::() as i32)) + .instruction(&Instruction::I32Sub) + .instruction(&Instruction::GlobalSet(stack_len_global)); + + run_body.instruction(&Instruction::End); + function(&mut code, run_body); + + let mut get_stack_body = wasm_encoder::Function::new(vec![]); + get_stack_body + .instruction(&Instruction::I32Const(0)) + .instruction(&Instruction::GlobalGet(stack_len_global)) + .instruction(&Instruction::End); + function(&mut code, get_stack_body); + + section(&mut module, code); + + return module.finish(); + + // Helper that defines a section in the module and takes ownership of it + // so that it is dropped and its memory reclaimed after adding it to the + // module. + fn section(module: &mut wasm_encoder::Module, section: impl wasm_encoder::Section) { + module.section(§ion); + } + + // Helper that defines a function body in the code section and takes + // ownership of it so that it is dropped and its memory reclaimed after + // adding it to the module. + fn function(code: &mut wasm_encoder::CodeSection, func: wasm_encoder::Function) { + code.function(&func); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rand::prelude::*; + use wasmparser::Validator; + + #[test] + fn stacks_generates_valid_wasm_modules() { + let mut rng = SmallRng::seed_from_u64(0); + let mut buf = vec![0; 2048]; + for _ in 0..1024 { + rng.fill_bytes(&mut buf); + let u = Unstructured::new(&buf); + if let Ok(stacks) = Stacks::arbitrary_take_rest(u) { + let wasm = stacks.wasm(); + validate(&wasm); + } + } + } + + fn validate(wasm: &[u8]) { + let mut validator = Validator::new(); + let err = match validator.validate_all(wasm) { + Ok(_) => return, + Err(e) => e, + }; + drop(std::fs::write("test.wasm", wasm)); + if let Ok(text) = wasmprinter::print_bytes(wasm) { + drop(std::fs::write("test.wat", &text)); + } + panic!("wasm failed to validate: {}", err); + } +} diff --git a/crates/fuzzing/src/oracles.rs b/crates/fuzzing/src/oracles.rs index 443b339847be..b49076708929 100644 --- a/crates/fuzzing/src/oracles.rs +++ b/crates/fuzzing/src/oracles.rs @@ -11,10 +11,12 @@ //! panicking. pub mod dummy; +mod stacks; use crate::generators; use arbitrary::Arbitrary; use log::debug; +pub use stacks::check_stacks; use std::cell::Cell; use std::rc::Rc; use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; diff --git a/crates/fuzzing/src/oracles/stacks.rs b/crates/fuzzing/src/oracles/stacks.rs new file mode 100644 index 000000000000..57715afa4c0d --- /dev/null +++ b/crates/fuzzing/src/oracles/stacks.rs @@ -0,0 +1,100 @@ +use crate::generators::Stacks; +use wasmtime::*; + +/// TODO FITZGEN +pub fn check_stacks(stacks: Stacks) { + let wasm = stacks.wasm(); + crate::oracles::log_wasm(&wasm); + + let engine = Engine::default(); + let module = Module::new(&engine, &wasm).expect("should compile okay"); + + let mut linker = Linker::new(&engine); + linker + .func_wrap( + "host", + "check_stack", + |mut caller: Caller<'_, ()>, ptr: u32, len: u32| { + let memory = caller + .get_export("memory") + .expect("should export `memory`") + .into_memory() + .expect("`memory` export should be a memory"); + + let trap = Trap::with_backtrace(&caller, "dummy"); + let host_trace = trap.trace().unwrap(); + + assert_stack_matches(&mut caller, memory, ptr, len, host_trace); + }, + ) + .unwrap(); + + for input in stacks.inputs().iter().copied() { + let mut store = Store::new(&engine, ()); + + let instance = linker + .instantiate(&mut store, &module) + .expect("should instantiate okay"); + + let run = instance + .get_typed_func::<(u32,), (), _>(&mut store, "run") + .expect("should export `run` function"); + + if let Err(trap) = run.call(&mut store, (input.into(),)) { + let get_stack = instance + .get_export(&mut store, "get_stack") + .expect("should export `get_stack`") + .into_func() + .expect("`get_stack` export should be a function") + .typed::<(), (u32, u32), _>(&store) + .expect("should have type [] -> [i32 i32]"); + + let (ptr, len) = get_stack + .call(&mut store, ()) + .expect("`get_stack` should not trap"); + + let memory = instance + .get_memory(&mut store, "memory") + .expect("should have `memory` export"); + + let host_trace = trap.trace().unwrap(); + + assert_stack_matches(&mut store, memory, ptr, len, host_trace); + } + } +} + +fn assert_stack_matches( + store: &mut impl AsContextMut, + memory: Memory, + ptr: u32, + len: u32, + host_trace: &[FrameInfo], +) { + let mut data = vec![0; len as usize]; + memory + .read(&mut *store, ptr as usize, &mut data) + .expect("should be in bounds"); + + let mut wasm_trace = vec![]; + for entry in data.chunks(4).rev() { + let mut bytes = [0; 4]; + bytes.copy_from_slice(entry); + let entry = u32::from_le_bytes(bytes); + wasm_trace.push(entry); + } + + log::debug!("Wasm thinks its stack is: {:?}", wasm_trace); + log::debug!( + "Host thinks the stack is: {:?}", + host_trace + .iter() + .map(|f| f.func_index()) + .collect::>() + ); + + assert_eq!(wasm_trace.len(), host_trace.len()); + for (wasm_entry, host_entry) in wasm_trace.into_iter().zip(host_trace) { + assert_eq!(wasm_entry, host_entry.func_index()); + } +} diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 90d851e6e5d9..31fc994553e9 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -79,6 +79,12 @@ path = "fuzz_targets/table_ops.rs" test = false doc = false +[[bin]] +name = "stacks" +path = "fuzz_targets/stacks.rs" +test = false +doc = false + [[bin]] name = "compile-maybe-invalid" path = "fuzz_targets/compile-maybe-invalid.rs" diff --git a/fuzz/fuzz_targets/stacks.rs b/fuzz/fuzz_targets/stacks.rs new file mode 100644 index 000000000000..08504c795f86 --- /dev/null +++ b/fuzz/fuzz_targets/stacks.rs @@ -0,0 +1,10 @@ +//! Check that we see the stack trace correctly. + +#![no_main] + +use libfuzzer_sys::fuzz_target; +use wasmtime_fuzzing::{generators::Stacks, oracles::check_stacks}; + +fuzz_target!(|stacks: Stacks| { + check_stacks(stacks); +});