From 10aded459d00d4e0311ec3a90e383a01f325199f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20M=C3=B6sl?= Date: Sun, 7 Mar 2021 23:54:51 +0100 Subject: [PATCH] feat: create api for rarity simulation - also improve readability of rarity simulation internals --- Cargo.lock | 1 + Cargo.toml | 1 + src/cli.rs | 74 +- src/engine/bug.rs | 117 +-- src/engine/mod.rs | 967 +----------------- .../rarity_simulation.rs} | 816 ++++++++------- src/engine/symbolic_execution.rs | 960 +++++++++++++++++ src/engine/symbolic_state.rs | 79 +- src/engine/system.rs | 28 + src/lib.rs | 79 +- src/main.rs | 65 +- tests/engine.rs | 52 +- tests/rarity.rs | 18 +- 13 files changed, 1683 insertions(+), 1574 deletions(-) rename src/{rarity.rs => engine/rarity_simulation.rs} (74%) create mode 100644 src/engine/symbolic_execution.rs create mode 100644 src/engine/system.rs diff --git a/Cargo.lock b/Cargo.lock index cdcf8e15..4fb0ab40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -504,6 +504,7 @@ dependencies = [ "divisors", "env_logger", "itertools 0.10.0", + "lazy_static", "log", "modinverse", "petgraph", diff --git a/Cargo.toml b/Cargo.toml index 3c7d19f4..fe63c8ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ anyhow = "~1.0.38" thiserror = "~1.0.24" divisors = "~0.2.1" const_format = "~0.2.13" +lazy_static = "~1.4.0" boolector = { version = "~0.4.2", features = ["vendor-lgl"], optional = true } z3 = { version = "~0.9.0", features = ["static-link-z3"], optional = true } diff --git a/src/cli.rs b/src/cli.rs index 888743c8..34a4e240 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,11 +1,14 @@ -use crate::{ - engine::{DEFAULT_MAX_EXECUTION_DEPTH, DEFAULT_MEMORY_SIZE}, - rarity::MetricType, - solver::SolverType, -}; use anyhow::{anyhow, Context, Result}; use clap::{crate_authors, crate_description, crate_version, App, AppSettings, Arg, ArgMatches}; use const_format::formatcp; +use lazy_static::lazy_static; +use monster::{ + engine::{ + rarity_simulation::{defaults as rarity_defaults, MeanType}, + symbolic_execution::defaults as symbolic_defaults, + }, + solver::SolverType, +}; use std::str::FromStr; use strum::{EnumString, EnumVariantNames, IntoStaticStr, VariantNames}; @@ -19,6 +22,10 @@ pub enum LogLevel { Error, } +lazy_static! { + static ref COPY_INIT_RATIO: String = format!("{}", rarity_defaults::COPY_INIT_RATIO); +} + pub fn args() -> App<'static, 'static> { App::new("Monster") .version(crate_version!()) @@ -99,8 +106,8 @@ pub fn args() -> App<'static, 'static> { .long("execution-depth") .takes_value(true) .value_name("NUMBER") - .default_value(formatcp!("{}", DEFAULT_MAX_EXECUTION_DEPTH)) - .validator(is_u64), + .default_value(formatcp!("{}", symbolic_defaults::MAX_EXECUTION_DEPTH)) + .validator(is::), ) .arg( Arg::with_name("memory") @@ -109,7 +116,7 @@ pub fn args() -> App<'static, 'static> { .long("memory") .takes_value(true) .value_name("NUMBER") - .default_value(formatcp!("{}", DEFAULT_MEMORY_SIZE.0 / bytesize::MB)) + .default_value(formatcp!("{}", symbolic_defaults::MEMORY_SIZE.0 / bytesize::MB)) .validator(is_valid_memory_size), ), ) @@ -130,28 +137,26 @@ pub fn args() -> App<'static, 'static> { .long("memory") .takes_value(true) .value_name("NUMBER") - .default_value("1") + .default_value(formatcp!("{}", rarity_defaults::MEMORY_SIZE.0 / bytesize::MB)) .validator(is_valid_memory_size), ) .arg( - Arg::with_name("cycles") + Arg::with_name("step-size") .help("Instructions to be executed for each round") - .short("c") - .long("cycles") + .long("step-size") .takes_value(true) .value_name("NUMBER") - .default_value("1000") - .validator(is_u64), + .default_value(formatcp!("{}", rarity_defaults::STEP_SIZE)) + .validator(is::), ) .arg( - Arg::with_name("runs") - .help("Number of distinct runs") - .short("r") - .long("runs") + Arg::with_name("states") + .help("Number of distinct states") + .long("states") .takes_value(true) .value_name("NUMBER") - .default_value("3000") - .validator(is_u64), + .default_value(formatcp!("{}", rarity_defaults::AMOUNT_OF_STATES)) + .validator(is::), ) .arg( Arg::with_name("selection") @@ -160,8 +165,8 @@ pub fn args() -> App<'static, 'static> { .long("selection") .takes_value(true) .value_name("NUMBER") - .default_value("50") - .validator(is_u64)) + .default_value(formatcp!("{}", rarity_defaults::SELECTION)) + .validator(is::)) .arg( Arg::with_name("iterations") .help("Iterations of rarity simulation to run") @@ -169,25 +174,25 @@ pub fn args() -> App<'static, 'static> { .long("iterations") .takes_value(true) .value_name("NUMBER") - .default_value("20") - .validator(is_u64)) + .default_value(formatcp!("{}", rarity_defaults::ITERATIONS)) + .validator(is::)) .arg( Arg::with_name("copy-init-ratio") .help("Determines how much new states are copied instead of started from the beginning") .long("copy-init-ratio") .takes_value(true) .value_name("RATIO") - .default_value("0.6") + .default_value(COPY_INIT_RATIO.as_str()) .validator(is_ratio) ) .arg( - Arg::with_name("metric") + Arg::with_name("mean") .help("The average to be used for the counts") - .long("metric") + .long("mean") .takes_value(true) - .value_name("METRIC") - .possible_values(&MetricType::VARIANTS) - .default_value(MetricType::Harmonic.into()) + .value_name("MEAN") + .possible_values(&MeanType::VARIANTS) + .default_value(rarity_defaults::MEAN_TYPE.into()) ) ) .setting(AppSettings::SubcommandRequiredElseHelp) @@ -205,12 +210,15 @@ where }) } -fn is_u64(v: String) -> Result<(), String> { - v.parse::().map(|_| ()).map_err(|e| e.to_string()) +fn is(v: String) -> Result<(), String> +where + ::Err: std::fmt::Display, +{ + v.parse::().map(|_| ()).map_err(|e| e.to_string()) } fn is_valid_memory_size(v: String) -> Result<(), String> { - is_u64(v.clone()).and_then(|_| { + is::(v.clone()).and_then(|_| { let memory_size = v.parse::().expect("have checked that already"); let valid_range = 1_u64..=1024_u64; diff --git a/src/engine/bug.rs b/src/engine/bug.rs index f517a4ca..99d8cae9 100644 --- a/src/engine/bug.rs +++ b/src/engine/bug.rs @@ -1,39 +1,40 @@ -use crate::{ - engine::{instruction_to_str, Value}, - solver::{BVOperator, BitVector}, -}; +use super::system::instruction_to_str; use riscu::Instruction; use std::fmt; +pub trait BugInfo: fmt::Display + fmt::Debug + Clone { + type Value: fmt::Display + fmt::Debug + Clone; +} + #[derive(Debug, Clone)] -pub enum Bug { +pub enum Bug { DivisionByZero { - info: T, + info: Info, }, AccessToUnitializedMemory { - info: T, + info: Info, instruction: Instruction, - operands: Vec, + operands: Vec, }, AccessToUnalignedAddress { - info: T, + info: Info, address: u64, }, AccessToOutOfRangeAddress { - info: T, + info: Info, }, ExitCodeGreaterZero { - info: T, + info: Info, }, } -impl fmt::Display for Bug +impl fmt::Display for Bug where - T: fmt::Display + fmt::Debug + Clone, + Info: BugInfo, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -63,93 +64,3 @@ where } } } - -#[derive(Debug, Clone)] -pub struct BasicInfo { - pub witness: Witness, - pub pc: u64, -} - -impl fmt::Display for BasicInfo { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "pc: {:#010x}\nwitness: {}", self.pc, self.witness) - } -} - -#[derive(Debug, Clone)] -pub(crate) enum Term { - Constant(u64), - Variable(String, u64), - Unary(BVOperator, usize, u64), - Binary(usize, BVOperator, usize, u64), -} - -#[derive(Debug, Clone)] -pub struct Witness { - assignments: Vec, -} - -impl Default for Witness { - fn default() -> Self { - Self { - assignments: Vec::new(), - } - } -} - -impl Witness { - pub fn new() -> Self { - Witness::default() - } - - pub fn add_constant(&mut self, value: BitVector) -> usize { - self.assignments.push(Term::Constant(value.0)); - - self.assignments.len() - 1 - } - - pub fn add_variable(&mut self, name: &str, result: BitVector) -> usize { - self.assignments - .push(Term::Variable(name.to_owned(), result.0)); - - self.assignments.len() - 1 - } - - pub fn add_unary(&mut self, op: BVOperator, v: usize, result: BitVector) -> usize { - self.assignments.push(Term::Unary(op, v, result.0)); - - self.assignments.len() - 1 - } - - pub fn add_binary( - &mut self, - lhs: usize, - op: BVOperator, - rhs: usize, - result: BitVector, - ) -> usize { - self.assignments.push(Term::Binary(lhs, op, rhs, result.0)); - - self.assignments.len() - 1 - } -} - -impl fmt::Display for Witness { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - writeln!(f, "[").and_then(|_| { - self.assignments - .clone() - .into_iter() - .enumerate() - .try_for_each(|(id, a)| match a { - Term::Constant(c) => writeln!(f, " x{} := {},", id, c), - Term::Variable(name, v) => writeln!(f, " x{} := {:?} ({}),", id, name, v), - Term::Unary(op, x, v) => writeln!(f, " x{} := {}x{} ({}),", id, op, x, v), - Term::Binary(lhs, op, rhs, v) => { - writeln!(f, " x{} := x{} {} x{} ({}),", id, lhs, op, rhs, v) - } - }) - .and_then(|_| writeln!(f, "]")) - }) - } -} diff --git a/src/engine/mod.rs b/src/engine/mod.rs index 7fbdf441..83a19d62 100644 --- a/src/engine/mod.rs +++ b/src/engine/mod.rs @@ -1,963 +1,18 @@ -#![allow(clippy::unnecessary_wraps)] - pub mod bug; +pub mod rarity_simulation; +pub mod symbolic_execution; pub mod symbolic_state; +pub mod system; -use self::{ - bug::{BasicInfo, Bug as GenericBug}, - symbolic_state::{Query, SymbolicState, SymbolicValue}, -}; -use crate::{ - path_exploration::ExplorationStrategy, - solver::{BVOperator, Solver, SolverError}, -}; -pub use bug::Witness; -use byteorder::{ByteOrder, LittleEndian}; -use bytesize::ByteSize; -use log::{debug, trace}; -use riscu::{ - decode, types::*, DecodingError, Instruction, Program, ProgramSegment, Register, - INSTRUCTION_SIZE as INSTR_SIZE, -}; -use std::{fmt, mem::size_of}; -use thiserror::Error; - -const INSTRUCTION_SIZE: u64 = INSTR_SIZE as u64; -pub const DEFAULT_MEMORY_SIZE: ByteSize = ByteSize(bytesize::MB); -pub const DEFAULT_MAX_EXECUTION_DEPTH: u64 = 1000; - -pub type Bug = GenericBug; - -pub enum SyscallId { - Exit = 93, - Read = 63, - Write = 64, - Openat = 56, - Brk = 214, -} - -pub struct EngineOptions { - pub memory_size: ByteSize, - pub max_exection_depth: u64, -} - -impl Default for EngineOptions { - fn default() -> EngineOptions { - EngineOptions { - memory_size: DEFAULT_MEMORY_SIZE, - max_exection_depth: DEFAULT_MAX_EXECUTION_DEPTH, - } - } -} - -#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -pub enum Value { - Concrete(u64), - Symbolic(SymbolicValue), - Uninitialized, -} - -impl fmt::Display for Value { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Value::Concrete(c) => write!(f, "{:#x}", c), - Value::Symbolic(i) => write!(f, "x{}", i.index()), - Value::Uninitialized => write!(f, "uninit"), - } - } -} - -#[derive(Debug, Error)] -pub enum EngineError { - #[error("failed to load binary {0:#}")] - IoError(anyhow::Error), - - #[error("engine does not support {0}")] - NotSupported(String), - - #[error("has reached the maximum execution depth of {0}")] - ExecutionDepthReached(u64), - - #[error("failed to decode instruction at PC: {0:#010x}")] - InvalidInstructionEncoding(u64, DecodingError), - - #[error("failed to compute satisfyability for formula")] - SatUnknown(SolverError), -} - -pub struct Engine<'a, E, S> -where - E: ExplorationStrategy, - S: Solver, -{ - symbolic_state: Box>, - program_break: u64, - pc: u64, - regs: [Value; 32], - memory: Vec, - strategy: &'a E, - execution_depth: u64, - max_exection_depth: u64, - is_running: bool, -} +pub use bug::*; +pub use rarity_simulation::*; +pub use symbolic_execution::*; -impl<'a, E, S> Engine<'a, E, S> +use riscu::Program; +pub trait BugFinder where - E: ExplorationStrategy, - S: Solver, + Info: BugInfo, + Error: std::error::Error, { - // creates a machine state with a specific memory size - pub fn new(program: &Program, options: &EngineOptions, strategy: &'a E, solver: &'a S) -> Self { - let mut regs = [Value::Uninitialized; 32]; - let memory_size = options.memory_size.as_u64(); - let mut memory = vec![Value::Uninitialized; memory_size as usize / 8]; - - let sp = memory_size - 8; - regs[Register::Sp as usize] = Value::Concrete(sp); - regs[Register::Zero as usize] = Value::Concrete(0); - - // TODO: Init main function arguments - let argc = 0; - memory[sp as usize / size_of::()] = Value::Concrete(argc); - - load_segment(&mut memory, &program.code); - load_segment(&mut memory, &program.data); - - let pc = program.code.address; - - let program_break = program.data.address + program.data.content.len() as u64; - - let symbolic_state = Box::new(SymbolicState::new(solver)); - - debug!( - "initializing new execution context with {} of main memory", - memory_size - ); - debug!( - "code segment: start={:#x} length={}", - program.code.address, - program.code.content.len(), - ); - debug!( - "data segment: start={:#x} length={}", - program.data.address, - program.data.content.len(), - ); - debug!( - "init state: pc={:#x} brk={:#x}, argc={}", - pc, program_break, argc - ); - - Self { - symbolic_state, - program_break, - pc, - regs, - memory, - strategy, - execution_depth: 0, - max_exection_depth: options.max_exection_depth, - is_running: false, - } - } - - fn decode(&self, raw: u32) -> Result { - decode(raw).map_err(|e| EngineError::InvalidInstructionEncoding(self.pc, e)) - } - - pub fn run(&mut self) -> Result, EngineError> { - self.is_running = true; - - loop { - if self.execution_depth >= self.max_exection_depth { - trace!("maximum execution depth reached => exiting this context"); - - self.is_running = false; - - return Err(EngineError::ExecutionDepthReached(self.execution_depth)); - } - - self.execution_depth += 1; - - let bug = self - .fetch() - .and_then(|raw| self.decode(raw)) - .and_then(|instr| self.execute(instr))?; - - if bug.is_some() || !self.is_running { - return Ok(bug); - } - } - } - - fn execute_query( - &mut self, - query: Query, - basic_info_to_bug: F, - ) -> Result, EngineError> - where - F: Fn(BasicInfo) -> Bug, - { - self.symbolic_state - .execute_query(query) - .map_err(EngineError::SatUnknown) - .map_or(Ok(None), |result| { - Ok(result.map(|witness| { - basic_info_to_bug(BasicInfo { - witness, - pc: self.pc, - }) - })) - }) - } - - fn check_for_uninitialized_memory( - &mut self, - instruction: Instruction, - v1: Value, - v2: Value, - ) -> Result, EngineError> { - trace!( - "{}: {}, {} => computing reachability", - instruction_to_str(instruction), - v1, - v2 - ); - - self.execute_query(Query::Reachable, |info| Bug::AccessToUnitializedMemory { - info, - instruction, - operands: vec![v1, v2], - }) - } - - fn is_in_vaddr_range(&self, vaddr: u64) -> bool { - vaddr as usize / size_of::() < self.memory.len() - } - - fn check_for_valid_memory_address( - &mut self, - instruction: &str, - address: u64, - ) -> Result, EngineError> { - let is_alignment_ok = address % size_of::() as u64 == 0; - - if !is_alignment_ok { - trace!( - "{}: address {:#x} is not double word aligned => computing reachability", - instruction, - address - ); - - self.is_running = false; - - self.execute_query(Query::Reachable, |info| Bug::AccessToUnalignedAddress { - info, - address, - }) - } else if !self.is_in_vaddr_range(address) { - trace!( - "{}: address {:#x} out of virtual address range (0x0 - {:#x}) => computing reachability", - instruction, - address, - self.memory.len() * 8, - ); - - self.is_running = false; - - self.execute_query(Query::Reachable, |info| Bug::AccessToOutOfRangeAddress { - info, - }) - } else { - Ok(None) - } - } - - #[allow(clippy::unnecessary_wraps)] - fn execute_lui(&mut self, utype: UType) -> Result, EngineError> { - let immediate = u64::from(utype.imm()) << 12; - - let result = Value::Concrete(immediate); - - trace!( - "[{:#010x}] {}: {:?} <- {}", - self.pc, - instruction_to_str(Instruction::Lui(utype)), - utype.rd(), - result, - ); - - self.assign_rd(utype.rd(), result); - - self.pc += INSTRUCTION_SIZE; - - Ok(None) - } - - fn execute_divu_remu( - &mut self, - instruction: Instruction, - rtype: RType, - op: Op, - ) -> Result, EngineError> - where - Op: FnOnce(u64, u64) -> u64, - { - let bug = match self.regs[rtype.rs2() as usize] { - Value::Symbolic(divisor) => { - trace!( - "{}: symbolic divisor -> find input for divisor == 0", - instruction_to_str(instruction) - ); - - self.execute_query(Query::Equals((divisor, 0)), |info| Bug::DivisionByZero { - info, - })? - } - Value::Concrete(divisor) if divisor == 0 => { - trace!( - "{}: divisor == 0 -> compute reachability", - instruction_to_str(instruction) - ); - - self.execute_query(Query::Reachable, |info| Bug::DivisionByZero { info })? - } - _ => None, - }; - - if bug.is_none() { - self.execute_rtype(instruction, rtype, op) - } else { - Ok(bug) - } - } - - fn execute_itype( - &mut self, - instruction: Instruction, - itype: IType, - op: Op, - ) -> Result, EngineError> - where - Op: FnOnce(u64, u64) -> u64, - { - let rs1_value = self.regs[itype.rs1() as usize]; - let imm_value = Value::Concrete(itype.imm() as u64); - - self.execute_binary_op(instruction, rs1_value, imm_value, itype.rd(), op) - } - - fn execute_rtype( - &mut self, - instruction: Instruction, - rtype: RType, - op: Op, - ) -> Result, EngineError> - where - Op: FnOnce(u64, u64) -> u64, - { - let rs1_value = self.regs[rtype.rs1() as usize]; - let rs2_value = self.regs[rtype.rs2() as usize]; - - self.execute_binary_op(instruction, rs1_value, rs2_value, rtype.rd(), op) - } - - fn execute_binary_op( - &mut self, - instruction: Instruction, - lhs: Value, - rhs: Value, - rd: Register, - op: Op, - ) -> Result, EngineError> - where - Op: FnOnce(u64, u64) -> u64, - { - let result = match (lhs, rhs) { - (Value::Concrete(v1), Value::Concrete(v2)) => Value::Concrete(op(v1, v2)), - (Value::Symbolic(v1), Value::Concrete(v2)) => { - let v2 = self.symbolic_state.create_const(v2); - Value::Symbolic(self.symbolic_state.create_instruction(instruction, v1, v2)) - } - (Value::Concrete(v1), Value::Symbolic(v2)) => { - let v1 = self.symbolic_state.create_const(v1); - Value::Symbolic(self.symbolic_state.create_instruction(instruction, v1, v2)) - } - (Value::Symbolic(v1), Value::Symbolic(v2)) => { - Value::Symbolic(self.symbolic_state.create_instruction(instruction, v1, v2)) - } - _ => { - let bug = self.check_for_uninitialized_memory(instruction, lhs, rhs)?; - - trace!("could not find input assignment => exeting this context"); - - self.is_running = false; - - return Ok(bug); - } - }; - - trace!( - "[{:#010x}] {}: {}, {} |- {:?} <- {}", - self.pc, - instruction_to_str(instruction), - lhs, - rhs, - rd, - result, - ); - - self.assign_rd(rd, result); - - self.pc += INSTRUCTION_SIZE; - - Ok(None) - } - - fn execute_brk(&mut self) -> Result, EngineError> { - if let Value::Concrete(new_program_break) = self.regs[Register::A0 as usize] { - let old_program_break = self.program_break; - - if new_program_break < self.program_break || !self.is_in_vaddr_range(new_program_break) - { - self.regs[Register::A0 as usize] = Value::Concrete(self.program_break); - } else { - self.program_break = new_program_break; - } - - trace!( - "brk: old={:#x} new={:#x}", - old_program_break, - new_program_break - ); - - Ok(None) - } else { - not_supported("can not handle symbolic or uninitialized program break") - } - } - - fn bytewise_combine( - &mut self, - old: Value, - n_lower_bytes: u32, - new_idx: SymbolicValue, - ) -> SymbolicValue { - let bits_in_a_byte = 8; - let low_shift_factor = 2_u64.pow(n_lower_bytes * bits_in_a_byte); - let high_shift_factor = - 2_u64.pow((size_of::() as u32 - n_lower_bytes) * bits_in_a_byte); - - assert!( - low_shift_factor != 0 && high_shift_factor != 0, - "no bytes to shift" - ); - - let old_idx = match old { - Value::Concrete(c) => { - let old_c = c / low_shift_factor * low_shift_factor; - - self.symbolic_state.create_const(old_c) - } - Value::Symbolic(old_idx) => { - let low_shift_factor_idx = self.symbolic_state.create_const(low_shift_factor); - - let old_idx = self.symbolic_state.create_operator( - BVOperator::Divu, - old_idx, - low_shift_factor_idx, - ); - - self.symbolic_state - .create_operator(BVOperator::Mul, old_idx, low_shift_factor_idx) - } - Value::Uninitialized => { - unreachable!("function should not be called for uninitialized values") - } - }; - - let high_shift_factor_idx = self.symbolic_state.create_const(high_shift_factor); - - let new_idx = - self.symbolic_state - .create_operator(BVOperator::Mul, new_idx, high_shift_factor_idx); - - let new_idx = - self.symbolic_state - .create_operator(BVOperator::Divu, new_idx, high_shift_factor_idx); - - self.symbolic_state - .create_operator(BVOperator::Add, old_idx, new_idx) - } - - fn execute_read(&mut self) -> Result, EngineError> { - if !matches!(self.regs[Register::A0 as usize], Value::Concrete(0)) { - return not_supported("can not handle other fd than stdin in read syscall"); - } - - let buffer = if let Value::Concrete(b) = self.regs[Register::A1 as usize] { - b - } else { - return not_supported( - "can not handle symbolic or uninitialized buffer address in read syscall", - ); - }; - - let size = if let Value::Concrete(s) = self.regs[Register::A2 as usize] { - s - } else { - return not_supported("can not handle symbolic or uinitialized size in read syscall"); - }; - - trace!("read: fd={} buffer={:#x} size={}", 0, buffer, size,); - - if !self.is_in_vaddr_range(buffer) || !self.is_in_vaddr_range(buffer + size) { - return not_supported("read syscall failed to"); - } - - let size_of_u64 = size_of::() as u64; - - let round_up = if size % size_of_u64 == 0 { - 0 - } else { - size_of_u64 - size % size_of_u64 - }; - - let mut bytes_to_read = size; - let words_to_read = (bytes_to_read + round_up) / size_of_u64; - - let start = buffer / size_of_u64; - - for word_count in 0..words_to_read { - let start_byte = word_count * size_of_u64; - let end_byte = start_byte - + if bytes_to_read < size_of_u64 { - bytes_to_read - } else { - 8 - }; - - let name = format!( - "read({}, {}, {})[{} - {}]", - 0, buffer, size, start_byte, end_byte, - ); - - let input_idx = self.symbolic_state.create_input(&name); - - let result_idx = if bytes_to_read >= size_of_u64 { - bytes_to_read -= size_of_u64; - - input_idx - } else { - match self.memory[(start + word_count) as usize] { - Value::Uninitialized => { - // we do not partially overwrite words with concrete values - // if at least one byte in a word is uninitialized, the whole word is uninitialized - break; - } - v => self.bytewise_combine(v, bytes_to_read as u32, input_idx), - } - }; - - self.memory[(start + word_count) as usize] = Value::Symbolic(result_idx); - } - - self.regs[Register::A0 as usize] = Value::Concrete(size); - - Ok(None) - } - - fn execute_beq_branches( - &mut self, - true_branch: u64, - false_branch: u64, - lhs: SymbolicValue, - rhs: SymbolicValue, - ) -> Result, EngineError> { - let memory_snapshot = self.memory.clone(); - let regs_snapshot = self.regs; - let graph_snapshot = Box::new((*self.symbolic_state).clone()); - let brk_snapshot = self.program_break; - let execution_depth_snapshot = self.execution_depth; - - let next_pc = self.strategy.choose_path(true_branch, false_branch); - - let decision = next_pc == true_branch; - - self.symbolic_state - .create_beq_path_condition(decision, lhs, rhs); - - if let Ok(Some(_)) = self.symbolic_state.execute_query(Query::Reachable) { - trace!( - "[{:#010x}] beq: x{}, x{} |- assume {}, pc <- {:#x}", - self.pc, - lhs.index(), - rhs.index(), - next_pc == false_branch, - next_pc, - ); - - self.pc = next_pc; - - let result = self.run(); - - if !matches!( - result, - Err(EngineError::ExecutionDepthReached(_)) | Ok(None) - ) { - return result; - } - } else { - trace!( - "[{:#010x}] beq: x{}, x{} |- assume {}, not reachable", - self.pc, - lhs.index(), - rhs.index(), - next_pc == false_branch, - ); - } - - let next_pc = if decision { false_branch } else { true_branch }; - - self.is_running = true; - - self.memory = memory_snapshot; - self.regs = regs_snapshot; - self.symbolic_state = graph_snapshot; - self.program_break = brk_snapshot; - self.execution_depth = execution_depth_snapshot; - - self.symbolic_state - .create_beq_path_condition(!decision, lhs, rhs); - - if let Ok(Some(_)) = self.symbolic_state.execute_query(Query::Reachable) { - trace!( - "[{:#010x}] beq: x{}, x{} |- assume {}, pc <- {:#x}", - self.pc, - lhs.index(), - rhs.index(), - next_pc == false_branch, - next_pc, - ); - - self.pc = next_pc; - - Ok(None) - } else { - trace!( - "[{:#010x}] beq: x{}, x{} |- assume {}, not reachable", - self.pc, - lhs.index(), - rhs.index(), - next_pc == false_branch, - ); - - self.is_running = false; - - Ok(None) - } - } - - fn execute_beq(&mut self, btype: BType) -> Result, EngineError> { - let lhs = self.regs[btype.rs1() as usize]; - let rhs = self.regs[btype.rs2() as usize]; - - let true_branch = self.pc.wrapping_add(btype.imm() as u64); - let false_branch = self.pc.wrapping_add(4); - - match (lhs, rhs) { - (Value::Concrete(v1), Value::Concrete(v2)) => { - let old_pc = self.pc; - - self.pc = if v1 == v2 { true_branch } else { false_branch }; - - trace!( - "[{:#010x}] beq: {}, {} |- pc <- {:#x}", - old_pc, - lhs, - rhs, - self.pc - ); - - Ok(None) - } - (Value::Symbolic(v1), Value::Concrete(v2)) => { - let v2 = self.symbolic_state.create_const(v2); - self.execute_beq_branches(true_branch, false_branch, v1, v2) - } - (Value::Concrete(v1), Value::Symbolic(v2)) => { - let v1 = self.symbolic_state.create_const(v1); - self.execute_beq_branches(true_branch, false_branch, v1, v2) - } - (Value::Symbolic(v1), Value::Symbolic(v2)) => { - self.execute_beq_branches(true_branch, false_branch, v1, v2) - } - (v1, v2) => { - self.is_running = false; - - let result = self.check_for_uninitialized_memory(Instruction::Beq(btype), v1, v2); - - trace!("access to uninitialized memory => exeting this context"); - - result - } - } - } - - fn execute_exit(&mut self) -> Result, EngineError> { - self.is_running = false; - - match self.regs[Register::A0 as usize] { - Value::Symbolic(exit_code) => { - trace!("exit: symbolic code -> find input for exit_code != 0"); - - self.execute_query(Query::NotEquals((exit_code, 0)), |info| { - Bug::ExitCodeGreaterZero { info } - }) - } - Value::Concrete(exit_code) => { - if exit_code > 0 { - trace!( - "exit: with code {} -> find input to satisfy path condition", - exit_code - ); - - self.execute_query(Query::Reachable, |info| Bug::ExitCodeGreaterZero { info }) - } else { - trace!("exiting context with exit_code 0"); - - Ok(None) - } - } - _ => not_supported("exit only implemented for symbolic exit codes"), - } - } - - fn execute_ecall(&mut self) -> Result, EngineError> { - trace!("[{:#010x}] ecall", self.pc); - - let result = match self.regs[Register::A7 as usize] { - Value::Concrete(syscall_id) if syscall_id == (SyscallId::Brk as u64) => { - self.execute_brk() - } - Value::Concrete(syscall_id) if syscall_id == (SyscallId::Read as u64) => { - self.execute_read() - } - Value::Concrete(syscall_id) if syscall_id == (SyscallId::Exit as u64) => { - self.execute_exit() - } - id => Err(EngineError::NotSupported(format!( - "syscall with id ({}) is not supported", - id - ))), - }; - - self.pc += INSTRUCTION_SIZE; - - result - } - - fn execute_ld( - &mut self, - instruction: Instruction, - itype: IType, - ) -> Result, EngineError> { - if let Value::Concrete(base_address) = self.regs[itype.rs1() as usize] { - let immediate = itype.imm() as u64; - - let address = base_address.wrapping_add(immediate); - - let bug = - self.check_for_valid_memory_address(instruction_to_str(instruction), address)?; - - if bug.is_none() { - let value = self.memory[(address / 8) as usize]; - - trace!( - "[{:#010x}] {}: {:#x}, {} |- {:?} <- mem[{:#x}]={}", - self.pc, - instruction_to_str(instruction), - base_address, - immediate, - itype.rd(), - address, - value, - ); - - self.assign_rd(itype.rd(), value); - - self.pc += INSTRUCTION_SIZE; - } - - Ok(bug) - } else { - not_supported("can not handle symbolic addresses in LD") - } - } - - fn execute_sd( - &mut self, - instruction: Instruction, - stype: SType, - ) -> Result, EngineError> { - if let Value::Concrete(base_address) = self.regs[stype.rs1() as usize] { - let immediate = stype.imm(); - - let address = base_address.wrapping_add(immediate as u64); - - let bug = - self.check_for_valid_memory_address(instruction_to_str(instruction), address)?; - - if bug.is_none() { - let value = self.regs[stype.rs2() as usize]; - - trace!( - "[{:#010x}] {}: {:#x}, {}, {} |- mem[{:#x}] <- {}", - self.pc, - instruction_to_str(instruction), - base_address, - immediate, - self.regs[stype.rs2() as usize], - address, - value, - ); - - self.memory[(address / 8) as usize] = value; - - self.pc += INSTRUCTION_SIZE; - } - - Ok(bug) - } else { - not_supported("can not handle symbolic addresses in SD") - } - } - - #[allow(clippy::unnecessary_wraps)] - fn execute_jal(&mut self, jtype: JType) -> Result, EngineError> { - let link = self.pc + INSTRUCTION_SIZE; - - let new_pc = self.pc.wrapping_add(jtype.imm() as u64); - - trace!( - "[{:#010x}] jal: pc <- {:#x}, {:?} <- {:#x}", - self.pc, - new_pc, - jtype.rd(), - link, - ); - - self.pc = new_pc; - - self.assign_rd(jtype.rd(), Value::Concrete(link)); - - Ok(None) - } - - fn assign_rd(&mut self, rd: Register, v: Value) { - if rd != Register::Zero { - self.regs[rd as usize] = v; - } - } - - fn execute_jalr(&mut self, itype: IType) -> Result, EngineError> { - if let Value::Concrete(dest) = self.regs[itype.rs1() as usize] { - let link = self.pc + INSTRUCTION_SIZE; - - let new_pc = dest.wrapping_add(itype.imm() as u64); - - trace!( - "[{:#010x}] jalr: {:#x}, {} |- pc <- {:#x}, {:?} <- {:#x}", - self.pc, - dest, - itype.imm(), - new_pc, - itype.rd(), - link - ); - - self.assign_rd(itype.rd(), Value::Concrete(link)); - - self.pc = new_pc; - - Ok(None) - } else { - not_supported("can only handle concrete addresses in JALR") - } - } - - fn fetch(&self) -> Result { - if let Value::Concrete(dword) = self.memory[(self.pc as usize / size_of::()) as usize] - { - if self.pc % size_of::() as u64 == 0 { - Ok(dword as u32) - } else { - Ok((dword >> 32) as u32) - } - } else { - Err(EngineError::NotSupported(String::from( - "tried to fetch none concrete instruction", - ))) - } - } - - fn execute(&mut self, instruction: Instruction) -> Result, EngineError> { - match instruction { - Instruction::Ecall(_) => self.execute_ecall(), - Instruction::Lui(utype) => self.execute_lui(utype), - Instruction::Addi(itype) => self.execute_itype(instruction, itype, u64::wrapping_add), - Instruction::Add(rtype) => self.execute_rtype(instruction, rtype, u64::wrapping_add), - Instruction::Sub(rtype) => self.execute_rtype(instruction, rtype, u64::wrapping_sub), - Instruction::Mul(rtype) => self.execute_rtype(instruction, rtype, u64::wrapping_mul), - Instruction::Divu(rtype) => { - self.execute_divu_remu(instruction, rtype, u64::wrapping_div) - } - Instruction::Remu(rtype) => { - self.execute_divu_remu(instruction, rtype, u64::wrapping_rem) - } - Instruction::Sltu(rtype) => { - self.execute_rtype(instruction, rtype, |l, r| if l < r { 1 } else { 0 }) - } - Instruction::Ld(itype) => self.execute_ld(instruction, itype), - Instruction::Sd(stype) => self.execute_sd(instruction, stype), - Instruction::Jal(jtype) => self.execute_jal(jtype), - Instruction::Jalr(itype) => self.execute_jalr(itype), - Instruction::Beq(btype) => self.execute_beq(btype), - } - } -} - -fn load_segment(memory: &mut Vec, segment: &ProgramSegment) { - let start = segment.address as usize / size_of::(); - let end = start + segment.content.len() / size_of::(); - - segment - .content - .chunks(size_of::()) - .map(LittleEndian::read_u64) - .zip(start..end) - .for_each(|(x, i)| memory[i] = Value::Concrete(x)); -} - -fn not_supported(s: &str) -> Result, EngineError> { - Err(EngineError::NotSupported(s.to_owned())) -} - -pub const fn instruction_to_str(i: Instruction) -> &'static str { - match i { - Instruction::Lui(_) => "lui", - Instruction::Jal(_) => "jal", - Instruction::Jalr(_) => "jalr", - Instruction::Beq(_) => "beq", - Instruction::Ld(_) => "ld", - Instruction::Sd(_) => "sd", - Instruction::Addi(_) => "addi", - Instruction::Add(_) => "add", - Instruction::Sub(_) => "sub", - Instruction::Sltu(_) => "sltu", - Instruction::Mul(_) => "mul", - Instruction::Divu(_) => "divu", - Instruction::Remu(_) => "remu", - Instruction::Ecall(_) => "ecall", - } + fn search_for_bugs(&self, program: &Program) -> Result>, Error>; } diff --git a/src/rarity.rs b/src/engine/rarity_simulation.rs similarity index 74% rename from src/rarity.rs rename to src/engine/rarity_simulation.rs index 2d6f45a6..def66291 100644 --- a/src/rarity.rs +++ b/src/engine/rarity_simulation.rs @@ -1,7 +1,6 @@ //! Rarity Simulation //! -//! This module contains an implementation of rarity simulation, as descibed in -//! the paper ["Using Speculation for Sequential Equivalence Checking" +//! This module contains an implementation of rarity simulation, as descibed in the paper ["Using Speculation for Sequential Equivalence Checking" //! ](https://people.eecs.berkeley.edu/~alanmi/publications/2012/iwls12_sec.pdf) by //! Brayton et. al. //! @@ -27,52 +26,115 @@ //! The amount of iterations/cycles and the amount of states allows for a fine-grained control for //! finding bugs in depth or in breadth, respectively. -#![allow(clippy::unnecessary_wraps)] -#![allow(clippy::too_many_arguments)] - -use crate::engine::{bug::Bug as BugDef, instruction_to_str, SyscallId}; +use super::{ + system::{instruction_to_str, SyscallId}, + Bug as GenericBug, BugFinder, BugInfo, +}; use byteorder::{ByteOrder, LittleEndian}; use bytesize::ByteSize; use itertools::Itertools; use log::{debug, info, trace, warn}; use riscu::{ - decode, load_object_file, types::*, DecodingError, Instruction, Program, ProgramSegment, - Register, RiscuError, INSTRUCTION_SIZE as INSTR_SIZE, + decode, types::*, DecodingError, Instruction, Program, ProgramSegment, Register, + INSTRUCTION_SIZE as INSTR_SIZE, }; use std::{ cmp::{min, Ordering}, collections::HashMap, fmt, - fs::File, - io::Write, + iter::IntoIterator, mem::size_of, - path::Path, sync::Arc, }; use strum::{EnumString, EnumVariantNames, IntoStaticStr}; use thiserror::Error; -pub type Bug = BugDef; +pub type RaritySimulationBug = GenericBug; +type Bug = RaritySimulationBug; + +type ExecutorResult = Result, RaritySimulationError>; const INSTRUCTION_SIZE: u64 = INSTR_SIZE as u64; const BYTES_PER_WORD: u64 = size_of::() as u64; const NUMBER_OF_BYTE_VALUES: u64 = 256; -/// Strategy for metric calculation +pub mod defaults { + use super::*; + + pub const MEMORY_SIZE: ByteSize = ByteSize(bytesize::MB); + pub const AMOUNT_OF_STATES: usize = 3000; + pub const STEP_SIZE: u64 = 1000; + pub const SELECTION: usize = 50; + pub const ITERATIONS: u64 = 20; + pub const COPY_INIT_RATIO: f64 = 0.6; + pub const MEAN_TYPE: MeanType = MeanType::Harmonic; +} + +#[derive(Debug, Clone)] +pub struct RaritySimulationOptions { + /// The size of the machine's memory + pub memory_size: ByteSize, + /// The number of states to pursue + pub amount_of_states: usize, + /// The amount of instructions to execute for each state on each iteration + pub step_size: u64, + /// Amount of (rarest) states that shall be further considered at the end of each iteration. + pub selection: usize, + /// The amount of rarity simulation iterations to perform + pub iterations: u64, + /// After discarding least rare and exited states, determines how much new states shall + /// be copied from the remaining (rare) states and, in inverse, how much shall be newly + /// created relative to the amount of missing states to archive `number_of_states`. + /// Must be between 0 and 1. + pub copy_init_ratio: f64, + /// The mean to use for determining state rarity + pub mean: MeanType, +} + +impl Default for RaritySimulationOptions { + fn default() -> RaritySimulationOptions { + RaritySimulationOptions { + memory_size: defaults::MEMORY_SIZE, + amount_of_states: defaults::AMOUNT_OF_STATES, + step_size: defaults::STEP_SIZE, + selection: defaults::SELECTION, + iterations: defaults::ITERATIONS, + copy_init_ratio: defaults::COPY_INIT_RATIO, + mean: defaults::MEAN_TYPE, + } + } +} + +#[derive(Debug, Clone, Error)] +pub enum RaritySimulationError { + #[error("failed to write State to file")] + IoError(Arc), + + #[error("engine does not support {0}")] + NotSupported(String), + + #[error("failed to decode instruction at PC: {0:#010x}")] + InvalidInstructionEncoding(u64, DecodingError), + + #[error("has reached the maximum execution depth of {0}")] + ExecutionDepthReached(u64), +} + +/// Strategy for mean calculation /// /// Based on the value counters, the rarity simulator calculates a score that is used to determine /// a state's rarity. This score is essential for the decision which states shall be further /// pursued and which shall be discarded. #[derive(Clone, Copy, Debug, EnumString, EnumVariantNames, IntoStaticStr)] #[strum(serialize_all = "kebab_case")] -pub enum MetricType { - /// Metric is calculated using the [arithmetic +pub enum MeanType { + /// Mean is calculated using the [arithmetic /// mean](https://en.wikipedia.org/wiki/Arithmetic_mean), i.e. the sum of all statistic /// counters divided by the amount of states. /// Lower scores are more rare Arithmetic, - /// Metric is calculated using the [harmonic mean](https://en.wikipedia.org/wiki/Harmonic_mean), + /// Mean is calculated using the [harmonic mean](https://en.wikipedia.org/wiki/Harmonic_mean), /// i.e. the amount of states divided by the sum of residues of statistic counters. /// Higher scores are more rare. Harmonic, @@ -91,353 +153,175 @@ pub struct State { /// Processor integer registers x0..x31 regs: [Value; 32], /// List of touched and untouched memory words - #[allow(dead_code)] memory: Vec, } type Address = u64; type Counter = u64; -/// Calculates all state scores with a given scoring predicate -/// -/// Using all states executed by the rarity simulation execution, this function constructs the -/// statistical counters and, based upon these, calculates the rarity score of each state. -/// -/// The counters work on byte-granularity basis. For each byte contained by a state, 256 counters -/// are created, one for each possible state a byte can be in. All bytes of all states are iterated -/// and a corresponding counter is incremented depending on the byte's value. -/// This is done to count the ocurrences of distinct values for each byte. The smaller a counter -/// value is, the rarer the value of this specific counter is for a specific byte. -/// -/// The function then determines the the counter values that are relevant for rarity calculation, -/// for each state, that is, for each byte it appends the value of the counter relevant to the byte -/// and the byte's value. -/// -/// The list of relevant counter values is passed to the scoring function in order to determine the -/// rarity score of each state. -/// -/// # Arguments -/// * states: A list of states. -/// * score: A function taking the amount of states and relevant statistical counter values and returning a score -fn compute_scores(states: &[&State], score: F) -> Vec -where - F: Fn(usize, &[Counter]) -> f64, -{ - let counter_addresses: Vec> = states - .iter() - .map(|s| compute_counter_addresses(&s)) - .collect(); - - // create global counters for all states - let mut overall_counts = HashMap::::new(); - - counter_addresses - .iter() - .flatten() - .for_each(|address| count_address(&mut overall_counts, *address)); - - // create counters per state based on overall counts - let n = states.len(); - - counter_addresses - .iter() - .map(|addresses| { - addresses - .iter() - .map(|address| { - *overall_counts - .get(address) - .expect("cound should be available") - }) - .collect_vec() - }) - .map(|addresses| score(n, &addresses[..])) - .collect() +#[derive(Debug, Clone)] +pub struct RaritySimulation { + options: RaritySimulationOptions, } -fn count_address(scores: &mut HashMap, addr: Address) { - if let Some(entry) = scores.get_mut(&addr) { - *entry += 1; - } else { - scores.insert(addr, 1); +impl RaritySimulation { + pub fn new(options: &RaritySimulationOptions) -> Self { + Self { + options: options.clone(), + } } } -/// Based on a state, generates an iterator that contains -/// matching counter `addresses` for each byte. -/// -/// The counter address is a combination of the byte's address and the value of that -/// address in the state. -/// -/// Each byte assumes one of 256 (2^8) values. -/// Thus, each distinct byte i of the state has has 256 different addresses: (i*256)..((i*256) + 255) -/// That is, each byte is `expanded` to 256 addresses. -/// -/// The first 8 bytes are represented by the program counter, in the CPU's native byte ordering -/// Next, 32 64-bit registers are represented. -/// Then, all touched memory regions follow. -/// -/// For each byte i, only one of its 256 different addresses may occur (because a byte can only -/// assume one state at a time) -fn compute_counter_addresses(state: &State) -> Vec
{ - fn offset_for_word(idx: u64) -> u64 { - idx * BYTES_PER_WORD * NUMBER_OF_BYTE_VALUES - } - - let mut addresses = Vec::new(); - - compute_counter_addresses_for_word(0, state.pc, &mut addresses); - - compute_counter_addresses_for_iter(offset_for_word(1), state.regs.iter(), &mut addresses); +impl BugFinder for RaritySimulation { + /// Performs rarity simulation on a given program + /// + /// If one state encountered a bug, execution is terminated and its description is returned. If no + /// bugs have been encountered after the configured limit has been met, [`None`] is returned. + /// + /// Please see the [module-level documentation](self) for a detailed description of rarity simulation. + #[allow(clippy::vec_box)] + fn search_for_bugs(&self, program: &Program) -> Result, RaritySimulationError> { + let mut executors: Vec> = Vec::new(); + + for iteration in 0..self.options.iterations { + info!("Running rarity simulation round {}...", iteration + 1); + + create_missing_executors( + &mut executors, + self.options.amount_of_states, + self.options.copy_init_ratio, + self.options.memory_size, + program, + ); - compute_counter_addresses_for_iter(offset_for_word(33), state.memory.iter(), &mut addresses); + let results = time_info!("Running engines", { + match run_all(&mut executors, self.options.step_size) { + Ok((Some(bug), _)) => return Ok(Some(bug)), + Ok((None, results)) => results, + Err(e) => return Err(e), + } + }); - addresses -} + let running = filter_successfully_exited(executors, results); -/// Appends relevant statistic counter addresses from an iterator -/// -/// Iterates over a collection of [`Value`]s and appends them to the relevant address list if they -/// contain a concrete value. -/// -/// # Arguments -/// * offset: The statistic counter address offset -/// * iter: The iterator -/// * addresses: The list where relevant addresses shall be appended to -/// -/// # See -/// * [`compute_counter_addresses_for_word`] -fn compute_counter_addresses_for_iter<'a, Iter>( - offset: u64, - iter: Iter, - addresses: &mut Vec
, -) where - Iter: Iterator, -{ - iter.enumerate() - .filter_map(|(idx, v)| match v { - Value::Concrete(w) => Some((idx, w)), - _ => None, - }) - .for_each(|(idx, word)| { - compute_counter_addresses_for_word( - offset + idx as u64 * NUMBER_OF_BYTE_VALUES, - *word, - addresses, + info!( + "Remove {} successfully exited states from selection", + self.options.amount_of_states - running.len() ); - }); -} -/// Appends to a counter address list -/// -/// Splits a 64-bit word into bytes (using the host machine's endianess) and appennds the relevant -/// counter addresses, depending on their respective values. -/// -/// # Arguments -/// * offset: -/// * word: The word that s -/// * addresses: The list where relevant addresses shall be appended to -fn compute_counter_addresses_for_word(offset: u64, word: u64, addresses: &mut Vec) { - u64::to_ne_bytes(word) - .iter() - .cloned() - .enumerate() - .for_each(|(byte_idx, byte_value)| { - let byte_address = BYTES_PER_WORD * byte_idx as u64; - let address = offset + byte_address * NUMBER_OF_BYTE_VALUES + byte_value as u64; - addresses.push(address); - }); -} + let (scores, ordering) = time_info!("Scoring states", { + let states: Vec<_> = running.iter().map(|e| e.state()).collect(); -/// Generates a random value limited by an upper bound -/// -/// Returns a random value between 0 inclusive and `len` exclusively by using the modulo operator -fn random_index(len: usize) -> usize { - rand::random::() % len -} + score_with_mean(&states[..], self.options.mean) + }); -impl State { - #[allow(dead_code)] - fn write_to_file

(&self, path: P) -> Result<(), EngineError> - where - P: AsRef, - { - File::create(path) - .and_then(|mut file| write!(file, "{}", self)) - .map_err(|e| EngineError::IoError(Arc::new(e))) - } -} + let selection = min(self.options.selection, running.len()); -impl fmt::Display for State { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - writeln!(f, "PC: 0x{:x}", self.pc)?; - writeln!(f, "Register:")?; - for (idx, val) in self.regs.iter().enumerate() { - writeln!(f, "{:x}: {:?}", idx, val)?; - } + executors = select_rarest(running, selection, scores, ordering); - /* - writeln!(f, "Memory:")?; - for (idx, val) in self.memory.iter().enumerate() { - writeln!(f, "{:#016x}: {:?}", idx, val)?; + info!("selecting rarest {} states", selection); } - */ - Ok(()) + + Ok(None) } } -/// Loads an object file and performs rarity simulation -/// -/// If one state encountered a bug, execution is terminated and its description is returned. If no -/// bugs have been encountered after the configured limit has been met, [`None`] is returned. -/// -/// Please see the [module-level documentation](self) for a detailed description of rarity simulation. -/// -/// # Arguments -/// * input: The path to the target object file -/// * memory_size: The size of the machine's memory -/// * number_of_states: The number of states to pursue -/// * selection: Amount of (rarest) states that shall be further considered at the end of each -/// iteration. -/// * cycles: The amount of instructions to execute for each state on each iteration -/// * iterations: The amount of rarity simulation iterations to perform -/// * copy_ratio: After discarding least rare and exited states, determines how much new states shall -/// be copied from the remaining (rare) states and, in inverse, how much shall be newly -/// created relative to the amount of missing states to archive `number_of_states`. -/// Must be between 0 and 1. -/// * metric: The metric to use for determining state rarity -pub fn execute

( - input: P, +#[allow(clippy::vec_box)] +fn create_missing_executors( + executors: &mut Vec>, + amount: usize, + copy_init_ratio: f64, memory_size: ByteSize, - number_of_states: u64, - selection: u64, - cycles: u64, - iterations: u64, - copy_ratio: f64, - metric: MetricType, -) -> Result, EngineError> -where - P: AsRef, -{ - let program = load_object_file(input).map_err(|e| EngineError::RiscuError(Arc::new(e)))?; - - create_and_run( - &program, - memory_size, - number_of_states, - selection, - cycles, - iterations, - copy_ratio, - metric, - ) -} - -/// Performs rarity simulation on a given program -/// -/// Please see the public-facing [`execute`] for more details. -fn create_and_run( program: &Program, - memory_size: ByteSize, - number_of_states: u64, - selection: u64, - cycles: u64, - iterations: u64, - copy_ratio: f64, - metric: MetricType, -) -> Result, EngineError> { - let mut engines: Vec = Vec::new(); - - for iteration in 0..iterations { - info!("Running rarity simulation round {}...", iteration + 1); - - let to_create = number_of_states as usize - engines.len(); - let to_copy = if engines.is_empty() { - 0 - } else { - f64::round(to_create as f64 * copy_ratio) as usize - }; - let to_init = to_create - to_copy; +) { + let missing = amount - executors.len(); - info!("Creating {} new states", to_create); - debug!( - " {} engines will be copied and {} engines will be created", - to_copy, to_init - ); - let initial_engines = engines.len(); - engines.extend( - (0..to_copy) - .map(|_| random_index(initial_engines)) - .map(|idx| engines[idx].clone()) - .collect::>(), - ); - engines.extend((0..to_init).map(|_| Engine::new(&program, memory_size))); - - let results = time_info!("Running engines", { - let results: Vec<_> = engines - .iter_mut() - .map(|engine| engine.run(cycles)) - .collect(); - - if let Some(error_or_bug) = results.clone().iter().find(|result| match result { - Err(EngineError::ExecutionDepthReached(_)) => false, - Err(_) | Ok(Some(_)) => true, - _ => false, - }) { - return error_or_bug.clone(); - } - - results - }); + let to_copy = if executors.is_empty() { + 0 + } else { + f64::round(missing as f64 * copy_init_ratio) as usize + }; + let to_create = missing - to_copy; - // remove successfully exited engines - engines = engines - .iter() - .cloned() - .zip(results) - .filter(|(_, r)| matches!(r, Err(EngineError::ExecutionDepthReached(_)))) - .map(|(e, _)| e) - .collect(); - - info!( - "Remove {} successfully exited states from selection", - number_of_states as usize - engines.len() - ); + info!( + "Creating {} new states ({} copied, {} new)", + missing, to_copy, to_create + ); - let (scores, ordering) = time_info!("Scoring states", { - let states: Vec<_> = engines.iter().map(|e| e.state()).collect(); + let copyable_engines = executors.len(); - score_with_metric(&states[..], metric) - }); + let copied = (0..to_copy) + .map(|_| Box::new((*executors[random_index(copyable_engines)]).clone())) + .collect_vec(); - info!(" scored states: {:?}", scores); + let created = (0..to_create).map(|_| Box::new(Executor::new(program, memory_size))); - let selection = min(selection as usize, engines.len()); + executors.extend(copied.into_iter().chain(created)); +} - engines = engines - .iter() - .zip(scores) - .sorted_by(|first, second| first.1.partial_cmp(&second.1).unwrap_or(ordering)) - .map(|x| (*x.0).clone()) - .take(selection) - .collect(); +fn run_all( + executors: &mut [Box], + step_size: u64, +) -> Result<(Option, Vec), RaritySimulationError> { + let results: Vec<_> = executors + .iter_mut() + .map(|engine| engine.run(step_size)) + .collect(); - info!(" selecting {} states", selection); + if let Some(Ok(Some(bug))) = results.iter().find(|r| matches!(r, Ok(Some(_)))) { + Ok((Some(bug.clone()), results)) + } else if let Some(Err(e)) = results.iter().find(|r| match **r { + Err(RaritySimulationError::ExecutionDepthReached(_)) => false, + Err(_) => true, + _ => false, + }) { + Err(e.clone()) + } else { + Ok((None, results)) } +} + +#[allow(clippy::vec_box)] +fn filter_successfully_exited( + executors: impl IntoIterator>, + results: impl IntoIterator, +) -> Vec> { + executors + .into_iter() + .zip(results) + .filter(|(_, r)| matches!(r, Err(RaritySimulationError::ExecutionDepthReached(_)))) + .map(|(e, _)| e) + .collect() +} + +#[allow(clippy::vec_box)] +fn select_rarest( + executors: impl IntoIterator>, + selection: usize, + scores: Vec, + ord: Ordering, +) -> Vec> { + let iter = executors.into_iter().zip(scores); + + let sorted = if ord == Ordering::Less { + iter.sorted_unstable_by(|l, r| l.1.partial_cmp(&r.1).expect("no NaN in scores")) + } else { + iter.sorted_unstable_by(|l, r| r.1.partial_cmp(&l.1).expect("no NaN in scores")) + }; - Ok(None) + sorted.map(|x| x.0).take(selection).collect() } -fn score_with_metric(states: &[&State], metric: MetricType) -> (Vec, Ordering) { - match metric { - MetricType::Harmonic => { +fn score_with_mean(states: &[&State], mean: MeanType) -> (Vec, Ordering) { + match mean { + MeanType::Harmonic => { let scores = compute_scores(states, |n, cs| { (n as f64) / cs.iter().map(|c| 1_f64 / (*c as f64)).sum::() }); (scores, Ordering::Greater) } - MetricType::Arithmetic => { + MeanType::Arithmetic => { let scores = compute_scores(states, |n, cs| cs.iter().sum::() as f64 / (n as f64)); (scores, Ordering::Less) @@ -461,7 +345,7 @@ impl fmt::Display for Value { } #[derive(Debug, Clone)] -pub struct Engine { +pub struct Executor { program_break: u64, state: State, execution_depth: u64, @@ -470,7 +354,7 @@ pub struct Engine { is_running: bool, } -impl Engine { +impl Executor { // creates a machine state with a specific memory size pub fn new(program: &Program, memory_size: ByteSize) -> Self { assert!( @@ -529,7 +413,10 @@ impl Engine { &self.state } - pub fn run(&mut self, number_of_instructions: u64) -> Result, EngineError> { + pub fn run( + &mut self, + number_of_instructions: u64, + ) -> Result, RaritySimulationError> { self.is_running = true; self.max_exection_depth += number_of_instructions; @@ -539,7 +426,9 @@ impl Engine { self.is_running = false; - return Err(EngineError::ExecutionDepthReached(self.execution_depth)); + return Err(RaritySimulationError::ExecutionDepthReached( + self.execution_depth, + )); } self.execution_depth += 1; @@ -555,7 +444,7 @@ impl Engine { } } - fn fetch(&self) -> Result { + fn fetch(&self) -> Result { if let Value::Concrete(dword) = self.state.memory[(self.state.pc as usize / size_of::()) as usize] { @@ -565,17 +454,17 @@ impl Engine { Ok((dword >> 32) as u32) } } else { - Err(EngineError::NotSupported(String::from( + Err(RaritySimulationError::NotSupported(String::from( "tried to fetch none concrete instruction", ))) } } - fn decode(&self, raw: u32) -> Result { - decode(raw).map_err(|e| EngineError::InvalidInstructionEncoding(self.state.pc, e)) + fn decode(&self, raw: u32) -> Result { + decode(raw).map_err(|e| RaritySimulationError::InvalidInstructionEncoding(self.state.pc, e)) } - fn execute(&mut self, instruction: Instruction) -> Result, EngineError> { + fn execute(&mut self, instruction: Instruction) -> Result, RaritySimulationError> { match instruction { Instruction::Ecall(_) => self.execute_ecall(), Instruction::Lui(utype) => self.execute_lui(utype), @@ -600,12 +489,12 @@ impl Engine { } } - fn check_for_uninitialized_memory( + fn access_to_uninitialized_memory( &mut self, instruction: Instruction, v1: Value, v2: Value, - ) -> Result, EngineError> { + ) -> Bug { trace!( "{}: {}, {} => computing reachability", instruction_to_str(instruction), @@ -613,26 +502,22 @@ impl Engine { v2 ); - Ok(Some(Bug::AccessToUnitializedMemory { + Bug::AccessToUnitializedMemory { info: RarityBugInfo { - witness: self.concrete_inputs.clone(), + inputs: self.concrete_inputs.clone(), pc: self.state.pc, }, instruction, // TODO: fix operands operands: vec![], - })) + } } fn is_in_vaddr_range(&self, vaddr: u64) -> bool { vaddr as usize / size_of::() < self.state.memory.len() } - fn check_for_valid_memory_address( - &mut self, - instruction: &str, - address: u64, - ) -> Result, EngineError> { + fn check_for_valid_memory_address(&mut self, instruction: &str, address: u64) -> Option { let is_alignment_ok = address % size_of::() as u64 == 0; if !is_alignment_ok { @@ -644,13 +529,13 @@ impl Engine { self.is_running = false; - Ok(Some(Bug::AccessToUnalignedAddress { + Some(Bug::AccessToUnalignedAddress { info: RarityBugInfo { - witness: self.concrete_inputs.clone(), + inputs: self.concrete_inputs.clone(), pc: self.state.pc, }, address, - })) + }) } else if !self.is_in_vaddr_range(address) { trace!( "{}: address {:#x} out of virtual address range (0x0 - {:#x}) => computing reachability", @@ -661,18 +546,19 @@ impl Engine { self.is_running = false; - Ok(Some(Bug::AccessToOutOfRangeAddress { + Some(Bug::AccessToOutOfRangeAddress { info: RarityBugInfo { - witness: self.concrete_inputs.clone(), + inputs: self.concrete_inputs.clone(), pc: self.state.pc, }, - })) + }) } else { - Ok(None) + None } } - fn execute_lui(&mut self, utype: UType) -> Result, EngineError> { + #[allow(clippy::unnecessary_wraps)] + fn execute_lui(&mut self, utype: UType) -> Result, RaritySimulationError> { let immediate = u64::from(utype.imm()) << 12; let result = Value::Concrete(immediate); @@ -697,7 +583,7 @@ impl Engine { instruction: Instruction, rtype: RType, op: Op, - ) -> Result, EngineError> + ) -> Result, RaritySimulationError> where Op: FnOnce(u64, u64) -> u64, { @@ -710,7 +596,7 @@ impl Engine { Ok(Some(Bug::DivisionByZero { info: RarityBugInfo { - witness: self.concrete_inputs.clone(), + inputs: self.concrete_inputs.clone(), pc: self.state.pc, }, })) @@ -724,7 +610,7 @@ impl Engine { instruction: Instruction, itype: IType, op: Op, - ) -> Result, EngineError> + ) -> Result, RaritySimulationError> where Op: FnOnce(u64, u64) -> u64, { @@ -734,12 +620,13 @@ impl Engine { self.execute_binary_op(instruction, rs1_value, imm_value, itype.rd(), op) } + #[allow(clippy::unnecessary_wraps)] fn execute_rtype( &mut self, instruction: Instruction, rtype: RType, op: Op, - ) -> Result, EngineError> + ) -> Result, RaritySimulationError> where Op: FnOnce(u64, u64) -> u64, { @@ -749,6 +636,7 @@ impl Engine { self.execute_binary_op(instruction, rs1_value, rs2_value, rtype.rd(), op) } + #[allow(clippy::unnecessary_wraps)] fn execute_binary_op( &mut self, instruction: Instruction, @@ -756,20 +644,20 @@ impl Engine { rhs: Value, rd: Register, op: Op, - ) -> Result, EngineError> + ) -> Result, RaritySimulationError> where Op: FnOnce(u64, u64) -> u64, { let result = match (lhs, rhs) { (Value::Concrete(v1), Value::Concrete(v2)) => Value::Concrete(op(v1, v2)), _ => { - let bug = self.check_for_uninitialized_memory(instruction, lhs, rhs)?; + let bug = self.access_to_uninitialized_memory(instruction, lhs, rhs); trace!("could not find input assignment => exeting this context"); self.is_running = false; - return Ok(bug); + return Ok(Some(bug)); } }; @@ -790,7 +678,7 @@ impl Engine { Ok(None) } - fn execute_brk(&mut self) -> Result, EngineError> { + fn execute_brk(&mut self) -> Result, RaritySimulationError> { if let Value::Concrete(new_program_break) = self.state.regs[Register::A0 as usize] { let old_program_break = self.program_break; @@ -813,7 +701,7 @@ impl Engine { } } - fn execute_read(&mut self) -> Result, EngineError> { + fn execute_read(&mut self) -> Result, RaritySimulationError> { if !matches!(self.state.regs[Register::A0 as usize], Value::Concrete(0)) { return not_supported("can not handle other fd than stdin in read syscall"); } @@ -894,7 +782,8 @@ impl Engine { Ok(None) } - fn execute_beq(&mut self, btype: BType) -> Result, EngineError> { + #[allow(clippy::unnecessary_wraps)] + fn execute_beq(&mut self, btype: BType) -> Result, RaritySimulationError> { let lhs = self.state.regs[btype.rs1() as usize]; let rhs = self.state.regs[btype.rs2() as usize]; @@ -920,16 +809,16 @@ impl Engine { (v1, v2) => { self.is_running = false; - let result = self.check_for_uninitialized_memory(Instruction::Beq(btype), v1, v2); + let result = self.access_to_uninitialized_memory(Instruction::Beq(btype), v1, v2); trace!("access to uninitialized memory => exeting this context"); - result + Ok(Some(result)) } } } - fn execute_exit(&mut self) -> Result, EngineError> { + fn execute_exit(&mut self) -> Result, RaritySimulationError> { self.is_running = false; match self.state.regs[Register::A0 as usize] { @@ -942,7 +831,7 @@ impl Engine { Ok(Some(Bug::ExitCodeGreaterZero { info: RarityBugInfo { - witness: self.concrete_inputs.clone(), + inputs: self.concrete_inputs.clone(), pc: self.state.pc, }, })) @@ -956,7 +845,7 @@ impl Engine { } } - fn execute_ecall(&mut self) -> Result, EngineError> { + fn execute_ecall(&mut self) -> Result, RaritySimulationError> { trace!("[{:#010x}] ecall", self.state.pc); let result = match self.state.regs[Register::A7 as usize] { @@ -969,7 +858,7 @@ impl Engine { Value::Concrete(syscall_id) if syscall_id == (SyscallId::Exit as u64) => { self.execute_exit() } - id => Err(EngineError::NotSupported(format!( + id => Err(RaritySimulationError::NotSupported(format!( "syscall with id ({}) is not supported", id ))), @@ -984,14 +873,13 @@ impl Engine { &mut self, instruction: Instruction, itype: IType, - ) -> Result, EngineError> { + ) -> Result, RaritySimulationError> { if let Value::Concrete(base_address) = self.state.regs[itype.rs1() as usize] { let immediate = itype.imm() as u64; let address = base_address.wrapping_add(immediate); - let bug = - self.check_for_valid_memory_address(instruction_to_str(instruction), address)?; + let bug = self.check_for_valid_memory_address(instruction_to_str(instruction), address); if bug.is_none() { let value = self.state.memory[(address / 8) as usize]; @@ -1022,14 +910,13 @@ impl Engine { &mut self, instruction: Instruction, stype: SType, - ) -> Result, EngineError> { + ) -> Result, RaritySimulationError> { if let Value::Concrete(base_address) = self.state.regs[stype.rs1() as usize] { let immediate = stype.imm(); let address = base_address.wrapping_add(immediate as u64); - let bug = - self.check_for_valid_memory_address(instruction_to_str(instruction), address)?; + let bug = self.check_for_valid_memory_address(instruction_to_str(instruction), address); if bug.is_none() { let value = self.state.regs[stype.rs2() as usize]; @@ -1056,7 +943,8 @@ impl Engine { } } - fn execute_jal(&mut self, jtype: JType) -> Result, EngineError> { + #[allow(clippy::unnecessary_wraps)] + fn execute_jal(&mut self, jtype: JType) -> Result, RaritySimulationError> { let link = self.state.pc + INSTRUCTION_SIZE; let new_pc = self.state.pc.wrapping_add(jtype.imm() as u64); @@ -1082,7 +970,7 @@ impl Engine { } } - fn execute_jalr(&mut self, itype: IType) -> Result, EngineError> { + fn execute_jalr(&mut self, itype: IType) -> Result, RaritySimulationError> { if let Value::Concrete(dest) = self.state.regs[itype.rs1() as usize] { let link = self.state.pc + INSTRUCTION_SIZE; @@ -1109,24 +997,6 @@ impl Engine { } } -#[derive(Debug, Clone, Error)] -pub enum EngineError { - #[error("failed to load RISC-U binary")] - RiscuError(Arc), - - #[error("failed to write State to file")] - IoError(Arc), - - #[error("engine does not support {0}")] - NotSupported(String), - - #[error("failed to decode instruction at PC: {0:#010x}")] - InvalidInstructionEncoding(u64, DecodingError), - - #[error("has reached the maximum execution depth of {0}")] - ExecutionDepthReached(u64), -} - fn load_segment(memory: &mut Vec, segment: &ProgramSegment) { let start = segment.address as usize / size_of::(); let end = start + segment.content.len() / size_of::(); @@ -1139,19 +1009,181 @@ fn load_segment(memory: &mut Vec, segment: &ProgramSegment) { .for_each(|(x, i)| memory[i] = Value::Concrete(x)); } -fn not_supported(s: &str) -> Result, EngineError> { - Err(EngineError::NotSupported(s.to_owned())) +fn not_supported(s: &str) -> Result, RaritySimulationError> { + Err(RaritySimulationError::NotSupported(s.to_owned())) } #[derive(Default, Debug, Clone)] pub struct RarityBugInfo { - witness: Vec, + inputs: Vec, pc: u64, } +impl BugInfo for RarityBugInfo { + type Value = u64; +} + impl fmt::Display for RarityBugInfo { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - writeln!(f, "concrete inputs read: {:?}", self.witness)?; + writeln!(f, "concrete inputs read: {:?}", self.inputs)?; writeln!(f, "pc: {:#x}", self.pc) } } + +/// Calculates all state scores with a given scoring predicate +/// +/// Using all states executed by the rarity simulation execution, this function constructs the +/// statistical counters and, based upon these, calculates the rarity score of each state. +/// +/// The counters work on byte-granularity basis. For each byte contained by a state, 256 counters +/// are created, one for each possible state a byte can be in. All bytes of all states are iterated +/// and a corresponding counter is incremented depending on the byte's value. +/// This is done to count the ocurrences of distinct values for each byte. The smaller a counter +/// value is, the rarer the value of this specific counter is for a specific byte. +/// +/// The function then determines the the counter values that are relevant for rarity calculation, +/// for each state, that is, for each byte it appends the value of the counter relevant to the byte +/// and the byte's value. +/// +/// The list of relevant counter values is passed to the scoring function in order to determine the +/// rarity score of each state. +/// +/// # Arguments +/// * states: A list of states. +/// * score: A function taking the amount of states and relevant statistical counter values and returning a score +fn compute_scores(states: &[&State], score: F) -> Vec +where + F: Fn(usize, &[Counter]) -> f64, +{ + let counter_addresses: Vec> = states + .iter() + .map(|s| compute_counter_addresses(&s)) + .collect(); + + // create global counters for all states + let mut overall_counts = HashMap::::new(); + + counter_addresses + .iter() + .flatten() + .for_each(|address| count_address(&mut overall_counts, *address)); + + // create counters per state based on overall counts + let n = states.len(); + + counter_addresses + .iter() + .map(|addresses| { + addresses + .iter() + .map(|address| { + *overall_counts + .get(address) + .expect("cound should be available") + }) + .collect_vec() + }) + .map(|addresses| score(n, &addresses[..])) + .collect() +} + +fn count_address(scores: &mut HashMap, addr: Address) { + if let Some(entry) = scores.get_mut(&addr) { + *entry += 1; + } else { + scores.insert(addr, 1); + } +} + +/// Based on a state, generates an iterator that contains +/// matching counter `addresses` for each byte. +/// +/// The counter address is a combination of the byte's address and the value of that +/// address in the state. +/// +/// Each byte assumes one of 256 (2^8) values. +/// Thus, each distinct byte i of the state has has 256 different addresses: (i*256)..((i*256) + 255) +/// That is, each byte is `expanded` to 256 addresses. +/// +/// The first 8 bytes are represented by the program counter, in the CPU's native byte ordering +/// Next, 32 64-bit registers are represented. +/// Then, all touched memory regions follow. +/// +/// For each byte i, only one of its 256 different addresses may occur (because a byte can only +/// assume one state at a time) +fn compute_counter_addresses(state: &State) -> Vec

{ + fn offset_for_word(idx: u64) -> u64 { + idx * BYTES_PER_WORD * NUMBER_OF_BYTE_VALUES + } + + let mut addresses = Vec::new(); + + compute_counter_addresses_for_word(0, state.pc, &mut addresses); + + compute_counter_addresses_for_iter(offset_for_word(1), state.regs.iter(), &mut addresses); + + compute_counter_addresses_for_iter(offset_for_word(33), state.memory.iter(), &mut addresses); + + addresses +} + +/// Appends relevant statistic counter addresses from an iterator +/// +/// Iterates over a collection of [`Value`]s and appends them to the relevant address list if they +/// contain a concrete value. +/// +/// # Arguments +/// * offset: The statistic counter address offset +/// * iter: The iterator +/// * addresses: The list where relevant addresses shall be appended to +/// +/// # See +/// * [`compute_counter_addresses_for_word`] +fn compute_counter_addresses_for_iter<'a, Iter>( + offset: u64, + iter: Iter, + addresses: &mut Vec
, +) where + Iter: Iterator, +{ + iter.enumerate() + .filter_map(|(idx, v)| match v { + Value::Concrete(w) => Some((idx, w)), + _ => None, + }) + .for_each(|(idx, word)| { + compute_counter_addresses_for_word( + offset + idx as u64 * NUMBER_OF_BYTE_VALUES, + *word, + addresses, + ); + }); +} + +/// Appends to a counter address list +/// +/// Splits a 64-bit word into bytes (using the host machine's endianess) and appennds the relevant +/// counter addresses, depending on their respective values. +/// +/// # Arguments +/// * offset: +/// * word: The word that s +/// * addresses: The list where relevant addresses shall be appended to +fn compute_counter_addresses_for_word(offset: u64, word: u64, addresses: &mut Vec) { + u64::to_ne_bytes(word) + .iter() + .cloned() + .enumerate() + .for_each(|(byte_idx, byte_value)| { + let byte_address = BYTES_PER_WORD * byte_idx as u64; + let address = offset + byte_address * NUMBER_OF_BYTE_VALUES + byte_value as u64; + addresses.push(address); + }); +} + +/// Generates a random value limited by an upper bound +/// +/// Returns a random value between 0 inclusive and `len` exclusively by using the modulo operator +fn random_index(len: usize) -> usize { + rand::random::() % len +} diff --git a/src/engine/symbolic_execution.rs b/src/engine/symbolic_execution.rs new file mode 100644 index 00000000..00b721bd --- /dev/null +++ b/src/engine/symbolic_execution.rs @@ -0,0 +1,960 @@ +use super::{ + bug::{Bug as GenericBug, BugInfo}, + symbolic_state::{Query, SymbolicState, SymbolicValue, Witness}, + system::{instruction_to_str, SyscallId}, +}; +use crate::{ + path_exploration::ExplorationStrategy, + solver::{BVOperator, Solver, SolverError}, +}; +use byteorder::{ByteOrder, LittleEndian}; +use bytesize::ByteSize; +use log::{debug, trace}; +use riscu::{ + decode, types::*, DecodingError, Instruction, Program, ProgramSegment, Register, + INSTRUCTION_SIZE as INSTR_SIZE, +}; +use std::{fmt, mem::size_of, sync::Arc}; +use thiserror::Error; + +const INSTRUCTION_SIZE: u64 = INSTR_SIZE as u64; + +pub mod defaults { + use super::*; + + pub const MEMORY_SIZE: ByteSize = ByteSize(bytesize::MB); + pub const MAX_EXECUTION_DEPTH: u64 = 1000; +} + +pub type SymbolicExecutionBug = GenericBug; +type Bug = SymbolicExecutionBug; + +pub struct SymbolicExecutionOptions { + pub memory_size: ByteSize, + pub max_exection_depth: u64, +} + +impl Default for SymbolicExecutionOptions { + fn default() -> Self { + Self { + memory_size: defaults::MEMORY_SIZE, + max_exection_depth: defaults::MAX_EXECUTION_DEPTH, + } + } +} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum Value { + Concrete(u64), + Symbolic(SymbolicValue), + Uninitialized, +} + +impl fmt::Display for Value { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Value::Concrete(c) => write!(f, "{:#x}", c), + Value::Symbolic(i) => write!(f, "x{}", i.index()), + Value::Uninitialized => write!(f, "uninit"), + } + } +} + +#[derive(Debug, Clone, Error)] +pub enum SymbolicExecutionError { + #[error("failed to load binary {0:#}")] + IoError(Arc), + + #[error("engine does not support {0}")] + NotSupported(String), + + #[error("has reached the maximum execution depth of {0}")] + ExecutionDepthReached(u64), + + #[error("failed to decode instruction at PC: {0:#010x}")] + InvalidInstructionEncoding(u64, DecodingError), + + #[error("failed to compute satisfyability for formula")] + SatUnknown(SolverError), +} + +pub struct SymbolicExecutionEngine<'a, E, S> +where + E: ExplorationStrategy, + S: Solver, +{ + symbolic_state: Box>, + program_break: u64, + pc: u64, + regs: [Value; 32], + memory: Vec, + strategy: &'a E, + execution_depth: u64, + max_exection_depth: u64, + is_running: bool, +} + +impl<'a, E, S> SymbolicExecutionEngine<'a, E, S> +where + E: ExplorationStrategy, + S: Solver, +{ + // creates a machine state with a specific memory size + pub fn new( + program: &Program, + options: &SymbolicExecutionOptions, + strategy: &'a E, + solver: &'a S, + ) -> Self { + let mut regs = [Value::Uninitialized; 32]; + let memory_size = options.memory_size.as_u64(); + let mut memory = vec![Value::Uninitialized; memory_size as usize / 8]; + + let sp = memory_size - 8; + regs[Register::Sp as usize] = Value::Concrete(sp); + regs[Register::Zero as usize] = Value::Concrete(0); + + // TODO: Init main function arguments + let argc = 0; + memory[sp as usize / size_of::()] = Value::Concrete(argc); + + load_segment(&mut memory, &program.code); + load_segment(&mut memory, &program.data); + + let pc = program.code.address; + + let program_break = program.data.address + program.data.content.len() as u64; + + let symbolic_state = Box::new(SymbolicState::new(solver)); + + debug!( + "initializing new execution context with {} of main memory", + memory_size + ); + debug!( + "code segment: start={:#x} length={}", + program.code.address, + program.code.content.len(), + ); + debug!( + "data segment: start={:#x} length={}", + program.data.address, + program.data.content.len(), + ); + debug!( + "init state: pc={:#x} brk={:#x}, argc={}", + pc, program_break, argc + ); + + Self { + symbolic_state, + program_break, + pc, + regs, + memory, + strategy, + execution_depth: 0, + max_exection_depth: options.max_exection_depth, + is_running: false, + } + } + + fn decode(&self, raw: u32) -> Result { + decode(raw).map_err(|e| SymbolicExecutionError::InvalidInstructionEncoding(self.pc, e)) + } + + pub fn run(&mut self) -> Result, SymbolicExecutionError> { + self.is_running = true; + + loop { + if self.execution_depth >= self.max_exection_depth { + trace!("maximum execution depth reached => exiting this context"); + + self.is_running = false; + + return Err(SymbolicExecutionError::ExecutionDepthReached( + self.execution_depth, + )); + } + + self.execution_depth += 1; + + let bug = self + .fetch() + .and_then(|raw| self.decode(raw)) + .and_then(|instr| self.execute(instr))?; + + if bug.is_some() || !self.is_running { + return Ok(bug); + } + } + } + + fn execute_query( + &mut self, + query: Query, + basic_info_to_bug: F, + ) -> Result, SymbolicExecutionError> + where + F: Fn(SymbolicExecutionErrorInfo) -> Bug, + { + self.symbolic_state + .execute_query(query) + .map_err(SymbolicExecutionError::SatUnknown) + .map_or(Ok(None), |result| { + Ok(result.map(|witness| { + basic_info_to_bug(SymbolicExecutionErrorInfo { + witness, + pc: self.pc, + }) + })) + }) + } + + fn check_for_uninitialized_memory( + &mut self, + instruction: Instruction, + v1: Value, + v2: Value, + ) -> Result, SymbolicExecutionError> { + trace!( + "{}: {}, {} => computing reachability", + instruction_to_str(instruction), + v1, + v2 + ); + + self.execute_query(Query::Reachable, |info| Bug::AccessToUnitializedMemory { + info, + instruction, + operands: vec![v1, v2], + }) + } + + fn is_in_vaddr_range(&self, vaddr: u64) -> bool { + vaddr as usize / size_of::() < self.memory.len() + } + + fn check_for_valid_memory_address( + &mut self, + instruction: &str, + address: u64, + ) -> Result, SymbolicExecutionError> { + let is_alignment_ok = address % size_of::() as u64 == 0; + + if !is_alignment_ok { + trace!( + "{}: address {:#x} is not double word aligned => computing reachability", + instruction, + address + ); + + self.is_running = false; + + self.execute_query(Query::Reachable, |info| Bug::AccessToUnalignedAddress { + info, + address, + }) + } else if !self.is_in_vaddr_range(address) { + trace!( + "{}: address {:#x} out of virtual address range (0x0 - {:#x}) => computing reachability", + instruction, + address, + self.memory.len() * 8, + ); + + self.is_running = false; + + self.execute_query(Query::Reachable, |info| Bug::AccessToOutOfRangeAddress { + info, + }) + } else { + Ok(None) + } + } + + #[allow(clippy::unnecessary_wraps)] + fn execute_lui(&mut self, utype: UType) -> Result, SymbolicExecutionError> { + let immediate = u64::from(utype.imm()) << 12; + + let result = Value::Concrete(immediate); + + trace!( + "[{:#010x}] {}: {:?} <- {}", + self.pc, + instruction_to_str(Instruction::Lui(utype)), + utype.rd(), + result, + ); + + self.assign_rd(utype.rd(), result); + + self.pc += INSTRUCTION_SIZE; + + Ok(None) + } + + fn execute_divu_remu( + &mut self, + instruction: Instruction, + rtype: RType, + op: Op, + ) -> Result, SymbolicExecutionError> + where + Op: FnOnce(u64, u64) -> u64, + { + let bug = match self.regs[rtype.rs2() as usize] { + Value::Symbolic(divisor) => { + trace!( + "{}: symbolic divisor -> find input for divisor == 0", + instruction_to_str(instruction) + ); + + self.execute_query(Query::Equals((divisor, 0)), |info| Bug::DivisionByZero { + info, + })? + } + Value::Concrete(divisor) if divisor == 0 => { + trace!( + "{}: divisor == 0 -> compute reachability", + instruction_to_str(instruction) + ); + + self.execute_query(Query::Reachable, |info| Bug::DivisionByZero { info })? + } + _ => None, + }; + + if bug.is_none() { + self.execute_rtype(instruction, rtype, op) + } else { + Ok(bug) + } + } + + fn execute_itype( + &mut self, + instruction: Instruction, + itype: IType, + op: Op, + ) -> Result, SymbolicExecutionError> + where + Op: FnOnce(u64, u64) -> u64, + { + let rs1_value = self.regs[itype.rs1() as usize]; + let imm_value = Value::Concrete(itype.imm() as u64); + + self.execute_binary_op(instruction, rs1_value, imm_value, itype.rd(), op) + } + + fn execute_rtype( + &mut self, + instruction: Instruction, + rtype: RType, + op: Op, + ) -> Result, SymbolicExecutionError> + where + Op: FnOnce(u64, u64) -> u64, + { + let rs1_value = self.regs[rtype.rs1() as usize]; + let rs2_value = self.regs[rtype.rs2() as usize]; + + self.execute_binary_op(instruction, rs1_value, rs2_value, rtype.rd(), op) + } + + fn execute_binary_op( + &mut self, + instruction: Instruction, + lhs: Value, + rhs: Value, + rd: Register, + op: Op, + ) -> Result, SymbolicExecutionError> + where + Op: FnOnce(u64, u64) -> u64, + { + let result = match (lhs, rhs) { + (Value::Concrete(v1), Value::Concrete(v2)) => Value::Concrete(op(v1, v2)), + (Value::Symbolic(v1), Value::Concrete(v2)) => { + let v2 = self.symbolic_state.create_const(v2); + Value::Symbolic(self.symbolic_state.create_instruction(instruction, v1, v2)) + } + (Value::Concrete(v1), Value::Symbolic(v2)) => { + let v1 = self.symbolic_state.create_const(v1); + Value::Symbolic(self.symbolic_state.create_instruction(instruction, v1, v2)) + } + (Value::Symbolic(v1), Value::Symbolic(v2)) => { + Value::Symbolic(self.symbolic_state.create_instruction(instruction, v1, v2)) + } + _ => { + let bug = self.check_for_uninitialized_memory(instruction, lhs, rhs)?; + + trace!("could not find input assignment => exeting this context"); + + self.is_running = false; + + return Ok(bug); + } + }; + + trace!( + "[{:#010x}] {}: {}, {} |- {:?} <- {}", + self.pc, + instruction_to_str(instruction), + lhs, + rhs, + rd, + result, + ); + + self.assign_rd(rd, result); + + self.pc += INSTRUCTION_SIZE; + + Ok(None) + } + + fn execute_brk(&mut self) -> Result, SymbolicExecutionError> { + if let Value::Concrete(new_program_break) = self.regs[Register::A0 as usize] { + let old_program_break = self.program_break; + + if new_program_break < self.program_break || !self.is_in_vaddr_range(new_program_break) + { + self.regs[Register::A0 as usize] = Value::Concrete(self.program_break); + } else { + self.program_break = new_program_break; + } + + trace!( + "brk: old={:#x} new={:#x}", + old_program_break, + new_program_break + ); + + Ok(None) + } else { + not_supported("can not handle symbolic or uninitialized program break") + } + } + + fn bytewise_combine( + &mut self, + old: Value, + n_lower_bytes: u32, + new_idx: SymbolicValue, + ) -> SymbolicValue { + let bits_in_a_byte = 8; + let low_shift_factor = 2_u64.pow(n_lower_bytes * bits_in_a_byte); + let high_shift_factor = + 2_u64.pow((size_of::() as u32 - n_lower_bytes) * bits_in_a_byte); + + assert!( + low_shift_factor != 0 && high_shift_factor != 0, + "no bytes to shift" + ); + + let old_idx = match old { + Value::Concrete(c) => { + let old_c = c / low_shift_factor * low_shift_factor; + + self.symbolic_state.create_const(old_c) + } + Value::Symbolic(old_idx) => { + let low_shift_factor_idx = self.symbolic_state.create_const(low_shift_factor); + + let old_idx = self.symbolic_state.create_operator( + BVOperator::Divu, + old_idx, + low_shift_factor_idx, + ); + + self.symbolic_state + .create_operator(BVOperator::Mul, old_idx, low_shift_factor_idx) + } + Value::Uninitialized => { + unreachable!("function should not be called for uninitialized values") + } + }; + + let high_shift_factor_idx = self.symbolic_state.create_const(high_shift_factor); + + let new_idx = + self.symbolic_state + .create_operator(BVOperator::Mul, new_idx, high_shift_factor_idx); + + let new_idx = + self.symbolic_state + .create_operator(BVOperator::Divu, new_idx, high_shift_factor_idx); + + self.symbolic_state + .create_operator(BVOperator::Add, old_idx, new_idx) + } + + fn execute_read(&mut self) -> Result, SymbolicExecutionError> { + if !matches!(self.regs[Register::A0 as usize], Value::Concrete(0)) { + return not_supported("can not handle other fd than stdin in read syscall"); + } + + let buffer = if let Value::Concrete(b) = self.regs[Register::A1 as usize] { + b + } else { + return not_supported( + "can not handle symbolic or uninitialized buffer address in read syscall", + ); + }; + + let size = if let Value::Concrete(s) = self.regs[Register::A2 as usize] { + s + } else { + return not_supported("can not handle symbolic or uinitialized size in read syscall"); + }; + + trace!("read: fd={} buffer={:#x} size={}", 0, buffer, size,); + + if !self.is_in_vaddr_range(buffer) || !self.is_in_vaddr_range(buffer + size) { + return not_supported("read syscall failed to"); + } + + let size_of_u64 = size_of::() as u64; + + let round_up = if size % size_of_u64 == 0 { + 0 + } else { + size_of_u64 - size % size_of_u64 + }; + + let mut bytes_to_read = size; + let words_to_read = (bytes_to_read + round_up) / size_of_u64; + + let start = buffer / size_of_u64; + + for word_count in 0..words_to_read { + let start_byte = word_count * size_of_u64; + let end_byte = start_byte + + if bytes_to_read < size_of_u64 { + bytes_to_read + } else { + 8 + }; + + let name = format!( + "read({}, {}, {})[{} - {}]", + 0, buffer, size, start_byte, end_byte, + ); + + let input_idx = self.symbolic_state.create_input(&name); + + let result_idx = if bytes_to_read >= size_of_u64 { + bytes_to_read -= size_of_u64; + + input_idx + } else { + match self.memory[(start + word_count) as usize] { + Value::Uninitialized => { + // we do not partially overwrite words with concrete values + // if at least one byte in a word is uninitialized, the whole word is uninitialized + break; + } + v => self.bytewise_combine(v, bytes_to_read as u32, input_idx), + } + }; + + self.memory[(start + word_count) as usize] = Value::Symbolic(result_idx); + } + + self.regs[Register::A0 as usize] = Value::Concrete(size); + + Ok(None) + } + + fn execute_beq_branches( + &mut self, + true_branch: u64, + false_branch: u64, + lhs: SymbolicValue, + rhs: SymbolicValue, + ) -> Result, SymbolicExecutionError> { + let memory_snapshot = self.memory.clone(); + let regs_snapshot = self.regs; + let graph_snapshot = Box::new((*self.symbolic_state).clone()); + let brk_snapshot = self.program_break; + let execution_depth_snapshot = self.execution_depth; + + let next_pc = self.strategy.choose_path(true_branch, false_branch); + + let decision = next_pc == true_branch; + + self.symbolic_state + .create_beq_path_condition(decision, lhs, rhs); + + if let Ok(Some(_)) = self.symbolic_state.execute_query(Query::Reachable) { + trace!( + "[{:#010x}] beq: x{}, x{} |- assume {}, pc <- {:#x}", + self.pc, + lhs.index(), + rhs.index(), + next_pc == false_branch, + next_pc, + ); + + self.pc = next_pc; + + let result = self.run(); + + if !matches!( + result, + Err(SymbolicExecutionError::ExecutionDepthReached(_)) | Ok(None) + ) { + return result; + } + } else { + trace!( + "[{:#010x}] beq: x{}, x{} |- assume {}, not reachable", + self.pc, + lhs.index(), + rhs.index(), + next_pc == false_branch, + ); + } + + let next_pc = if decision { false_branch } else { true_branch }; + + self.is_running = true; + + self.memory = memory_snapshot; + self.regs = regs_snapshot; + self.symbolic_state = graph_snapshot; + self.program_break = brk_snapshot; + self.execution_depth = execution_depth_snapshot; + + self.symbolic_state + .create_beq_path_condition(!decision, lhs, rhs); + + if let Ok(Some(_)) = self.symbolic_state.execute_query(Query::Reachable) { + trace!( + "[{:#010x}] beq: x{}, x{} |- assume {}, pc <- {:#x}", + self.pc, + lhs.index(), + rhs.index(), + next_pc == false_branch, + next_pc, + ); + + self.pc = next_pc; + + Ok(None) + } else { + trace!( + "[{:#010x}] beq: x{}, x{} |- assume {}, not reachable", + self.pc, + lhs.index(), + rhs.index(), + next_pc == false_branch, + ); + + self.is_running = false; + + Ok(None) + } + } + + fn execute_beq(&mut self, btype: BType) -> Result, SymbolicExecutionError> { + let lhs = self.regs[btype.rs1() as usize]; + let rhs = self.regs[btype.rs2() as usize]; + + let true_branch = self.pc.wrapping_add(btype.imm() as u64); + let false_branch = self.pc.wrapping_add(4); + + match (lhs, rhs) { + (Value::Concrete(v1), Value::Concrete(v2)) => { + let old_pc = self.pc; + + self.pc = if v1 == v2 { true_branch } else { false_branch }; + + trace!( + "[{:#010x}] beq: {}, {} |- pc <- {:#x}", + old_pc, + lhs, + rhs, + self.pc + ); + + Ok(None) + } + (Value::Symbolic(v1), Value::Concrete(v2)) => { + let v2 = self.symbolic_state.create_const(v2); + self.execute_beq_branches(true_branch, false_branch, v1, v2) + } + (Value::Concrete(v1), Value::Symbolic(v2)) => { + let v1 = self.symbolic_state.create_const(v1); + self.execute_beq_branches(true_branch, false_branch, v1, v2) + } + (Value::Symbolic(v1), Value::Symbolic(v2)) => { + self.execute_beq_branches(true_branch, false_branch, v1, v2) + } + (v1, v2) => { + self.is_running = false; + + let result = self.check_for_uninitialized_memory(Instruction::Beq(btype), v1, v2); + + trace!("access to uninitialized memory => exeting this context"); + + result + } + } + } + + fn execute_exit(&mut self) -> Result, SymbolicExecutionError> { + self.is_running = false; + + match self.regs[Register::A0 as usize] { + Value::Symbolic(exit_code) => { + trace!("exit: symbolic code -> find input for exit_code != 0"); + + self.execute_query(Query::NotEquals((exit_code, 0)), |info| { + Bug::ExitCodeGreaterZero { info } + }) + } + Value::Concrete(exit_code) => { + if exit_code > 0 { + trace!( + "exit: with code {} -> find input to satisfy path condition", + exit_code + ); + + self.execute_query(Query::Reachable, |info| Bug::ExitCodeGreaterZero { info }) + } else { + trace!("exiting context with exit_code 0"); + + Ok(None) + } + } + _ => not_supported("exit only implemented for symbolic exit codes"), + } + } + + fn execute_ecall(&mut self) -> Result, SymbolicExecutionError> { + trace!("[{:#010x}] ecall", self.pc); + + let result = match self.regs[Register::A7 as usize] { + Value::Concrete(syscall_id) if syscall_id == (SyscallId::Brk as u64) => { + self.execute_brk() + } + Value::Concrete(syscall_id) if syscall_id == (SyscallId::Read as u64) => { + self.execute_read() + } + Value::Concrete(syscall_id) if syscall_id == (SyscallId::Exit as u64) => { + self.execute_exit() + } + id => Err(SymbolicExecutionError::NotSupported(format!( + "syscall with id ({}) is not supported", + id + ))), + }; + + self.pc += INSTRUCTION_SIZE; + + result + } + + fn execute_ld( + &mut self, + instruction: Instruction, + itype: IType, + ) -> Result, SymbolicExecutionError> { + if let Value::Concrete(base_address) = self.regs[itype.rs1() as usize] { + let immediate = itype.imm() as u64; + + let address = base_address.wrapping_add(immediate); + + let bug = + self.check_for_valid_memory_address(instruction_to_str(instruction), address)?; + + if bug.is_none() { + let value = self.memory[(address / 8) as usize]; + + trace!( + "[{:#010x}] {}: {:#x}, {} |- {:?} <- mem[{:#x}]={}", + self.pc, + instruction_to_str(instruction), + base_address, + immediate, + itype.rd(), + address, + value, + ); + + self.assign_rd(itype.rd(), value); + + self.pc += INSTRUCTION_SIZE; + } + + Ok(bug) + } else { + not_supported("can not handle symbolic addresses in LD") + } + } + + fn execute_sd( + &mut self, + instruction: Instruction, + stype: SType, + ) -> Result, SymbolicExecutionError> { + if let Value::Concrete(base_address) = self.regs[stype.rs1() as usize] { + let immediate = stype.imm(); + + let address = base_address.wrapping_add(immediate as u64); + + let bug = + self.check_for_valid_memory_address(instruction_to_str(instruction), address)?; + + if bug.is_none() { + let value = self.regs[stype.rs2() as usize]; + + trace!( + "[{:#010x}] {}: {:#x}, {}, {} |- mem[{:#x}] <- {}", + self.pc, + instruction_to_str(instruction), + base_address, + immediate, + self.regs[stype.rs2() as usize], + address, + value, + ); + + self.memory[(address / 8) as usize] = value; + + self.pc += INSTRUCTION_SIZE; + } + + Ok(bug) + } else { + not_supported("can not handle symbolic addresses in SD") + } + } + + #[allow(clippy::unnecessary_wraps)] + fn execute_jal(&mut self, jtype: JType) -> Result, SymbolicExecutionError> { + let link = self.pc + INSTRUCTION_SIZE; + + let new_pc = self.pc.wrapping_add(jtype.imm() as u64); + + trace!( + "[{:#010x}] jal: pc <- {:#x}, {:?} <- {:#x}", + self.pc, + new_pc, + jtype.rd(), + link, + ); + + self.pc = new_pc; + + self.assign_rd(jtype.rd(), Value::Concrete(link)); + + Ok(None) + } + + fn assign_rd(&mut self, rd: Register, v: Value) { + if rd != Register::Zero { + self.regs[rd as usize] = v; + } + } + + fn execute_jalr(&mut self, itype: IType) -> Result, SymbolicExecutionError> { + if let Value::Concrete(dest) = self.regs[itype.rs1() as usize] { + let link = self.pc + INSTRUCTION_SIZE; + + let new_pc = dest.wrapping_add(itype.imm() as u64); + + trace!( + "[{:#010x}] jalr: {:#x}, {} |- pc <- {:#x}, {:?} <- {:#x}", + self.pc, + dest, + itype.imm(), + new_pc, + itype.rd(), + link + ); + + self.assign_rd(itype.rd(), Value::Concrete(link)); + + self.pc = new_pc; + + Ok(None) + } else { + not_supported("can only handle concrete addresses in JALR") + } + } + + fn fetch(&self) -> Result { + if let Value::Concrete(dword) = self.memory[(self.pc as usize / size_of::()) as usize] + { + if self.pc % size_of::() as u64 == 0 { + Ok(dword as u32) + } else { + Ok((dword >> 32) as u32) + } + } else { + Err(SymbolicExecutionError::NotSupported(String::from( + "tried to fetch none concrete instruction", + ))) + } + } + + fn execute(&mut self, instruction: Instruction) -> Result, SymbolicExecutionError> { + match instruction { + Instruction::Ecall(_) => self.execute_ecall(), + Instruction::Lui(utype) => self.execute_lui(utype), + Instruction::Addi(itype) => self.execute_itype(instruction, itype, u64::wrapping_add), + Instruction::Add(rtype) => self.execute_rtype(instruction, rtype, u64::wrapping_add), + Instruction::Sub(rtype) => self.execute_rtype(instruction, rtype, u64::wrapping_sub), + Instruction::Mul(rtype) => self.execute_rtype(instruction, rtype, u64::wrapping_mul), + Instruction::Divu(rtype) => { + self.execute_divu_remu(instruction, rtype, u64::wrapping_div) + } + Instruction::Remu(rtype) => { + self.execute_divu_remu(instruction, rtype, u64::wrapping_rem) + } + Instruction::Sltu(rtype) => { + self.execute_rtype(instruction, rtype, |l, r| if l < r { 1 } else { 0 }) + } + Instruction::Ld(itype) => self.execute_ld(instruction, itype), + Instruction::Sd(stype) => self.execute_sd(instruction, stype), + Instruction::Jal(jtype) => self.execute_jal(jtype), + Instruction::Jalr(itype) => self.execute_jalr(itype), + Instruction::Beq(btype) => self.execute_beq(btype), + } + } +} + +fn load_segment(memory: &mut Vec, segment: &ProgramSegment) { + let start = segment.address as usize / size_of::(); + let end = start + segment.content.len() / size_of::(); + + segment + .content + .chunks(size_of::()) + .map(LittleEndian::read_u64) + .zip(start..end) + .for_each(|(x, i)| memory[i] = Value::Concrete(x)); +} + +fn not_supported(s: &str) -> Result, SymbolicExecutionError> { + Err(SymbolicExecutionError::NotSupported(s.to_owned())) +} + +#[derive(Debug, Clone)] +pub struct SymbolicExecutionErrorInfo { + pub witness: Witness, + pub pc: u64, +} + +impl BugInfo for SymbolicExecutionErrorInfo { + type Value = Value; +} + +impl fmt::Display for SymbolicExecutionErrorInfo { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "pc: {:#010x}\nwitness: {}", self.pc, self.witness) + } +} diff --git a/src/engine/symbolic_state.rs b/src/engine/symbolic_state.rs index a9e470bc..e66f2570 100644 --- a/src/engine/symbolic_state.rs +++ b/src/engine/symbolic_state.rs @@ -1,4 +1,3 @@ -use super::bug::Witness; use crate::solver::{ BVOperator, BitVector, Formula, FormulaVisitor, OperandSide, Solver, SolverError, Symbol, SymbolId, @@ -527,3 +526,81 @@ impl<'a> FormulaVisitor for WitnessBuilder<'a> { ) } } + +#[derive(Debug, Clone)] +pub(crate) enum Term { + Constant(u64), + Variable(String, u64), + Unary(BVOperator, usize, u64), + Binary(usize, BVOperator, usize, u64), +} + +#[derive(Debug, Clone)] +pub struct Witness { + assignments: Vec, +} + +impl Default for Witness { + fn default() -> Self { + Self { + assignments: Vec::new(), + } + } +} + +impl Witness { + pub fn new() -> Self { + Witness::default() + } + + pub fn add_constant(&mut self, value: BitVector) -> usize { + self.assignments.push(Term::Constant(value.0)); + + self.assignments.len() - 1 + } + + pub fn add_variable(&mut self, name: &str, result: BitVector) -> usize { + self.assignments + .push(Term::Variable(name.to_owned(), result.0)); + + self.assignments.len() - 1 + } + + pub fn add_unary(&mut self, op: BVOperator, v: usize, result: BitVector) -> usize { + self.assignments.push(Term::Unary(op, v, result.0)); + + self.assignments.len() - 1 + } + + pub fn add_binary( + &mut self, + lhs: usize, + op: BVOperator, + rhs: usize, + result: BitVector, + ) -> usize { + self.assignments.push(Term::Binary(lhs, op, rhs, result.0)); + + self.assignments.len() - 1 + } +} + +impl fmt::Display for Witness { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + writeln!(f, "[").and_then(|_| { + self.assignments + .clone() + .into_iter() + .enumerate() + .try_for_each(|(id, a)| match a { + Term::Constant(c) => writeln!(f, " x{} := {},", id, c), + Term::Variable(name, v) => writeln!(f, " x{} := {:?} ({}),", id, name, v), + Term::Unary(op, x, v) => writeln!(f, " x{} := {}x{} ({}),", id, op, x, v), + Term::Binary(lhs, op, rhs, v) => { + writeln!(f, " x{} := x{} {} x{} ({}),", id, lhs, op, rhs, v) + } + }) + .and_then(|_| writeln!(f, "]")) + }) + } +} diff --git a/src/engine/system.rs b/src/engine/system.rs new file mode 100644 index 00000000..da6f85a2 --- /dev/null +++ b/src/engine/system.rs @@ -0,0 +1,28 @@ +use riscu::Instruction; + +pub enum SyscallId { + Exit = 93, + Read = 63, + Write = 64, + Openat = 56, + Brk = 214, +} + +pub const fn instruction_to_str(i: Instruction) -> &'static str { + match i { + Instruction::Lui(_) => "lui", + Instruction::Jal(_) => "jal", + Instruction::Jalr(_) => "jalr", + Instruction::Beq(_) => "beq", + Instruction::Ld(_) => "ld", + Instruction::Sd(_) => "sd", + Instruction::Addi(_) => "addi", + Instruction::Add(_) => "add", + Instruction::Sub(_) => "sub", + Instruction::Sltu(_) => "sltu", + Instruction::Mul(_) => "mul", + Instruction::Divu(_) => "divu", + Instruction::Remu(_) => "remu", + Instruction::Ecall(_) => "ecall", + } +} diff --git a/src/lib.rs b/src/lib.rs index 768e0282..933da3ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,10 +4,13 @@ pub mod util; pub mod disassemble; pub mod engine; pub mod path_exploration; -pub mod rarity; pub mod solver; -use engine::{Bug, Engine, EngineError, EngineOptions}; +pub use engine::{ + BugFinder, RaritySimulation, RaritySimulationBug, RaritySimulationError, + RaritySimulationOptions, SymbolicExecutionBug, SymbolicExecutionEngine, SymbolicExecutionError, + SymbolicExecutionOptions, +}; use riscu::{load_object_file, Program}; use std::path::Path; use thiserror::Error; @@ -20,8 +23,11 @@ pub enum MonsterError { #[error("preprocessing failed with error")] Preprocessing(anyhow::Error), - #[error("execution stopped with error")] - Execution(EngineError), + #[error("symbolic execution stopped with error")] + SymbolicExecution(SymbolicExecutionError), + + #[error("rarity simulation stopped with error")] + RaritySimulation(RaritySimulationError), } pub fn load_elf

(input: P) -> Result @@ -33,42 +39,46 @@ where }) } -pub fn execute(program: &Program) -> Result, MonsterError> { - let options = EngineOptions::default(); +pub fn symbolically_execute( + program: &Program, +) -> Result, MonsterError> { + let options = SymbolicExecutionOptions::default(); let solver = solver::MonsterSolver::default(); let strategy = path_exploration::ShortestPathStrategy::compute_for(program) .map_err(MonsterError::Preprocessing)?; - execute_with(program, &options, &strategy, &solver) + symbolically_execute_with(program, &options, &strategy, &solver) } -pub fn execute_elf>(input: P) -> Result, MonsterError> { +pub fn symbollically_execute_elf>( + input: P, +) -> Result, MonsterError> { let program = load_elf(input)?; - execute(&program) + symbolically_execute(&program) } -pub fn execute_with( +pub fn symbolically_execute_with( program: &Program, - options: &EngineOptions, + options: &SymbolicExecutionOptions, strategy: &Strategy, solver: &Solver, -) -> Result, MonsterError> +) -> Result, MonsterError> where Strategy: path_exploration::ExplorationStrategy, Solver: solver::Solver, { - let mut engine = Engine::new(&program, &options, strategy, solver); + let mut engine = SymbolicExecutionEngine::new(&program, &options, strategy, solver); - engine.run().map_err(MonsterError::Execution) + engine.run().map_err(MonsterError::SymbolicExecution) } -pub fn execute_elf_with( +pub fn symbolically_execute_elf_with( input: P, - options: &EngineOptions, + options: &SymbolicExecutionOptions, strategy: &Strategy, solver: &Solver, -) -> Result, MonsterError> +) -> Result, MonsterError> where P: AsRef, Strategy: path_exploration::ExplorationStrategy, @@ -76,5 +86,38 @@ where { let program = load_elf(input)?; - execute_with(&program, options, strategy, solver) + symbolically_execute_with(&program, options, strategy, solver) +} + +pub fn rarity_simulate(program: &Program) -> Result, MonsterError> { + rarity_simulate_with(program, &RaritySimulationOptions::default()) +} + +pub fn rarity_simulate_elf>( + input: P, +) -> Result, MonsterError> { + let program = load_elf(input)?; + + rarity_simulate(&program) +} + +pub fn rarity_simulate_with( + program: &Program, + options: &RaritySimulationOptions, +) -> Result, MonsterError> { + RaritySimulation::new(&options) + .search_for_bugs(program) + .map_err(MonsterError::RaritySimulation) +} + +pub fn rarity_simulate_elf_with

( + input: P, + options: &RaritySimulationOptions, +) -> Result, MonsterError> +where + P: AsRef, +{ + let program = load_elf(input)?; + + rarity_simulate_with(&program, options) } diff --git a/src/main.rs b/src/main.rs index 393097f5..3bb8efe8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,11 +7,10 @@ use env_logger::{Env, TimestampPrecision}; use log::info; use monster::{ disassemble::disassemble, - engine::{self, EngineOptions}, - execute_elf_with, path_exploration::{ControlFlowGraph, ShortestPathStrategy}, - rarity::{self, MetricType}, + rarity_simulate_elf_with, solver::{self, SolverType}, + symbolically_execute_elf_with, RaritySimulationOptions, SymbolicExecutionOptions, }; use riscu::load_object_file; use std::{ @@ -58,12 +57,10 @@ fn main() -> Result<()> { ("execute", Some(args)) => { let input = expect_arg::(&args, "input-file")?; let solver = expect_arg::(&args, "solver")?; - let depth = expect_arg::(args, "max-execution-depth")?; - let megabytes = expect_arg::(args, "memory")?; - let options = EngineOptions { - max_exection_depth: depth, - memory_size: ByteSize::mb(megabytes), + let options = SymbolicExecutionOptions { + max_exection_depth: expect_arg(args, "max-execution-depth")?, + memory_size: ByteSize::mb(expect_arg(args, "memory")?), }; let program = load_object_file(&input)?; @@ -71,26 +68,32 @@ fn main() -> Result<()> { let strategy = ShortestPathStrategy::compute_for(&program)?; if let Some(bug) = match solver { - SolverType::Monster => execute_elf_with( + SolverType::Monster => symbolically_execute_elf_with( &input, &options, &strategy, &solver::MonsterSolver::default(), ), - SolverType::External => execute_elf_with( + SolverType::External => symbolically_execute_elf_with( &input, &options, &strategy, &solver::ExternalSolver::default(), ), #[cfg(feature = "boolector")] - SolverType::Boolector => { - execute_elf_with(&input, &options, &strategy, &solver::Boolector::default()) - } + SolverType::Boolector => symbolically_execute_elf_with( + &input, + &options, + &strategy, + &solver::Boolector::default(), + ), #[cfg(feature = "z3")] - SolverType::Z3 => { - execute_elf_with(&input, &options, &strategy, &solver::Z3::default()) - } + SolverType::Z3 => symbolically_execute_elf_with( + &input, + &options, + &strategy, + &solver::Z3::default(), + ), } .with_context(|| format!("Execution of {} failed", input.display()))? { @@ -103,24 +106,18 @@ fn main() -> Result<()> { } ("rarity", Some(args)) => { let input = expect_arg::(args, "input-file")?; - let megabytes = expect_arg::(args, "memory")?; - let cycles = expect_arg::(args, "cycles")?; - let iterations = expect_arg::(args, "iterations")?; - let runs = expect_arg::(args, "runs")?; - let selection = expect_arg::(args, "selection")?; - let copy_ratio = expect_arg::(args, "copy-init-ratio")?; - let metric = expect_arg::(args, "metric")?; - - if let Some(bug) = rarity::execute( - input, - ByteSize::mb(megabytes), - runs, - selection, - cycles, - iterations, - copy_ratio, - metric, - )? { + + let options = RaritySimulationOptions { + memory_size: ByteSize::mb(expect_arg(args, "memory")?), + amount_of_states: expect_arg(args, "states")?, + step_size: expect_arg(args, "step-size")?, + selection: expect_arg(args, "iterations")?, + iterations: expect_arg(args, "selection")?, + copy_init_ratio: expect_arg(args, "copy-init-ratio")?, + mean: expect_arg(args, "mean")?, + }; + + if let Some(bug) = rarity_simulate_elf_with(input, &options)? { info!("bug found:\n{}", bug); } else { info!("no bug found in binary"); diff --git a/tests/engine.rs b/tests/engine.rs index 3148a5e7..0ca118bb 100644 --- a/tests/engine.rs +++ b/tests/engine.rs @@ -1,12 +1,10 @@ use bytesize::ByteSize; use log::trace; use monster::{ - self, - engine::{Bug, EngineOptions}, - execute_with, load_elf, + self, load_elf, path_exploration::ShortestPathStrategy, solver::{MonsterSolver, Solver}, - MonsterError, + symbolically_execute_with, MonsterError, SymbolicExecutionBug, SymbolicExecutionOptions, }; use rayon::prelude::*; use std::{ @@ -69,7 +67,7 @@ fn execute_with_different_memory_sizes() { with_temp_dir(|dir| { compile_riscu(dir, Some(&["recursive-fibonacci-1-35.c"])).for_each(|(source, object)| { [1, 64, 512, 1024].iter().for_each(move |size| { - let options = EngineOptions { + let options = SymbolicExecutionOptions { max_exection_depth: 200, memory_size: ByteSize::mb(*size), }; @@ -94,7 +92,7 @@ fn execute_engine_for_endless_loops() { with_temp_dir(|dir| { compile_riscu(dir, Some(&["endless-loop.c"])).for_each(|(_, object)| { - let options = EngineOptions { + let options = SymbolicExecutionOptions { max_exection_depth: 5, ..Default::default() }; @@ -109,8 +107,8 @@ fn execute_engine_for_endless_loops() { fn execute_default_with>( object: P, - options: &EngineOptions, -) -> Result, MonsterError> { + options: &SymbolicExecutionOptions, +) -> Result, MonsterError> { // need a big timeout because of the slow Github runners let solver = MonsterSolver::new(Duration::new(5, 0)); @@ -119,13 +117,13 @@ fn execute_default_with>( fn execute_default_with_solver, S: Solver>( object: P, - options: &EngineOptions, + options: &SymbolicExecutionOptions, solver: &S, -) -> Result, MonsterError> { +) -> Result, MonsterError> { let program = load_elf(object).unwrap(); let strategy = ShortestPathStrategy::compute_for(&program).unwrap(); - execute_with(&program, options, &strategy, solver) + symbolically_execute_with(&program, options, &strategy, solver) } fn execute_riscu_examples(names: &'static [&str], solver: &S) { @@ -140,7 +138,7 @@ fn execute_riscu_examples(names: &'static [&str], solver: &S) { fn execute_riscu(source: PathBuf, object: PathBuf, solver: &S) { let file_name = source.file_name().unwrap().to_str().unwrap(); - let options = EngineOptions { + let options = SymbolicExecutionOptions { max_exection_depth: match file_name { "two-level-nested-loop-1-35.c" => 230, "recursive-fibonacci-1-10.c" => 300, @@ -173,22 +171,22 @@ fn execute_riscu(source: PathBuf, object: PathBuf, solver: &S) { assert!( matches!( (file_name, bug.clone()), - ("arithmetic.c", Bug::ExitCodeGreaterZero { .. }) | - ("invalid-memory-access-2-35.c", Bug::AccessToOutOfRangeAddress { .. }) | - ("if-else.c", Bug::ExitCodeGreaterZero { .. }) | - ("division-by-zero-3-35.c", Bug::DivisionByZero { .. }) | - ("simple-assignment-1-35.c", Bug::ExitCodeGreaterZero { .. }) | - ("test-sltu.c", Bug::ExitCodeGreaterZero { .. }) | + ("arithmetic.c", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) | + ("invalid-memory-access-2-35.c", SymbolicExecutionBug::AccessToOutOfRangeAddress { .. }) | + ("if-else.c", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) | + ("division-by-zero-3-35.c", SymbolicExecutionBug::DivisionByZero { .. }) | + ("simple-assignment-1-35.c", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) | + ("test-sltu.c", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) | //("memory-access-1-35.c", Bug:: - ("nested-if-else-reverse-1-35", Bug::ExitCodeGreaterZero { .. }) | - ("nested-recursion-1-35.c", Bug::ExitCodeGreaterZero { .. }) | - ("recursive-ackermann-1-35.c", Bug::ExitCodeGreaterZero { .. }) | - ("recursive-factorial-1-35.c", Bug::ExitCodeGreaterZero { .. }) | - ("recursive-fibonacci-1-10.c", Bug::ExitCodeGreaterZero { .. }) | - ("simple-if-else-1-35.c", Bug::ExitCodeGreaterZero { .. }) | - ("simple-increasing-loop-1-35.c", Bug::ExitCodeGreaterZero { .. }) | - ("two-level-nested-loop-1-35.c", Bug::ExitCodeGreaterZero { .. }) | - ("multiple-read.c", Bug::ExitCodeGreaterZero { .. }) + ("nested-if-else-reverse-1-35", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) | + ("nested-recursion-1-35.c", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) | + ("recursive-ackermann-1-35.c", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) | + ("recursive-factorial-1-35.c", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) | + ("recursive-fibonacci-1-10.c", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) | + ("simple-if-else-1-35.c", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) | + ("simple-increasing-loop-1-35.c", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) | + ("two-level-nested-loop-1-35.c", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) | + ("multiple-read.c", SymbolicExecutionBug::ExitCodeGreaterZero { .. }) ), "found right bug type (actual: {}) for {}", bug, diff --git a/tests/rarity.rs b/tests/rarity.rs index 1e0773e2..ed204c93 100644 --- a/tests/rarity.rs +++ b/tests/rarity.rs @@ -1,6 +1,5 @@ -use bytesize::ByteSize; use log::trace; -use monster::{self, rarity::*}; +use monster::{self, engine::rarity_simulation::*, rarity_simulate_elf_with}; use rayon::prelude::*; use utils::{compile_riscu, init, with_temp_dir}; @@ -11,15 +10,14 @@ fn test_rarity_simulation() { with_temp_dir(|dir| { compile_riscu(dir, Some(&["three-level-nested-loop-1-35.c"])).for_each( |(source, object)| { - let result = execute( + let result = rarity_simulate_elf_with( &object, - ByteSize::mb(1), - 1, - 1, - 1, - 1, - 0.6, - MetricType::Harmonic, + &RaritySimulationOptions { + amount_of_states: 1, + selection: 1, + iterations: 1, + ..Default::default() + }, ); trace!("execution finished: {:?}", result);