From 79df6a36901d50d8cc8d5b59e51078fe74ad1ac2 Mon Sep 17 00:00:00 2001 From: jfecher Date: Wed, 4 Sep 2024 06:30:51 -0500 Subject: [PATCH] chore: Add pass to normalize Ids in SSA (#5909) # Description ## Problem\* SSA can be difficult to debug manually, particularly for very large files. Even when comparing two sources side by side it can be hard to find exactly where they differ since one optimization difference earlier on can affect where ValueIds start in every function afterward. ## Summary\* This PR adds a pass to normalize ids in an SSA program - restarting from v0 after every SSA pass instead of continuing from the previous end. The goal of this is to be able to take two SSA programs and easily diff them to find out where they start differing. E.g. using this on two files containing the final SSA from https://github.com/noir-lang/noir/issues/5771 in both failing and passing versions, it is clear that they differ in exactly one ValueId in one instruction. ## Additional Context This new pass is only run when `--show-ssa` is specified, and is run before each printout. ## Documentation\* Check one: - [x] No documentation needed. - [ ] Documentation included in this PR. - [ ] **[For Experimental Features]** Documentation to be submitted in a separate PR. # PR Checklist\* - [x] I have tested the changes locally. - [x] I have formatted the changes with [Prettier](https://prettier.io/) and/or `cargo fmt` on default settings. --------- Co-authored-by: Tom French <15848336+TomAFrench@users.noreply.github.com> --- compiler/noirc_evaluator/src/ssa.rs | 4 +- .../noirc_evaluator/src/ssa/acir_gen/mod.rs | 10 +- .../check_for_underconstrained_values.rs | 2 +- .../noirc_evaluator/src/ssa/ir/function.rs | 7 + compiler/noirc_evaluator/src/ssa/ir/map.rs | 10 +- .../noirc_evaluator/src/ssa/opt/inlining.rs | 13 +- compiler/noirc_evaluator/src/ssa/opt/mod.rs | 1 + .../src/ssa/opt/normalize_value_ids.rs | 194 ++++++++++++++++++ 8 files changed, 227 insertions(+), 14 deletions(-) create mode 100644 compiler/noirc_evaluator/src/ssa/opt/normalize_value_ids.rs diff --git a/compiler/noirc_evaluator/src/ssa.rs b/compiler/noirc_evaluator/src/ssa.rs index 57bd76d4f78..ad6645df228 100644 --- a/compiler/noirc_evaluator/src/ssa.rs +++ b/compiler/noirc_evaluator/src/ssa.rs @@ -82,6 +82,7 @@ pub(crate) fn optimize_into_acir( ) -> Result { let ssa_gen_span = span!(Level::TRACE, "ssa_generation"); let ssa_gen_span_guard = ssa_gen_span.enter(); + let mut ssa = SsaBuilder::new( program, options.enable_ssa_logging, @@ -418,8 +419,9 @@ impl SsaBuilder { Ok(self.print(msg)) } - fn print(self, msg: &str) -> Self { + fn print(mut self, msg: &str) -> Self { if self.print_ssa_passes { + self.ssa.normalize_ids(); println!("{msg}\n{}", self.ssa); } self diff --git a/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs b/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs index a2b9e46a15a..0360b15d950 100644 --- a/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs @@ -770,10 +770,12 @@ impl<'a> Context<'a> { .map(|result_id| dfg.type_of_value(*result_id).flattened_size()) .sum(); - let acir_function_id = ssa - .entry_point_to_generated_index - .get(id) - .expect("ICE: should have an associated final index"); + let Some(acir_function_id) = + ssa.entry_point_to_generated_index.get(id) + else { + unreachable!("Expected an associated final index for call to acir function {id} with args {arguments:?}"); + }; + let output_vars = self.acir_context.call_acir_function( AcirFunctionId(*acir_function_id), inputs, diff --git a/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs b/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs index 26eab290d4b..aa5f4c8df95 100644 --- a/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs +++ b/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs @@ -244,7 +244,7 @@ impl Context { } }, Value::ForeignFunction(..) => { - panic!("Should not be able to reach foreign function from non-brillig functions"); + panic!("Should not be able to reach foreign function from non-brillig functions, {func_id} in function {}", function.name()); } Value::Array { .. } | Value::Instruction { .. } diff --git a/compiler/noirc_evaluator/src/ssa/ir/function.rs b/compiler/noirc_evaluator/src/ssa/ir/function.rs index bae9f82e4f1..65a616ef612 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/function.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/function.rs @@ -72,6 +72,13 @@ impl Function { Self { name: another.name.clone(), id, entry_block, dfg, runtime: another.runtime } } + /// Takes the signature (function name & runtime) from a function but does not copy the body. + pub(crate) fn clone_signature(id: FunctionId, another: &Function) -> Self { + let mut new_function = Function::new(another.name.clone(), id); + new_function.runtime = another.runtime; + new_function + } + /// The name of the function. /// Used exclusively for debugging purposes. pub(crate) fn name(&self) -> &str { diff --git a/compiler/noirc_evaluator/src/ssa/ir/map.rs b/compiler/noirc_evaluator/src/ssa/ir/map.rs index 769d52e6e65..23f5380f030 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/map.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/map.rs @@ -1,6 +1,7 @@ use fxhash::FxHashMap as HashMap; use serde::{Deserialize, Serialize}; use std::{ + collections::BTreeMap, hash::Hash, str::FromStr, sync::atomic::{AtomicUsize, Ordering}, @@ -240,7 +241,7 @@ impl std::ops::IndexMut> for DenseMap { /// call to .remove(). #[derive(Debug)] pub(crate) struct SparseMap { - storage: HashMap, T>, + storage: BTreeMap, T>, } impl SparseMap { @@ -271,11 +272,16 @@ impl SparseMap { pub(crate) fn remove(&mut self, id: Id) -> Option { self.storage.remove(&id) } + + /// Unwraps the inner storage of this map + pub(crate) fn into_btree(self) -> BTreeMap, T> { + self.storage + } } impl Default for SparseMap { fn default() -> Self { - Self { storage: HashMap::default() } + Self { storage: Default::default() } } } diff --git a/compiler/noirc_evaluator/src/ssa/opt/inlining.rs b/compiler/noirc_evaluator/src/ssa/opt/inlining.rs index 1ff593a1531..7843c55da65 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/inlining.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/inlining.rs @@ -2,7 +2,7 @@ //! The purpose of this pass is to inline the instructions of each function call //! within the function caller. If all function calls are known, there will only //! be a single function remaining when the pass finishes. -use std::collections::{BTreeSet, HashSet}; +use std::collections::{BTreeSet, HashSet, VecDeque}; use acvm::acir::AcirField; use iter_extended::{btree_map, vecmap}; @@ -372,14 +372,14 @@ impl<'function> PerFunctionContext<'function> { fn translate_block( &mut self, source_block: BasicBlockId, - block_queue: &mut Vec, + block_queue: &mut VecDeque, ) -> BasicBlockId { if let Some(block) = self.blocks.get(&source_block) { return *block; } // The block is not yet inlined, queue it - block_queue.push(source_block); + block_queue.push_back(source_block); // The block is not already present in the function being inlined into so we must create it. // The block's instructions are not copied over as they will be copied later in inlining. @@ -415,13 +415,14 @@ impl<'function> PerFunctionContext<'function> { /// Inline all reachable blocks within the source_function into the destination function. fn inline_blocks(&mut self, ssa: &Ssa) -> Vec { let mut seen_blocks = HashSet::new(); - let mut block_queue = vec![self.source_function.entry_block()]; + let mut block_queue = VecDeque::new(); + block_queue.push_back(self.source_function.entry_block()); // This Vec will contain each block with a Return instruction along with the // returned values of that block. let mut function_returns = vec![]; - while let Some(source_block_id) = block_queue.pop() { + while let Some(source_block_id) = block_queue.pop_front() { if seen_blocks.contains(&source_block_id) { continue; } @@ -609,7 +610,7 @@ impl<'function> PerFunctionContext<'function> { fn handle_terminator_instruction( &mut self, block_id: BasicBlockId, - block_queue: &mut Vec, + block_queue: &mut VecDeque, ) -> Option<(BasicBlockId, Vec)> { match self.source_function.dfg[block_id].unwrap_terminator() { TerminatorInstruction::Jmp { destination, arguments, call_stack } => { diff --git a/compiler/noirc_evaluator/src/ssa/opt/mod.rs b/compiler/noirc_evaluator/src/ssa/opt/mod.rs index 4e5fa262696..bd9d0baff97 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/mod.rs @@ -13,6 +13,7 @@ mod die; pub(crate) mod flatten_cfg; mod inlining; mod mem2reg; +mod normalize_value_ids; mod rc; mod remove_bit_shifts; mod remove_enable_side_effects; diff --git a/compiler/noirc_evaluator/src/ssa/opt/normalize_value_ids.rs b/compiler/noirc_evaluator/src/ssa/opt/normalize_value_ids.rs new file mode 100644 index 00000000000..f11b310494b --- /dev/null +++ b/compiler/noirc_evaluator/src/ssa/opt/normalize_value_ids.rs @@ -0,0 +1,194 @@ +use std::collections::BTreeMap; + +use crate::ssa::{ + ir::{ + basic_block::BasicBlockId, + function::{Function, FunctionId}, + map::SparseMap, + post_order::PostOrder, + value::{Value, ValueId}, + }, + ssa_gen::Ssa, +}; +use fxhash::FxHashMap as HashMap; +use iter_extended::vecmap; + +impl Ssa { + /// This is a debugging pass which re-inserts each instruction + /// and block in a fresh DFG context for each function so that ValueIds, + /// BasicBlockIds, and FunctionIds are always identical for the same SSA code. + /// + /// During normal compilation this is often not the case since prior passes + /// may increase the ID counter so that later passes start at different offsets, + /// even if they contain the same SSA code. + pub(crate) fn normalize_ids(&mut self) { + let mut context = Context::default(); + context.populate_functions(&self.functions); + for function in self.functions.values_mut() { + context.normalize_ids(function); + } + self.functions = context.functions.into_btree(); + } +} + +#[derive(Default)] +struct Context { + functions: SparseMap, + + new_ids: IdMaps, +} + +/// Maps from old ids to new ones. +/// Separate from the rest of Context so we can call mutable methods on it +/// while Context gives out mutable references to functions within. +#[derive(Default)] +struct IdMaps { + // Maps old function id -> new function id + function_ids: HashMap, + + // Maps old block id -> new block id + // Cleared in between each function. + blocks: HashMap, + + // Maps old value id -> new value id + // Cleared in between each function. + values: HashMap, +} + +impl Context { + fn populate_functions(&mut self, functions: &BTreeMap) { + for (id, function) in functions { + self.functions.insert_with_id(|new_id| { + self.new_ids.function_ids.insert(*id, new_id); + Function::clone_signature(new_id, function) + }); + } + } + + fn normalize_ids(&mut self, old_function: &mut Function) { + self.new_ids.blocks.clear(); + self.new_ids.values.clear(); + + let new_function_id = self.new_ids.function_ids[&old_function.id()]; + let new_function = &mut self.functions[new_function_id]; + + let mut reachable_blocks = PostOrder::with_function(old_function).into_vec(); + reachable_blocks.reverse(); + + self.new_ids.populate_blocks(&reachable_blocks, old_function, new_function); + + // Map each parameter, instruction, and terminator + for old_block_id in reachable_blocks { + let new_block_id = self.new_ids.blocks[&old_block_id]; + + let old_block = &mut old_function.dfg[old_block_id]; + for old_instruction_id in old_block.take_instructions() { + let instruction = old_function.dfg[old_instruction_id] + .map_values(|value| self.new_ids.map_value(new_function, old_function, value)); + + let call_stack = old_function.dfg.get_call_stack(old_instruction_id); + let old_results = old_function.dfg.instruction_results(old_instruction_id); + + let ctrl_typevars = instruction + .requires_ctrl_typevars() + .then(|| vecmap(old_results, |result| old_function.dfg.type_of_value(*result))); + + let new_results = new_function.dfg.insert_instruction_and_results( + instruction, + new_block_id, + ctrl_typevars, + call_stack, + ); + + assert_eq!(old_results.len(), new_results.len()); + for (old_result, new_result) in old_results.iter().zip(new_results.results().iter()) + { + let old_result = old_function.dfg.resolve(*old_result); + self.new_ids.values.insert(old_result, *new_result); + } + } + + let old_block = &mut old_function.dfg[old_block_id]; + let mut terminator = old_block + .take_terminator() + .map_values(|value| self.new_ids.map_value(new_function, old_function, value)); + terminator.mutate_blocks(|old_block| self.new_ids.blocks[&old_block]); + new_function.dfg.set_block_terminator(new_block_id, terminator); + } + } +} + +impl IdMaps { + fn populate_blocks( + &mut self, + reachable_blocks: &[BasicBlockId], + old_function: &mut Function, + new_function: &mut Function, + ) { + let old_entry = old_function.entry_block(); + self.blocks.insert(old_entry, new_function.entry_block()); + + for old_id in reachable_blocks { + if *old_id != old_entry { + let new_id = new_function.dfg.make_block(); + self.blocks.insert(*old_id, new_id); + } + + let new_id = self.blocks[old_id]; + let old_block = &mut old_function.dfg[*old_id]; + for old_parameter in old_block.take_parameters() { + let old_parameter = old_function.dfg.resolve(old_parameter); + let typ = old_function.dfg.type_of_value(old_parameter); + let new_parameter = new_function.dfg.add_block_parameter(new_id, typ); + self.values.insert(old_parameter, new_parameter); + } + } + } + + fn map_value( + &mut self, + new_function: &mut Function, + old_function: &Function, + old_value: ValueId, + ) -> ValueId { + let old_value = old_function.dfg.resolve(old_value); + match &old_function.dfg[old_value] { + value @ Value::Instruction { instruction, .. } => { + *self.values.get(&old_value).unwrap_or_else(|| { + let instruction = &old_function.dfg[*instruction]; + unreachable!("Unmapped value with id {old_value}: {value:?}\n from instruction: {instruction:?}, SSA: {old_function}") + }) + } + + value @ Value::Param { .. } => { + *self.values.get(&old_value).unwrap_or_else(|| { + unreachable!("Unmapped value with id {old_value}: {value:?}") + }) + } + + Value::Function(id) => { + let new_id = self.function_ids[id]; + new_function.dfg.import_function(new_id) + } + + Value::NumericConstant { constant, typ } => { + new_function.dfg.make_constant(*constant, typ.clone()) + } + Value::Array { array, typ } => { + if let Some(value) = self.values.get(&old_value) { + return *value; + } + + let array = array + .iter() + .map(|value| self.map_value(new_function, old_function, *value)) + .collect(); + let new_value = new_function.dfg.make_array(array, typ.clone()); + self.values.insert(old_value, new_value); + new_value + } + Value::Intrinsic(intrinsic) => new_function.dfg.import_intrinsic(*intrinsic), + Value::ForeignFunction(name) => new_function.dfg.import_foreign_function(name), + } + } +}