From 267bbb940641dd55bf14592b15f9a3517ca60f21 Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Fri, 16 Aug 2024 01:56:50 -0500 Subject: [PATCH 1/7] Add loop scanner to tool-scanner --- .../tool-scanner/scanner-test.expected | 6 +- tests/script-based-pre/tool-scanner/test.rs | 27 ++++++++ tools/scanner/src/analysis.rs | 62 ++++++++++++++++--- 3 files changed, 84 insertions(+), 11 deletions(-) diff --git a/tests/script-based-pre/tool-scanner/scanner-test.expected b/tests/script-based-pre/tool-scanner/scanner-test.expected index c8f9af0ef1b7..f55a883434ee 100644 --- a/tests/script-based-pre/tool-scanner/scanner-test.expected +++ b/tests/script-based-pre/tool-scanner/scanner-test.expected @@ -1,6 +1,6 @@ -2 test_scan_fn_loops.csv -16 test_scan_functions.csv +5 test_scan_fn_loops.csv +19 test_scan_functions.csv 5 test_scan_input_tys.csv -14 test_scan_overall.csv +16 test_scan_overall.csv 3 test_scan_recursion.csv 5 test_scan_unsafe_ops.csv diff --git a/tests/script-based-pre/tool-scanner/test.rs b/tests/script-based-pre/tool-scanner/test.rs index 24b346e535b5..9eb3cb667bf3 100644 --- a/tests/script-based-pre/tool-scanner/test.rs +++ b/tests/script-based-pre/tool-scanner/test.rs @@ -36,6 +36,33 @@ pub fn with_iterator(input: &[usize]) -> usize { .unwrap_or_else(|| input.iter().fold(0, |acc, i| acc + 1)) } +pub fn with_for_loop(input: &[usize]) -> usize { + let mut res = 0; + for n in input { + res += 1; + } + res +} + +pub fn with_while_loop(input: &[usize]) -> usize { + let mut res = 0; + while res < input.len() { + res += 1; + } + return res; +} + +pub fn with_loop_loop(input: &[usize]) -> usize { + let mut res = 0; + loop { + if res == input.len() { + break; + } + res += 1; + } + res +} + static mut COUNTER: Option = Some(0); static OK: bool = true; diff --git a/tools/scanner/src/analysis.rs b/tools/scanner/src/analysis.rs index c376af9662f8..4d9849bd8d88 100644 --- a/tools/scanner/src/analysis.rs +++ b/tools/scanner/src/analysis.rs @@ -9,7 +9,7 @@ use serde::{ser::SerializeStruct, Serialize, Serializer}; use stable_mir::mir::mono::Instance; use stable_mir::mir::visit::{Location, PlaceContext, PlaceRef}; use stable_mir::mir::{ - Body, MirVisitor, Mutability, ProjectionElem, Safety, Terminator, TerminatorKind, + BasicBlock, Body, MirVisitor, Mutability, ProjectionElem, Safety, Terminator, TerminatorKind, }; use stable_mir::ty::{AdtDef, AdtKind, FnDef, GenericArgs, MirConst, RigidTy, Ty, TyKind}; use stable_mir::visitor::{Visitable, Visitor}; @@ -159,6 +159,7 @@ impl OverallStats { pub fn loops(&mut self, filename: PathBuf) { let all_items = stable_mir::all_local_items(); let (has_loops, no_loops) = all_items + .clone() .into_iter() .filter_map(|item| { let kind = item.ty().kind(); @@ -168,9 +169,37 @@ impl OverallStats { Some(FnLoops::new(item.name()).collect(&item.body())) }) .partition::, _>(|props| props.has_loops()); - self.counters - .extend_from_slice(&[("has_loops", has_loops.len()), ("no_loops", no_loops.len())]); - dump_csv(filename, &has_loops); + + let (has_iterators, no_iterators) = all_items + .clone() + .into_iter() + .filter_map(|item| { + let kind = item.ty().kind(); + if !kind.is_fn() { + return None; + }; + Some(FnLoops::new(item.name()).collect(&item.body())) + }) + .partition::, _>(|props| props.has_iterators()); + + let (has_either, _) = all_items + .into_iter() + .filter_map(|item| { + let kind = item.ty().kind(); + if !kind.is_fn() { + return None; + }; + Some(FnLoops::new(item.name()).collect(&item.body())) + }) + .partition::, _>(|props| props.has_iterators() || props.has_loops()); + + self.counters.extend_from_slice(&[ + ("has_loops", has_loops.len()), + ("no_loops", no_loops.len()), + ("has_iterators", has_iterators.len()), + ("no_iterators", no_iterators.len()), + ]); + dump_csv(filename, &has_either); } /// Create a callgraph for this crate and try to find recursive calls. @@ -436,21 +465,25 @@ impl<'a> MirVisitor for BodyVisitor<'a> { fn_props! { struct FnLoops { iterators, - nested_loops, - /// TODO: Collect loops. loops, + // TODO: Collect nested loops. + nested_loops, } } impl FnLoops { pub fn collect(self, body: &Body) -> FnLoops { - let mut visitor = IteratorVisitor { props: self, body }; + let mut visitor = IteratorVisitor { props: self, body, num_visited_blocks: 0 }; visitor.visit_body(body); visitor.props } pub fn has_loops(&self) -> bool { - (self.iterators + self.loops + self.nested_loops) > 0 + (self.loops + self.nested_loops) > 0 + } + + pub fn has_iterators(&self) -> bool { + (self.iterators) > 0 } } @@ -461,10 +494,23 @@ impl FnLoops { struct IteratorVisitor<'a> { props: FnLoops, body: &'a Body, + num_visited_blocks: usize, } impl<'a> MirVisitor for IteratorVisitor<'a> { + fn visit_basic_block(&mut self, bb: &BasicBlock) { + self.num_visited_blocks += 1; + self.super_basic_block(bb); + } + fn visit_terminator(&mut self, term: &Terminator, location: Location) { + // A goto is identified as a loop latch if its target number is less than + // the number of visited basic block. + if let TerminatorKind::Goto { target } = &term.kind { + if self.num_visited_blocks > *target { + self.props.loops += 1; + } + } if let TerminatorKind::Call { func, .. } = &term.kind { let kind = func.ty(self.body.locals()).unwrap().kind(); if let TyKind::RigidTy(RigidTy::FnDef(def, _)) = kind { From 40e58a1f7cc61c4cffd3d9699787ad1d92c05f94 Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Fri, 16 Aug 2024 20:22:25 -0500 Subject: [PATCH 2/7] Change the way of identifying loops --- tools/scanner/src/analysis.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tools/scanner/src/analysis.rs b/tools/scanner/src/analysis.rs index 4d9849bd8d88..1d7753ae88d6 100644 --- a/tools/scanner/src/analysis.rs +++ b/tools/scanner/src/analysis.rs @@ -473,7 +473,7 @@ fn_props! { impl FnLoops { pub fn collect(self, body: &Body) -> FnLoops { - let mut visitor = IteratorVisitor { props: self, body, num_visited_blocks: 0 }; + let mut visitor = IteratorVisitor { props: self, body, visited_blocks: HashSet::new() }; visitor.visit_body(body); visitor.props } @@ -494,20 +494,19 @@ impl FnLoops { struct IteratorVisitor<'a> { props: FnLoops, body: &'a Body, - num_visited_blocks: usize, + visited_blocks: HashSet, } impl<'a> MirVisitor for IteratorVisitor<'a> { fn visit_basic_block(&mut self, bb: &BasicBlock) { - self.num_visited_blocks += 1; + self.visited_blocks.insert(self.body.blocks.iter().position(|b| *b == *bb).unwrap()); self.super_basic_block(bb); } fn visit_terminator(&mut self, term: &Terminator, location: Location) { - // A goto is identified as a loop latch if its target number is less than - // the number of visited basic block. + // A goto is identified as a loop latch if its target has been visited. if let TerminatorKind::Goto { target } = &term.kind { - if self.num_visited_blocks > *target { + if self.visited_blocks.contains(target) { self.props.loops += 1; } } From 5aa08b7ceae860f7483c0f3aca9fff80222821d3 Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Sat, 17 Aug 2024 01:53:57 -0500 Subject: [PATCH 3/7] Identify loops for terminator beyond goto --- tools/scanner/src/analysis.rs | 114 ++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 41 deletions(-) diff --git a/tools/scanner/src/analysis.rs b/tools/scanner/src/analysis.rs index 1d7753ae88d6..3f9d761a1aed 100644 --- a/tools/scanner/src/analysis.rs +++ b/tools/scanner/src/analysis.rs @@ -504,52 +504,84 @@ impl<'a> MirVisitor for IteratorVisitor<'a> { } fn visit_terminator(&mut self, term: &Terminator, location: Location) { - // A goto is identified as a loop latch if its target has been visited. - if let TerminatorKind::Goto { target } = &term.kind { - if self.visited_blocks.contains(target) { - self.props.loops += 1; + match &term.kind { + TerminatorKind::Goto { target } => { + // A goto is identified as a loop latch if its target has been visited. + if self.visited_blocks.contains(target) { + self.props.loops += 1; + } } - } - if let TerminatorKind::Call { func, .. } = &term.kind { - let kind = func.ty(self.body.locals()).unwrap().kind(); - if let TyKind::RigidTy(RigidTy::FnDef(def, _)) = kind { - let fullname = def.name(); - let names = fullname.split("::").collect::>(); - if let [.., s_last, last] = names.as_slice() { - if *s_last == "Iterator" - && [ - "for_each", - "collect", - "advance_by", - "all", - "any", - "partition", - "partition_in_place", - "fold", - "try_fold", - "spec_fold", - "spec_try_fold", - "try_for_each", - "for_each", - "try_reduce", - "reduce", - "find", - "find_map", - "try_find", - "position", - "rposition", - "nth", - "count", - "last", - "find", - ] - .contains(last) - { - self.props.iterators += 1; + TerminatorKind::SwitchInt { targets, .. } => { + let successors = targets.all_targets(); + // Check if any of the targets of SwitchInt has been visited. + if self.visited_blocks.contains(&successors[0]) + || self.visited_blocks.contains(&successors[1]) + { + self.props.loops += 1; + } + } + TerminatorKind::Drop { target, .. } => { + if self.visited_blocks.contains(target) { + self.props.loops += 1; + } + } + TerminatorKind::Call { func, .. } => { + let kind = func.ty(self.body.locals()).unwrap().kind(); + // Check if the target is a visited block. + + // Check if the call is an iterator function that contains loops. + if let TyKind::RigidTy(RigidTy::FnDef(def, _)) = kind { + let fullname = def.name(); + let names = fullname.split("::").collect::>(); + if let [.., s_last, last] = names.as_slice() { + if *s_last == "Iterator" + && [ + "for_each", + "collect", + "advance_by", + "all", + "any", + "partition", + "partition_in_place", + "fold", + "try_fold", + "spec_fold", + "spec_try_fold", + "try_for_each", + "for_each", + "try_reduce", + "reduce", + "find", + "find_map", + "try_find", + "position", + "rposition", + "nth", + "count", + "last", + "find", + ] + .contains(last) + { + self.props.iterators += 1; + } } } } + TerminatorKind::Assert { target, .. } => { + if self.visited_blocks.contains(target) { + self.props.loops += 1; + } + } + TerminatorKind::InlineAsm { destination, .. } => { + if let Some(target) = destination { + self.props.loops += self.visited_blocks.contains(target) as usize; + } + } + // No targets in other terminators. + _ => {} } + self.super_terminator(term, location) } } From b59205ab93c64cd40043fd6bc0fd978dd8488b5c Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Sat, 17 Aug 2024 13:07:14 -0500 Subject: [PATCH 4/7] Fix format --- tools/scanner/src/analysis.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/scanner/src/analysis.rs b/tools/scanner/src/analysis.rs index 3f9d761a1aed..0643c59a73e6 100644 --- a/tools/scanner/src/analysis.rs +++ b/tools/scanner/src/analysis.rs @@ -573,9 +573,9 @@ impl<'a> MirVisitor for IteratorVisitor<'a> { self.props.loops += 1; } } - TerminatorKind::InlineAsm { destination, .. } => { - if let Some(target) = destination { - self.props.loops += self.visited_blocks.contains(target) as usize; + TerminatorKind::InlineAsm { destination: Some(target), .. } => { + if self.visited_blocks.contains(target) { + self.props.loops += 1; } } // No targets in other terminators. From c329e5af2a37e132aeccb94ac3d43970b6ef8cb9 Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Tue, 20 Aug 2024 12:40:42 -0500 Subject: [PATCH 5/7] Use successors to simplify match --- tools/scanner/src/analysis.rs | 120 +++++++++++++--------------------- 1 file changed, 47 insertions(+), 73 deletions(-) diff --git a/tools/scanner/src/analysis.rs b/tools/scanner/src/analysis.rs index 0643c59a73e6..b83a6657e924 100644 --- a/tools/scanner/src/analysis.rs +++ b/tools/scanner/src/analysis.rs @@ -504,82 +504,56 @@ impl<'a> MirVisitor for IteratorVisitor<'a> { } fn visit_terminator(&mut self, term: &Terminator, location: Location) { - match &term.kind { - TerminatorKind::Goto { target } => { - // A goto is identified as a loop latch if its target has been visited. - if self.visited_blocks.contains(target) { - self.props.loops += 1; - } - } - TerminatorKind::SwitchInt { targets, .. } => { - let successors = targets.all_targets(); - // Check if any of the targets of SwitchInt has been visited. - if self.visited_blocks.contains(&successors[0]) - || self.visited_blocks.contains(&successors[1]) - { - self.props.loops += 1; - } - } - TerminatorKind::Drop { target, .. } => { - if self.visited_blocks.contains(target) { - self.props.loops += 1; - } - } - TerminatorKind::Call { func, .. } => { - let kind = func.ty(self.body.locals()).unwrap().kind(); - // Check if the target is a visited block. - - // Check if the call is an iterator function that contains loops. - if let TyKind::RigidTy(RigidTy::FnDef(def, _)) = kind { - let fullname = def.name(); - let names = fullname.split("::").collect::>(); - if let [.., s_last, last] = names.as_slice() { - if *s_last == "Iterator" - && [ - "for_each", - "collect", - "advance_by", - "all", - "any", - "partition", - "partition_in_place", - "fold", - "try_fold", - "spec_fold", - "spec_try_fold", - "try_for_each", - "for_each", - "try_reduce", - "reduce", - "find", - "find_map", - "try_find", - "position", - "rposition", - "nth", - "count", - "last", - "find", - ] - .contains(last) - { - self.props.iterators += 1; - } + // Check if any successor has been visited---the terminator is a loop latch. + let successors = term.kind.successors(); + let mut contain_loop = false; + for target in successors { + contain_loop |= self.visited_blocks.contains(&target); + } + self.props.loops += contain_loop as usize; + + if let TerminatorKind::Call { func, .. } = &term.kind { + let kind = func.ty(self.body.locals()).unwrap().kind(); + // Check if the target is a visited block. + + // Check if the call is an iterator function that contains loops. + if let TyKind::RigidTy(RigidTy::FnDef(def, _)) = kind { + let fullname = def.name(); + let names = fullname.split("::").collect::>(); + if let [.., s_last, last] = names.as_slice() { + if *s_last == "Iterator" + && [ + "for_each", + "collect", + "advance_by", + "all", + "any", + "partition", + "partition_in_place", + "fold", + "try_fold", + "spec_fold", + "spec_try_fold", + "try_for_each", + "for_each", + "try_reduce", + "reduce", + "find", + "find_map", + "try_find", + "position", + "rposition", + "nth", + "count", + "last", + "find", + ] + .contains(last) + { + self.props.iterators += 1; } } } - TerminatorKind::Assert { target, .. } => { - if self.visited_blocks.contains(target) { - self.props.loops += 1; - } - } - TerminatorKind::InlineAsm { destination: Some(target), .. } => { - if self.visited_blocks.contains(target) { - self.props.loops += 1; - } - } - // No targets in other terminators. - _ => {} } self.super_terminator(term, location) From 0de96813500fb3c9cb03ba8c15bf5b4b3700f7b1 Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Tue, 20 Aug 2024 15:11:34 -0500 Subject: [PATCH 6/7] Use graph algorithm to identify loops instead --- tests/script-based-pre/tool-scanner/test.rs | 4 ++-- tools/scanner/Cargo.toml | 2 ++ tools/scanner/src/analysis.rs | 24 +++++++++++++++------ 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/tests/script-based-pre/tool-scanner/test.rs b/tests/script-based-pre/tool-scanner/test.rs index 9eb3cb667bf3..f6a141f2a708 100644 --- a/tests/script-based-pre/tool-scanner/test.rs +++ b/tests/script-based-pre/tool-scanner/test.rs @@ -33,12 +33,12 @@ pub fn with_iterator(input: &[usize]) -> usize { .iter() .copied() .find(|e| *e == 0) - .unwrap_or_else(|| input.iter().fold(0, |acc, i| acc + 1)) + .unwrap_or_else(|| input.iter().fold(0, |acc, _| acc + 1)) } pub fn with_for_loop(input: &[usize]) -> usize { let mut res = 0; - for n in input { + for _ in input { res += 1; } res diff --git a/tools/scanner/Cargo.toml b/tools/scanner/Cargo.toml index edbd330bea47..f27e9e06c72c 100644 --- a/tools/scanner/Cargo.toml +++ b/tools/scanner/Cargo.toml @@ -15,6 +15,8 @@ csv = "1.3" serde = {version = "1", features = ["derive"]} strum = "0.26" strum_macros = "0.26" +petgraph = "0.6.5" +graph-cycles = "0.1.0" [package.metadata.rust-analyzer] # This crate uses rustc crates. diff --git a/tools/scanner/src/analysis.rs b/tools/scanner/src/analysis.rs index b83a6657e924..b32627939bf5 100644 --- a/tools/scanner/src/analysis.rs +++ b/tools/scanner/src/analysis.rs @@ -5,6 +5,8 @@ use crate::info; use csv::WriterBuilder; +use graph_cycles::Cycles; +use petgraph::graph::Graph; use serde::{ser::SerializeStruct, Serialize, Serializer}; use stable_mir::mir::mono::Instance; use stable_mir::mir::visit::{Location, PlaceContext, PlaceRef}; @@ -473,7 +475,8 @@ fn_props! { impl FnLoops { pub fn collect(self, body: &Body) -> FnLoops { - let mut visitor = IteratorVisitor { props: self, body, visited_blocks: HashSet::new() }; + let mut visitor = + IteratorVisitor { props: self, body, graph: Vec::new(), current_bbidx: 0 }; visitor.visit_body(body); visitor.props } @@ -494,23 +497,30 @@ impl FnLoops { struct IteratorVisitor<'a> { props: FnLoops, body: &'a Body, - visited_blocks: HashSet, + graph: Vec<(u32, u32)>, + current_bbidx: u32, } impl<'a> MirVisitor for IteratorVisitor<'a> { + fn visit_body(&mut self, body: &Body) { + // First visit the body to build the control flow graph + self.super_body(body); + // Build the petgraph from the adj vec + let g = Graph::<(), ()>::from_edges(self.graph.clone()); + self.props.loops += g.cycles().len(); + } + fn visit_basic_block(&mut self, bb: &BasicBlock) { - self.visited_blocks.insert(self.body.blocks.iter().position(|b| *b == *bb).unwrap()); + self.current_bbidx = self.body.blocks.iter().position(|b| *b == *bb).unwrap() as u32; self.super_basic_block(bb); } fn visit_terminator(&mut self, term: &Terminator, location: Location) { - // Check if any successor has been visited---the terminator is a loop latch. + // Add edges between basic block into the adj table let successors = term.kind.successors(); - let mut contain_loop = false; for target in successors { - contain_loop |= self.visited_blocks.contains(&target); + self.graph.push((self.current_bbidx, target as u32)); } - self.props.loops += contain_loop as usize; if let TerminatorKind::Call { func, .. } = &term.kind { let kind = func.ty(self.body.locals()).unwrap().kind(); From 3415712c647f5028509c5b00cb77f2b8014269cc Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Tue, 20 Aug 2024 15:19:20 -0500 Subject: [PATCH 7/7] Cargo.lock --- Cargo.lock | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 1e8e743d5e56..6c968a7c4a4e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,6 +9,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", + "getrandom", "once_cell", "version_check", "zerocopy", @@ -349,6 +350,12 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "getopts" version = "0.2.21" @@ -375,6 +382,17 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "graph-cycles" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a6ad932c6dd3cfaf16b66754a42f87bbeefd591530c4b6a8334270a7df3e853" +dependencies = [ + "ahash", + "petgraph", + "thiserror", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -723,6 +741,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -928,6 +956,8 @@ name = "scanner" version = "0.0.0" dependencies = [ "csv", + "graph-cycles", + "petgraph", "serde", "strum", "strum_macros",