From cc5fdb208094a035ee3f9ab9e2dd51b010a91033 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Steven=20H=C3=A9=20=28S=C4=ABch=C3=A0ng=29?= Date: Sun, 24 Mar 2024 11:28:58 +0800 Subject: [PATCH] route object stats #138 --- route_verification/Cargo.lock | 17 +- route_verification/Cargo.toml | 7 +- route_verification/src/lib.rs | 2 +- .../stat_route_objects/Cargo.toml | 19 +++ .../stat_route_objects/src/lib.rs | 146 ++++++++++++++++++ .../stat_route_objects/src/main.rs | 14 ++ 6 files changed, 200 insertions(+), 5 deletions(-) create mode 100644 route_verification/stat_route_objects/Cargo.toml create mode 100644 route_verification/stat_route_objects/src/lib.rs create mode 100644 route_verification/stat_route_objects/src/main.rs diff --git a/route_verification/Cargo.lock b/route_verification/Cargo.lock index f347cce..6feacc7 100644 --- a/route_verification/Cargo.lock +++ b/route_verification/Cargo.lock @@ -549,6 +549,7 @@ dependencies = [ "ahash", "allocator-api2", "rayon", + "serde", ] [[package]] @@ -1321,7 +1322,7 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "route_verification" -version = "0.3.0" +version = "0.3.1" dependencies = [ "anyhow", "chardetng", @@ -1622,6 +1623,20 @@ dependencies = [ "log", ] +[[package]] +name = "stat_route_objects" +version = "0.1.0" +dependencies = [ + "anyhow", + "env_logger", + "hashbrown", + "log", + "rayon", + "route_verification", + "serde", + "serde_json", +] + [[package]] name = "static_assertions" version = "1.1.0" diff --git a/route_verification/Cargo.toml b/route_verification/Cargo.toml index 09fdc1f..2b8849e 100644 --- a/route_verification/Cargo.toml +++ b/route_verification/Cargo.toml @@ -12,8 +12,9 @@ members = [ "shared_struct", # Extra library "graph", - # Binary + # Binaries "rib_stats", + "stat_route_objects", ] [workspace.dependencies] @@ -54,7 +55,7 @@ lex = { package = "route_verification_lex", path = "./lex", version = "0.2.0" } parse = { package = "route_verification_parse", path = "./parse", version = "0.2.0" } shared_struct = { package = "route_verification_shared_struct", path = "./shared_struct", version = "0.1.1" } -route_verification = { package = "route_verification", path = ".", version = "0.3.0" } +route_verification = { package = "route_verification", path = ".", version = "0.3.1" } [workspace.package] description = "Parse RPSL in the IRR to verify observed BGP routes" @@ -63,7 +64,7 @@ repository = "https://github.com/SichangHe/internet_route_verification" [package] name = "route_verification" -version = "0.3.0" +version = "0.3.1" edition = "2021" description.workspace = true diff --git a/route_verification/src/lib.rs b/route_verification/src/lib.rs index f5b84b5..5d8f5f2 100644 --- a/route_verification/src/lib.rs +++ b/route_verification/src/lib.rs @@ -3,7 +3,7 @@ use log::debug; pub use {as_rel, bgp, common_regex, io, ir, irr, lex, parse}; -mod fs; +pub mod fs; pub fn parse_one(args: Vec) -> Result<()> { if args.len() < 4 { diff --git a/route_verification/stat_route_objects/Cargo.toml b/route_verification/stat_route_objects/Cargo.toml new file mode 100644 index 0000000..9f84b18 --- /dev/null +++ b/route_verification/stat_route_objects/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "stat_route_objects" +version = "0.1.0" +edition = "2021" +description.workspace = true +license.workspace = true +repository.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow.workspace = true +env_logger.workspace = true +hashbrown = { workspace = true, features = ["serde"] } +log.workspace = true +rayon.workspace = true +route_verification.workspace = true +serde.workspace = true +serde_json.workspace = true diff --git a/route_verification/stat_route_objects/src/lib.rs b/route_verification/stat_route_objects/src/lib.rs new file mode 100644 index 0000000..02eb632 --- /dev/null +++ b/route_verification/stat_route_objects/src/lib.rs @@ -0,0 +1,146 @@ +use std::{ + fmt::Display, + fs::{read_dir, File}, + io::BufRead, +}; + +use anyhow::{bail, Result}; +use hashbrown::HashMap; +use log::{debug, error, warn}; +use rayon::prelude::*; + +use route_verification::{ + fs::open_file_w_correct_encoding, + irr::split_commas, + lex::{expressions, io_wrapper_lines, lines_continued, rpsl_objects, RpslExpr}, +}; +use serde::{Deserialize, Serialize}; + +pub fn scan_dirs(input_dirs: &[String]) -> Result<()> { + if input_dirs.is_empty() { + bail!("No input directories specified."); + } + + debug!("Starting to scan `{input_dirs:?}`."); + let all_scanned_routes: Vec<_> = input_dirs + .par_iter() + .rev() + .filter_map(|dir| match scan_dir(dir) { + Ok(routes) => Some(routes), + Err(why) => { + error!("Error scanning {dir}: {why:?}"); + None + } + }) + .flatten() + .collect(); + debug!("Scanned `{input_dirs:?}`."); + + let mut aggregated_routes: HashMap> = + HashMap::with_capacity(all_scanned_routes.len()); + for route in all_scanned_routes { + aggregated_routes + .entry_ref(&route.name) + .or_default() + .push(route); + } + + let total_n_route = aggregated_routes.len(); + debug!("Aggregated {total_n_route} routes."); + + let routes_defined_multiple_times: HashMap<_, _> = aggregated_routes + .iter() + .filter(|(_, routes)| routes.len() > 1) + .collect(); + debug!( + "{} routes defined multiple times.", + routes_defined_multiple_times.len() + ); + + let route_defined_by_different_mntners: HashMap<_, _> = routes_defined_multiple_times + .iter() + .filter(|(_, routes)| { + let first_route = &routes[0]; + routes[1..] + .iter() + .any(|route| route.mnt_by != first_route.mnt_by) + }) + .collect(); + debug!( + "{} routes defined by multiple maintainers.", + route_defined_by_different_mntners.len() + ); + + warn!("Dumping routes defined multiple times."); + let mut file = File::create("route_objects_defined_multiple_times.json")?; + serde_json::to_writer(&mut file, &routes_defined_multiple_times)?; + + // The other two maps can also be dumped. + Ok(()) +} + +pub fn scan_dir(input_dir: &str) -> Result> { + debug!("Starining to scan {input_dir}."); + let routes_in_dir = read_dir(input_dir)? + .par_bridge() + .map(|entry| { + let path = entry?.path(); + let reader = open_file_w_correct_encoding(&path)?; + let tag = path.to_string_lossy(); + scan_db(tag, reader) + }) + .filter_map(|maybe_routes| match maybe_routes { + Ok(routes) => Some(routes), + Err(why) => { + error!("Error scanning {input_dir}: {why:?}"); + None + } + }) + .flatten() + .collect(); + + debug!("Scanned {input_dir}."); + Ok(routes_in_dir) +} + +#[derive(Deserialize, Serialize)] +pub struct Route { + pub name: String, + pub origin: Option, + pub mnt_by: Vec, + pub source: Option, +} + +pub fn scan_db(tag: impl Display, db: impl BufRead) -> Result> { + debug!("Starting to scan {tag}."); + let mut routes = Vec::new(); + + for obj in rpsl_objects(io_wrapper_lines(db)) { + if !matches!(obj.class.as_str(), "route" | "route6") { + continue; + } + + let mut origin = None; + let mut source = None; + let mut mnt_by = Vec::new(); + for RpslExpr { key, expr } in expressions(lines_continued(obj.body.lines())) { + match key.as_str() { + "origin" => origin = Some(expr), + "mnt-by" => mnt_by.extend(split_commas(&expr).map(str::to_string)), + "source" => source = Some(expr), + _ => {} + } + } + mnt_by.shrink_to_fit(); + + routes.push(Route { + name: obj.name, + origin, + mnt_by, + source, + }) + } + debug!("Scanned {tag}."); + + Ok(routes) +} diff --git a/route_verification/stat_route_objects/src/main.rs b/route_verification/stat_route_objects/src/main.rs new file mode 100644 index 0000000..2965875 --- /dev/null +++ b/route_verification/stat_route_objects/src/main.rs @@ -0,0 +1,14 @@ +use std::env::args; + +use anyhow::{bail, Result}; +use stat_route_objects::scan_dirs; + +fn main() -> Result<()> { + env_logger::init(); + let args: Vec<_> = args().collect(); + if args.len() < 2 { + bail!("Specify directories separated by spaces!"); + } + let input_dirs = &args[1..]; + scan_dirs(input_dirs) +}