From 23369f22be12fcbe2170e388b8d90816935a5acb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Steven=20H=C3=A9=20=28S=C4=ABch=C3=A0ng=29?= Date: Sun, 31 Mar 2024 19:22:32 +0800 Subject: [PATCH] gzip all stats before writing to disk #141 --- route_verification/Cargo.lock | 20 +++++++++++++++++++ route_verification/Cargo.toml | 1 + route_verification/rib_stats/Cargo.toml | 1 + route_verification/rib_stats/src/main.rs | 25 +++++++++++++++--------- 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/route_verification/Cargo.lock b/route_verification/Cargo.lock index da90822..3a97f44 100644 --- a/route_verification/Cargo.lock +++ b/route_verification/Cargo.lock @@ -332,6 +332,15 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +[[package]] +name = "crc32fast" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-channel" version = "0.5.8" @@ -509,6 +518,16 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "foreign_vec" version = "0.1.0" @@ -1509,6 +1528,7 @@ dependencies = [ "anyhow", "dashmap", "env_logger", + "flate2", "human-duration", "log", "rayon", diff --git a/route_verification/Cargo.toml b/route_verification/Cargo.toml index 84af4ec..c9d2d20 100644 --- a/route_verification/Cargo.toml +++ b/route_verification/Cargo.toml @@ -27,6 +27,7 @@ dashmap = "5.5" encoding_rs = "0.8" encoding_rs_io = "0.1" env_logger = "0.11" +flate2 = "1" hashbrown = { version = "0.14", features = ["rayon"] } ipnet = { version = "2.9", features = ["serde"] } itertools = "0.12" diff --git a/route_verification/rib_stats/Cargo.toml b/route_verification/rib_stats/Cargo.toml index 6adf0cf..46829eb 100644 --- a/route_verification/rib_stats/Cargo.toml +++ b/route_verification/rib_stats/Cargo.toml @@ -12,6 +12,7 @@ repository.workspace = true anyhow.workspace = true dashmap.workspace = true env_logger.workspace = true +flate2.workspace = true human-duration = "0.1" log.workspace = true rayon.workspace = true diff --git a/route_verification/rib_stats/src/main.rs b/route_verification/rib_stats/src/main.rs index 86d885b..980eb77 100644 --- a/route_verification/rib_stats/src/main.rs +++ b/route_verification/rib_stats/src/main.rs @@ -10,6 +10,7 @@ use std::{ use anyhow::Result; use dashmap::DashMap; +use flate2::{write::GzEncoder, Compression}; use human_duration::human_duration; use log::{debug, error, info}; use rayon::prelude::*; @@ -83,10 +84,10 @@ fn process_rib_file(query: &QueryIr, db: &AsRelDb, rib_file: &Path) -> Result<() .next() .expect("First split always succeeds."); - let route_stats_filename = format!("{collector}--route_stats2.csv"); - let route_first_hop_stats_filename = format!("{collector}--route_first_hop_stats2.csv"); - let as_stats_filename = format!("{collector}--as_stats2.csv"); - let as_pair_stats_filename = format!("{collector}--as_pair_stats2.csv"); + let route_stats_filename = format!("{collector}--route_stats2.csv.gz"); + let route_first_hop_stats_filename = format!("{collector}--route_first_hop_stats2.csv.gz"); + let as_stats_filename = format!("{collector}--as_stats2.csv.gz"); + let as_pair_stats_filename = format!("{collector}--as_pair_stats2.csv.gz"); if [ &route_stats_filename, &route_first_hop_stats_filename, @@ -126,7 +127,7 @@ fn process_rib_file(query: &QueryIr, db: &AsRelDb, rib_file: &Path) -> Result<() let (route_stats_sender, route_stats_receiver) = channel::>(); let route_stats_writer = { - let mut route_stats_file = BufWriter::new(File::create(route_stats_filename)?); + let mut route_stats_file = gzip_file(route_stats_filename)?; route_stats_file.write_all(csv_header.trim_end_matches(',').as_bytes())?; route_stats_file.write_all(b"\n")?; @@ -143,8 +144,7 @@ fn process_rib_file(query: &QueryIr, db: &AsRelDb, rib_file: &Path) -> Result<() let (route_first_hop_stats_sender, route_first_hop_stats_receiver) = channel::>(); let route_first_hop_stats_writer = { - let mut route_first_hop_stats_file = - BufWriter::new(File::create(route_first_hop_stats_filename)?); + let mut route_first_hop_stats_file = gzip_file(route_first_hop_stats_filename)?; route_first_hop_stats_file.write_all(csv_header.trim_end_matches(',').as_bytes())?; route_first_hop_stats_file.write_all(b"\n")?; @@ -213,7 +213,7 @@ fn process_rib_file(query: &QueryIr, db: &AsRelDb, rib_file: &Path) -> Result<() { let start = Instant::now(); - let mut as_stats_file = BufWriter::new(File::create(as_stats_filename)?); + let mut as_stats_file = gzip_file(as_stats_filename)?; as_stats_file.write_all(b"aut_num,")?; as_stats_file.write_all(csv_header.trim_end_matches(',').as_bytes())?; as_stats_file.write_all(b"\n")?; @@ -233,7 +233,7 @@ fn process_rib_file(query: &QueryIr, db: &AsRelDb, rib_file: &Path) -> Result<() { let start = Instant::now(); - let mut as_pair_stats_file = BufWriter::new(File::create(as_pair_stats_filename)?); + let mut as_pair_stats_file = gzip_file(as_pair_stats_filename)?; as_pair_stats_file.write_all(b"from,to,")?; as_pair_stats_file.write_all(csv_header.as_bytes())?; as_pair_stats_file.write_all(b"relationship\n")?; @@ -266,3 +266,10 @@ fn process_rib_file(query: &QueryIr, db: &AsRelDb, rib_file: &Path) -> Result<() Ok(()) } + +fn gzip_file(path: impl AsRef) -> Result>> { + Ok(GzEncoder::new( + BufWriter::new(File::create(path)?), + Compression::default(), + )) +}