diff --git a/src/bench.rs b/src/bench.rs index 3e99a4c..7132c4a 100644 --- a/src/bench.rs +++ b/src/bench.rs @@ -37,7 +37,12 @@ //! //! ## Output Format //! -//! **Throughput-only measurement (default case)** +//! Currently, all outputs are in plain text format. This makes the output easy to process using +//! shell scripts and tools including gnuplot. If there are new data added to the output, it +//! will be appended at the end of existing entries (but before `cdf` if it exists, see below) +//! to make sure outputs from old versions can still be processed without changes. +//! +//! ### Throughput-only Output (default case) //! //! When measuring throughput, an output may look like the following: //! ```txt @@ -64,7 +69,7 @@ //! - ``: the total key-value operations executed by all worker threads in the repeat/phase. //! - ``: followed by the throughput in million operations per second of the repeat/phase. //! -//! **Throughput+Latency measurement (when `latency` is `true`)** +//! ### Throughput + Latency Output (when `latency` is `true`) //! //! When latency measurement is enabled, the latency metrics shall be printed at the end of each //! benchmark. It is not shown after each repeat, because unlike throughput which is a singleton @@ -76,23 +81,52 @@ //! 0 phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 //! 1 phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 //! 2 phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 -//! 3 phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 min_ns 1 max_ns 100 p50_ns 50 p95_ns 95 p99_ns 99 p999_ns 100 +//! 3 phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 min_ns 1 max_ns 100 avg_ns 50 p50_ns 50 p95_ns 95 p99_ns 99 p999_ns 100 //! ``` //! //! The extra output on the last line has a format of: //! //! ```txt -//! min_ns max_ns p50_ns p95_ns p99_ns

p999_ns +//! min_ns max_ns avg_ns p50_ns p95_ns p99_ns

p999_ns //! ``` //! //! Where (all units are nanoseconds): //! //! - ``: minimum latency //! - ``: maximum latency +//! - ``: mean latency //! - ``: median latency (50% percentile) //! - ``: P95 latency //! - `

`: P99 latency //! - ``: P999 latency (99.9%) +//! +//! ### Throughput + Latency + Latency CDF Mode (when both `latency` and `cdf` are `true`) +//! +//! When `cdf` is enabled, the latency CDF data will be printed at the end of the same line as the +//! latency metrics above. In that case, the output will be like the following: +//! +//! ```txt +//! 0 phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 +//! 1 phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 +//! 2 phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 +//! 3 phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 min_ns 1 max_ns 100 avg_ns 50 p50_ns 50 p95_ns 95 p99_ns 99 p999_ns 100 cdf_ns ... +//! ``` +//! Since the latency metrics vary a lot between different benchmarks/runs, the number of data +//! points of the CDF is different. Therefore, it is printed at the end of the output only. It is +//! printed as a tuple of ` ` where `` is the latency in nanoseconds and `` +//! is the accumulated number of operations up to ``. There can be arbitrary number of tuples. +//! The output ends when the maximum recorded latency is reached. +//! +//! An example of the CDF data will look like: +//! +//! ```txt +//! cdf_ns 0 0 1 0 2 0 3 10 4 10 5 20 ... +//! ``` +//! +//! It means there are not data points at 0/1/2 nanoseconds. At 3 nanoseconds, there are 10 data +//! points. At 4 nanoseconds, there are another 10 data points which makes the total points 20. +//! At 5 nanoseconds, there are no data points so the total number is still 20. Users can +//! post-process the output make a smooth CDF plot out of it. use crate::stores::{BenchKVMap, BenchKVMapOpt}; use crate::thread::{JoinHandle, Thread}; @@ -114,7 +148,7 @@ use std::time::Duration; // {{{ benchmark /// Length determines when a benchmark should stop or how often the metrics should be collected. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] enum Length { /// Each worker thread syncs after a timeout (e.g., 0.1s). Timeout(Duration), @@ -195,6 +229,12 @@ pub struct BenchmarkOpt { /// Default: false. pub latency: Option, + /// Whether or not to print out latency CDF at the end of each benchmark. If this is set to + /// `true`, `latency` must also be set to `true`. + /// + /// Default: false. + pub cdf: Option, + /// The definition of a workload. /// /// This section is embedded and flattened, so that you can directly use options in @@ -207,6 +247,9 @@ impl BenchmarkOpt { /// Internal function called after all global options are applied and when all the options are /// set. This will test if the opt can be a valid benchmark. It does not check the workload's /// configuration, as it will be checked when a workload instance is created. + /// + /// Note: `timeout` and `ops` may not be set as of now, and they are not checked. They will be + /// converted to `Length` when creating a new benchmark object. fn sanity(&self) { // these must be present, so `unwrap` won't panic. assert!( @@ -221,6 +264,12 @@ impl BenchmarkOpt { "hidden" | "repeat" | "finish" | "all" => {} _ => panic!("report mode should be one of: hidden, repeat, finish, all"), } + if let Some(true) = self.cdf { + assert!( + *self.latency.as_ref().unwrap(), + "when cdf is true, latency must also be true" + ); + } assert!( *self.qd.as_ref().unwrap() > 0, "queue depth should be positive if given" @@ -233,7 +282,7 @@ impl BenchmarkOpt { } /// The configuration of a benchmark, parsed from user's input. -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct Benchmark { threads: usize, repeat: usize, @@ -242,6 +291,7 @@ pub struct Benchmark { len: Length, report: ReportMode, latency: bool, + cdf: bool, wopt: WorkloadOpt, } @@ -258,7 +308,10 @@ impl Benchmark { let batch = opt.batch.unwrap(); // handle length in the following, now 3 modes let len = if let Some(t) = opt.timeout { - assert!(opt.ops.is_none()); + assert!( + opt.ops.is_none(), + "timeout and ops cannot be provided at the same time" + ); Length::Timeout(Duration::from_secs_f32(t)) } else if let Some(c) = opt.ops { Length::Count(c) @@ -273,6 +326,7 @@ impl Benchmark { _ => panic!("Invalid report mode provided"), }; let latency = opt.latency.unwrap(); + let cdf = opt.cdf.unwrap(); let wopt = opt.workload.clone(); Self { threads, @@ -282,6 +336,7 @@ impl Benchmark { len, report, latency, + cdf, wopt, } } @@ -303,6 +358,7 @@ pub struct GlobalOpt { pub batch: Option, pub report: Option, pub latency: Option, + pub cdf: Option, pub klen: Option, pub vlen: Option, pub kmin: Option, @@ -318,6 +374,7 @@ impl Default for GlobalOpt { batch: None, report: None, latency: None, + cdf: None, klen: None, vlen: None, kmin: None, @@ -341,6 +398,10 @@ impl GlobalOpt { .latency .clone() .or_else(|| Some(self.latency.clone().unwrap_or(false))); + opt.cdf = opt + .cdf + .clone() + .or_else(|| Some(self.cdf.clone().unwrap_or(false))); // the workload options (must be specified) opt.workload.klen = opt .workload @@ -639,15 +700,31 @@ fn bench_stat_final( assert_eq!(total, latency.hdr.len()); let hdr = &latency.hdr; print!( - "min_ns {} max_ns {} p50_ns {} p95_ns {} p99_ns {} p999_ns {}", + "min_ns {} max_ns {} avg_ns {:.2} p50_ns {} p95_ns {} p99_ns {} p999_ns {}", hdr.min(), hdr.max(), + hdr.mean(), hdr.value_at_quantile(0.50), hdr.value_at_quantile(0.95), hdr.value_at_quantile(0.99), hdr.value_at_quantile(0.999), ); + if benchmark.cdf { + print!(" cdf_ns "); + let mut cdf = 0; + for ns in latency.hdr.iter_all() { + let val = ns.value_iterated_to(); + cdf += ns.count_at_value(); + print!("{} {}", val, cdf); + if val == hdr.max() { + break; + } + print!(" "); + } + assert_eq!(cdf, total); + } } + println!(); seq.fetch_add(1, Ordering::Relaxed); } @@ -1050,6 +1127,189 @@ pub fn bench_async( mod tests { use super::*; + #[test] + fn global_options_are_applied() { + let opt = r#" + [map] + name = "nullmap" + + [global] + threads = 8 + repeat = 10 + qd = 10 + batch = 15 + report = "finish" + latency = true + cdf = true + klen = 8 + vlen = 16 + kmin = 100 + kmax = 1000 + + [[benchmark]] + timeout = 10.0 + set_perc = 100 + get_perc = 0 + del_perc = 0 + dist = "incrementp" + "#; + + let (_, bg) = init(opt); + assert_eq!(bg.len(), 1); + + let wopt = WorkloadOpt { + set_perc: 100, + get_perc: 0, + del_perc: 0, + dist: "incrementp".to_string(), + klen: Some(8), + vlen: Some(16), + kmin: Some(100), + kmax: Some(1000), + zipf_theta: None, + zipf_hotspot: None, + }; + + let benchmark = Benchmark { + threads: 8, + repeat: 10, + qd: 10, + batch: 15, + report: ReportMode::Finish, + latency: true, + cdf: true, + len: Length::Timeout(Duration::from_secs_f32(10.0)), + wopt, + }; + + assert_eq!(*bg[0], benchmark) + } + + #[test] + #[should_panic(expected = "should be positive")] + fn invalid_threads() { + let opt = r#" + [map] + name = "nullmap" + + [global] + klen = 8 + vlen = 16 + kmin = 0 + kmax = 1000 + + [[benchmark]] + threads = 0 + timeout = 1.0 + set_perc = 100 + get_perc = 0 + del_perc = 0 + dist = "incrementp" + "#; + + let (_, _) = init(opt); + } + + #[test] + #[should_panic(expected = "should be positive")] + fn invalid_repeat() { + let opt = r#" + [map] + name = "nullmap" + + [global] + klen = 8 + vlen = 16 + kmin = 0 + kmax = 1000 + + [[benchmark]] + repeat = 0 + timeout = 1.0 + set_perc = 100 + get_perc = 0 + del_perc = 0 + dist = "incrementp" + "#; + + let (_, _) = init(opt); + } + + #[test] + #[should_panic(expected = "report mode should be one of")] + fn invalid_report() { + let opt = r#" + [map] + name = "nullmap" + + [global] + klen = 8 + vlen = 16 + kmin = 0 + kmax = 1000 + + [[benchmark]] + timeout = 1.0 + set_perc = 100 + get_perc = 0 + del_perc = 0 + dist = "incrementp" + report = "alll" + "#; + + let (_, _) = init(opt); + } + + #[test] + #[should_panic(expected = "cannot be provided at the same time")] + fn invalid_length() { + let opt = r#" + [map] + name = "nullmap" + + [global] + klen = 8 + vlen = 16 + kmin = 0 + kmax = 1000 + + [[benchmark]] + timeout = 1.0 + ops = 1000 + set_perc = 100 + get_perc = 0 + del_perc = 0 + dist = "incrementp" + "#; + + let (_, _) = init(opt); + } + + #[test] + #[should_panic(expected = "latency must also be true")] + fn invalid_latency() { + let opt = r#" + [map] + name = "nullmap" + + [global] + klen = 8 + vlen = 16 + kmin = 0 + kmax = 1000 + + [[benchmark]] + timeout = 1.0 + cdf = true + set_perc = 100 + get_perc = 0 + del_perc = 0 + dist = "incrementp" + "#; + + let (_, _) = init(opt); + } + const EXAMPLE_BENCH: &str = include_str!(concat!( env!("CARGO_MANIFEST_DIR"), "/presets/benchmarks/example.toml" diff --git a/src/workload.rs b/src/workload.rs index 69fe11a..faf3fc1 100644 --- a/src/workload.rs +++ b/src/workload.rs @@ -115,7 +115,7 @@ impl KeyGenerator { /// /// This struct is used for interacting with workload configuration files and also create new /// [`Workload`] instances. -#[derive(Deserialize, Clone, Debug)] +#[derive(Deserialize, Clone, Debug, PartialEq)] pub struct WorkloadOpt { /// Percentage of `SET` operations. pub set_perc: u8,