Skip to content

Commit

Permalink
run bench on CI with generated CSV
Browse files Browse the repository at this point in the history
  • Loading branch information
XiNiHa committed Jan 17, 2025
1 parent 66e293e commit 990c335
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 45 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/bench.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Run Benchmarks
on: pull_request
permissions:
deployments: write
contents: write
pull-requests: write

jobs:
benchmark:
name: Benchmark
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@1ff72ee08e3cb84d84adba594e0a297990fc1ed3 # stable
- uses: Swatinem/rust-cache@f0deed1e0edfc6a9be95417288c0e1099b1eeec3 # v2.7.7
- run: cargo install cargo-criterion
- run: cargo criterion --output-format bencher 2>&1 | tee output.txt
- uses: benchmark-action/github-action-benchmark@d48d326b4ca9ba73ca0cd0d59f108f9e02a381c7 # v1.20.4
with:
name: Rust Benchmark
tool: "cargo"
output-file-path: output.txt
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-push: true
alert-threshold: "120%"
comment-always: true
fail-on-alert: true
94 changes: 84 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ thiserror = { version = "2", default-features = false }
[dev-dependencies]
criterion = "0.5.1"
csv = "1.3.1"
memmap2 = "0.9.5"
rand = "0.8.5"

[[bench]]
name = "single_thread"
Expand Down
88 changes: 54 additions & 34 deletions benches/single_thread.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,42 @@
use std::{fs::File, hint::black_box, io::Cursor};
use std::{hint::black_box, io::Cursor};

use criterion::{criterion_group, criterion_main, Bencher, BenchmarkId, Criterion};
use criterion::{criterion_group, criterion_main, Bencher, Criterion};
use lazycsv::{Csv, CsvIterItem};
use memchr::memchr_iter;
use memmap2::Mmap;

fn prepare(rows: usize) -> Vec<u8> {
let f = File::open(std::env::var("INPUT").unwrap()).unwrap();
let mmap = unsafe { Mmap::map(&f).unwrap() };
let mut lf_iter = memchr_iter(b'\n', &mmap);
let second_lf = lf_iter.nth(1).unwrap();
let ending_lf = lf_iter.nth(rows).unwrap();
let range = (second_lf + 1)..ending_lf;
let mut vec = Vec::with_capacity(range.len());
vec.extend_from_slice(&mmap[range]);
vec
use rand::{Rng, SeedableRng as _};

const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\",";
const ROWS: usize = 100_000;
const COLS: usize = 30;
const MIN_CHARS: usize = 3;
const MAX_CHARS: usize = 100;

fn gen_random_str<T: Rng>(rng: &mut T) -> String {
let content: String = (0..rng.gen_range(MIN_CHARS..MAX_CHARS))
.map(|_| CHARS[rng.gen_range(0..CHARS.len())] as char)
.collect();

if content.contains(',') || content.contains('"') {
format!("\"{}\"", content.replace("\"", "\"\""))
} else {
content
}
}

fn prepare() -> Vec<u8> {
let mut buf = Vec::with_capacity(ROWS * COLS * ((MAX_CHARS - MIN_CHARS) / 2 + MIN_CHARS));

let mut rng = rand::rngs::StdRng::from_seed(b"f3a90c67b3ca86afd62658c1b30f1f12".to_owned());
for _ in 0..ROWS {
for col in 0..COLS {
buf.extend_from_slice(gen_random_str(&mut rng).as_bytes());
if col != 29 {
buf.push(b',');
}
}
buf.push(b'\n');
}

buf
}

pub fn lazy_csv(b: &mut Bencher, slice: &[u8]) {
Expand All @@ -29,7 +51,7 @@ pub fn lazy_csv(b: &mut Bencher, slice: &[u8]) {

pub fn lazy_csv_into_rows(b: &mut Bencher, slice: &[u8]) {
b.iter(|| {
for row in Csv::new(slice).into_rows::<28>() {
for row in Csv::new(slice).into_rows::<COLS>() {
for cell in row.unwrap() {
black_box(cell.try_as_str().unwrap());
}
Expand All @@ -47,7 +69,7 @@ pub fn lazy_csv_raw(b: &mut Bencher, slice: &[u8]) {

pub fn lazy_csv_into_rows_raw(b: &mut Bencher, slice: &[u8]) {
b.iter(|| {
for row in Csv::new(slice).into_rows::<28>() {
for row in Csv::new(slice).into_rows::<COLS>() {
for cell in row.unwrap() {
black_box(cell);
}
Expand All @@ -72,23 +94,21 @@ pub fn csv(b: &mut Bencher, slice: &[u8]) {

fn bench_parsers(c: &mut Criterion) {
let mut group = c.benchmark_group("Parsers");
for i in [1_000, 10_000, 50_000, 100_000] {
group.bench_with_input(BenchmarkId::new("lazy_csv", i), &i, |b, i| {
lazy_csv(b, &prepare(*i))
});
group.bench_with_input(BenchmarkId::new("lazy_csv (into_rows)", i), &i, |b, i| {
lazy_csv_into_rows(b, &prepare(*i))
});
group.bench_with_input(BenchmarkId::new("lazy_csv (raw)", i), &i, |b, i| {
lazy_csv_raw(b, &prepare(*i))
});
group.bench_with_input(
BenchmarkId::new("lazy_csv (into_rows, raw)", i),
&i,
|b, i| lazy_csv_into_rows_raw(b, &prepare(*i)),
);
group.bench_with_input(BenchmarkId::new("csv", i), &i, |b, i| csv(b, &prepare(*i)));
}

group.sample_size(50);

let buf = prepare();
group.bench_with_input("lazy_csv", &buf.clone(), |b, buf| lazy_csv(b, buf));
group.bench_with_input("lazy_csv (into_rows)", &buf.clone(), |b, buf| {
lazy_csv_into_rows(b, buf)
});
group.bench_with_input("lazy_csv (raw)", &buf.clone(), |b, buf| {
lazy_csv_raw(b, buf)
});
group.bench_with_input("lazy_csv (into_rows, raw)", &buf.clone(), |b, buf| {
lazy_csv_into_rows_raw(b, buf)
});
group.bench_with_input("csv", &buf.clone(), |b, buf| csv(b, buf));
group.finish();
}

Expand Down

0 comments on commit 990c335

Please sign in to comment.