-
Notifications
You must be signed in to change notification settings - Fork 36
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implements reading/writing FlexInt
, FlexUInt
#690
Changes from all commits
3d893d0
93c824a
9cce623
aafd9b2
0d0cabc
ad2a7f7
e0f24c7
783dd93
925d210
ba28563
1228ddf
948b539
7cdc9a1
26da664
436ac0a
ea70407
c572b6d
7a40aff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,10 +18,6 @@ jobs: | |
checks: write | ||
|
||
steps: | ||
- name: Remove MSys64 MingW64 Binaries | ||
if: runner.os == 'Windows' | ||
# remove this because there is a bad libclang.dll that confuses bindgen | ||
run: Remove-Item -LiteralPath "C:\msys64\mingw64\bin" -Force -Recurse | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ This was necessary for |
||
- name: Install Dependencies | ||
if: runner.os == 'Windows' | ||
run: choco install llvm -y | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
use criterion::{black_box, criterion_group, criterion_main, Criterion}; | ||
use rand::prelude::StdRng; | ||
use rand::{distributions::Uniform, Rng, SeedableRng}; | ||
use std::io; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ This file is a suite of benchmarks comparing the time needed to read or write 10k random integers using |
||
|
||
use ion_rs::{FlexInt, FlexUInt, ImmutableBuffer, IonResult, VarInt, VarUInt}; | ||
|
||
// Rather than store a set of test values, we hardcode a seed value and generate the same set | ||
// on each run. | ||
const RNG_SEED: u64 = 1024; | ||
|
||
// The number of values (signed or unsigned) that will be read or written in each benchmark. | ||
const NUM_VALUES: usize = 10_000; | ||
|
||
fn generate_unsigned_values(min: u64, max: u64) -> Vec<u64> { | ||
let mut rng = StdRng::seed_from_u64(RNG_SEED); | ||
let range = Uniform::new(min, max); | ||
|
||
(0..NUM_VALUES).map(|_| rng.sample(range)).collect() | ||
} | ||
|
||
fn generate_signed_values(min: i64, max: i64) -> Vec<i64> { | ||
let mut rng = StdRng::seed_from_u64(RNG_SEED); | ||
let range = Uniform::new(min, max); | ||
|
||
(0..NUM_VALUES).map(|_| rng.sample(range)).collect() | ||
} | ||
|
||
pub fn criterion_benchmark(c: &mut Criterion) { | ||
println!("# Values: {NUM_VALUES}"); | ||
|
||
// TODO: For now, these benchmarks only write values that can be serialized in 8 bytes or fewer. | ||
// This is because `VarUInt` has a bug[1] that causes it to encode very large u64s incorrectly. | ||
// [1]: https://github.com/amazon-ion/ion-rust/issues/689 | ||
let unsigned_values = generate_unsigned_values(u64::MIN, (2 << 49) - 1); | ||
let signed_values = generate_signed_values(-2 << 49, (2 << 49) - 1); | ||
|
||
// Roundtrip all of the values as 1.1 encoding primitives as a correctness/sanity check. | ||
// Save the encoded bytes of each value sequence; we'll check its length at the end of each | ||
// benchmark as another sanity check. VarUInt/FlexUint and VarInt/FlexInt are the same size. | ||
let encoded_var_uints = roundtrip_var_uint_test(&unsigned_values).unwrap(); | ||
let encoded_var_ints = roundtrip_var_int_test(&signed_values).unwrap(); | ||
let encoded_flex_uints = roundtrip_flex_uint_test(&unsigned_values).unwrap(); | ||
let encoded_flex_ints = roundtrip_flex_int_test(&signed_values).unwrap(); | ||
|
||
let mut binary_1_0_group = c.benchmark_group("binary 1.0"); | ||
binary_1_0_group.bench_function("write VarUInt", |b| { | ||
// `io::sink()` is an implementation of `io::Write` that simply discards the provided bytes | ||
// and declares success, analogous to `/dev/null`. This minimizes the I/O logic being | ||
// measured in each benchmark. | ||
let mut output = io::sink(); | ||
b.iter(|| { | ||
let mut encoded_length: usize = 0; | ||
for value in &unsigned_values { | ||
encoded_length += black_box(VarUInt::write_u64(&mut output, *value).unwrap()); | ||
} | ||
assert_eq!(encoded_length, encoded_flex_uints.len()); | ||
}) | ||
}); | ||
binary_1_0_group.bench_function("read VarUInt", |b| { | ||
b.iter(|| { | ||
let mut decoded_length: usize = 0; | ||
let mut input = ImmutableBuffer::new(encoded_var_uints.as_slice()); | ||
for _ in 0..unsigned_values.len() { | ||
let (var_uint, remaining) = input.read_var_uint().unwrap(); | ||
input = remaining; | ||
decoded_length += var_uint.size_in_bytes(); | ||
} | ||
assert_eq!(decoded_length, encoded_var_uints.len()); | ||
}) | ||
}); | ||
binary_1_0_group.bench_function("write VarInt", |b| { | ||
let mut output = io::sink(); | ||
b.iter(|| { | ||
let mut encoded_length: usize = 0; | ||
for value in &signed_values { | ||
encoded_length += black_box(VarInt::write_i64(&mut output, *value).unwrap()); | ||
} | ||
assert_eq!(encoded_length, encoded_flex_ints.len()); | ||
}) | ||
}); | ||
binary_1_0_group.bench_function("read VarInt", |b| { | ||
b.iter(|| { | ||
let mut decoded_length: usize = 0; | ||
let mut input = ImmutableBuffer::new(encoded_var_ints.as_slice()); | ||
for _ in 0..unsigned_values.len() { | ||
let (var_int, remaining) = input.read_var_int().unwrap(); | ||
input = remaining; | ||
decoded_length += var_int.size_in_bytes(); | ||
} | ||
assert_eq!(decoded_length, encoded_var_ints.len()); | ||
}) | ||
}); | ||
binary_1_0_group.finish(); | ||
|
||
let mut binary_1_1_group = c.benchmark_group("binary 1.1"); | ||
binary_1_1_group.bench_function("write FlexUInt", |b| { | ||
let mut output = io::sink(); | ||
b.iter(|| { | ||
let mut encoded_length: usize = 0; | ||
for value in &unsigned_values { | ||
encoded_length += black_box(FlexUInt::write_u64(&mut output, *value).unwrap()); | ||
} | ||
assert_eq!(encoded_length, encoded_flex_uints.len()); | ||
}) | ||
}); | ||
binary_1_1_group.bench_function("read FlexUInt", |b| { | ||
b.iter(|| { | ||
let mut decoded_length: usize = 0; | ||
let mut input = ImmutableBuffer::new(encoded_flex_uints.as_slice()); | ||
for _ in 0..unsigned_values.len() { | ||
let (flex_uint, remaining) = input.read_flex_uint().unwrap(); | ||
input = remaining; | ||
decoded_length += flex_uint.size_in_bytes(); | ||
} | ||
assert_eq!(decoded_length, encoded_flex_uints.len()); | ||
}) | ||
}); | ||
binary_1_1_group.bench_function("write FlexInt", |b| { | ||
let mut output = io::sink(); | ||
b.iter(|| { | ||
let mut encoded_length: usize = 0; | ||
for value in &signed_values { | ||
encoded_length += black_box(FlexInt::write_i64(&mut output, *value).unwrap()); | ||
} | ||
assert_eq!(encoded_length, encoded_flex_ints.len()); | ||
}) | ||
}); | ||
binary_1_1_group.bench_function("read FlexInt", |b| { | ||
b.iter(|| { | ||
let mut decoded_length: usize = 0; | ||
let mut input = ImmutableBuffer::new(encoded_flex_ints.as_slice()); | ||
for _ in 0..unsigned_values.len() { | ||
let (flex_int, remaining) = input.read_flex_int().unwrap(); | ||
input = remaining; | ||
decoded_length += flex_int.size_in_bytes(); | ||
} | ||
assert_eq!(decoded_length, encoded_flex_ints.len()); | ||
}) | ||
}); | ||
binary_1_1_group.finish(); | ||
} | ||
|
||
fn roundtrip_var_uint_test(unsigned_values: &[u64]) -> IonResult<Vec<u8>> { | ||
println!("Roundtripping unsigned values as VarUInts to check for correctness."); | ||
let mut encoded_values_buffer = Vec::new(); | ||
for value in unsigned_values { | ||
VarUInt::write_u64(&mut encoded_values_buffer, *value)?; | ||
} | ||
let mut decoded_values = Vec::new(); | ||
let mut input = ImmutableBuffer::new(encoded_values_buffer.as_slice()); | ||
for _ in 0..unsigned_values.len() { | ||
let (var_uint, remaining) = input.read_var_uint()?; | ||
input = remaining; | ||
decoded_values.push(var_uint.value() as u64); | ||
} | ||
assert_eq!(decoded_values.as_slice(), unsigned_values); | ||
Ok(encoded_values_buffer) | ||
} | ||
|
||
fn roundtrip_var_int_test(signed_values: &[i64]) -> IonResult<Vec<u8>> { | ||
println!("Roundtripping signed values as VarInts to check for correctness."); | ||
let mut encoded_values_buffer = Vec::new(); | ||
for value in signed_values { | ||
VarInt::write_i64(&mut encoded_values_buffer, *value)?; | ||
} | ||
let mut decoded_values = Vec::new(); | ||
let mut input = ImmutableBuffer::new(encoded_values_buffer.as_slice()); | ||
for _ in 0..signed_values.len() { | ||
let (var_int, remaining) = input.read_var_int()?; | ||
input = remaining; | ||
decoded_values.push(var_int.value()); | ||
} | ||
assert_eq!(decoded_values.as_slice(), signed_values); | ||
Ok(encoded_values_buffer) | ||
} | ||
|
||
fn roundtrip_flex_uint_test(unsigned_values: &[u64]) -> IonResult<Vec<u8>> { | ||
println!("Roundtripping unsigned values as FlexUInts to check for correctness."); | ||
let mut encoded_values_buffer = Vec::new(); | ||
for value in unsigned_values { | ||
FlexUInt::write_u64(&mut encoded_values_buffer, *value)?; | ||
} | ||
let mut decoded_values = Vec::new(); | ||
let mut input = ImmutableBuffer::new(encoded_values_buffer.as_slice()); | ||
for _ in 0..unsigned_values.len() { | ||
let (flex_uint, remaining) = input.read_flex_uint()?; | ||
input = remaining; | ||
decoded_values.push(flex_uint.value()); | ||
} | ||
assert_eq!(decoded_values.as_slice(), unsigned_values); | ||
Ok(encoded_values_buffer) | ||
} | ||
|
||
fn roundtrip_flex_int_test(signed_values: &[i64]) -> IonResult<Vec<u8>> { | ||
println!("Roundtripping signed values as FlexInts to check for correctness."); | ||
let mut encoded_values_buffer = Vec::new(); | ||
for value in signed_values { | ||
FlexInt::write_i64(&mut encoded_values_buffer, *value)?; | ||
} | ||
let mut decoded_values = Vec::new(); | ||
let mut input = ImmutableBuffer::new(encoded_values_buffer.as_slice()); | ||
for _ in 0..signed_values.len() { | ||
let (flex_int, remaining) = input.read_flex_int()?; | ||
input = remaining; | ||
decoded_values.push(flex_int.value()); | ||
} | ||
assert_eq!(decoded_values.as_slice(), signed_values); | ||
Ok(encoded_values_buffer) | ||
} | ||
|
||
criterion_group!(benches, criterion_benchmark); | ||
criterion_main!(benches); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ use crate::IonType; | |
/// [Typed Value Formats](https://amazon-ion.github.io/ion-docs/docs/binary.html#typed-value-formats) | ||
/// section of the binary Ion spec. | ||
#[derive(Copy, Clone, Debug, PartialEq)] | ||
pub(crate) struct TypeDescriptor { | ||
pub struct TypeDescriptor { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ This PR makes a couple of types |
||
pub ion_type_code: IonTypeCode, | ||
pub ion_type: Option<IonType>, | ||
pub length_code: u8, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🗺️ Issue #353 was fixed in #688, which changed the CI task back to
windows-latest
.