Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial ZeroHashMap #2579

Merged
merged 39 commits into from
Feb 13, 2023
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
d455634
add core functionality of ZeroHashMap based on CHD algorithm
pdogr Sep 19, 2022
9c4bba2
remove keys from HashIndex,
pdogr Sep 19, 2022
4423552
rename variables, use vec! macro
pdogr Sep 19, 2022
3aa891e
remove #[macro_use] and directly import macro
pdogr Sep 19, 2022
9d01fd5
add benchmarks for zerohashmap
pdogr Sep 20, 2022
2ee9633
make functions inline
pdogr Sep 20, 2022
92700b4
add hashmap feature for ZeroHashMapStatic
pdogr Sep 21, 2022
5bd7ddb
remove unnecessary Iterator impl
pdogr Sep 21, 2022
027a3ce
Merge branch 'unicode-org:main' into hm
pdogr Sep 21, 2022
3a17393
Apply reverse permutation in HashIndex bulding using zvl_permute
pdogr Sep 21, 2022
9284bde
Derive serde for HashIndex, ZeroHashMapStatic
pdogr Sep 21, 2022
6ebde0e
modify generation algorithm using hashing only once
pdogr Sep 23, 2022
1d5f211
replace ahash with wyhash
pdogr Sep 23, 2022
305038f
Merge branch 'unicode-org:main' into hm
pdogr Sep 23, 2022
69a3355
Merge branch 'unicode-org:main' into hm
pdogr Dec 19, 2022
d9c52f5
Merge branch 'unicode-org:main' into hm
pdogr Dec 24, 2022
6781a16
Move zerohashmap into a separate module
pdogr Dec 24, 2022
12045d5
Merge branch 'main' into hm
pdogr Jan 3, 2023
40e89a6
move everything to mod.rs
pdogr Jan 8, 2023
553705b
Use t1ha hash function
pdogr Jan 8, 2023
7bf72b0
s/ZeroHashMapStatic/ZeroHashMap
pdogr Jan 8, 2023
061a38e
Add docs, fix zhm lookup bench, add zhm deserialize bench
pdogr Jan 9, 2023
b0d7b8b
add zhm deserialize benches
pdogr Jan 9, 2023
3d10558
remove hashindex and refactor code
pdogr Jan 12, 2023
35b8393
move common functions to algorithms module
pdogr Jan 12, 2023
52d9e79
impl FromIterator for zhm
pdogr Jan 12, 2023
1cebd79
Update utils/zerovec/src/hashmap/mod.rs
pdogr Jan 12, 2023
3e1a1d8
remove borrow, pub changes
pdogr Jan 12, 2023
51e577e
add Hash to make_ule
pdogr Jan 13, 2023
900c668
add contains_key, iter_keys, iter_values, iter
pdogr Jan 13, 2023
575a040
minor benchmark refactor
pdogr Jan 13, 2023
60f4d7f
pass m as usize
pdogr Jan 19, 2023
ffe15f6
fix maths
pdogr Jan 19, 2023
dbe8882
remove inlining
pdogr Jan 19, 2023
8e6dae9
custom serde for zerohashmap to bake in length validations
pdogr Feb 9, 2023
96fd1c2
revert back to u32 arithmetic
pdogr Feb 9, 2023
a2abdde
Merge branch 'main' into hm
pdogr Feb 9, 2023
715f667
fix clippy errors
pdogr Feb 9, 2023
7811713
add derive Hash to make_ule
pdogr Feb 10, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion utils/zerovec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ yoke = { version = "0.6.0", path = "../yoke", optional = true }
zerofrom = { version = "0.1.0", path = "../zerofrom" }
zerovec-derive = {version = "0.9.0", path = "./derive", optional = true}
databake = { version = "0.1.0", path = "../../utils/databake", features = ["derive"], optional = true }
t1ha = { version = "0.1", optional = true }

[dev-dependencies]
icu_benchmark_macros = { version = "0.7", path = "../../tools/benchmark/macros" }
Expand All @@ -51,6 +52,7 @@ zerofrom = { version = "0.1.0", path = "../zerofrom", features = ["derive"] }
bench = []
std = []
derive = ["zerovec-derive"]
hashmap = ["dep:t1ha"]

[[bench]]
name = "zerovec"
Expand All @@ -72,7 +74,7 @@ harness = false
[[bench]]
name = "zeromap"
harness = false
required-features = ["serde"]
required-features = ["serde", "hashmap"]

[[example]]
name = "zv_serde"
Expand Down
Binary file not shown.
151 changes: 142 additions & 9 deletions utils/zerovec/benches/zeromap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion};

use zerovec::maps::ZeroMapKV;
use zerovec::vecs::{Index32, VarZeroSlice, VarZeroVec};
use zerovec::ZeroMap;
use zerovec::{ZeroHashMap, ZeroMap};

const DATA: [(&str, &str); 16] = [
("ar", "Arabic"),
Expand Down Expand Up @@ -56,10 +56,30 @@ const POSTCARD_HASHMAP: [u8; 176] = [
114, 97, 98, 105, 99,
];

const POSTCARD_ZEROHASHMAP: [u8; 412] = [
128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 102, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 11, 0,
0, 0, 13, 0, 0, 0, 15, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 21, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0,
0, 28, 0, 0, 0, 30, 0, 0, 0, 32, 0, 0, 0, 101, 110, 102, 114, 106, 97, 101, 108, 99, 104, 114,
98, 110, 115, 114, 105, 117, 101, 111, 116, 114, 99, 99, 112, 122, 104, 114, 117, 101, 115,
116, 104, 97, 114, 177, 1, 16, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 13, 0, 0, 0, 21, 0, 0, 0, 26,
0, 0, 0, 34, 0, 0, 0, 40, 0, 0, 0, 47, 0, 0, 0, 56, 0, 0, 0, 65, 0, 0, 0, 72, 0, 0, 0, 78, 0,
0, 0, 85, 0, 0, 0, 92, 0, 0, 0, 99, 0, 0, 0, 103, 0, 0, 0, 69, 110, 103, 108, 105, 115, 104,
70, 114, 101, 110, 99, 104, 74, 97, 112, 97, 110, 101, 115, 101, 71, 114, 101, 101, 107, 67,
104, 101, 114, 111, 107, 101, 101, 66, 97, 110, 103, 108, 97, 83, 101, 114, 98, 105, 97, 110,
73, 110, 117, 107, 116, 105, 116, 117, 116, 69, 115, 112, 101, 114, 97, 110, 116, 111, 84, 117,
114, 107, 105, 115, 104, 67, 104, 97, 107, 109, 97, 67, 104, 105, 110, 101, 115, 101, 82, 117,
115, 115, 105, 97, 110, 83, 112, 97, 110, 105, 115, 104, 84, 104, 97, 105, 65, 114, 97, 98,
105, 99,
];

/// Run this function to print new data to the console.
/// Requires the optional `serde` Cargo feature.
#[allow(dead_code)]
fn generate() {
fn generate_zeromap() {
let map = build_zeromap(false);
let buf = postcard::to_stdvec(&map).unwrap();
println!("{:?}", buf);
Expand All @@ -74,6 +94,15 @@ fn generate_hashmap() {
println!("{:?}", buf);
}

/// Run this function to print new data to the console.
/// Requires the optional `serde` Cargo feature.
#[allow(dead_code)]
fn generate_zerohashmap() {
let map = build_zerohashmap(false);
let buf = postcard::to_stdvec(&map).unwrap();
println!("{:?}", buf);
}

#[cfg(feature = "generate")]
fn generate_test_data() {
let zeromap = build_zeromap(true);
Expand All @@ -83,21 +112,29 @@ fn generate_test_data() {
let hashmap = build_hashmap(true);
let hashmap_bytes = postcard::to_stdvec(&hashmap).unwrap();
fs::write("large_hashmap.postcard", &hashmap_bytes).unwrap();

let zerohashmap = build_zerohashmap(true);
let zerohashmap_bytes = postcard::to_stdvec(&zerohashmap).unwrap();
fs::write("large_zerohashmap.postcard", &zerohashmap_bytes).unwrap();
}

fn overview_bench(c: &mut Criterion) {
bench_zeromap(c);
bench_hashmap(c);
bench_zerohashmap(c);

#[cfg(feature = "generate")]
generate_test_data();
}

fn bench_zeromap(c: &mut Criterion) {
// Uncomment the following line to re-generate the binary data.
// generate();
// generate_hashmap();

bench_deserialize(c);
bench_deserialize_large(c);
bench_lookup(c);
bench_lookup_large(c);

bench_hashmap(c);

#[cfg(feature = "generate")]
generate_test_data();
}

fn build_zeromap(large: bool) -> ZeroMap<'static, Index32Str, Index32Str> {
Expand Down Expand Up @@ -243,6 +280,102 @@ fn read_large_hashmap_postcard_bytes() -> Vec<u8> {
fs::read(path).unwrap()
}

fn bench_zerohashmap(c: &mut Criterion) {
// Uncomment the following line to re-generate the binary data.
// generate_zerohashmap();

bench_deserialize_zerohashmap(c);
bench_deserialize_large_zerohashmap(c);
bench_zerohashmap_lookup(c);
bench_zerohashmap_lookup_large(c);
}

fn build_zerohashmap(large: bool) -> ZeroHashMap<'static, Index32Str, Index32Str> {
let mut kv = match large {
true => Vec::with_capacity(8192 * DATA.len()),
false => Vec::with_capacity(DATA.len()),
};

for (key, value) in DATA.iter() {
if large {
for n in 0..8192 {
kv.push((format!("{}{}", key, n), indexify(value)));
}
} else {
kv.push((key.to_string(), indexify(value)));
}
}

ZeroHashMap::from_iter(kv.iter().map(|kv| (indexify(&kv.0), kv.1)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
ZeroHashMap::from_iter(kv.iter().map(|kv| (indexify(&kv.0), kv.1)))
kv.iter().map(|kv| (indexify(&kv.0), kv.1)).collect()

}

fn bench_deserialize_zerohashmap(c: &mut Criterion) {
c.bench_function("zerohashmap/deserialize/small", |b| {
b.iter(|| {
let map: ZeroHashMap<Index32Str, Index32Str> =
postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap();
assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut"));
})
});
}

fn bench_deserialize_large_zerohashmap(c: &mut Criterion) {
let buf = read_large_zerohashmap_postcard_bytes();
c.bench_function("zerohashmap/deserialize/large", |b| {
b.iter(|| {
let map: ZeroHashMap<Index32Str, Index32Str> =
postcard::from_bytes(black_box(&buf)).unwrap();
assert_eq!(map.get(indexify("iu3333")).map(|x| &x.0), Some("Inuktitut"));
})
});
}

fn bench_zerohashmap_lookup(c: &mut Criterion) {
let zero_hashmap: ZeroHashMap<Index32Str, Index32Str> =
postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap();

c.bench_function("zerohashmap/lookup/small", |b| {
b.iter(|| {
assert_eq!(
zero_hashmap.get(black_box(indexify("iu"))).map(|x| &x.0),
Some("Inuktitut")
);
assert_eq!(
zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0),
None
);
});
});
}

fn bench_zerohashmap_lookup_large(c: &mut Criterion) {
let buf = read_large_zerohashmap_postcard_bytes();
let zero_hashmap: ZeroHashMap<Index32Str, Index32Str> = postcard::from_bytes(&buf).unwrap();

c.bench_function("zerohashmap/lookup/large", |b| {
b.iter(|| {
assert_eq!(
zero_hashmap
.get(black_box(indexify("iu3333")))
.map(|x| &x.0),
Some("Inuktitut")
);
assert_eq!(
zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0),
None
);
});
});
}

fn read_large_zerohashmap_postcard_bytes() -> Vec<u8> {
let path = concat!(
env!("CARGO_MANIFEST_DIR"),
"/benches/testdata/large_zerohashmap.postcard"
);
fs::read(path).unwrap()
}

criterion_group!(benches, overview_bench);
criterion_main!(benches);

Expand All @@ -258,7 +391,7 @@ criterion_main!(benches);
#[zerovec::make_varule(Index32Str)]
#[zerovec::skip_derive(ZeroMapKV)]
#[derive(Eq, PartialEq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
#[zerovec::derive(Serialize, Deserialize)]
#[zerovec::derive(Serialize, Deserialize, Hash)]
pub(crate) struct Index32StrBorrowed<'a>(#[serde(borrow)] pub &'a str);

impl<'a> ZeroMapKV<'a> for Index32Str {
Expand Down
11 changes: 10 additions & 1 deletion utils/zerovec/derive/examples/make.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,23 @@ struct Struct {
c: Option<char>,
}

#[make_ule(HashedStructULE)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
#[zerovec::derive(Debug, Hash)]
struct HashedStruct {
a: u64,
b: i16,
c: Option<char>,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
#[make_ule(TupleStructULE)]
struct TupleStruct(u8, char);

#[make_ule(EnumULE)]
#[repr(u8)]
#[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Debug)]
#[zerovec::derive(Debug)]
#[zerovec::derive(Debug, Hash)]
enum Enum {
A = 0,
B = 1,
Expand Down
15 changes: 15 additions & 0 deletions utils/zerovec/derive/src/make_varule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,19 @@ pub fn make_varule_impl(attr: AttributeArgs, mut input: DeriveInput) -> TokenStr
quote!()
};

let maybe_hash = if attrs.hash {
quote!(
#[allow(clippy::derive_hash_xor_eq)]
robertbastian marked this conversation as resolved.
Show resolved Hide resolved
impl core::hash::Hash for #ule_name {
fn hash<H>(&self, state: &mut H) where H: core::hash::Hasher {
state.write(<#ule_name as zerovec::ule::VarULE>::as_byte_slice(&self));
}
}
)
} else {
quote!()
};

quote!(
#input

Expand All @@ -241,6 +254,8 @@ pub fn make_varule_impl(attr: AttributeArgs, mut input: DeriveInput) -> TokenStr
#maybe_de

#maybe_debug

#maybe_hash
robertbastian marked this conversation as resolved.
Show resolved Hide resolved
)
}

Expand Down
3 changes: 3 additions & 0 deletions utils/zerovec/derive/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ pub struct ZeroVecAttrs {
pub serialize: bool,
pub deserialize: bool,
pub debug: bool,
pub hash: bool,
}

/// Removes all known zerovec:: attributes from attrs and validates them
Expand Down Expand Up @@ -243,6 +244,8 @@ pub fn extract_attributes_common(
attrs.deserialize = true;
} else if ident == "Debug" {
attrs.debug = true;
} else if ident == "Hash" {
attrs.hash = true;
} else {
return Err(Error::new(
ident.span(),
Expand Down
Loading