Skip to content

Commit

Permalink
leb128-encode integers before hashing them in IchHasher.
Browse files Browse the repository at this point in the history
This significantly reduces the number of bytes hashed by IchHasher.
  • Loading branch information
nnethercote committed Nov 2, 2016
1 parent af0b27e commit d73c68c
Showing 1 changed file with 37 additions and 1 deletion.
38 changes: 37 additions & 1 deletion src/librustc_incremental/calculate_svh/hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
// except according to those terms.

use std::mem;
use std::hash::Hasher;
use rustc_data_structures::blake2b::Blake2bHasher;
use rustc::ty::util::ArchIndependentHasher;
use ich::Fingerprint;
use rustc_serialize::leb128::write_unsigned_leb128;

#[derive(Debug)]
pub struct IchHasher {
state: ArchIndependentHasher<Blake2bHasher>,
leb128_helper: Vec<u8>,
bytes_hashed: u64,
}

Expand All @@ -24,6 +27,7 @@ impl IchHasher {
let hash_size = mem::size_of::<Fingerprint>();
IchHasher {
state: ArchIndependentHasher::new(Blake2bHasher::new(hash_size, &[])),
leb128_helper: vec![],
bytes_hashed: 0
}
}
Expand All @@ -37,9 +41,19 @@ impl IchHasher {
fingerprint.0.copy_from_slice(self.state.into_inner().finalize());
fingerprint
}

#[inline]
fn write_uleb128(&mut self, value: u64) {
let len = write_unsigned_leb128(&mut self.leb128_helper, 0, value);
self.state.write(&self.leb128_helper[0..len]);
self.bytes_hashed += len as u64;
}
}

impl ::std::hash::Hasher for IchHasher {
// For the non-u8 integer cases we leb128 encode them first. Because small
// integers dominate, this significantly and cheaply reduces the number of
// bytes hashed, which is good because blake2b is expensive.
impl Hasher for IchHasher {
fn finish(&self) -> u64 {
bug!("Use other finish() implementation to get the full 128-bit hash.");
}
Expand All @@ -49,4 +63,26 @@ impl ::std::hash::Hasher for IchHasher {
self.state.write(bytes);
self.bytes_hashed += bytes.len() as u64;
}

// There is no need to leb128-encode u8 values.

#[inline]
fn write_u16(&mut self, i: u16) {
self.write_uleb128(i as u64);
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.write_uleb128(i as u64);
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.write_uleb128(i);
}

#[inline]
fn write_usize(&mut self, i: usize) {
self.write_uleb128(i as u64);
}
}

0 comments on commit d73c68c

Please sign in to comment.