-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
imrove safe Decompression by 3-28% improve safe Compression by 0-16% Replace calls to memcpy with custom function
- Loading branch information
Showing
4 changed files
with
152 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
//! # FastCpy | ||
//! | ||
//! The Rust Compiler calls `memcpy` for slices of unknown length. | ||
//! This crate provides a faster implementation of `memcpy` for slices up to 32bytes (64bytes with `avx`). | ||
//! If you know most of you copy operations are not too big you can use `fastcpy` to speed up your program. | ||
//! | ||
//! `fastcpy` is designed to contain not too much assembly, so the overhead is low. | ||
//! | ||
//! As fall back the standard `memcpy` is called | ||
//! | ||
//! ## Double Copy Trick | ||
//! `fastcpy` employs a double copy trick to copy slices of length 4-32bytes (64bytes with `avx`). | ||
//! E.g. Slice of length 6 can be copied with two uncoditional copy operations. | ||
//! | ||
//! /// [1, 2, 3, 4, 5, 6] | ||
//! /// [1, 2, 3, 4] | ||
//! /// [3, 4, 5, 6] | ||
//! | ||
#[inline] | ||
pub fn slice_copy(src: &[u8], dst: &mut [u8]) { | ||
#[inline(never)] | ||
#[cold] | ||
#[track_caller] | ||
fn len_mismatch_fail(dst_len: usize, src_len: usize) -> ! { | ||
panic!( | ||
"source slice length ({}) does not match destination slice length ({})", | ||
src_len, dst_len, | ||
); | ||
} | ||
|
||
if src.len() != dst.len() { | ||
len_mismatch_fail(src.len(), dst.len()); | ||
} | ||
let len = src.len(); | ||
|
||
if src.is_empty() { | ||
return; | ||
} | ||
|
||
if len < 4 { | ||
short_copy(src, dst); | ||
return; | ||
} | ||
|
||
if len < 8 { | ||
double_copy_trick::<4>(src, dst); | ||
return; | ||
} | ||
|
||
if len <= 16 { | ||
double_copy_trick::<8>(src, dst); | ||
return; | ||
} | ||
|
||
if len <= 32 { | ||
double_copy_trick::<16>(src, dst); | ||
return; | ||
} | ||
|
||
/// The code will use the vmovdqu instruction to copy 32 bytes at a time. | ||
#[cfg(target_feature = "avx")] | ||
{ | ||
if len <= 64 { | ||
double_copy_trick::<32>(src, dst); | ||
return; | ||
} | ||
} | ||
|
||
// For larger sizes we use the default, which calls memcpy | ||
// memcpy does some virtual memory tricks to copy large chunks of memory. | ||
// | ||
// The theory should be that the checks above don't cost much relative to the copy call for | ||
// larger copies. | ||
// The bounds checks in `copy_from_slice` are elided. | ||
dst.copy_from_slice(src); | ||
} | ||
|
||
#[inline(always)] | ||
fn short_copy(src: &[u8], dst: &mut [u8]) { | ||
let len = src.len(); | ||
|
||
// length 1-3 | ||
dst[0] = src[0]; | ||
if len >= 2 { | ||
double_copy_trick::<2>(src, dst); | ||
} | ||
} | ||
|
||
#[inline(always)] | ||
/// [1, 2, 3, 4, 5, 6] | ||
/// [1, 2, 3, 4] | ||
/// [3, 4, 5, 6] | ||
fn double_copy_trick<const SIZE: usize>(src: &[u8], dst: &mut [u8]) { | ||
dst[0..SIZE].copy_from_slice(&src[0..SIZE]); | ||
dst[src.len() - SIZE..].copy_from_slice(&src[src.len() - SIZE..]); | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::slice_copy; | ||
use proptest::prelude::*; | ||
|
||
proptest! { | ||
#[test] | ||
fn test_fast_short_slice_copy(left: Vec<u8>) { | ||
let mut right = vec![0u8; left.len()]; | ||
slice_copy(&left, &mut right); | ||
prop_assert_eq!(&left, &right); | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_fast_short_slice_copy_edge_cases() { | ||
for len in 0..(512 * 2) { | ||
let left = (0..len).map(|i| i as u8).collect::<Vec<_>>(); | ||
let mut right = vec![0u8; len]; | ||
slice_copy(&left, &mut right); | ||
assert_eq!(left, right); | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_fail2() { | ||
let left = vec![ | ||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, | ||
24, 25, 26, 27, 28, 29, 30, 31, 32, | ||
]; | ||
let mut right = vec![0u8; left.len()]; | ||
slice_copy(&left, &mut right); | ||
assert_eq!(left, right); | ||
} | ||
|
||
#[test] | ||
fn test_fail() { | ||
let left = vec![ | ||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
]; | ||
let mut right = vec![0u8; left.len()]; | ||
slice_copy(&left, &mut right); | ||
assert_eq!(left, right); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters