diff --git a/src/simd_accel/teddy128.rs b/src/simd_accel/teddy128.rs index a2b34bc0ee..a15e4401a6 100644 --- a/src/simd_accel/teddy128.rs +++ b/src/simd_accel/teddy128.rs @@ -86,7 +86,7 @@ better. Namely: 1. Teddy's core algorithm scans the haystack in 16 byte chunks. 16 is significant because it corresponds to the number of bytes in a SIMD vector. If one used AVX2 instructions, then we could scan the haystack in 32 byte - chunks. Similarly, if one used AVX512 instructions, we could sca the + chunks. Similarly, if one used AVX512 instructions, we could scan the haystack in 64 byte chunks. Hyperscan implements SIMD + AVX2, we only implement SIMD for the moment. (The author doesn't have a CPU with AVX2 support... yet.) @@ -326,11 +326,10 @@ References // TODO: Make the inner loop do aligned loads. use std::cmp; -use std::mem::transmute; use std::ptr; use simd::u8x16; -use simd::x86::sse2::u64x2; +use simd::x86::sse2::Sse2Bool8ix16; use simd::x86::ssse3::Ssse3U8x16; use syntax; @@ -456,8 +455,9 @@ impl Teddy { // N.B. `res0` is our `C` in the module documentation. let res0 = self.masks.members1(h); // Only do expensive verification if there are any non-zero bits. - if res0.ne(zero).any() { - if let Some(m) = self.verify_128(haystack, pos, res0) { + let bitfield = res0.ne(zero).move_mask(); + if bitfield != 0 { + if let Some(m) = self.verify(haystack, pos, res0, bitfield) { return Some(m); } } @@ -510,9 +510,11 @@ impl Teddy { // `AND`'s our `C` values together. let res = res0prev0 & res1; prev0 = res0; - if res.ne(zero).any() { + + let bitfield = res.ne(zero).move_mask(); + if bitfield != 0 { let pos = pos.checked_sub(1).unwrap(); - if let Some(m) = self.verify_128(haystack, pos, res) { + if let Some(m) = self.verify(haystack, pos, res, bitfield) { return Some(m); } } @@ -568,9 +570,11 @@ impl Teddy { prev0 = res0; prev1 = res1; - if res.ne(zero).any() { + + let bitfield = res.ne(zero).move_mask(); + if bitfield != 0 { let pos = pos.checked_sub(2).unwrap(); - if let Some(m) = self.verify_128(haystack, pos, res) { + if let Some(m) = self.verify(haystack, pos, res, bitfield) { return Some(m); } } @@ -585,63 +589,40 @@ impl Teddy { /// Runs the verification procedure on `res` (i.e., `C` from the module /// documentation), where the haystack block starts at `pos` in - /// `haystack`. + /// `haystack`. `bitfield` has ones in the bit positions that `res` has + /// non-zero bytes. /// /// If a match exists, it returns the first one. #[inline(always)] - fn verify_128( + fn verify( &self, haystack: &[u8], pos: usize, res: u8x16, + mut bitfield: u32, ) -> Option { - // The verification procedure is more amenable to standard 64 bit - // values, so get those. - let res64: u64x2 = unsafe { transmute(res) }; - let reshi = res64.extract(0); - let reslo = res64.extract(1); - if let Some(m) = self.verify_64(haystack, pos, reshi, 0) { - return Some(m); - } - if let Some(m) = self.verify_64(haystack, pos, reslo, 8) { - return Some(m); - } - None - } - - /// Runs the verification procedure on half of `C`. - /// - /// If a match exists, it returns the first one. - /// - /// `offset` is an additional byte offset to add to the position before - /// substring match verification. - #[inline(always)] - fn verify_64( - &self, - haystack: &[u8], - pos: usize, - mut res: u64, - offset: usize, - ) -> Option { - // There's a possible match so long as there's at least one bit set. - while res != 0 { - // The next possible match is at the least significant bit. - let bit = res.trailing_zeros(); - // The position of the bit in its corresponding lane gives us the - // corresponding bucket. - let bucket = (bit % 8) as usize; - // The lane that the bit is in gives us its offset. - let bytei = (bit / 8) as usize; - // Compute the start of where a substring would start. - let start = pos + offset + bytei; - // Kill off this bit. If we couldn't match anything, we'll go to - // the next bit. - res &= !(1 << bit); - // Actual substring search verification. - if let Some(m) = self.verify_bucket(haystack, bucket, start) { - return Some(m); + while bitfield != 0 { + // The next offset, relative to pos, where some fingerprint matched. + let byte_pos = bitfield.trailing_zeros(); + bitfield &= !(1 << byte_pos); + + // Offset relative to the beginning of the haystack. + let start = pos + byte_pos as usize; + + // The bitfield telling us which patterns had fingerprints that match at this starting + // position. + let mut patterns = res.extract(byte_pos); + while patterns != 0 { + let bucket = patterns.trailing_zeros() as usize; + patterns &= !(1 << bucket); + + // Actual substring search verification. + if let Some(m) = self.verify_bucket(haystack, bucket, start) { + return Some(m); + } } } + None }