From b36c7e88f2056a664e094c077fe997d47ebf5d87 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Fri, 22 Dec 2017 13:54:54 +0100
Subject: [PATCH 1/3] Completes SSE and adds some MMX intrinsics

MMX:

- `_mm_cmpgt_pi{8,16,32}`
- `_mm_unpack{hi,lo}_pi{8,16,32}`

SSE (is now complete):

- `_mm_cvtp{i,u}{8,16}_ps`
- add test for `_m_pmulhuw`
---
 coresimd/src/x86/i586/sse.rs               |   3 +-
 coresimd/src/x86/i686/mmx.rs               | 134 +++++++++++---
 coresimd/src/x86/i686/sse.rs               |  69 +++++---
 coresimd/src/x86/i686/sse2.rs              |  23 ++-
 stdsimd-test/assert-instr-macro/src/lib.rs |   3 +-
 stdsimd-verify/build.rs                    |   4 +-
 stdsimd-verify/src/lib.rs                  | 197 ++++++++++-----------
 stdsimd-verify/tests/x86-intel.rs          | 148 +++++++++-------
 8 files changed, 351 insertions(+), 230 deletions(-)

diff --git a/coresimd/src/x86/i586/sse.rs b/coresimd/src/x86/i586/sse.rs
index 2f5cc78c4a..d3325701c6 100644
--- a/coresimd/src/x86/i586/sse.rs
+++ b/coresimd/src/x86/i586/sse.rs
@@ -3299,7 +3299,8 @@ mod tests {
         use v64::*;
 
         let a = mem::transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7));
-        let mut mem = ::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1)));
+        let mut mem =
+            ::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1)));
         sse::_mm_stream_pi(&mut *mem as *mut _ as *mut _, a);
         assert_eq!(a, *mem);
     }
diff --git a/coresimd/src/x86/i686/mmx.rs b/coresimd/src/x86/i686/mmx.rs
index 08f9f46f9b..0de5154e9d 100644
--- a/coresimd/src/x86/i686/mmx.rs
+++ b/coresimd/src/x86/i686/mmx.rs
@@ -16,7 +16,7 @@ use stdsimd_test::assert_instr;
 
 /// Constructs a 64-bit integer vector initialized to zero.
 #[inline(always)]
-#[target_feature = "+mmx,+sse"]
+#[target_feature = "+mmx"]
 // FIXME: this produces a movl instead of xorps on x86
 // FIXME: this produces a xor intrinsic instead of xorps on x86_64
 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
@@ -30,7 +30,7 @@ pub unsafe fn _mm_setzero_si64() -> __m64 {
 /// Positive values greater than 0x7F are saturated to 0x7F. Negative values
 /// less than 0x80 are saturated to 0x80.
 #[inline(always)]
-#[target_feature = "+mmx,+sse"]
+#[target_feature = "+mmx"]
 #[cfg_attr(test, assert_instr(packsswb))]
 pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
     packsswb(a, b)
@@ -42,17 +42,14 @@ pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
 /// Positive values greater than 0x7F are saturated to 0x7F. Negative values
 /// less than 0x80 are saturated to 0x80.
 #[inline(always)]
-#[target_feature = "+mmx,+sse"]
+#[target_feature = "+mmx"]
 #[cfg_attr(test, assert_instr(packssdw))]
 pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 {
     packssdw(a, b)
 }
 
-/// Compares the 8-bit integer elements of two 64-bit integer vectors of
-/// [8 x i8] to determine if the element of the first vector is greater than
-/// the corresponding element of the second vector.
-///
-/// The comparison yields 0 for false, 0xFF for true.
+/// Compares whether each element of `a` is greater than the corresponding
+/// element of `b` returning `0` for `false` and `-1` for `true`.
 #[inline(always)]
 #[target_feature = "+mmx"]
 #[cfg_attr(test, assert_instr(pcmpgtb))]
@@ -60,11 +57,8 @@ pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
     pcmpgtb(a, b)
 }
 
-/// Compares the 16-bit integer elements of two 64-bit integer vectors of
-/// [4 x i16] to determine if the element of the first vector is greater than
-/// the corresponding element of the second vector.
-///
-/// The comparison yields 0 for false, 0xFFFF for true.
+/// Compares whether each element of `a` is greater than the corresponding
+/// element of `b` returning `0` for `false` and `-1` for `true`.
 #[inline(always)]
 #[target_feature = "+mmx"]
 #[cfg_attr(test, assert_instr(pcmpgtw))]
@@ -72,8 +66,17 @@ pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 {
     pcmpgtw(a, b)
 }
 
-/// Unpacks the upper 32 bits from two 64-bit integer vectors of
-/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
+/// Compares whether each element of `a` is greater than the corresponding
+/// element of `b` returning `0` for `false` and `-1` for `true`.
+#[inline(always)]
+#[target_feature = "+mmx"]
+#[cfg_attr(test, assert_instr(pcmpgtd))]
+pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 {
+    pcmpgtd(a, b)
+}
+
+/// Unpacks the upper two elements from two `i16x4` vectors and interleaves
+/// them into the result: `[a.2, b.2, a.3, b.3]`.
 #[inline(always)]
 #[target_feature = "+mmx"]
 #[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
@@ -81,8 +84,17 @@ pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 {
     punpckhwd(a, b)
 }
 
-/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
-/// and interleaves them into a 64-bit integer vector of [8 x i8].
+/// Unpacks the upper four elements from two `i8x8` vectors and interleaves
+/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
+#[inline(always)]
+#[target_feature = "+mmx"]
+#[cfg_attr(test, assert_instr(punpckhbw))]
+pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 {
+    punpckhbw(a, b)
+}
+
+/// Unpacks the lower four elements from two `i8x8` vectors and interleaves
+/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
 #[inline(always)]
 #[target_feature = "+mmx"]
 #[cfg_attr(test, assert_instr(punpcklbw))]
@@ -90,8 +102,8 @@ pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
     punpcklbw(a, b)
 }
 
-/// Unpacks the lower 32 bits from two 64-bit integer vectors of
-/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
+/// Unpacks the lower two elements from two `i16x4` vectors and interleaves
+/// them into the result: `[a.0 b.0 a.1 b.1]`.
 #[inline(always)]
 #[target_feature = "+mmx"]
 #[cfg_attr(test, assert_instr(punpcklwd))]
@@ -99,6 +111,24 @@ pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 {
     punpcklwd(a, b)
 }
 
+/// Unpacks the upper element from two `i32x2` vectors and interleaves them
+/// into the result: `[a.1, b.1]`.
+#[inline(always)]
+#[target_feature = "+mmx"]
+#[cfg_attr(test, assert_instr(punpckhdq))]
+pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 {
+    punpckhdq(a, b)
+}
+
+/// Unpacks the lower element from two `i32x2` vectors and interleaves them
+/// into the result: `[a.0, b.0]`.
+#[inline(always)]
+#[target_feature = "+mmx"]
+#[cfg_attr(test, assert_instr(punpckldq))]
+pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
+    punpckldq(a, b)
+}
+
 #[allow(improper_ctypes)]
 extern "C" {
     #[link_name = "llvm.x86.mmx.packsswb"]
@@ -109,12 +139,20 @@ extern "C" {
     fn pcmpgtb(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.pcmpgt.w"]
     fn pcmpgtw(a: __m64, b: __m64) -> __m64;
+    #[link_name = "llvm.x86.mmx.pcmpgt.d"]
+    fn pcmpgtd(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.punpckhwd"]
     fn punpckhwd(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.punpcklbw"]
-    fn punpcklbw(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.punpcklwd"]
     fn punpcklwd(a: __m64, b: __m64) -> __m64;
+    #[link_name = "llvm.x86.mmx.punpckhbw"]
+    fn punpckhbw(a: __m64, b: __m64) -> __m64;
+    #[link_name = "llvm.x86.mmx.punpcklbw"]
+    fn punpcklbw(a: __m64, b: __m64) -> __m64;
+    #[link_name = "llvm.x86.mmx.punpckhdq"]
+    fn punpckhdq(a: __m64, b: __m64) -> __m64;
+    #[link_name = "llvm.x86.mmx.punpckldq"]
+    fn punpckldq(a: __m64, b: __m64) -> __m64;
 }
 
 #[cfg(test)]
@@ -123,13 +161,13 @@ mod tests {
     use x86::i686::mmx;
     use stdsimd_test::simd_test;
 
-    #[simd_test = "sse"] // FIXME: should be mmx
+    #[simd_test = "mmx"]
     unsafe fn _mm_setzero_si64() {
         let r: __m64 = ::std::mem::transmute(0_i64);
         assert_eq!(r, mmx::_mm_setzero_si64());
     }
 
-    #[simd_test = "sse"] // FIXME: should be mmx
+    #[simd_test = "mmx"]
     unsafe fn _mm_packs_pi16() {
         let a = i16x4::new(-1, 2, -3, 4);
         let b = i16x4::new(-5, 6, -7, 8);
@@ -137,7 +175,7 @@ mod tests {
         assert_eq!(r, i8x8::from(mmx::_mm_packs_pi16(a.into(), b.into())));
     }
 
-    #[simd_test = "sse"] // FIXME: should be mmx
+    #[simd_test = "mmx"]
     unsafe fn _mm_packs_pi32() {
         let a = i32x2::new(-1, 2);
         let b = i32x2::new(-5, 6);
@@ -162,11 +200,23 @@ mod tests {
     }
 
     #[simd_test = "mmx"]
-    unsafe fn _mm_unpackhi_pi16() {
-        let a = i16x4::new(0, 1, 2, 3);
-        let b = i16x4::new(4, 5, 6, 7);
-        let r = i16x4::new(2, 6, 3, 7);
-        assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into())));
+    unsafe fn _mm_cmpgt_pi32() {
+        let a = i32x2::new(0, 3);
+        let b = i32x2::new(1, 2);
+        let r0 = i32x2::new(0, -1);
+        let r1 = i32x2::new(-1, 0);
+
+        assert_eq!(r0, mmx::_mm_cmpgt_pi32(a.into(), b.into()).into());
+        assert_eq!(r1, mmx::_mm_cmpgt_pi32(b.into(), a.into()).into());
+    }
+
+    #[simd_test = "mmx"]
+    unsafe fn _mm_unpackhi_pi8() {
+        let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
+        let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14);
+        let r = i8x8::new(8, 9, 11, 10, 12, 13, 15, 14);
+
+        assert_eq!(r, mmx::_mm_unpackhi_pi8(a.into(), b.into()).into());
     }
 
     #[simd_test = "mmx"]
@@ -177,6 +227,14 @@ mod tests {
         assert_eq!(r, i8x8::from(mmx::_mm_unpacklo_pi8(a.into(), b.into())));
     }
 
+    #[simd_test = "mmx"]
+    unsafe fn _mm_unpackhi_pi16() {
+        let a = i16x4::new(0, 1, 2, 3);
+        let b = i16x4::new(4, 5, 6, 7);
+        let r = i16x4::new(2, 6, 3, 7);
+        assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into())));
+    }
+
     #[simd_test = "mmx"]
     unsafe fn _mm_unpacklo_pi16() {
         let a = i16x4::new(0, 1, 2, 3);
@@ -184,4 +242,22 @@ mod tests {
         let r = i16x4::new(0, 4, 1, 5);
         assert_eq!(r, i16x4::from(mmx::_mm_unpacklo_pi16(a.into(), b.into())));
     }
+
+    #[simd_test = "mmx"]
+    unsafe fn _mm_unpackhi_pi32() {
+        let a = i32x2::new(0, 3);
+        let b = i32x2::new(1, 2);
+        let r = i32x2::new(3, 2);
+
+        assert_eq!(r, mmx::_mm_unpackhi_pi32(a.into(), b.into()).into());
+    }
+
+    #[simd_test = "mmx"]
+    unsafe fn _mm_unpacklo_pi32() {
+        let a = i32x2::new(0, 3);
+        let b = i32x2::new(1, 2);
+        let r = i32x2::new(0, 1);
+
+        assert_eq!(r, mmx::_mm_unpacklo_pi32(a.into(), b.into()).into());
+    }
 }
diff --git a/coresimd/src/x86/i686/sse.rs b/coresimd/src/x86/i686/sse.rs
index d87eadcfe7..e91db455c8 100644
--- a/coresimd/src/x86/i686/sse.rs
+++ b/coresimd/src/x86/i686/sse.rs
@@ -221,25 +221,46 @@ pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 {
     _mm_cvtpi32_ps(a, b)
 }
 
-/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x
-/// float].
+/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
 #[inline(always)]
 #[target_feature = "+sse"]
+#[cfg_attr(test, assert_instr(cvtpi2ps))]
+pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
+    let b = mmx::_mm_setzero_si64();
+    let b = mmx::_mm_cmpgt_pi8(b, a);
+    let b = mmx::_mm_unpacklo_pi8(a, b);
+    _mm_cvtpi16_ps(b)
+}
+
+/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
+#[inline(always)]
+#[target_feature = "+sse"]
+#[cfg_attr(test, assert_instr(cvtpi2ps))]
+pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
+    let b = mmx::_mm_setzero_si64();
+    let b = mmx::_mm_unpacklo_pi8(a, b);
+    _mm_cvtpi16_ps(b)
+}
+
+/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
+#[inline(always)]
+#[target_feature = "+sse"]
+#[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> f32x4 {
     let b = mmx::_mm_setzero_si64();
-    let b = mmx::_mm_cmpgt_pi16(mem::transmute(b), a);
+    let b = mmx::_mm_cmpgt_pi16(b, a);
     let c = mmx::_mm_unpackhi_pi16(a, b);
     let r = i586::_mm_setzero_ps();
-    let r = cvtpi2ps(r, mem::transmute(c));
+    let r = cvtpi2ps(r, c);
     let r = i586::_mm_movelh_ps(r, r);
     let c = mmx::_mm_unpacklo_pi16(a, b);
-    cvtpi2ps(r, mem::transmute(c))
+    cvtpi2ps(r, c)
 }
 
-/// Converts a 64-bit vector of 16-bit unsigned integer values into a
-/// 128-bit vector of [4 x float].
+/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
 #[inline(always)]
 #[target_feature = "+sse"]
+#[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 {
     let b = mmx::_mm_setzero_si64();
     let c = mmx::_mm_unpackhi_pi16(a, b);
@@ -250,27 +271,6 @@ pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 {
     cvtpi2ps(r, c)
 }
 
-/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]
-/// into a 128-bit vector of [4 x float].
-#[inline(always)]
-#[target_feature = "+sse"]
-pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
-    let b = mmx::_mm_setzero_si64();
-    let b = mmx::_mm_cmpgt_pi8(b, a);
-    let b = mmx::_mm_unpacklo_pi8(a, b);
-    _mm_cvtpi16_ps(b)
-}
-
-/// Converts the lower four unsigned 8-bit integer values from a 64-bit
-/// vector of [8 x u8] into a 128-bit vector of [4 x float].
-#[inline(always)]
-#[target_feature = "+sse"]
-pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
-    let b = mmx::_mm_setzero_si64();
-    let b = mmx::_mm_unpacklo_pi8(a, b);
-    _mm_cvtpi16_ps(b)
-}
-
 /// Converts the two 32-bit signed integer values from each 64-bit vector
 /// operand of [2 x i32] into a 128-bit vector of [4 x float].
 #[inline(always)]
@@ -512,6 +512,13 @@ mod tests {
         assert_eq!(r, u16x4::splat(15));
     }
 
+    #[simd_test = "sse"]
+    unsafe fn _m_pmulhuw() {
+        let (a, b) = (u16x4::splat(1000), u16x4::splat(1001));
+        let r = sse::_m_pmulhuw(a.into(), b.into());
+        assert_eq!(r, u16x4::splat(15).into());
+    }
+
     #[simd_test = "sse"]
     unsafe fn _mm_avg_pu8() {
         let (a, b) = (u8x8::splat(3), u8x8::splat(9));
@@ -601,7 +608,11 @@ mod tests {
         let a = i8x8::splat(9);
         let mask = i8x8::splat(0).replace(2, 0x80u8 as i8);
         let mut r = i8x8::splat(0);
-        sse::_mm_maskmove_si64(a.into(), mask.into(), &mut r as *mut _ as *mut i8);
+        sse::_mm_maskmove_si64(
+            a.into(),
+            mask.into(),
+            &mut r as *mut _ as *mut i8,
+        );
         assert_eq!(r, i8x8::splat(0).replace(2, 9));
 
         let mut r = i8x8::splat(0);
diff --git a/coresimd/src/x86/i686/sse2.rs b/coresimd/src/x86/i686/sse2.rs
index 59dbf3ca9e..c9b5fd3fa0 100644
--- a/coresimd/src/x86/i686/sse2.rs
+++ b/coresimd/src/x86/i686/sse2.rs
@@ -76,7 +76,8 @@ pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> i64x2 {
 /// integer.
 #[inline(always)]
 #[target_feature = "+sse2"]
-// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong instr?
+// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
+// instr?
 pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 {
     mem::transmute(a.extract(0))
 }
@@ -85,7 +86,8 @@ pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 {
 /// upper bits.
 #[inline(always)]
 #[target_feature = "+sse2"]
-// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong instr?
+// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
+// instr?
 pub unsafe fn _mm_movpi64_epi64(a: __m64) -> i64x2 {
     i64x2::new(mem::transmute(a), 0)
 }
@@ -175,7 +177,8 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_set_epi64() {
-        let r = sse2::_mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
+        let r =
+            sse2::_mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
         assert_eq!(r, i64x2::new(2, 1));
     }
 
@@ -187,7 +190,8 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_setr_epi64() {
-        let r = sse2::_mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
+        let r =
+            sse2::_mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
         assert_eq!(r, i64x2::new(1, 2));
     }
 
@@ -199,7 +203,16 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_movpi64_epi64() {
-        let r = sse2::_mm_movpi64_epi64(mem::transmute(i8x8::new(5, 0, 0, 0, 0, 0, 0, 0)));
+        let r = sse2::_mm_movpi64_epi64(mem::transmute(i8x8::new(
+            5,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+        )));
         assert_eq!(r, i64x2::new(5, 0));
     }
 
diff --git a/stdsimd-test/assert-instr-macro/src/lib.rs b/stdsimd-test/assert-instr-macro/src/lib.rs
index 38a42c4ec9..4d347f89d9 100644
--- a/stdsimd-test/assert-instr-macro/src/lib.rs
+++ b/stdsimd-test/assert-instr-macro/src/lib.rs
@@ -42,8 +42,7 @@ pub fn assert_instr(
     let assert_name = syn::Ident::from(
         &format!("assert_{}_{}", name.as_ref(), instr.as_ref())[..],
     );
-    let shim_name =
-        syn::Ident::from(format!("{}_shim", name.as_ref()));
+    let shim_name = syn::Ident::from(format!("{}_shim", name.as_ref()));
     let (to_test, test_name) = if invoc.args.len() == 0 {
         (TokenStream::empty(), &func.ident)
     } else {
diff --git a/stdsimd-verify/build.rs b/stdsimd-verify/build.rs
index 3273777679..28554bac42 100644
--- a/stdsimd-verify/build.rs
+++ b/stdsimd-verify/build.rs
@@ -12,11 +12,11 @@ fn walk(root: &Path) {
         let file = file.unwrap();
         if file.file_type().unwrap().is_dir() {
             walk(&file.path());
-            continue
+            continue;
         }
         let path = file.path();
         if path.extension().and_then(|s| s.to_str()) != Some("rs") {
-            continue
+            continue;
         }
 
         println!("cargo:rerun-if-changed={}", path.display());
diff --git a/stdsimd-verify/src/lib.rs b/stdsimd-verify/src/lib.rs
index 5660cd05e9..b00398a001 100644
--- a/stdsimd-verify/src/lib.rs
+++ b/stdsimd-verify/src/lib.rs
@@ -1,10 +1,10 @@
 #![feature(proc_macro)]
 
-extern crate proc_macro;
 extern crate proc_macro2;
-extern crate syn;
+extern crate proc_macro;
 #[macro_use]
 extern crate quote;
+extern crate syn;
 
 use std::path::Path;
 use std::fs::File;
@@ -42,21 +42,21 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
             _ => return false,
         }
         if f.unsafety.is_none() {
-            return false
+            return false;
         }
-        f.attrs.iter()
+        f.attrs
+            .iter()
             .filter_map(|a| a.meta_item())
-            .any(|a| {
-                match a {
-                    syn::MetaItem::NameValue(i) => i.ident == "target_feature",
-                    _ => false,
-                }
+            .any(|a| match a {
+                syn::MetaItem::NameValue(i) => i.ident == "target_feature",
+                _ => false,
             })
     });
 
     let input = proc_macro2::TokenStream::from(input);
 
-    let functions = functions.iter()
+    let functions = functions
+        .iter()
         .map(|f| {
             let name = f.ident;
             // println!("{}", name);
@@ -96,53 +96,51 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
 
 fn to_type(t: &syn::Type) -> Tokens {
     match *t {
-        syn::Type::Path(ref p) => {
-            match extract_path_ident(&p.path).as_ref() {
-                "__m128i" => my_quote! { &I8x16 },
-                "__m256i" => my_quote! { &I8x32 },
-                "__m64" => my_quote! { &I8x8 },
-                "bool" => my_quote! { &BOOL },
-                "f32" => my_quote! { &F32 },
-                "f32x4" => my_quote! { &F32x4 },
-                "f32x8" => my_quote! { &F32x8 },
-                "f64" => my_quote! { &F64 },
-                "f64x2" => my_quote! { &F64x2 },
-                "f64x4" => my_quote! { &F64x4 },
-                "i16" => my_quote! { &I16 },
-                "i16x16" => my_quote! { &I16x16 },
-                "i16x4" => my_quote! { &I16x4 },
-                "i16x8" => my_quote! { &I16x8 },
-                "i32" => my_quote! { &I32 },
-                "i32x2" => my_quote! { &I32x2 },
-                "i32x4" => my_quote! { &I32x4 },
-                "i32x8" => my_quote! { &I32x8 },
-                "i64" => my_quote! { &I64 },
-                "i64x2" => my_quote! { &I64x2 },
-                "i64x4" => my_quote! { &I64x4 },
-                "i8" => my_quote! { &I8 },
-                "i8x16" => my_quote! { &I8x16 },
-                "i8x32" => my_quote! { &I8x32 },
-                "i8x8" => my_quote! { &I8x8 },
-                "u16x4" => my_quote! { &U16x4 },
-                "u16x8" => my_quote! { &U16x8 },
-                "u32" => my_quote! { &U32 },
-                "u32x2" => my_quote! { &U32x2 },
-                "u32x4" => my_quote! { &U32x4 },
-                "u32x8" => my_quote! { &U32x8 },
-                "u64" => my_quote! { &U64 },
-                "u64x2" => my_quote! { &U64x2 },
-                "u64x4" => my_quote! { &U64x4 },
-                "u8" => my_quote! { &U8 },
-                "u16" => my_quote! { &U16 },
-                "u8x16" => my_quote! { &U8x16 },
-                "u8x32" => my_quote! { &U8x32 },
-                "u16x16" => my_quote! { &U16x16 },
-                "u8x8" => my_quote! { &U8x8 },
-                s => panic!("unspported type: {}", s),
-            }
-        }
-        syn::Type::Ptr(syn::TypePtr { ref elem, .. }) |
-        syn::Type::Reference(syn::TypeReference { ref elem, .. }) => {
+        syn::Type::Path(ref p) => match extract_path_ident(&p.path).as_ref() {
+            "__m128i" => my_quote! { &I8x16 },
+            "__m256i" => my_quote! { &I8x32 },
+            "__m64" => my_quote! { &I8x8 },
+            "bool" => my_quote! { &BOOL },
+            "f32" => my_quote! { &F32 },
+            "f32x4" => my_quote! { &F32x4 },
+            "f32x8" => my_quote! { &F32x8 },
+            "f64" => my_quote! { &F64 },
+            "f64x2" => my_quote! { &F64x2 },
+            "f64x4" => my_quote! { &F64x4 },
+            "i16" => my_quote! { &I16 },
+            "i16x16" => my_quote! { &I16x16 },
+            "i16x4" => my_quote! { &I16x4 },
+            "i16x8" => my_quote! { &I16x8 },
+            "i32" => my_quote! { &I32 },
+            "i32x2" => my_quote! { &I32x2 },
+            "i32x4" => my_quote! { &I32x4 },
+            "i32x8" => my_quote! { &I32x8 },
+            "i64" => my_quote! { &I64 },
+            "i64x2" => my_quote! { &I64x2 },
+            "i64x4" => my_quote! { &I64x4 },
+            "i8" => my_quote! { &I8 },
+            "i8x16" => my_quote! { &I8x16 },
+            "i8x32" => my_quote! { &I8x32 },
+            "i8x8" => my_quote! { &I8x8 },
+            "u16x4" => my_quote! { &U16x4 },
+            "u16x8" => my_quote! { &U16x8 },
+            "u32" => my_quote! { &U32 },
+            "u32x2" => my_quote! { &U32x2 },
+            "u32x4" => my_quote! { &U32x4 },
+            "u32x8" => my_quote! { &U32x8 },
+            "u64" => my_quote! { &U64 },
+            "u64x2" => my_quote! { &U64x2 },
+            "u64x4" => my_quote! { &U64x4 },
+            "u8" => my_quote! { &U8 },
+            "u16" => my_quote! { &U16 },
+            "u8x16" => my_quote! { &U8x16 },
+            "u8x32" => my_quote! { &U8x32 },
+            "u16x16" => my_quote! { &U16x16 },
+            "u8x8" => my_quote! { &U8x8 },
+            s => panic!("unspported type: {}", s),
+        },
+        syn::Type::Ptr(syn::TypePtr { ref elem, .. })
+        | syn::Type::Reference(syn::TypeReference { ref elem, .. }) => {
             let tokens = to_type(&elem);
             my_quote! { &Type::Ptr(#tokens) }
         }
@@ -162,7 +160,7 @@ fn extract_path_ident(path: &syn::Path) -> syn::Ident {
     }
     match path.segments.first().unwrap().item().arguments {
         syn::PathArguments::None => {}
-        _ => panic!("unsupported path that has path arguments")
+        _ => panic!("unsupported path that has path arguments"),
     }
     path.segments.first().unwrap().item().ident
 }
@@ -172,71 +170,72 @@ fn walk(root: &Path, files: &mut Vec<syn::File>) {
         let file = file.unwrap();
         if file.file_type().unwrap().is_dir() {
             walk(&file.path(), files);
-            continue
+            continue;
         }
         let path = file.path();
         if path.extension().and_then(|s| s.to_str()) != Some("rs") {
-            continue
+            continue;
         }
 
         let mut contents = String::new();
-        File::open(&path).unwrap().read_to_string(&mut contents).unwrap();
-
-        files.push(syn::parse_str::<syn::File>(&contents).expect("failed to parse"));
+        File::open(&path)
+            .unwrap()
+            .read_to_string(&mut contents)
+            .unwrap();
+
+        files.push(
+            syn::parse_str::<syn::File>(&contents).expect("failed to parse"),
+        );
     }
 }
 
 fn find_instrs(attrs: &[syn::Attribute]) -> Vec<syn::Ident> {
-    attrs.iter()
+    attrs
+        .iter()
         .filter_map(|a| a.meta_item())
-        .filter_map(|a| {
-            match a {
-                syn::MetaItem::List(i) => {
-                    if i.ident == "cfg_attr" {
-                        i.nested.into_iter().next()
-                    } else {
-                        None
-                    }
+        .filter_map(|a| match a {
+            syn::MetaItem::List(i) => {
+                if i.ident == "cfg_attr" {
+                    i.nested.into_iter().next()
+                } else {
+                    None
                 }
-                _ => None,
             }
+            _ => None,
         })
-        .filter_map(|nested| {
-            match nested {
-                syn::NestedMetaItem::MetaItem(syn::MetaItem::List(i)) => {
-                    if i.ident == "assert_instr" {
-                        i.nested.into_iter().next()
-                    } else {
-                        None
-                    }
+        .filter_map(|nested| match nested {
+            syn::NestedMetaItem::MetaItem(syn::MetaItem::List(i)) => {
+                if i.ident == "assert_instr" {
+                    i.nested.into_iter().next()
+                } else {
+                    None
                 }
-                _ => None,
             }
+            _ => None,
         })
-        .filter_map(|nested| {
-            match nested {
-                syn::NestedMetaItem::MetaItem(syn::MetaItem::Term(i)) => Some(i),
-                _ => None,
-            }
+        .filter_map(|nested| match nested {
+            syn::NestedMetaItem::MetaItem(syn::MetaItem::Term(i)) => Some(i),
+            _ => None,
         })
         .collect()
 }
 
-fn find_target_feature(name: syn::Ident, attrs: &[syn::Attribute]) -> syn::Lit {
-    attrs.iter()
+fn find_target_feature(
+    name: syn::Ident, attrs: &[syn::Attribute]
+) -> syn::Lit {
+    attrs
+        .iter()
         .filter_map(|a| a.meta_item())
-        .filter_map(|a| {
-            match a {
-                syn::MetaItem::NameValue(i) => {
-                    if i.ident == "target_feature" {
-                        Some(i.lit)
-                    } else {
-                        None
-                    }
+        .filter_map(|a| match a {
+            syn::MetaItem::NameValue(i) => {
+                if i.ident == "target_feature" {
+                    Some(i.lit)
+                } else {
+                    None
                 }
-                _ => None,
             }
+            _ => None,
         })
         .next()
-        .expect(&format!("failed to find target_feature for {}",name))
+        .expect(&format!("failed to find target_feature for {}", name))
 }
diff --git a/stdsimd-verify/tests/x86-intel.rs b/stdsimd-verify/tests/x86-intel.rs
index b4a8a4b2b9..ce425bdd6a 100644
--- a/stdsimd-verify/tests/x86-intel.rs
+++ b/stdsimd-verify/tests/x86-intel.rs
@@ -42,7 +42,7 @@ static I8x32: Type = Type::Signed(8, 32);
 static I8x8: Type = Type::Signed(8, 8);
 static U16: Type = Type::PrimUnsigned(16);
 static U16x16: Type = Type::Unsigned(16, 16);
-static U16x4: Type = Type::Unsigned(16, 4);
+// static U16x4: Type = Type::Unsigned(16, 4);
 static U16x8: Type = Type::Unsigned(16, 8);
 static U32: Type = Type::PrimUnsigned(32);
 static U32x2: Type = Type::Unsigned(32, 2);
@@ -54,7 +54,7 @@ static U64x4: Type = Type::Unsigned(64, 4);
 static U8: Type = Type::PrimUnsigned(8);
 static U8x16: Type = Type::Unsigned(8, 16);
 static U8x32: Type = Type::Unsigned(8, 32);
-static U8x8: Type = Type::Unsigned(8, 8);
+// static U8x8: Type = Type::Unsigned(8, 8);
 
 #[derive(Debug)]
 enum Type {
@@ -72,8 +72,7 @@ x86_functions!(static FUNCTIONS);
 
 #[derive(Deserialize)]
 struct Data {
-    #[serde(rename = "intrinsic", default)]
-    intrinsics: Vec<Intrinsic>,
+    #[serde(rename = "intrinsic", default)] intrinsics: Vec<Intrinsic>,
 }
 
 #[derive(Deserialize)]
@@ -81,18 +80,14 @@ struct Intrinsic {
     rettype: String,
     name: String,
     tech: String,
-    #[serde(rename = "CPUID", default)]
-    cpuid: Vec<String>,
-    #[serde(rename = "parameter", default)]
-    parameters: Vec<Parameter>,
-    #[serde(default)]
-    instruction: Vec<Instruction>,
+    #[serde(rename = "CPUID", default)] cpuid: Vec<String>,
+    #[serde(rename = "parameter", default)] parameters: Vec<Parameter>,
+    #[serde(default)] instruction: Vec<Instruction>,
 }
 
 #[derive(Deserialize)]
 struct Parameter {
-    #[serde(rename = "type")]
-    type_: String,
+    #[serde(rename = "type")] type_: String,
 }
 
 #[derive(Deserialize)]
@@ -113,18 +108,20 @@ fn verify_all_signatures() {
     let xml = include_bytes!("../x86-intel.xml");
 
     let xml = &xml[..];
-    let data: Data = serde_xml_rs::deserialize(xml).expect("failed to deserialize xml");
+    let data: Data =
+        serde_xml_rs::deserialize(xml).expect("failed to deserialize xml");
     let mut map = HashMap::new();
     for intrinsic in data.intrinsics.iter() {
-        // This intrinsic has multiple definitions in the XML, so just ignore it.
+        // This intrinsic has multiple definitions in the XML, so just ignore
+        // it.
         if intrinsic.name == "_mm_prefetch" {
-            continue
+            continue;
         }
 
         // These'll need to get added eventually, but right now they have some
         // duplicate names in the XML which we're not dealing with yet
         if intrinsic.tech == "AVX-512" {
-            continue
+            continue;
         }
 
         assert!(map.insert(&intrinsic.name[..], intrinsic).is_none());
@@ -133,13 +130,14 @@ fn verify_all_signatures() {
     for rust in FUNCTIONS {
         // This was ignored above, we ignore it here as well.
         if rust.name == "_mm_prefetch" {
-            continue
+            continue;
         }
 
         // these are all AMD-specific intrinsics
-        if rust.target_feature.contains("sse4a") ||
-            rust.target_feature.contains("tbm") {
-            continue
+        if rust.target_feature.contains("sse4a")
+            || rust.target_feature.contains("tbm")
+        {
+            continue;
         }
 
         let intel = match map.get(rust.name) {
@@ -147,15 +145,15 @@ fn verify_all_signatures() {
             None => panic!("missing intel definition for {}", rust.name),
         };
 
-        // Verify that all `#[target_feature]` annotations are correct, ensuring
-        // that we've actually enabled the right instruction set for this
-        // intrinsic.
+        // Verify that all `#[target_feature]` annotations are correct,
+        // ensuring that we've actually enabled the right instruction
+        // set for this intrinsic.
         assert!(intel.cpuid.len() > 0, "missing cpuid for {}", rust.name);
         for cpuid in intel.cpuid.iter() {
             // this is needed by _xsave and probably some related intrinsics,
             // but let's just skip it for now.
             if *cpuid == "XSS" {
-                continue
+                continue;
             }
 
             let cpuid = cpuid
@@ -163,36 +161,48 @@ fn verify_all_signatures() {
                 .flat_map(|c| c.to_lowercase())
                 .collect::<String>();
 
-            // Normalize `bmi1` to `bmi` as apparently that's what we're calling
-            // it.
+            // Normalize `bmi1` to `bmi` as apparently that's what we're
+            // calling it.
             let cpuid = if cpuid == "bmi1" {
                 String::from("bmi")
             } else {
                 cpuid
             };
 
-            assert!(rust.target_feature.contains(&cpuid),
-                    "intel cpuid `{}` not in `{}` for {}",
-                    cpuid,
-                    rust.target_feature,
-                    rust.name);
+            assert!(
+                rust.target_feature.contains(&cpuid),
+                "intel cpuid `{}` not in `{}` for {}",
+                cpuid,
+                rust.target_feature,
+                rust.name
+            );
         }
 
         // TODO: we should test this, but it generates too many failures right
         // now
         if false {
             if rust.instrs.len() == 0 {
-                assert_eq!(intel.instruction.len(), 0,
-                           "instruction not listed for {}", rust.name);
+                assert_eq!(
+                    intel.instruction.len(),
+                    0,
+                    "instruction not listed for {}",
+                    rust.name
+                );
 
             // If intel doesn't list any instructions and we do then don't
             // bother trying to look for instructions in intel, we've just got
             // some extra assertions on our end.
             } else if intel.instruction.len() > 0 {
                 for instr in rust.instrs.iter() {
-                    assert!(intel.instruction.iter().any(|a| a.name.starts_with(instr)),
-                            "intel failed to list `{}` as an instruction for `{}`",
-                            instr, rust.name);
+                    assert!(
+                        intel
+                            .instruction
+                            .iter()
+                            .any(|a| a.name.starts_with(instr)),
+                        "intel failed to list `{}` as an instruction for `{}`",
+                        instr,
+                        rust.name
+                    );
                 }
             }
         }
@@ -201,9 +211,12 @@ fn verify_all_signatures() {
         match rust.ret {
             Some(t) => equate(t, &intel.rettype, &rust.name),
             None => {
-                assert!(intel.rettype == "" || intel.rettype == "void",
-                        "{} returns `{}` with intel, void in rust",
-                        rust.name, intel.rettype);
+                assert!(
+                    intel.rettype == "" || intel.rettype == "void",
+                    "{} returns `{}` with intel, void in rust",
+                    rust.name,
+                    intel.rettype
+                );
             }
         }
 
@@ -212,13 +225,17 @@ fn verify_all_signatures() {
         if rust.arguments.len() == 0 {
             if intel.parameters.len() == 1 {
                 assert_eq!(intel.parameters[0].type_, "void");
-                continue
+                continue;
             }
         }
 
         // Otherwise we want all parameters to be exactly the same
-        assert_eq!(rust.arguments.len(), intel.parameters.len(),
-                   "wrong number of arguments on {}", rust.name);
+        assert_eq!(
+            rust.arguments.len(),
+            intel.parameters.len(),
+            "wrong number of arguments on {}",
+            rust.name
+        );
         for (a, b) in intel.parameters.iter().zip(rust.arguments) {
             equate(b, &a.type_, &intel.name);
         }
@@ -255,20 +272,21 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) {
         (&Type::Ptr(&Type::PrimUnsigned(8)), "const void*") => {}
         (&Type::Ptr(&Type::PrimUnsigned(8)), "void*") => {}
 
-        (&Type::Signed(a, b), "__m128i") |
-        (&Type::Unsigned(a, b), "__m128i") |
-        (&Type::Ptr(&Type::Signed(a, b)), "__m128i*") |
-        (&Type::Ptr(&Type::Unsigned(a, b)), "__m128i*") if a * b == 128 => {}
+        (&Type::Signed(a, b), "__m128i")
+        | (&Type::Unsigned(a, b), "__m128i")
+        | (&Type::Ptr(&Type::Signed(a, b)), "__m128i*")
+        | (&Type::Ptr(&Type::Unsigned(a, b)), "__m128i*") if a * b == 128 => {}
 
-        (&Type::Signed(a, b), "__m256i") |
-        (&Type::Unsigned(a, b), "__m256i") |
-        (&Type::Ptr(&Type::Signed(a, b)), "__m256i*") |
-        (&Type::Ptr(&Type::Unsigned(a, b)), "__m256i*") if (a as u32) * (b as u32) == 256 => {}
+        (&Type::Signed(a, b), "__m256i")
+        | (&Type::Unsigned(a, b), "__m256i")
+        | (&Type::Ptr(&Type::Signed(a, b)), "__m256i*")
+        | (&Type::Ptr(&Type::Unsigned(a, b)), "__m256i*")
+            if (a as u32) * (b as u32) == 256 => {}
 
-        (&Type::Signed(a, b), "__m64") |
-        (&Type::Unsigned(a, b), "__m64") |
-        (&Type::Ptr(&Type::Signed(a, b)), "__m64*") |
-        (&Type::Ptr(&Type::Unsigned(a, b)), "__m64*") if a * b == 64 => {}
+        (&Type::Signed(a, b), "__m64")
+        | (&Type::Unsigned(a, b), "__m64")
+        | (&Type::Ptr(&Type::Signed(a, b)), "__m64*")
+        | (&Type::Ptr(&Type::Unsigned(a, b)), "__m64*") if a * b == 64 => {}
 
         (&Type::Float(32, 4), "__m128") => {}
         (&Type::Ptr(&Type::Float(32, 4)), "__m128*") => {}
@@ -291,20 +309,24 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) {
         // Intel says the argument is i32...
         (&Type::PrimSigned(8), "int") if intrinsic == "_mm_insert_epi8" => {}
 
-        // This is a macro (?) in C which seems to mutate its arguments, but that
-        // means that we're taking pointers to arguments in rust as we're not
-        // exposing it as a macro.
-        (&Type::Ptr(&Type::Float(32, 4)), "__m128") if intrinsic == "_MM_TRANSPOSE4_PS" => {}
+        // This is a macro (?) in C which seems to mutate its arguments, but
+        // that means that we're taking pointers to arguments in rust
+        // as we're not exposing it as a macro.
+        (&Type::Ptr(&Type::Float(32, 4)), "__m128")
+            if intrinsic == "_MM_TRANSPOSE4_PS" => {}
 
         // These intrinsics return an `int` in C but they're always either the
         // bit 1 or 0 so we switch it to returning `bool` in rust
         (&Type::Bool, "int")
-            if intrinsic.starts_with("_mm_comi") && intrinsic.ends_with("_sd")
-            => {}
+            if intrinsic.starts_with("_mm_comi")
+                && intrinsic.ends_with("_sd") => {}
         (&Type::Bool, "int")
-            if intrinsic.starts_with("_mm_ucomi") && intrinsic.ends_with("_sd")
-                => {}
+            if intrinsic.starts_with("_mm_ucomi")
+                && intrinsic.ends_with("_sd") => {}
 
-        _ => panic!("failed to equate: `{}` and {:?} for {}", intel, t, intrinsic),
+        _ => panic!(
+            "failed to equate: `{}` and {:?} for {}",
+            intel, t, intrinsic
+        ),
     }
 }

From 275071e6b5091dbc2ece87381a0262654515603d Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Thu, 4 Jan 2018 12:19:11 +0100
Subject: [PATCH 2/3] fmt and clippy

---
 coresimd/src/x86/i586/bswap.rs    |  4 +++
 coresimd/src/x86/i686/sse4a.rs    |  4 +--
 coresimd/src/x86/i686/ssse3.rs    |  6 ++++-
 coresimd/src/x86/macros.rs        | 10 ++++----
 coresimd/src/x86/x86_64/sse41.rs  |  2 ++
 stdsimd-test/src/lib.rs           | 38 ++++++++++++++++------------
 stdsimd-verify/tests/x86-intel.rs | 41 +++++++++++++++----------------
 7 files changed, 60 insertions(+), 45 deletions(-)

diff --git a/coresimd/src/x86/i586/bswap.rs b/coresimd/src/x86/i586/bswap.rs
index 15cd0b1c47..8bac167569 100644
--- a/coresimd/src/x86/i586/bswap.rs
+++ b/coresimd/src/x86/i586/bswap.rs
@@ -1,3 +1,7 @@
+//! Byte swap intrinsics.
+
+#![cfg_attr(feature = "cargo-clippy", allow(stutter))]
+
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
diff --git a/coresimd/src/x86/i686/sse4a.rs b/coresimd/src/x86/i686/sse4a.rs
index 9c47617ff6..884097e871 100644
--- a/coresimd/src/x86/i686/sse4a.rs
+++ b/coresimd/src/x86/i686/sse4a.rs
@@ -1,4 +1,4 @@
-//! `i686`'s Streaming SIMD Extensions 4a (SSE4a)
+//! `i686`'s Streaming SIMD Extensions 4a (`SSE4a`)
 
 use core::mem;
 use v128::*;
@@ -52,7 +52,7 @@ pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
 #[target_feature = "+sse4a"]
 #[cfg_attr(test, assert_instr(insertq))]
 pub unsafe fn _mm_insert_si64(x: i64x2, y: i64x2) -> i64x2 {
-    insertq(x, mem::transmute(y))
+    insertq(x, y)
 }
 
 /// Non-temporal store of `a.0` into `p`.
diff --git a/coresimd/src/x86/i686/ssse3.rs b/coresimd/src/x86/i686/ssse3.rs
index bf31cbb647..965973598e 100644
--- a/coresimd/src/x86/i686/ssse3.rs
+++ b/coresimd/src/x86/i686/ssse3.rs
@@ -256,7 +256,11 @@ mod tests {
     unsafe fn _mm_alignr_pi8() {
         let a = u32x2::new(0x89ABCDEF_u32, 0x01234567_u32);
         let b = u32x2::new(0xBBAA9988_u32, 0xFFDDEECC_u32);
-        let r = ssse3::_mm_alignr_pi8(u8x8::from(a).into(), u8x8::from(b).into(), 4);
+        let r = ssse3::_mm_alignr_pi8(
+            u8x8::from(a).into(),
+            u8x8::from(b).into(),
+            4,
+        );
         assert_eq!(r, ::std::mem::transmute(0x89abcdefffddeecc_u64));
     }
 
diff --git a/coresimd/src/x86/macros.rs b/coresimd/src/x86/macros.rs
index 79109fd67c..ea72c062e8 100644
--- a/coresimd/src/x86/macros.rs
+++ b/coresimd/src/x86/macros.rs
@@ -3,7 +3,7 @@
 macro_rules! constify_imm8 {
     ($imm8:expr, $expand:ident) => {
         #[allow(overflowing_literals)]
-        match $imm8 & 0b1111_1111 {
+        match ($imm8) & 0b1111_1111 {
             0 => $expand!(0),
             1 => $expand!(1),
             2 => $expand!(2),
@@ -267,7 +267,7 @@ macro_rules! constify_imm8 {
 macro_rules! constify_imm6 {
     ($imm8:expr, $expand:ident) => {
         #[allow(overflowing_literals)]
-        match $imm8 & 0b1_1111 {
+        match ($imm8) & 0b1_1111 {
             0 => $expand!(0),
             1 => $expand!(1),
             2 => $expand!(2),
@@ -307,7 +307,7 @@ macro_rules! constify_imm6 {
 macro_rules! constify_imm4 {
     ($imm8:expr, $expand:ident) => {
         #[allow(overflowing_literals)]
-        match $imm8 & 0b1111 {
+        match ($imm8) & 0b1111 {
             0 => $expand!(0),
             1 => $expand!(1),
             2 => $expand!(2),
@@ -331,7 +331,7 @@ macro_rules! constify_imm4 {
 macro_rules! constify_imm3 {
     ($imm8:expr, $expand:ident) => {
         #[allow(overflowing_literals)]
-        match $imm8 & 0b111 {
+        match ($imm8) & 0b111 {
             0 => $expand!(0),
             1 => $expand!(1),
             2 => $expand!(2),
@@ -347,7 +347,7 @@ macro_rules! constify_imm3 {
 macro_rules! constify_imm2 {
     ($imm8:expr, $expand:ident) => {
         #[allow(overflowing_literals)]
-        match $imm8 & 0b11 {
+        match ($imm8) & 0b11 {
             0 => $expand!(0),
             1 => $expand!(1),
             2 => $expand!(2),
diff --git a/coresimd/src/x86/x86_64/sse41.rs b/coresimd/src/x86/x86_64/sse41.rs
index b2417a8e59..20fa606d5f 100644
--- a/coresimd/src/x86/x86_64/sse41.rs
+++ b/coresimd/src/x86/x86_64/sse41.rs
@@ -1,3 +1,5 @@
+//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)
+
 use v128::*;
 
 #[cfg(test)]
diff --git a/stdsimd-test/src/lib.rs b/stdsimd-test/src/lib.rs
index d41bec2564..1ec3d96920 100644
--- a/stdsimd-test/src/lib.rs
+++ b/stdsimd-test/src/lib.rs
@@ -304,19 +304,20 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
             None => continue,
         };
         if !part.contains("call") {
-            continue
+            continue;
         }
 
         // On 32-bit x86 position independent code will call itself and be
         // immediately followed by a `pop` to learn about the current address.
         // Let's not take that into account when considering whether a function
         // failed inlining something.
-        let followed_by_pop = function.instrs.get(i + 1)
+        let followed_by_pop = function
+            .instrs
+            .get(i + 1)
             .and_then(|i| i.parts.get(0))
-            .map(|s| s.contains("pop"))
-            .unwrap_or(false);
+            .map_or(false, |s| s.contains("pop"));
         if followed_by_pop && cfg!(target_arch = "x86") {
-            continue
+            continue;
         }
 
         inlining_failed = true;
@@ -324,14 +325,14 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
     }
 
     let instruction_limit = match expected {
-        // cpuid returns a pretty big aggregate structure so excempt it from the
-        // slightly more restrictive 20 instructions below
+        // cpuid returns a pretty big aggregate structure so excempt it from
+        // the slightly more restrictive 20 instructions below
         "cpuid" => 30,
 
-        // Apparently on Windows LLVM generates a bunch of saves/restores of xmm
-        // registers around these intstructions which blows the 20 limit
-        // below. As it seems dictates by Windows's abi (I guess?) we probably
-        // can't do much about it...
+        // Apparently on Windows LLVM generates a bunch of saves/restores of
+        // xmm registers around these intstructions which blows the 20
+        // limit below. As it seems dictates by Windows's abi (I
+        // guess?) we probably can't do much about it...
         "vzeroall" | "vzeroupper" if cfg!(windows) => 30,
 
         _ => 20,
@@ -363,12 +364,17 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
             expected
         );
     } else if !probably_only_one_instruction {
-        panic!("instruction found, but the disassembly contains too many \
-                instructions: #instructions = {} >= {} (limit)",
-               function.instrs.len(), instruction_limit);
+        panic!(
+            "instruction found, but the disassembly contains too many \
+             instructions: #instructions = {} >= {} (limit)",
+            function.instrs.len(),
+            instruction_limit
+        );
     } else if inlining_failed {
-        panic!("instruction found, but the disassembly contains `call` \
-                instructions, which hint that inlining failed");
+        panic!(
+            "instruction found, but the disassembly contains `call` \
+             instructions, which hint that inlining failed"
+        );
     }
 }
 
diff --git a/stdsimd-verify/tests/x86-intel.rs b/stdsimd-verify/tests/x86-intel.rs
index ce425bdd6a..87c1a1fc87 100644
--- a/stdsimd-verify/tests/x86-intel.rs
+++ b/stdsimd-verify/tests/x86-intel.rs
@@ -1,5 +1,7 @@
 #![feature(proc_macro)]
 #![allow(bad_style)]
+#![cfg_attr(feature = "cargo-clippy",
+            allow(shadow_reuse, cast_lossless, match_same_arms))]
 
 #[macro_use]
 extern crate serde_derive;
@@ -111,7 +113,7 @@ fn verify_all_signatures() {
     let data: Data =
         serde_xml_rs::deserialize(xml).expect("failed to deserialize xml");
     let mut map = HashMap::new();
-    for intrinsic in data.intrinsics.iter() {
+    for intrinsic in &data.intrinsics {
         // This intrinsic has multiple definitions in the XML, so just ignore
         // it.
         if intrinsic.name == "_mm_prefetch" {
@@ -148,8 +150,8 @@ fn verify_all_signatures() {
         // Verify that all `#[target_feature]` annotations are correct,
         // ensuring that we've actually enabled the right instruction
         // set for this intrinsic.
-        assert!(intel.cpuid.len() > 0, "missing cpuid for {}", rust.name);
-        for cpuid in intel.cpuid.iter() {
+        assert!(!intel.cpuid.is_empty(), "missing cpuid for {}", rust.name);
+        for cpuid in &intel.cpuid {
             // this is needed by _xsave and probably some related intrinsics,
             // but let's just skip it for now.
             if *cpuid == "XSS" {
@@ -181,7 +183,7 @@ fn verify_all_signatures() {
         // TODO: we should test this, but it generates too many failures right
         // now
         if false {
-            if rust.instrs.len() == 0 {
+            if rust.instrs.is_empty() {
                 assert_eq!(
                     intel.instruction.len(),
                     0,
@@ -192,8 +194,8 @@ fn verify_all_signatures() {
             // If intel doesn't list any instructions and we do then don't
             // bother trying to look for instructions in intel, we've just got
             // some extra assertions on our end.
-            } else if intel.instruction.len() > 0 {
-                for instr in rust.instrs.iter() {
+            } else if !intel.instruction.is_empty() {
+                for instr in rust.instrs {
                     assert!(
                         intel
                             .instruction
@@ -208,25 +210,22 @@ fn verify_all_signatures() {
         }
 
         // Make sure we've got the right return type.
-        match rust.ret {
-            Some(t) => equate(t, &intel.rettype, &rust.name),
-            None => {
-                assert!(
-                    intel.rettype == "" || intel.rettype == "void",
-                    "{} returns `{}` with intel, void in rust",
-                    rust.name,
-                    intel.rettype
-                );
-            }
+        if let Some(t) = rust.ret {
+            equate(t, &intel.rettype, rust.name);
+        } else {
+            assert!(
+                intel.rettype == "" || intel.rettype == "void",
+                "{} returns `{}` with intel, void in rust",
+                rust.name,
+                intel.rettype
+            );
         }
 
         // If there's no arguments on Rust's side intel may list one "void"
         // argument, so handle that here.
-        if rust.arguments.len() == 0 {
-            if intel.parameters.len() == 1 {
-                assert_eq!(intel.parameters[0].type_, "void");
-                continue;
-            }
+        if rust.arguments.is_empty() && intel.parameters.len() == 1 {
+            assert_eq!(intel.parameters[0].type_, "void");
+            continue;
         }
 
         // Otherwise we want all parameters to be exactly the same

From 4b4ea61970bc84fa131e2b258a758c8a8163ff45 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Thu, 4 Jan 2018 16:44:27 +0100
Subject: [PATCH 3/3] add an exception for intrinsics using cvtpi2ps

---
 stdsimd-test/src/lib.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/stdsimd-test/src/lib.rs b/stdsimd-test/src/lib.rs
index 1ec3d96920..eec17111f6 100644
--- a/stdsimd-test/src/lib.rs
+++ b/stdsimd-test/src/lib.rs
@@ -335,6 +335,10 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
         // guess?) we probably can't do much about it...
         "vzeroall" | "vzeroupper" if cfg!(windows) => 30,
 
+        // Intrinsics using `cvtpi2ps` are typically "composites" and in some
+        // cases exceed the limit.
+        "cvtpi2ps" => 25,
+
         _ => 20,
     };
     let probably_only_one_instruction =