From b36c7e88f2056a664e094c077fe997d47ebf5d87 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Fri, 22 Dec 2017 13:54:54 +0100 Subject: [PATCH 1/3] Completes SSE and adds some MMX intrinsics MMX: - `_mm_cmpgt_pi{8,16,32}` - `_mm_unpack{hi,lo}_pi{8,16,32}` SSE (is now complete): - `_mm_cvtp{i,u}{8,16}_ps` - add test for `_m_pmulhuw` --- coresimd/src/x86/i586/sse.rs | 3 +- coresimd/src/x86/i686/mmx.rs | 134 +++++++++++--- coresimd/src/x86/i686/sse.rs | 69 +++++--- coresimd/src/x86/i686/sse2.rs | 23 ++- stdsimd-test/assert-instr-macro/src/lib.rs | 3 +- stdsimd-verify/build.rs | 4 +- stdsimd-verify/src/lib.rs | 197 ++++++++++----------- stdsimd-verify/tests/x86-intel.rs | 148 +++++++++------- 8 files changed, 351 insertions(+), 230 deletions(-) diff --git a/coresimd/src/x86/i586/sse.rs b/coresimd/src/x86/i586/sse.rs index 2f5cc78c4a..d3325701c6 100644 --- a/coresimd/src/x86/i586/sse.rs +++ b/coresimd/src/x86/i586/sse.rs @@ -3299,7 +3299,8 @@ mod tests { use v64::*; let a = mem::transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7)); - let mut mem = ::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1))); + let mut mem = + ::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1))); sse::_mm_stream_pi(&mut *mem as *mut _ as *mut _, a); assert_eq!(a, *mem); } diff --git a/coresimd/src/x86/i686/mmx.rs b/coresimd/src/x86/i686/mmx.rs index 08f9f46f9b..0de5154e9d 100644 --- a/coresimd/src/x86/i686/mmx.rs +++ b/coresimd/src/x86/i686/mmx.rs @@ -16,7 +16,7 @@ use stdsimd_test::assert_instr; /// Constructs a 64-bit integer vector initialized to zero. #[inline(always)] -#[target_feature = "+mmx,+sse"] +#[target_feature = "+mmx"] // FIXME: this produces a movl instead of xorps on x86 // FIXME: this produces a xor intrinsic instead of xorps on x86_64 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))] @@ -30,7 +30,7 @@ pub unsafe fn _mm_setzero_si64() -> __m64 { /// Positive values greater than 0x7F are saturated to 0x7F. Negative values /// less than 0x80 are saturated to 0x80. #[inline(always)] -#[target_feature = "+mmx,+sse"] +#[target_feature = "+mmx"] #[cfg_attr(test, assert_instr(packsswb))] pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 { packsswb(a, b) @@ -42,17 +42,14 @@ pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 { /// Positive values greater than 0x7F are saturated to 0x7F. Negative values /// less than 0x80 are saturated to 0x80. #[inline(always)] -#[target_feature = "+mmx,+sse"] +#[target_feature = "+mmx"] #[cfg_attr(test, assert_instr(packssdw))] pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 { packssdw(a, b) } -/// Compares the 8-bit integer elements of two 64-bit integer vectors of -/// [8 x i8] to determine if the element of the first vector is greater than -/// the corresponding element of the second vector. -/// -/// The comparison yields 0 for false, 0xFF for true. +/// Compares whether each element of `a` is greater than the corresponding +/// element of `b` returning `0` for `false` and `-1` for `true`. #[inline(always)] #[target_feature = "+mmx"] #[cfg_attr(test, assert_instr(pcmpgtb))] @@ -60,11 +57,8 @@ pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 { pcmpgtb(a, b) } -/// Compares the 16-bit integer elements of two 64-bit integer vectors of -/// [4 x i16] to determine if the element of the first vector is greater than -/// the corresponding element of the second vector. -/// -/// The comparison yields 0 for false, 0xFFFF for true. +/// Compares whether each element of `a` is greater than the corresponding +/// element of `b` returning `0` for `false` and `-1` for `true`. #[inline(always)] #[target_feature = "+mmx"] #[cfg_attr(test, assert_instr(pcmpgtw))] @@ -72,8 +66,17 @@ pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 { pcmpgtw(a, b) } -/// Unpacks the upper 32 bits from two 64-bit integer vectors of -/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. +/// Compares whether each element of `a` is greater than the corresponding +/// element of `b` returning `0` for `false` and `-1` for `true`. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(pcmpgtd))] +pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 { + pcmpgtd(a, b) +} + +/// Unpacks the upper two elements from two `i16x4` vectors and interleaves +/// them into the result: `[a.2, b.2, a.3, b.3]`. #[inline(always)] #[target_feature = "+mmx"] #[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected @@ -81,8 +84,17 @@ pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 { punpckhwd(a, b) } -/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] -/// and interleaves them into a 64-bit integer vector of [8 x i8]. +/// Unpacks the upper four elements from two `i8x8` vectors and interleaves +/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(punpckhbw))] +pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 { + punpckhbw(a, b) +} + +/// Unpacks the lower four elements from two `i8x8` vectors and interleaves +/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`. #[inline(always)] #[target_feature = "+mmx"] #[cfg_attr(test, assert_instr(punpcklbw))] @@ -90,8 +102,8 @@ pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 { punpcklbw(a, b) } -/// Unpacks the lower 32 bits from two 64-bit integer vectors of -/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. +/// Unpacks the lower two elements from two `i16x4` vectors and interleaves +/// them into the result: `[a.0 b.0 a.1 b.1]`. #[inline(always)] #[target_feature = "+mmx"] #[cfg_attr(test, assert_instr(punpcklwd))] @@ -99,6 +111,24 @@ pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 { punpcklwd(a, b) } +/// Unpacks the upper element from two `i32x2` vectors and interleaves them +/// into the result: `[a.1, b.1]`. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(punpckhdq))] +pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 { + punpckhdq(a, b) +} + +/// Unpacks the lower element from two `i32x2` vectors and interleaves them +/// into the result: `[a.0, b.0]`. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(punpckldq))] +pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 { + punpckldq(a, b) +} + #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.mmx.packsswb"] @@ -109,12 +139,20 @@ extern "C" { fn pcmpgtb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pcmpgt.w"] fn pcmpgtw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.pcmpgt.d"] + fn pcmpgtd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpckhwd"] fn punpckhwd(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.punpcklbw"] - fn punpcklbw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpcklwd"] fn punpcklwd(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.punpckhbw"] + fn punpckhbw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.punpcklbw"] + fn punpcklbw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.punpckhdq"] + fn punpckhdq(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.punpckldq"] + fn punpckldq(a: __m64, b: __m64) -> __m64; } #[cfg(test)] @@ -123,13 +161,13 @@ mod tests { use x86::i686::mmx; use stdsimd_test::simd_test; - #[simd_test = "sse"] // FIXME: should be mmx + #[simd_test = "mmx"] unsafe fn _mm_setzero_si64() { let r: __m64 = ::std::mem::transmute(0_i64); assert_eq!(r, mmx::_mm_setzero_si64()); } - #[simd_test = "sse"] // FIXME: should be mmx + #[simd_test = "mmx"] unsafe fn _mm_packs_pi16() { let a = i16x4::new(-1, 2, -3, 4); let b = i16x4::new(-5, 6, -7, 8); @@ -137,7 +175,7 @@ mod tests { assert_eq!(r, i8x8::from(mmx::_mm_packs_pi16(a.into(), b.into()))); } - #[simd_test = "sse"] // FIXME: should be mmx + #[simd_test = "mmx"] unsafe fn _mm_packs_pi32() { let a = i32x2::new(-1, 2); let b = i32x2::new(-5, 6); @@ -162,11 +200,23 @@ mod tests { } #[simd_test = "mmx"] - unsafe fn _mm_unpackhi_pi16() { - let a = i16x4::new(0, 1, 2, 3); - let b = i16x4::new(4, 5, 6, 7); - let r = i16x4::new(2, 6, 3, 7); - assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into()))); + unsafe fn _mm_cmpgt_pi32() { + let a = i32x2::new(0, 3); + let b = i32x2::new(1, 2); + let r0 = i32x2::new(0, -1); + let r1 = i32x2::new(-1, 0); + + assert_eq!(r0, mmx::_mm_cmpgt_pi32(a.into(), b.into()).into()); + assert_eq!(r1, mmx::_mm_cmpgt_pi32(b.into(), a.into()).into()); + } + + #[simd_test = "mmx"] + unsafe fn _mm_unpackhi_pi8() { + let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15); + let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14); + let r = i8x8::new(8, 9, 11, 10, 12, 13, 15, 14); + + assert_eq!(r, mmx::_mm_unpackhi_pi8(a.into(), b.into()).into()); } #[simd_test = "mmx"] @@ -177,6 +227,14 @@ mod tests { assert_eq!(r, i8x8::from(mmx::_mm_unpacklo_pi8(a.into(), b.into()))); } + #[simd_test = "mmx"] + unsafe fn _mm_unpackhi_pi16() { + let a = i16x4::new(0, 1, 2, 3); + let b = i16x4::new(4, 5, 6, 7); + let r = i16x4::new(2, 6, 3, 7); + assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into()))); + } + #[simd_test = "mmx"] unsafe fn _mm_unpacklo_pi16() { let a = i16x4::new(0, 1, 2, 3); @@ -184,4 +242,22 @@ mod tests { let r = i16x4::new(0, 4, 1, 5); assert_eq!(r, i16x4::from(mmx::_mm_unpacklo_pi16(a.into(), b.into()))); } + + #[simd_test = "mmx"] + unsafe fn _mm_unpackhi_pi32() { + let a = i32x2::new(0, 3); + let b = i32x2::new(1, 2); + let r = i32x2::new(3, 2); + + assert_eq!(r, mmx::_mm_unpackhi_pi32(a.into(), b.into()).into()); + } + + #[simd_test = "mmx"] + unsafe fn _mm_unpacklo_pi32() { + let a = i32x2::new(0, 3); + let b = i32x2::new(1, 2); + let r = i32x2::new(0, 1); + + assert_eq!(r, mmx::_mm_unpacklo_pi32(a.into(), b.into()).into()); + } } diff --git a/coresimd/src/x86/i686/sse.rs b/coresimd/src/x86/i686/sse.rs index d87eadcfe7..e91db455c8 100644 --- a/coresimd/src/x86/i686/sse.rs +++ b/coresimd/src/x86/i686/sse.rs @@ -221,25 +221,46 @@ pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 { _mm_cvtpi32_ps(a, b) } -/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x -/// float]. +/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s. #[inline(always)] #[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(cvtpi2ps))] +pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 { + let b = mmx::_mm_setzero_si64(); + let b = mmx::_mm_cmpgt_pi8(b, a); + let b = mmx::_mm_unpacklo_pi8(a, b); + _mm_cvtpi16_ps(b) +} + +/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s. +#[inline(always)] +#[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(cvtpi2ps))] +pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 { + let b = mmx::_mm_setzero_si64(); + let b = mmx::_mm_unpacklo_pi8(a, b); + _mm_cvtpi16_ps(b) +} + +/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s. +#[inline(always)] +#[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(cvtpi2ps))] pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> f32x4 { let b = mmx::_mm_setzero_si64(); - let b = mmx::_mm_cmpgt_pi16(mem::transmute(b), a); + let b = mmx::_mm_cmpgt_pi16(b, a); let c = mmx::_mm_unpackhi_pi16(a, b); let r = i586::_mm_setzero_ps(); - let r = cvtpi2ps(r, mem::transmute(c)); + let r = cvtpi2ps(r, c); let r = i586::_mm_movelh_ps(r, r); let c = mmx::_mm_unpacklo_pi16(a, b); - cvtpi2ps(r, mem::transmute(c)) + cvtpi2ps(r, c) } -/// Converts a 64-bit vector of 16-bit unsigned integer values into a -/// 128-bit vector of [4 x float]. +/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s. #[inline(always)] #[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(cvtpi2ps))] pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 { let b = mmx::_mm_setzero_si64(); let c = mmx::_mm_unpackhi_pi16(a, b); @@ -250,27 +271,6 @@ pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 { cvtpi2ps(r, c) } -/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8] -/// into a 128-bit vector of [4 x float]. -#[inline(always)] -#[target_feature = "+sse"] -pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 { - let b = mmx::_mm_setzero_si64(); - let b = mmx::_mm_cmpgt_pi8(b, a); - let b = mmx::_mm_unpacklo_pi8(a, b); - _mm_cvtpi16_ps(b) -} - -/// Converts the lower four unsigned 8-bit integer values from a 64-bit -/// vector of [8 x u8] into a 128-bit vector of [4 x float]. -#[inline(always)] -#[target_feature = "+sse"] -pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 { - let b = mmx::_mm_setzero_si64(); - let b = mmx::_mm_unpacklo_pi8(a, b); - _mm_cvtpi16_ps(b) -} - /// Converts the two 32-bit signed integer values from each 64-bit vector /// operand of [2 x i32] into a 128-bit vector of [4 x float]. #[inline(always)] @@ -512,6 +512,13 @@ mod tests { assert_eq!(r, u16x4::splat(15)); } + #[simd_test = "sse"] + unsafe fn _m_pmulhuw() { + let (a, b) = (u16x4::splat(1000), u16x4::splat(1001)); + let r = sse::_m_pmulhuw(a.into(), b.into()); + assert_eq!(r, u16x4::splat(15).into()); + } + #[simd_test = "sse"] unsafe fn _mm_avg_pu8() { let (a, b) = (u8x8::splat(3), u8x8::splat(9)); @@ -601,7 +608,11 @@ mod tests { let a = i8x8::splat(9); let mask = i8x8::splat(0).replace(2, 0x80u8 as i8); let mut r = i8x8::splat(0); - sse::_mm_maskmove_si64(a.into(), mask.into(), &mut r as *mut _ as *mut i8); + sse::_mm_maskmove_si64( + a.into(), + mask.into(), + &mut r as *mut _ as *mut i8, + ); assert_eq!(r, i8x8::splat(0).replace(2, 9)); let mut r = i8x8::splat(0); diff --git a/coresimd/src/x86/i686/sse2.rs b/coresimd/src/x86/i686/sse2.rs index 59dbf3ca9e..c9b5fd3fa0 100644 --- a/coresimd/src/x86/i686/sse2.rs +++ b/coresimd/src/x86/i686/sse2.rs @@ -76,7 +76,8 @@ pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> i64x2 { /// integer. #[inline(always)] #[target_feature = "+sse2"] -// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong instr? +// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong +// instr? pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 { mem::transmute(a.extract(0)) } @@ -85,7 +86,8 @@ pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 { /// upper bits. #[inline(always)] #[target_feature = "+sse2"] -// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong instr? +// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong +// instr? pub unsafe fn _mm_movpi64_epi64(a: __m64) -> i64x2 { i64x2::new(mem::transmute(a), 0) } @@ -175,7 +177,8 @@ mod tests { #[simd_test = "sse2"] unsafe fn _mm_set_epi64() { - let r = sse2::_mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64)); + let r = + sse2::_mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64)); assert_eq!(r, i64x2::new(2, 1)); } @@ -187,7 +190,8 @@ mod tests { #[simd_test = "sse2"] unsafe fn _mm_setr_epi64() { - let r = sse2::_mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64)); + let r = + sse2::_mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64)); assert_eq!(r, i64x2::new(1, 2)); } @@ -199,7 +203,16 @@ mod tests { #[simd_test = "sse2"] unsafe fn _mm_movpi64_epi64() { - let r = sse2::_mm_movpi64_epi64(mem::transmute(i8x8::new(5, 0, 0, 0, 0, 0, 0, 0))); + let r = sse2::_mm_movpi64_epi64(mem::transmute(i8x8::new( + 5, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ))); assert_eq!(r, i64x2::new(5, 0)); } diff --git a/stdsimd-test/assert-instr-macro/src/lib.rs b/stdsimd-test/assert-instr-macro/src/lib.rs index 38a42c4ec9..4d347f89d9 100644 --- a/stdsimd-test/assert-instr-macro/src/lib.rs +++ b/stdsimd-test/assert-instr-macro/src/lib.rs @@ -42,8 +42,7 @@ pub fn assert_instr( let assert_name = syn::Ident::from( &format!("assert_{}_{}", name.as_ref(), instr.as_ref())[..], ); - let shim_name = - syn::Ident::from(format!("{}_shim", name.as_ref())); + let shim_name = syn::Ident::from(format!("{}_shim", name.as_ref())); let (to_test, test_name) = if invoc.args.len() == 0 { (TokenStream::empty(), &func.ident) } else { diff --git a/stdsimd-verify/build.rs b/stdsimd-verify/build.rs index 3273777679..28554bac42 100644 --- a/stdsimd-verify/build.rs +++ b/stdsimd-verify/build.rs @@ -12,11 +12,11 @@ fn walk(root: &Path) { let file = file.unwrap(); if file.file_type().unwrap().is_dir() { walk(&file.path()); - continue + continue; } let path = file.path(); if path.extension().and_then(|s| s.to_str()) != Some("rs") { - continue + continue; } println!("cargo:rerun-if-changed={}", path.display()); diff --git a/stdsimd-verify/src/lib.rs b/stdsimd-verify/src/lib.rs index 5660cd05e9..b00398a001 100644 --- a/stdsimd-verify/src/lib.rs +++ b/stdsimd-verify/src/lib.rs @@ -1,10 +1,10 @@ #![feature(proc_macro)] -extern crate proc_macro; extern crate proc_macro2; -extern crate syn; +extern crate proc_macro; #[macro_use] extern crate quote; +extern crate syn; use std::path::Path; use std::fs::File; @@ -42,21 +42,21 @@ pub fn x86_functions(input: TokenStream) -> TokenStream { _ => return false, } if f.unsafety.is_none() { - return false + return false; } - f.attrs.iter() + f.attrs + .iter() .filter_map(|a| a.meta_item()) - .any(|a| { - match a { - syn::MetaItem::NameValue(i) => i.ident == "target_feature", - _ => false, - } + .any(|a| match a { + syn::MetaItem::NameValue(i) => i.ident == "target_feature", + _ => false, }) }); let input = proc_macro2::TokenStream::from(input); - let functions = functions.iter() + let functions = functions + .iter() .map(|f| { let name = f.ident; // println!("{}", name); @@ -96,53 +96,51 @@ pub fn x86_functions(input: TokenStream) -> TokenStream { fn to_type(t: &syn::Type) -> Tokens { match *t { - syn::Type::Path(ref p) => { - match extract_path_ident(&p.path).as_ref() { - "__m128i" => my_quote! { &I8x16 }, - "__m256i" => my_quote! { &I8x32 }, - "__m64" => my_quote! { &I8x8 }, - "bool" => my_quote! { &BOOL }, - "f32" => my_quote! { &F32 }, - "f32x4" => my_quote! { &F32x4 }, - "f32x8" => my_quote! { &F32x8 }, - "f64" => my_quote! { &F64 }, - "f64x2" => my_quote! { &F64x2 }, - "f64x4" => my_quote! { &F64x4 }, - "i16" => my_quote! { &I16 }, - "i16x16" => my_quote! { &I16x16 }, - "i16x4" => my_quote! { &I16x4 }, - "i16x8" => my_quote! { &I16x8 }, - "i32" => my_quote! { &I32 }, - "i32x2" => my_quote! { &I32x2 }, - "i32x4" => my_quote! { &I32x4 }, - "i32x8" => my_quote! { &I32x8 }, - "i64" => my_quote! { &I64 }, - "i64x2" => my_quote! { &I64x2 }, - "i64x4" => my_quote! { &I64x4 }, - "i8" => my_quote! { &I8 }, - "i8x16" => my_quote! { &I8x16 }, - "i8x32" => my_quote! { &I8x32 }, - "i8x8" => my_quote! { &I8x8 }, - "u16x4" => my_quote! { &U16x4 }, - "u16x8" => my_quote! { &U16x8 }, - "u32" => my_quote! { &U32 }, - "u32x2" => my_quote! { &U32x2 }, - "u32x4" => my_quote! { &U32x4 }, - "u32x8" => my_quote! { &U32x8 }, - "u64" => my_quote! { &U64 }, - "u64x2" => my_quote! { &U64x2 }, - "u64x4" => my_quote! { &U64x4 }, - "u8" => my_quote! { &U8 }, - "u16" => my_quote! { &U16 }, - "u8x16" => my_quote! { &U8x16 }, - "u8x32" => my_quote! { &U8x32 }, - "u16x16" => my_quote! { &U16x16 }, - "u8x8" => my_quote! { &U8x8 }, - s => panic!("unspported type: {}", s), - } - } - syn::Type::Ptr(syn::TypePtr { ref elem, .. }) | - syn::Type::Reference(syn::TypeReference { ref elem, .. }) => { + syn::Type::Path(ref p) => match extract_path_ident(&p.path).as_ref() { + "__m128i" => my_quote! { &I8x16 }, + "__m256i" => my_quote! { &I8x32 }, + "__m64" => my_quote! { &I8x8 }, + "bool" => my_quote! { &BOOL }, + "f32" => my_quote! { &F32 }, + "f32x4" => my_quote! { &F32x4 }, + "f32x8" => my_quote! { &F32x8 }, + "f64" => my_quote! { &F64 }, + "f64x2" => my_quote! { &F64x2 }, + "f64x4" => my_quote! { &F64x4 }, + "i16" => my_quote! { &I16 }, + "i16x16" => my_quote! { &I16x16 }, + "i16x4" => my_quote! { &I16x4 }, + "i16x8" => my_quote! { &I16x8 }, + "i32" => my_quote! { &I32 }, + "i32x2" => my_quote! { &I32x2 }, + "i32x4" => my_quote! { &I32x4 }, + "i32x8" => my_quote! { &I32x8 }, + "i64" => my_quote! { &I64 }, + "i64x2" => my_quote! { &I64x2 }, + "i64x4" => my_quote! { &I64x4 }, + "i8" => my_quote! { &I8 }, + "i8x16" => my_quote! { &I8x16 }, + "i8x32" => my_quote! { &I8x32 }, + "i8x8" => my_quote! { &I8x8 }, + "u16x4" => my_quote! { &U16x4 }, + "u16x8" => my_quote! { &U16x8 }, + "u32" => my_quote! { &U32 }, + "u32x2" => my_quote! { &U32x2 }, + "u32x4" => my_quote! { &U32x4 }, + "u32x8" => my_quote! { &U32x8 }, + "u64" => my_quote! { &U64 }, + "u64x2" => my_quote! { &U64x2 }, + "u64x4" => my_quote! { &U64x4 }, + "u8" => my_quote! { &U8 }, + "u16" => my_quote! { &U16 }, + "u8x16" => my_quote! { &U8x16 }, + "u8x32" => my_quote! { &U8x32 }, + "u16x16" => my_quote! { &U16x16 }, + "u8x8" => my_quote! { &U8x8 }, + s => panic!("unspported type: {}", s), + }, + syn::Type::Ptr(syn::TypePtr { ref elem, .. }) + | syn::Type::Reference(syn::TypeReference { ref elem, .. }) => { let tokens = to_type(&elem); my_quote! { &Type::Ptr(#tokens) } } @@ -162,7 +160,7 @@ fn extract_path_ident(path: &syn::Path) -> syn::Ident { } match path.segments.first().unwrap().item().arguments { syn::PathArguments::None => {} - _ => panic!("unsupported path that has path arguments") + _ => panic!("unsupported path that has path arguments"), } path.segments.first().unwrap().item().ident } @@ -172,71 +170,72 @@ fn walk(root: &Path, files: &mut Vec) { let file = file.unwrap(); if file.file_type().unwrap().is_dir() { walk(&file.path(), files); - continue + continue; } let path = file.path(); if path.extension().and_then(|s| s.to_str()) != Some("rs") { - continue + continue; } let mut contents = String::new(); - File::open(&path).unwrap().read_to_string(&mut contents).unwrap(); - - files.push(syn::parse_str::(&contents).expect("failed to parse")); + File::open(&path) + .unwrap() + .read_to_string(&mut contents) + .unwrap(); + + files.push( + syn::parse_str::(&contents).expect("failed to parse"), + ); } } fn find_instrs(attrs: &[syn::Attribute]) -> Vec { - attrs.iter() + attrs + .iter() .filter_map(|a| a.meta_item()) - .filter_map(|a| { - match a { - syn::MetaItem::List(i) => { - if i.ident == "cfg_attr" { - i.nested.into_iter().next() - } else { - None - } + .filter_map(|a| match a { + syn::MetaItem::List(i) => { + if i.ident == "cfg_attr" { + i.nested.into_iter().next() + } else { + None } - _ => None, } + _ => None, }) - .filter_map(|nested| { - match nested { - syn::NestedMetaItem::MetaItem(syn::MetaItem::List(i)) => { - if i.ident == "assert_instr" { - i.nested.into_iter().next() - } else { - None - } + .filter_map(|nested| match nested { + syn::NestedMetaItem::MetaItem(syn::MetaItem::List(i)) => { + if i.ident == "assert_instr" { + i.nested.into_iter().next() + } else { + None } - _ => None, } + _ => None, }) - .filter_map(|nested| { - match nested { - syn::NestedMetaItem::MetaItem(syn::MetaItem::Term(i)) => Some(i), - _ => None, - } + .filter_map(|nested| match nested { + syn::NestedMetaItem::MetaItem(syn::MetaItem::Term(i)) => Some(i), + _ => None, }) .collect() } -fn find_target_feature(name: syn::Ident, attrs: &[syn::Attribute]) -> syn::Lit { - attrs.iter() +fn find_target_feature( + name: syn::Ident, attrs: &[syn::Attribute] +) -> syn::Lit { + attrs + .iter() .filter_map(|a| a.meta_item()) - .filter_map(|a| { - match a { - syn::MetaItem::NameValue(i) => { - if i.ident == "target_feature" { - Some(i.lit) - } else { - None - } + .filter_map(|a| match a { + syn::MetaItem::NameValue(i) => { + if i.ident == "target_feature" { + Some(i.lit) + } else { + None } - _ => None, } + _ => None, }) .next() - .expect(&format!("failed to find target_feature for {}",name)) + .expect(&format!("failed to find target_feature for {}", name)) } diff --git a/stdsimd-verify/tests/x86-intel.rs b/stdsimd-verify/tests/x86-intel.rs index b4a8a4b2b9..ce425bdd6a 100644 --- a/stdsimd-verify/tests/x86-intel.rs +++ b/stdsimd-verify/tests/x86-intel.rs @@ -42,7 +42,7 @@ static I8x32: Type = Type::Signed(8, 32); static I8x8: Type = Type::Signed(8, 8); static U16: Type = Type::PrimUnsigned(16); static U16x16: Type = Type::Unsigned(16, 16); -static U16x4: Type = Type::Unsigned(16, 4); +// static U16x4: Type = Type::Unsigned(16, 4); static U16x8: Type = Type::Unsigned(16, 8); static U32: Type = Type::PrimUnsigned(32); static U32x2: Type = Type::Unsigned(32, 2); @@ -54,7 +54,7 @@ static U64x4: Type = Type::Unsigned(64, 4); static U8: Type = Type::PrimUnsigned(8); static U8x16: Type = Type::Unsigned(8, 16); static U8x32: Type = Type::Unsigned(8, 32); -static U8x8: Type = Type::Unsigned(8, 8); +// static U8x8: Type = Type::Unsigned(8, 8); #[derive(Debug)] enum Type { @@ -72,8 +72,7 @@ x86_functions!(static FUNCTIONS); #[derive(Deserialize)] struct Data { - #[serde(rename = "intrinsic", default)] - intrinsics: Vec, + #[serde(rename = "intrinsic", default)] intrinsics: Vec, } #[derive(Deserialize)] @@ -81,18 +80,14 @@ struct Intrinsic { rettype: String, name: String, tech: String, - #[serde(rename = "CPUID", default)] - cpuid: Vec, - #[serde(rename = "parameter", default)] - parameters: Vec, - #[serde(default)] - instruction: Vec, + #[serde(rename = "CPUID", default)] cpuid: Vec, + #[serde(rename = "parameter", default)] parameters: Vec, + #[serde(default)] instruction: Vec, } #[derive(Deserialize)] struct Parameter { - #[serde(rename = "type")] - type_: String, + #[serde(rename = "type")] type_: String, } #[derive(Deserialize)] @@ -113,18 +108,20 @@ fn verify_all_signatures() { let xml = include_bytes!("../x86-intel.xml"); let xml = &xml[..]; - let data: Data = serde_xml_rs::deserialize(xml).expect("failed to deserialize xml"); + let data: Data = + serde_xml_rs::deserialize(xml).expect("failed to deserialize xml"); let mut map = HashMap::new(); for intrinsic in data.intrinsics.iter() { - // This intrinsic has multiple definitions in the XML, so just ignore it. + // This intrinsic has multiple definitions in the XML, so just ignore + // it. if intrinsic.name == "_mm_prefetch" { - continue + continue; } // These'll need to get added eventually, but right now they have some // duplicate names in the XML which we're not dealing with yet if intrinsic.tech == "AVX-512" { - continue + continue; } assert!(map.insert(&intrinsic.name[..], intrinsic).is_none()); @@ -133,13 +130,14 @@ fn verify_all_signatures() { for rust in FUNCTIONS { // This was ignored above, we ignore it here as well. if rust.name == "_mm_prefetch" { - continue + continue; } // these are all AMD-specific intrinsics - if rust.target_feature.contains("sse4a") || - rust.target_feature.contains("tbm") { - continue + if rust.target_feature.contains("sse4a") + || rust.target_feature.contains("tbm") + { + continue; } let intel = match map.get(rust.name) { @@ -147,15 +145,15 @@ fn verify_all_signatures() { None => panic!("missing intel definition for {}", rust.name), }; - // Verify that all `#[target_feature]` annotations are correct, ensuring - // that we've actually enabled the right instruction set for this - // intrinsic. + // Verify that all `#[target_feature]` annotations are correct, + // ensuring that we've actually enabled the right instruction + // set for this intrinsic. assert!(intel.cpuid.len() > 0, "missing cpuid for {}", rust.name); for cpuid in intel.cpuid.iter() { // this is needed by _xsave and probably some related intrinsics, // but let's just skip it for now. if *cpuid == "XSS" { - continue + continue; } let cpuid = cpuid @@ -163,36 +161,48 @@ fn verify_all_signatures() { .flat_map(|c| c.to_lowercase()) .collect::(); - // Normalize `bmi1` to `bmi` as apparently that's what we're calling - // it. + // Normalize `bmi1` to `bmi` as apparently that's what we're + // calling it. let cpuid = if cpuid == "bmi1" { String::from("bmi") } else { cpuid }; - assert!(rust.target_feature.contains(&cpuid), - "intel cpuid `{}` not in `{}` for {}", - cpuid, - rust.target_feature, - rust.name); + assert!( + rust.target_feature.contains(&cpuid), + "intel cpuid `{}` not in `{}` for {}", + cpuid, + rust.target_feature, + rust.name + ); } // TODO: we should test this, but it generates too many failures right // now if false { if rust.instrs.len() == 0 { - assert_eq!(intel.instruction.len(), 0, - "instruction not listed for {}", rust.name); + assert_eq!( + intel.instruction.len(), + 0, + "instruction not listed for {}", + rust.name + ); // If intel doesn't list any instructions and we do then don't // bother trying to look for instructions in intel, we've just got // some extra assertions on our end. } else if intel.instruction.len() > 0 { for instr in rust.instrs.iter() { - assert!(intel.instruction.iter().any(|a| a.name.starts_with(instr)), - "intel failed to list `{}` as an instruction for `{}`", - instr, rust.name); + assert!( + intel + .instruction + .iter() + .any(|a| a.name.starts_with(instr)), + "intel failed to list `{}` as an instruction for `{}`", + instr, + rust.name + ); } } } @@ -201,9 +211,12 @@ fn verify_all_signatures() { match rust.ret { Some(t) => equate(t, &intel.rettype, &rust.name), None => { - assert!(intel.rettype == "" || intel.rettype == "void", - "{} returns `{}` with intel, void in rust", - rust.name, intel.rettype); + assert!( + intel.rettype == "" || intel.rettype == "void", + "{} returns `{}` with intel, void in rust", + rust.name, + intel.rettype + ); } } @@ -212,13 +225,17 @@ fn verify_all_signatures() { if rust.arguments.len() == 0 { if intel.parameters.len() == 1 { assert_eq!(intel.parameters[0].type_, "void"); - continue + continue; } } // Otherwise we want all parameters to be exactly the same - assert_eq!(rust.arguments.len(), intel.parameters.len(), - "wrong number of arguments on {}", rust.name); + assert_eq!( + rust.arguments.len(), + intel.parameters.len(), + "wrong number of arguments on {}", + rust.name + ); for (a, b) in intel.parameters.iter().zip(rust.arguments) { equate(b, &a.type_, &intel.name); } @@ -255,20 +272,21 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) { (&Type::Ptr(&Type::PrimUnsigned(8)), "const void*") => {} (&Type::Ptr(&Type::PrimUnsigned(8)), "void*") => {} - (&Type::Signed(a, b), "__m128i") | - (&Type::Unsigned(a, b), "__m128i") | - (&Type::Ptr(&Type::Signed(a, b)), "__m128i*") | - (&Type::Ptr(&Type::Unsigned(a, b)), "__m128i*") if a * b == 128 => {} + (&Type::Signed(a, b), "__m128i") + | (&Type::Unsigned(a, b), "__m128i") + | (&Type::Ptr(&Type::Signed(a, b)), "__m128i*") + | (&Type::Ptr(&Type::Unsigned(a, b)), "__m128i*") if a * b == 128 => {} - (&Type::Signed(a, b), "__m256i") | - (&Type::Unsigned(a, b), "__m256i") | - (&Type::Ptr(&Type::Signed(a, b)), "__m256i*") | - (&Type::Ptr(&Type::Unsigned(a, b)), "__m256i*") if (a as u32) * (b as u32) == 256 => {} + (&Type::Signed(a, b), "__m256i") + | (&Type::Unsigned(a, b), "__m256i") + | (&Type::Ptr(&Type::Signed(a, b)), "__m256i*") + | (&Type::Ptr(&Type::Unsigned(a, b)), "__m256i*") + if (a as u32) * (b as u32) == 256 => {} - (&Type::Signed(a, b), "__m64") | - (&Type::Unsigned(a, b), "__m64") | - (&Type::Ptr(&Type::Signed(a, b)), "__m64*") | - (&Type::Ptr(&Type::Unsigned(a, b)), "__m64*") if a * b == 64 => {} + (&Type::Signed(a, b), "__m64") + | (&Type::Unsigned(a, b), "__m64") + | (&Type::Ptr(&Type::Signed(a, b)), "__m64*") + | (&Type::Ptr(&Type::Unsigned(a, b)), "__m64*") if a * b == 64 => {} (&Type::Float(32, 4), "__m128") => {} (&Type::Ptr(&Type::Float(32, 4)), "__m128*") => {} @@ -291,20 +309,24 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) { // Intel says the argument is i32... (&Type::PrimSigned(8), "int") if intrinsic == "_mm_insert_epi8" => {} - // This is a macro (?) in C which seems to mutate its arguments, but that - // means that we're taking pointers to arguments in rust as we're not - // exposing it as a macro. - (&Type::Ptr(&Type::Float(32, 4)), "__m128") if intrinsic == "_MM_TRANSPOSE4_PS" => {} + // This is a macro (?) in C which seems to mutate its arguments, but + // that means that we're taking pointers to arguments in rust + // as we're not exposing it as a macro. + (&Type::Ptr(&Type::Float(32, 4)), "__m128") + if intrinsic == "_MM_TRANSPOSE4_PS" => {} // These intrinsics return an `int` in C but they're always either the // bit 1 or 0 so we switch it to returning `bool` in rust (&Type::Bool, "int") - if intrinsic.starts_with("_mm_comi") && intrinsic.ends_with("_sd") - => {} + if intrinsic.starts_with("_mm_comi") + && intrinsic.ends_with("_sd") => {} (&Type::Bool, "int") - if intrinsic.starts_with("_mm_ucomi") && intrinsic.ends_with("_sd") - => {} + if intrinsic.starts_with("_mm_ucomi") + && intrinsic.ends_with("_sd") => {} - _ => panic!("failed to equate: `{}` and {:?} for {}", intel, t, intrinsic), + _ => panic!( + "failed to equate: `{}` and {:?} for {}", + intel, t, intrinsic + ), } } From 275071e6b5091dbc2ece87381a0262654515603d Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 4 Jan 2018 12:19:11 +0100 Subject: [PATCH 2/3] fmt and clippy --- coresimd/src/x86/i586/bswap.rs | 4 +++ coresimd/src/x86/i686/sse4a.rs | 4 +-- coresimd/src/x86/i686/ssse3.rs | 6 ++++- coresimd/src/x86/macros.rs | 10 ++++---- coresimd/src/x86/x86_64/sse41.rs | 2 ++ stdsimd-test/src/lib.rs | 38 ++++++++++++++++------------ stdsimd-verify/tests/x86-intel.rs | 41 +++++++++++++++---------------- 7 files changed, 60 insertions(+), 45 deletions(-) diff --git a/coresimd/src/x86/i586/bswap.rs b/coresimd/src/x86/i586/bswap.rs index 15cd0b1c47..8bac167569 100644 --- a/coresimd/src/x86/i586/bswap.rs +++ b/coresimd/src/x86/i586/bswap.rs @@ -1,3 +1,7 @@ +//! Byte swap intrinsics. + +#![cfg_attr(feature = "cargo-clippy", allow(stutter))] + #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/coresimd/src/x86/i686/sse4a.rs b/coresimd/src/x86/i686/sse4a.rs index 9c47617ff6..884097e871 100644 --- a/coresimd/src/x86/i686/sse4a.rs +++ b/coresimd/src/x86/i686/sse4a.rs @@ -1,4 +1,4 @@ -//! `i686`'s Streaming SIMD Extensions 4a (SSE4a) +//! `i686`'s Streaming SIMD Extensions 4a (`SSE4a`) use core::mem; use v128::*; @@ -52,7 +52,7 @@ pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 { #[target_feature = "+sse4a"] #[cfg_attr(test, assert_instr(insertq))] pub unsafe fn _mm_insert_si64(x: i64x2, y: i64x2) -> i64x2 { - insertq(x, mem::transmute(y)) + insertq(x, y) } /// Non-temporal store of `a.0` into `p`. diff --git a/coresimd/src/x86/i686/ssse3.rs b/coresimd/src/x86/i686/ssse3.rs index bf31cbb647..965973598e 100644 --- a/coresimd/src/x86/i686/ssse3.rs +++ b/coresimd/src/x86/i686/ssse3.rs @@ -256,7 +256,11 @@ mod tests { unsafe fn _mm_alignr_pi8() { let a = u32x2::new(0x89ABCDEF_u32, 0x01234567_u32); let b = u32x2::new(0xBBAA9988_u32, 0xFFDDEECC_u32); - let r = ssse3::_mm_alignr_pi8(u8x8::from(a).into(), u8x8::from(b).into(), 4); + let r = ssse3::_mm_alignr_pi8( + u8x8::from(a).into(), + u8x8::from(b).into(), + 4, + ); assert_eq!(r, ::std::mem::transmute(0x89abcdefffddeecc_u64)); } diff --git a/coresimd/src/x86/macros.rs b/coresimd/src/x86/macros.rs index 79109fd67c..ea72c062e8 100644 --- a/coresimd/src/x86/macros.rs +++ b/coresimd/src/x86/macros.rs @@ -3,7 +3,7 @@ macro_rules! constify_imm8 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] - match $imm8 & 0b1111_1111 { + match ($imm8) & 0b1111_1111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), @@ -267,7 +267,7 @@ macro_rules! constify_imm8 { macro_rules! constify_imm6 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] - match $imm8 & 0b1_1111 { + match ($imm8) & 0b1_1111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), @@ -307,7 +307,7 @@ macro_rules! constify_imm6 { macro_rules! constify_imm4 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] - match $imm8 & 0b1111 { + match ($imm8) & 0b1111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), @@ -331,7 +331,7 @@ macro_rules! constify_imm4 { macro_rules! constify_imm3 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] - match $imm8 & 0b111 { + match ($imm8) & 0b111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), @@ -347,7 +347,7 @@ macro_rules! constify_imm3 { macro_rules! constify_imm2 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] - match $imm8 & 0b11 { + match ($imm8) & 0b11 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), diff --git a/coresimd/src/x86/x86_64/sse41.rs b/coresimd/src/x86/x86_64/sse41.rs index b2417a8e59..20fa606d5f 100644 --- a/coresimd/src/x86/x86_64/sse41.rs +++ b/coresimd/src/x86/x86_64/sse41.rs @@ -1,3 +1,5 @@ +//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1) + use v128::*; #[cfg(test)] diff --git a/stdsimd-test/src/lib.rs b/stdsimd-test/src/lib.rs index d41bec2564..1ec3d96920 100644 --- a/stdsimd-test/src/lib.rs +++ b/stdsimd-test/src/lib.rs @@ -304,19 +304,20 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { None => continue, }; if !part.contains("call") { - continue + continue; } // On 32-bit x86 position independent code will call itself and be // immediately followed by a `pop` to learn about the current address. // Let's not take that into account when considering whether a function // failed inlining something. - let followed_by_pop = function.instrs.get(i + 1) + let followed_by_pop = function + .instrs + .get(i + 1) .and_then(|i| i.parts.get(0)) - .map(|s| s.contains("pop")) - .unwrap_or(false); + .map_or(false, |s| s.contains("pop")); if followed_by_pop && cfg!(target_arch = "x86") { - continue + continue; } inlining_failed = true; @@ -324,14 +325,14 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { } let instruction_limit = match expected { - // cpuid returns a pretty big aggregate structure so excempt it from the - // slightly more restrictive 20 instructions below + // cpuid returns a pretty big aggregate structure so excempt it from + // the slightly more restrictive 20 instructions below "cpuid" => 30, - // Apparently on Windows LLVM generates a bunch of saves/restores of xmm - // registers around these intstructions which blows the 20 limit - // below. As it seems dictates by Windows's abi (I guess?) we probably - // can't do much about it... + // Apparently on Windows LLVM generates a bunch of saves/restores of + // xmm registers around these intstructions which blows the 20 + // limit below. As it seems dictates by Windows's abi (I + // guess?) we probably can't do much about it... "vzeroall" | "vzeroupper" if cfg!(windows) => 30, _ => 20, @@ -363,12 +364,17 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { expected ); } else if !probably_only_one_instruction { - panic!("instruction found, but the disassembly contains too many \ - instructions: #instructions = {} >= {} (limit)", - function.instrs.len(), instruction_limit); + panic!( + "instruction found, but the disassembly contains too many \ + instructions: #instructions = {} >= {} (limit)", + function.instrs.len(), + instruction_limit + ); } else if inlining_failed { - panic!("instruction found, but the disassembly contains `call` \ - instructions, which hint that inlining failed"); + panic!( + "instruction found, but the disassembly contains `call` \ + instructions, which hint that inlining failed" + ); } } diff --git a/stdsimd-verify/tests/x86-intel.rs b/stdsimd-verify/tests/x86-intel.rs index ce425bdd6a..87c1a1fc87 100644 --- a/stdsimd-verify/tests/x86-intel.rs +++ b/stdsimd-verify/tests/x86-intel.rs @@ -1,5 +1,7 @@ #![feature(proc_macro)] #![allow(bad_style)] +#![cfg_attr(feature = "cargo-clippy", + allow(shadow_reuse, cast_lossless, match_same_arms))] #[macro_use] extern crate serde_derive; @@ -111,7 +113,7 @@ fn verify_all_signatures() { let data: Data = serde_xml_rs::deserialize(xml).expect("failed to deserialize xml"); let mut map = HashMap::new(); - for intrinsic in data.intrinsics.iter() { + for intrinsic in &data.intrinsics { // This intrinsic has multiple definitions in the XML, so just ignore // it. if intrinsic.name == "_mm_prefetch" { @@ -148,8 +150,8 @@ fn verify_all_signatures() { // Verify that all `#[target_feature]` annotations are correct, // ensuring that we've actually enabled the right instruction // set for this intrinsic. - assert!(intel.cpuid.len() > 0, "missing cpuid for {}", rust.name); - for cpuid in intel.cpuid.iter() { + assert!(!intel.cpuid.is_empty(), "missing cpuid for {}", rust.name); + for cpuid in &intel.cpuid { // this is needed by _xsave and probably some related intrinsics, // but let's just skip it for now. if *cpuid == "XSS" { @@ -181,7 +183,7 @@ fn verify_all_signatures() { // TODO: we should test this, but it generates too many failures right // now if false { - if rust.instrs.len() == 0 { + if rust.instrs.is_empty() { assert_eq!( intel.instruction.len(), 0, @@ -192,8 +194,8 @@ fn verify_all_signatures() { // If intel doesn't list any instructions and we do then don't // bother trying to look for instructions in intel, we've just got // some extra assertions on our end. - } else if intel.instruction.len() > 0 { - for instr in rust.instrs.iter() { + } else if !intel.instruction.is_empty() { + for instr in rust.instrs { assert!( intel .instruction @@ -208,25 +210,22 @@ fn verify_all_signatures() { } // Make sure we've got the right return type. - match rust.ret { - Some(t) => equate(t, &intel.rettype, &rust.name), - None => { - assert!( - intel.rettype == "" || intel.rettype == "void", - "{} returns `{}` with intel, void in rust", - rust.name, - intel.rettype - ); - } + if let Some(t) = rust.ret { + equate(t, &intel.rettype, rust.name); + } else { + assert!( + intel.rettype == "" || intel.rettype == "void", + "{} returns `{}` with intel, void in rust", + rust.name, + intel.rettype + ); } // If there's no arguments on Rust's side intel may list one "void" // argument, so handle that here. - if rust.arguments.len() == 0 { - if intel.parameters.len() == 1 { - assert_eq!(intel.parameters[0].type_, "void"); - continue; - } + if rust.arguments.is_empty() && intel.parameters.len() == 1 { + assert_eq!(intel.parameters[0].type_, "void"); + continue; } // Otherwise we want all parameters to be exactly the same From 4b4ea61970bc84fa131e2b258a758c8a8163ff45 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 4 Jan 2018 16:44:27 +0100 Subject: [PATCH 3/3] add an exception for intrinsics using cvtpi2ps --- stdsimd-test/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stdsimd-test/src/lib.rs b/stdsimd-test/src/lib.rs index 1ec3d96920..eec17111f6 100644 --- a/stdsimd-test/src/lib.rs +++ b/stdsimd-test/src/lib.rs @@ -335,6 +335,10 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { // guess?) we probably can't do much about it... "vzeroall" | "vzeroupper" if cfg!(windows) => 30, + // Intrinsics using `cvtpi2ps` are typically "composites" and in some + // cases exceed the limit. + "cvtpi2ps" => 25, + _ => 20, }; let probably_only_one_instruction =