Last missing avx and avx2 intrinsics (#258)

* avx: _mm256_cvtss_f32, avx2: _mm256_cvtsd_f64, _mm256_cvtsi256_si32 * avx2: _mm256_slli_si256, _mm256_srli_si256 And aliases: _mm256_bslli_epi128 _mm256_bsrli_epi128
rust-lang · Jan 2, 2018 · dda7157 · dda7157
1 parent fedf60d
commit dda7157
Show file tree

Hide file tree

Showing 2 changed files with 117 additions and 5 deletions.
diff --git a/coresimd/src/x86/i586/avx.rs b/coresimd/src/x86/i586/avx.rs
@@ -2398,6 +2398,14 @@ pub unsafe fn _mm256_storeu2_m128i(
     _mm_storeu_si128(hiaddr, hi);
 }
 
+/// Returns the first element of the input vector of [8 x float].
+#[inline(always)]
+#[target_feature = "+avx"]
+//#[cfg_attr(test, assert_instr(movss))] FIXME
+pub unsafe fn _mm256_cvtss_f32(a: f32x8) -> f32 {
+    a.extract(0)
+}
+
 /// LLVM intrinsics used in the above functions
 #[allow(improper_ctypes)]
 extern "C" {
@@ -4290,4 +4298,11 @@ mod tests {
         assert_eq!(hi, e_hi);
         assert_eq!(lo, e_lo);
     }
+
+    #[simd_test = "avx"]
+    unsafe fn _mm256_cvtss_f32() {
+        let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
+        let r = avx::_mm256_cvtss_f32(a);
+        assert_eq!(r, 1.);
+    }
 }
diff --git a/coresimd/src/x86/i586/avx2.rs b/coresimd/src/x86/i586/avx2.rs
@@ -474,9 +474,6 @@ pub unsafe fn _mm256_broadcastw_epi16(a: i16x8) -> i16x16 {
     simd_shuffle16(a, i16x8::splat(0_i16), [0_u32; 16])
 }
 
-// TODO _mm256_bslli_epi128
-// TODO _mm256_bsrli_epi128
-
 /// Compare packed 64-bit integers in `a` and `b` for equality.
 #[inline(always)]
 #[target_feature = "+avx2"]
@@ -2050,7 +2047,26 @@ pub unsafe fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 {
     pslliq(a, imm8)
 }
 
-// TODO _mm256_slli_si256 (__m256i a, const int imm8)
+/// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
+#[inline(always)]
+#[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
+pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i {
+    macro_rules! call {
+        ($imm8:expr) => {
+            vpslldq(a, $imm8)
+        }
+    }
+    constify_imm8!(imm8 * 8, call)
+}
+
+/// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
+#[inline(always)]
+#[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
+pub unsafe fn _mm256_bslli_epi128(a: __m256i, imm8: i32) -> __m256i {
+    _mm256_slli_si256(a, imm8)
+}
 
 /// Shift packed 32-bit integers in `a` left by the amount
 /// specified by the corresponding element in `count` while
@@ -2146,6 +2162,27 @@ pub unsafe fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 {
     psravd256(a, count)
 }
 
+/// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
+#[inline(always)]
+#[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
+pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i {
+    macro_rules! call {
+        ($imm8:expr) => {
+            vpsrldq(a, $imm8)
+        }
+    }
+    constify_imm8!(imm8 * 8, call)
+}
+
+/// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
+#[inline(always)]
+#[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
+pub unsafe fn _mm256_bsrli_epi128(a: __m256i, imm8: i32) -> __m256i {
+    _mm256_srli_si256(a, imm8)
+}
+
 /// Shift packed 16-bit integers in `a` right by `count` while shifting in
 /// zeros.
 #[inline(always)]
@@ -2698,6 +2735,22 @@ pub unsafe fn _mm256_extract_epi64(a: i64x4, imm8: i32) -> i64 {
     a.extract_unchecked(imm8)
 }
 
+/// Returns the first element of the input vector of [4 x double].
+#[inline(always)]
+#[target_feature = "+avx2"]
+//#[cfg_attr(test, assert_instr(movsd))] FIXME
+pub unsafe fn _mm256_cvtsd_f64(a: f64x4) -> f64 {
+    a.extract(0)
+}
+
+/// Returns the first element of the input vector of [8 x i32].
+#[inline(always)]
+#[target_feature = "+avx2"]
+//#[cfg_attr(test, assert_instr(movd))] FIXME
+pub unsafe fn _mm256_cvtsi256_si32(a: i32x8) -> i32 {
+    a.extract(0)
+}
+
 #[allow(improper_ctypes)]
 extern "C" {
     #[link_name = "llvm.x86.avx2.pabs.b"]
@@ -2938,7 +2991,10 @@ extern "C" {
     fn vpgatherqps(
         src: f32x4, slice: *const i8, offsets: i64x4, mask: f32x4, scale: i8
     ) -> f32x4;
-
+    #[link_name = "llvm.x86.avx2.psll.dq"]
+    fn vpslldq(a: __m256i, b: i32) -> __m256i;
+    #[link_name = "llvm.x86.avx2.psrl.dq"]
+    fn vpsrldq(a: __m256i, b: i32) -> __m256i;
 }
 
 #[cfg(test)]
@@ -4075,6 +4131,13 @@ mod tests {
         );
     }
 
+    #[simd_test = "avx2"]
+    unsafe fn _mm256_slli_si256() {
+        let a = i64x4::splat(0xFFFFFFFF);
+        let r = avx2::_mm256_slli_si256(__m256i::from(a), 3);
+        assert_eq!(r, __m256i::from(i64x4::splat(0xFFFFFFFF000000)));
+    }
+
     #[simd_test = "avx2"]
     unsafe fn _mm_sllv_epi32() {
         let a = i32x4::splat(2);
@@ -4161,6 +4224,26 @@ mod tests {
         assert_eq!(r, e);
     }
 
+    #[simd_test = "avx2"]
+    unsafe fn _mm256_srli_si256() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = i8x32::new(
+            1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, 14, 15, 16,
+            17, 18, 19, 20, 21, 22, 23, 24,
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
+        let r = avx2::_mm256_srli_si256(__m256i::from(a), 3);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = i8x32::new(
+            4, 5, 6, 7, 8, 9, 10, 11,
+            12, 13, 14, 15, 16, 0, 0, 0,
+            20, 21, 22, 23, 24, 25, 26, 27,
+            28, 29, 30, 31, 32, 0, 0, 0,
+        );
+        assert_eq!(r, __m256i::from(e));
+    }
+
     #[simd_test = "avx2"]
     unsafe fn _mm256_srl_epi16() {
         let a = i16x16::splat(0xFF);
@@ -5005,4 +5088,18 @@ mod tests {
         let r = avx2::_mm256_extract_epi64(a, 3);
         assert_eq!(r, 3);
     }
+
+    #[simd_test = "avx2"]
+    unsafe fn _mm256_cvtsd_f64() {
+        let a = f64x4::new(1., 2., 3., 4.);
+        let r = avx2::_mm256_cvtsd_f64(a);
+        assert_eq!(r, 1.);
+    }
+
+    #[simd_test = "avx2"]
+    unsafe fn _mm256_cvtsi256_si32() {
+        let a = i32x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = avx2::_mm256_cvtsi256_si32(a);
+        assert_eq!(r, 1);
+    }
 }