Skip to content

Commit

Permalink
Completes SSE and adds some MMX intrinsics
Browse files Browse the repository at this point in the history
MMX:

- `_mm_cmpgt_pi{8,16,32}`
- `_mm_unpack{hi,lo}_pi{8,16,32}`

SSE (is now complete):

- `_mm_cvtp{i,u}{8,16}_ps`
- add test for `_m_pmulhuw`
  • Loading branch information
gnzlbg committed Jan 3, 2018
1 parent dda7157 commit a9d10f7
Show file tree
Hide file tree
Showing 9 changed files with 463 additions and 268 deletions.
3 changes: 2 additions & 1 deletion coresimd/src/x86/i586/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3304,7 +3304,8 @@ mod tests {
use v64::*;

let a = mem::transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7));
let mut mem = ::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1)));
let mut mem =
::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1)));
sse::_mm_stream_pi(&mut *mem as *mut _ as *mut _, a);
assert_eq!(a, *mem);
}
Expand Down
219 changes: 172 additions & 47 deletions coresimd/src/x86/i686/mmx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use stdsimd_test::assert_instr;

/// Constructs a 64-bit integer vector initialized to zero.
#[inline(always)]
#[target_feature = "+mmx,+sse"]
#[target_feature = "+mmx"]
// FIXME: this produces a movl instead of xorps on x86
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
Expand All @@ -30,7 +30,7 @@ pub unsafe fn _mm_setzero_si64() -> __m64 {
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
#[inline(always)]
#[target_feature = "+mmx,+sse"]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(packsswb))]
pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 {
mem::transmute(packsswb(mem::transmute(a), mem::transmute(b)))
Expand All @@ -42,63 +42,93 @@ pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 {
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
#[inline(always)]
#[target_feature = "+mmx,+sse"]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(packssdw))]
pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 {
mem::transmute(packssdw(mem::transmute(a), mem::transmute(b)))
}

/// Compares the 8-bit integer elements of two 64-bit integer vectors of
/// [8 x i8] to determine if the element of the first vector is greater than
/// the corresponding element of the second vector.
///
/// The comparison yields 0 for false, 0xFF for true.
/// Compares whether each element of `a` is greater than the corresponding
/// element of `b` returning `0` for `false` and `-1` for `true`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(pcmpgtb))]
pub unsafe fn _mm_cmpgt_pi8(a: i8x8, b: i8x8) -> i8x8 {
mem::transmute(pcmpgtb(mem::transmute(a), mem::transmute(b)))
}

/// Compares the 16-bit integer elements of two 64-bit integer vectors of
/// [4 x i16] to determine if the element of the first vector is greater than
/// the corresponding element of the second vector.
///
/// The comparison yields 0 for false, 0xFFFF for true.
/// Compares whether each element of `a` is greater than the corresponding
/// element of `b` returning `0` for `false` and `-1` for `true`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(pcmpgtw))]
pub unsafe fn _mm_cmpgt_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(pcmpgtw(mem::transmute(a), mem::transmute(b)))
}

/// Unpacks the upper 32 bits from two 64-bit integer vectors of
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
/// Unpacks the upper two elements from two `i16x4` vectors and interleaves
/// them into the result: `[a.2, b.2, a.3, b.3]`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
#[cfg_attr(test, assert_instr(punpcklbw))] // TODO: check
pub unsafe fn _mm_unpackhi_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(punpckhwd(mem::transmute(a), mem::transmute(b)))
mem::transmute(punpcklbw(mem::transmute(a), mem::transmute(b)))
}

/// Compares whether each element of `a` is greater than the corresponding
/// element of `b` returning `0` for `false` and `-1` for `true`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(pcmpgtd))]
pub unsafe fn _mm_cmpgt_pi32(a: i32x2, b: i32x2) -> i32x2 {
mem::transmute(pcmpgtd(mem::transmute(a), mem::transmute(b)))
}

/// Unpacks the upper four elements from two `i8x8` vectors and interleaves
/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpckhbw))]
pub unsafe fn _mm_unpackhi_pi8(a: i8x8, b: i8x8) -> i8x8 {
mem::transmute(punpckhbw(mem::transmute(a), mem::transmute(b)))
}

/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
/// and interleaves them into a 64-bit integer vector of [8 x i8].
/// Unpacks the lower four elements from two `i8x8` vectors and interleaves
/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpcklbw))]
pub unsafe fn _mm_unpacklo_pi8(a: i8x8, b: i8x8) -> i8x8 {
mem::transmute(punpcklbw(mem::transmute(a), mem::transmute(b)))
}

/// Unpacks the lower 32 bits from two 64-bit integer vectors of
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
/// Unpacks the lower two elements from two `i16x4` vectors and interleaves
/// them into the result: `[a.0 b.0 a.1 b.1]`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpcklwd))]
pub unsafe fn _mm_unpacklo_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(punpcklwd(mem::transmute(a), mem::transmute(b)))
}

/// Unpacks the upper element from two `i32x2` vectors and interleaves them
/// into the result: `[a.1, b.1]`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpckhdq))]
pub unsafe fn _mm_unpackhi_pi32(a: i32x2, b: i32x2) -> i32x2 {
mem::transmute(punpckhdq(mem::transmute(a), mem::transmute(b)))
}

/// Unpacks the lower element from two `i32x2` vectors and interleaves them
/// into the result: `[a.0, b.0]`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpckldq))]
pub unsafe fn _mm_unpacklo_pi32(a: i32x2, b: i32x2) -> i32x2 {
mem::transmute(punpckldq(mem::transmute(a), mem::transmute(b)))
}

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.mmx.packsswb"]
Expand All @@ -109,12 +139,20 @@ extern "C" {
fn pcmpgtb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pcmpgt.w"]
fn pcmpgtw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pcmpgt.d"]
fn pcmpgtd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckhwd"]
fn punpckhwd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpcklbw"]
fn punpcklbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpcklwd"]
fn punpcklwd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckhbw"]
fn punpckhbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpcklbw"]
fn punpcklbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckhdq"]
fn punpckhdq(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckldq"]
fn punpckldq(a: __m64, b: __m64) -> __m64;
}

#[cfg(test)]
Expand All @@ -123,21 +161,21 @@ mod tests {
use x86::i686::mmx;
use stdsimd_test::simd_test;

#[simd_test = "sse"] // FIXME: should be mmx
#[simd_test = "mmx"]
unsafe fn _mm_setzero_si64() {
let r: __m64 = ::std::mem::transmute(0_i64);
assert_eq!(r, mmx::_mm_setzero_si64());
}

#[simd_test = "sse"] // FIXME: should be mmx
#[simd_test = "mmx"]
unsafe fn _mm_packs_pi16() {
let a = i16x4::new(-1, 2, -3, 4);
let b = i16x4::new(-5, 6, -7, 8);
let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8);
assert_eq!(r, mmx::_mm_packs_pi16(a, b));
}

#[simd_test = "sse"] // FIXME: should be mmx
#[simd_test = "mmx"]
unsafe fn _mm_packs_pi32() {
let a = i32x2::new(-1, 2);
let b = i32x2::new(-5, 6);
Expand All @@ -147,41 +185,128 @@ mod tests {

#[simd_test = "mmx"]
unsafe fn _mm_cmpgt_pi8() {
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1);
assert_eq!(r, mmx::_mm_cmpgt_pi8(a, b));
{
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1);
assert_eq!(r, mmx::_mm_cmpgt_pi8(a, b));
}
{
let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14);
let r0 = i8x8::new(0, -1, 0, -1, 0, -1, 0, -1);
let r1 = i8x8::new(-1, 0, -1, 0, -1, 0, -1, 0);

assert_eq!(r0, mmx::_mm_cmpgt_pi8(a, b));
assert_eq!(r1, mmx::_mm_cmpgt_pi8(b, a));
}
}

#[simd_test = "mmx"]
unsafe fn _mm_cmpgt_pi16() {
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 3, 2, 1);
let r = i16x4::new(0, 0, 0, -1);
assert_eq!(r, mmx::_mm_cmpgt_pi16(a, b));
{
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 3, 2, 1);
let r = i16x4::new(0, 0, 0, -1);
assert_eq!(r, mmx::_mm_cmpgt_pi16(a, b));
}
{
let a = i16x4::new(0, 3, 4, 7);
let b = i16x4::new(1, 2, 5, 6);
let r0 = i16x4::new(0, -1, 0, -1);
let r1 = i16x4::new(-1, 0, -1, 0);

assert_eq!(r0, mmx::_mm_cmpgt_pi16(a, b));
assert_eq!(r1, mmx::_mm_cmpgt_pi16(b, a));
}
}

#[simd_test = "mmx"]
unsafe fn _mm_unpackhi_pi16() {
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 5, 6, 7);
let r = i16x4::new(2, 6, 3, 7);
assert_eq!(r, mmx::_mm_unpackhi_pi16(a, b));
unsafe fn _mm_cmpgt_pi32() {
let a = i32x2::new(0, 3);
let b = i32x2::new(1, 2);
let r0 = i32x2::new(0, -1);
let r1 = i32x2::new(-1, 0);

assert_eq!(r0, mmx::_mm_cmpgt_pi32(a, b));
assert_eq!(r1, mmx::_mm_cmpgt_pi32(b, a));
}

#[simd_test = "mmx"]
unsafe fn _mm_unpackhi_pi8() {
let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14);
let r = i8x8::new(8, 9, 11, 10, 12, 13, 15, 14);

assert_eq!(r, mmx::_mm_unpackhi_pi8(a, b));
}

#[simd_test = "mmx"]
unsafe fn _mm_unpacklo_pi8() {
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15);
let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11);
assert_eq!(r, mmx::_mm_unpacklo_pi8(a, b));
{
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15);
let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11);
assert_eq!(r, mmx::_mm_unpacklo_pi8(a, b));
}
{
let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14);
let r = i8x8::new(0, 1, 3, 2, 4, 5, 7, 6);
assert_eq!(r, mmx::_mm_unpacklo_pi8(a, b));
}
}

#[simd_test = "mmx"]
unsafe fn _mm_unpackhi_pi16() {
{
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 5, 6, 7);
let r = i16x4::new(2, 6, 3, 7);
assert_eq!(r, mmx::_mm_unpackhi_pi16(a, b));
}
{
let a = i16x4::new(0, 3, 4, 7);
let b = i16x4::new(1, 2, 5, 6);
let r = i16x4::new(4, 5, 7, 6);

assert_eq!(r, mmx::_mm_unpackhi_pi16(a, b));
}
}

#[simd_test = "mmx"]
unsafe fn _mm_unpacklo_pi16() {
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 5, 6, 7);
let r = i16x4::new(0, 4, 1, 5);
assert_eq!(r, mmx::_mm_unpacklo_pi16(a, b));
{
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 5, 6, 7);
let r = i16x4::new(0, 4, 1, 5);
assert_eq!(r, mmx::_mm_unpacklo_pi16(a, b));
}
{
let a = i16x4::new(0, 3, 4, 7);
let b = i16x4::new(1, 2, 5, 6);
let r = i16x4::new(0, 1, 3, 2);

assert_eq!(r, mmx::_mm_unpacklo_pi16(a, b));
}
}

#[simd_test = "mmx"]
unsafe fn _mm_unpackhi_pi32() {
let a = i32x2::new(0, 3);
let b = i32x2::new(1, 2);
let r = i32x2::new(3, 2);

assert_eq!(r, mmx::_mm_unpackhi_pi32(a, b));
}

#[simd_test = "mmx"]
unsafe fn _mm_unpacklo_pi32() {
let a = i32x2::new(0, 3);
let b = i32x2::new(1, 2);
let r = i32x2::new(0, 1);

assert_eq!(r, mmx::_mm_unpacklo_pi32(a, b));
}

}
Loading

0 comments on commit a9d10f7

Please sign in to comment.