Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Completes SSE and adds some MMX intrinsics #247

Merged
merged 3 commits into from
Jan 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions coresimd/src/x86/i586/bswap.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
//! Byte swap intrinsics.

#![cfg_attr(feature = "cargo-clippy", allow(stutter))]

#[cfg(test)]
use stdsimd_test::assert_instr;

Expand Down
3 changes: 2 additions & 1 deletion coresimd/src/x86/i586/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3299,7 +3299,8 @@ mod tests {
use v64::*;

let a = mem::transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7));
let mut mem = ::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1)));
let mut mem =
::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1)));
sse::_mm_stream_pi(&mut *mem as *mut _ as *mut _, a);
assert_eq!(a, *mem);
}
Expand Down
134 changes: 105 additions & 29 deletions coresimd/src/x86/i686/mmx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use stdsimd_test::assert_instr;

/// Constructs a 64-bit integer vector initialized to zero.
#[inline(always)]
#[target_feature = "+mmx,+sse"]
#[target_feature = "+mmx"]
// FIXME: this produces a movl instead of xorps on x86
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
Expand All @@ -30,7 +30,7 @@ pub unsafe fn _mm_setzero_si64() -> __m64 {
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
#[inline(always)]
#[target_feature = "+mmx,+sse"]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(packsswb))]
pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
packsswb(a, b)
Expand All @@ -42,63 +42,93 @@ pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
#[inline(always)]
#[target_feature = "+mmx,+sse"]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(packssdw))]
pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 {
packssdw(a, b)
}

/// Compares the 8-bit integer elements of two 64-bit integer vectors of
/// [8 x i8] to determine if the element of the first vector is greater than
/// the corresponding element of the second vector.
///
/// The comparison yields 0 for false, 0xFF for true.
/// Compares whether each element of `a` is greater than the corresponding
/// element of `b` returning `0` for `false` and `-1` for `true`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(pcmpgtb))]
pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
pcmpgtb(a, b)
}

/// Compares the 16-bit integer elements of two 64-bit integer vectors of
/// [4 x i16] to determine if the element of the first vector is greater than
/// the corresponding element of the second vector.
///
/// The comparison yields 0 for false, 0xFFFF for true.
/// Compares whether each element of `a` is greater than the corresponding
/// element of `b` returning `0` for `false` and `-1` for `true`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(pcmpgtw))]
pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 {
pcmpgtw(a, b)
}

/// Unpacks the upper 32 bits from two 64-bit integer vectors of
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
/// Compares whether each element of `a` is greater than the corresponding
/// element of `b` returning `0` for `false` and `-1` for `true`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(pcmpgtd))]
pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 {
pcmpgtd(a, b)
}

/// Unpacks the upper two elements from two `i16x4` vectors and interleaves
/// them into the result: `[a.2, b.2, a.3, b.3]`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 {
punpckhwd(a, b)
}

/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
/// and interleaves them into a 64-bit integer vector of [8 x i8].
/// Unpacks the upper four elements from two `i8x8` vectors and interleaves
/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpckhbw))]
pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 {
punpckhbw(a, b)
}

/// Unpacks the lower four elements from two `i8x8` vectors and interleaves
/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpcklbw))]
pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
punpcklbw(a, b)
}

/// Unpacks the lower 32 bits from two 64-bit integer vectors of
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
/// Unpacks the lower two elements from two `i16x4` vectors and interleaves
/// them into the result: `[a.0 b.0 a.1 b.1]`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpcklwd))]
pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 {
punpcklwd(a, b)
}

/// Unpacks the upper element from two `i32x2` vectors and interleaves them
/// into the result: `[a.1, b.1]`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpckhdq))]
pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 {
punpckhdq(a, b)
}

/// Unpacks the lower element from two `i32x2` vectors and interleaves them
/// into the result: `[a.0, b.0]`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpckldq))]
pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
punpckldq(a, b)
}

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.mmx.packsswb"]
Expand All @@ -109,12 +139,20 @@ extern "C" {
fn pcmpgtb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pcmpgt.w"]
fn pcmpgtw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pcmpgt.d"]
fn pcmpgtd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckhwd"]
fn punpckhwd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpcklbw"]
fn punpcklbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpcklwd"]
fn punpcklwd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckhbw"]
fn punpckhbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpcklbw"]
fn punpcklbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckhdq"]
fn punpckhdq(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckldq"]
fn punpckldq(a: __m64, b: __m64) -> __m64;
}

#[cfg(test)]
Expand All @@ -123,21 +161,21 @@ mod tests {
use x86::i686::mmx;
use stdsimd_test::simd_test;

#[simd_test = "sse"] // FIXME: should be mmx
#[simd_test = "mmx"]
unsafe fn _mm_setzero_si64() {
let r: __m64 = ::std::mem::transmute(0_i64);
assert_eq!(r, mmx::_mm_setzero_si64());
}

#[simd_test = "sse"] // FIXME: should be mmx
#[simd_test = "mmx"]
unsafe fn _mm_packs_pi16() {
let a = i16x4::new(-1, 2, -3, 4);
let b = i16x4::new(-5, 6, -7, 8);
let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8);
assert_eq!(r, i8x8::from(mmx::_mm_packs_pi16(a.into(), b.into())));
}

#[simd_test = "sse"] // FIXME: should be mmx
#[simd_test = "mmx"]
unsafe fn _mm_packs_pi32() {
let a = i32x2::new(-1, 2);
let b = i32x2::new(-5, 6);
Expand All @@ -162,11 +200,23 @@ mod tests {
}

#[simd_test = "mmx"]
unsafe fn _mm_unpackhi_pi16() {
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 5, 6, 7);
let r = i16x4::new(2, 6, 3, 7);
assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into())));
unsafe fn _mm_cmpgt_pi32() {
let a = i32x2::new(0, 3);
let b = i32x2::new(1, 2);
let r0 = i32x2::new(0, -1);
let r1 = i32x2::new(-1, 0);

assert_eq!(r0, mmx::_mm_cmpgt_pi32(a.into(), b.into()).into());
assert_eq!(r1, mmx::_mm_cmpgt_pi32(b.into(), a.into()).into());
}

#[simd_test = "mmx"]
unsafe fn _mm_unpackhi_pi8() {
let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14);
let r = i8x8::new(8, 9, 11, 10, 12, 13, 15, 14);

assert_eq!(r, mmx::_mm_unpackhi_pi8(a.into(), b.into()).into());
}

#[simd_test = "mmx"]
Expand All @@ -177,11 +227,37 @@ mod tests {
assert_eq!(r, i8x8::from(mmx::_mm_unpacklo_pi8(a.into(), b.into())));
}

#[simd_test = "mmx"]
unsafe fn _mm_unpackhi_pi16() {
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 5, 6, 7);
let r = i16x4::new(2, 6, 3, 7);
assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into())));
}

#[simd_test = "mmx"]
unsafe fn _mm_unpacklo_pi16() {
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 5, 6, 7);
let r = i16x4::new(0, 4, 1, 5);
assert_eq!(r, i16x4::from(mmx::_mm_unpacklo_pi16(a.into(), b.into())));
}

#[simd_test = "mmx"]
unsafe fn _mm_unpackhi_pi32() {
let a = i32x2::new(0, 3);
let b = i32x2::new(1, 2);
let r = i32x2::new(3, 2);

assert_eq!(r, mmx::_mm_unpackhi_pi32(a.into(), b.into()).into());
}

#[simd_test = "mmx"]
unsafe fn _mm_unpacklo_pi32() {
let a = i32x2::new(0, 3);
let b = i32x2::new(1, 2);
let r = i32x2::new(0, 1);

assert_eq!(r, mmx::_mm_unpacklo_pi32(a.into(), b.into()).into());
}
}
69 changes: 40 additions & 29 deletions coresimd/src/x86/i686/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,25 +221,46 @@ pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 {
_mm_cvtpi32_ps(a, b)
}

/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x
/// float].
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_cmpgt_pi8(b, a);
let b = mmx::_mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}

/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}

/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_cmpgt_pi16(mem::transmute(b), a);
let b = mmx::_mm_cmpgt_pi16(b, a);
let c = mmx::_mm_unpackhi_pi16(a, b);
let r = i586::_mm_setzero_ps();
let r = cvtpi2ps(r, mem::transmute(c));
let r = cvtpi2ps(r, c);
let r = i586::_mm_movelh_ps(r, r);
let c = mmx::_mm_unpacklo_pi16(a, b);
cvtpi2ps(r, mem::transmute(c))
cvtpi2ps(r, c)
}

/// Converts a 64-bit vector of 16-bit unsigned integer values into a
/// 128-bit vector of [4 x float].
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let c = mmx::_mm_unpackhi_pi16(a, b);
Expand All @@ -250,27 +271,6 @@ pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 {
cvtpi2ps(r, c)
}

/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]
/// into a 128-bit vector of [4 x float].
#[inline(always)]
#[target_feature = "+sse"]
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_cmpgt_pi8(b, a);
let b = mmx::_mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}

/// Converts the lower four unsigned 8-bit integer values from a 64-bit
/// vector of [8 x u8] into a 128-bit vector of [4 x float].
#[inline(always)]
#[target_feature = "+sse"]
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}

/// Converts the two 32-bit signed integer values from each 64-bit vector
/// operand of [2 x i32] into a 128-bit vector of [4 x float].
#[inline(always)]
Expand Down Expand Up @@ -512,6 +512,13 @@ mod tests {
assert_eq!(r, u16x4::splat(15));
}

#[simd_test = "sse"]
unsafe fn _m_pmulhuw() {
let (a, b) = (u16x4::splat(1000), u16x4::splat(1001));
let r = sse::_m_pmulhuw(a.into(), b.into());
assert_eq!(r, u16x4::splat(15).into());
}

#[simd_test = "sse"]
unsafe fn _mm_avg_pu8() {
let (a, b) = (u8x8::splat(3), u8x8::splat(9));
Expand Down Expand Up @@ -601,7 +608,11 @@ mod tests {
let a = i8x8::splat(9);
let mask = i8x8::splat(0).replace(2, 0x80u8 as i8);
let mut r = i8x8::splat(0);
sse::_mm_maskmove_si64(a.into(), mask.into(), &mut r as *mut _ as *mut i8);
sse::_mm_maskmove_si64(
a.into(),
mask.into(),
&mut r as *mut _ as *mut i8,
);
assert_eq!(r, i8x8::splat(0).replace(2, 9));

let mut r = i8x8::splat(0);
Expand Down
Loading