From 6396267009266aa619745693488b5d076298fae4 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 9 Aug 2018 14:41:22 +0200 Subject: [PATCH] test wasm32 simd128 instructions --- .travis.yml | 3 + Cargo.toml | 3 + coresimd/wasm32/mod.rs | 2 + coresimd/wasm32/simd128.rs | 152 ++++---- crates/wasm-test/.cargo/config | 2 + crates/wasm-test/Cargo.toml | 11 + crates/wasm-test/src/lib.rs | 1 + crates/wasm-test/tests/simd128.rs | 584 ++++++++++++++++++++++++++++++ 8 files changed, 677 insertions(+), 81 deletions(-) create mode 100644 crates/wasm-test/.cargo/config create mode 100644 crates/wasm-test/Cargo.toml create mode 100644 crates/wasm-test/src/lib.rs create mode 100644 crates/wasm-test/tests/simd128.rs diff --git a/.travis.yml b/.travis.yml index 0746da3949..f52246ddf3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -42,6 +42,9 @@ matrix: - cat wasm.wat - grep current_memory wasm.wat - grep grow_memory wasm.wat + - cd crates/wasm-test + - cargo test --target=$TARGET + - cargo test --target=$TARGET --release - env: TARGET=thumbv6m-none-eabi NOSTD=1 - env: TARGET=thumbv7m-none-eabi NOSTD=1 - env: TARGET=thumbv7em-none-eabi NOSTD=1 diff --git a/Cargo.toml b/Cargo.toml index d789fed9aa..2dbf903acb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,9 @@ members = [ "crates/stdsimd-verify", "crates/stdsimd", ] +exclude = [ + "crates/wasm-test" +] [profile.release] debug = true diff --git a/coresimd/wasm32/mod.rs b/coresimd/wasm32/mod.rs index 3e2e7aad88..f5c71cd58a 100644 --- a/coresimd/wasm32/mod.rs +++ b/coresimd/wasm32/mod.rs @@ -1,6 +1,8 @@ //! WASM32 intrinsics +#[macro_use] mod simd128; +pub use self::simd128::*; extern "C" { #[link_name = "llvm.wasm.grow.memory.i32"] diff --git a/coresimd/wasm32/simd128.rs b/coresimd/wasm32/simd128.rs index dc0177e2ad..cc46e1fd25 100644 --- a/coresimd/wasm32/simd128.rs +++ b/coresimd/wasm32/simd128.rs @@ -6,48 +6,17 @@ #![allow(non_camel_case_types)] /// A single unconstrained byte (0-255). -#[derive(Copy, Clone, Debug)] -pub struct ImmByte(u8); -impl ImmByte { - /// Constructor - #[inline] - #[rustc_args_required_const(0)] - pub const fn new(value: u8) -> Self { - ImmByte(value) - } -} - -macro_rules! impl_laneidx { - ($id:ident($ty:ty): [$_from:expr, $_to:expr] | $(#[$doc:meta])*) => { - #[derive(Copy, Clone, Debug)] - pub struct $id($ty); - impl $id { - #[inline] - #[rustc_args_required_const(0)] - pub const fn new(x: $ty) -> Self { - // FIXME: not allowed in const fn: - // * if statements - // * unreachable_unchecked / panic / abort - // - // if x < $from || x > $to { - // unsafe { ::_core::hint::unreachable_unchecked() }; - // debug_assert!(...) - // } - $id(x) - } - } - }; -} -impl_laneidx!(LaneIdx2(u8): [0, 1] | /// A byte with values in the range 0–1 identifying a lane. -); -impl_laneidx!(LaneIdx4(u8): [0, 3] | /// A byte with values in the range 0–3 identifying a lane. -); -impl_laneidx!(LaneIdx8(u8): [0, 7] | /// A byte with values in the range 0–7 identifying a lane. -); -impl_laneidx!(LaneIdx16(u8): [0, 15] | /// A byte with values in the range 0–15 identifying a lane. -); -impl_laneidx!(LaneIdx32(u8): [0, 31] | /// A byte with values in the range 0–31 identifying a lane. -); +pub type ImmByte = u8; +/// A byte with values in the range 0–1 identifying a lane. +pub type LaneIdx2 = u8; +/// A byte with values in the range 0–3 identifying a lane. +pub type LaneIdx4 = u8; +/// A byte with values in the range 0–7 identifying a lane. +pub type LaneIdx8 = u8; +/// A byte with values in the range 0–15 identifying a lane. +pub type LaneIdx16 = u8; +/// A byte with values in the range 0–31 identifying a lane. +pub type LaneIdx32 = u8; types! { /// WASM-specific 128-bit wide SIMD vector type @@ -102,6 +71,9 @@ mod sealed { fn sqrt_v4f32(x: f32x4) -> f32x4; #[link_name = "llvm.sqrt.v2f64"] fn sqrt_v2f64(x: f64x2) -> f64x2; + #[link_name = "shufflevector"] + pub fn shufflevector_v16i8(x: v8x16, y: v8x16, i: v8x16) -> v8x16; + } impl f32x4 { #[inline(always)] @@ -214,7 +186,7 @@ macro_rules! impl_extract_lane { a: v128 } // the vectors store a signed integer => extract into it - let v: $selem_ty = simd_extract(U { a }.vec, imm.0 as u32 /* zero-extends index */); + let v: $selem_ty = simd_extract(U { a }.vec, imm as u32 /* zero-extends index */); v as $x_ty } @@ -233,7 +205,7 @@ macro_rules! impl_extract_lane { a: v128 } // the vectors store a signed integer => extract into it - let v: $selem_ty = simd_extract(U { a }.vec, imm.0 as u32 /* zero-extends index */); + let v: $selem_ty = simd_extract(U { a }.vec, imm as u32 /* zero-extends index */); // re-interpret the signed integer as an unsigned one of the same size (no-op) let v: $uelem_ty= ::mem::transmute(v); // cast the internal unsigned integer to a larger signed integer (zero-extends) @@ -258,7 +230,7 @@ macro_rules! impl_extract_lane { a: v128 } // the vectors store a signed integer => extract into it - simd_extract(U { a }.vec, imm.0 as u32 /* zero-extends index */) + simd_extract(U { a }.vec, imm as u32 /* zero-extends index */) } } }; @@ -291,7 +263,7 @@ macro_rules! impl_replace_lane { // the vectors store a signed integer => extract into it ::mem::transmute( simd_insert(U { a }.vec, - imm.0 as u32 /* zero-extends index */, + imm as u32 /* zero-extends index */, x as $ielem_ty) ) } @@ -306,23 +278,44 @@ impl_replace_lane!(i64x2[v64x2:i64](LaneIdx2) <= i64); impl_replace_lane!(f32x4[f32x4:f32](LaneIdx4) <= f32); impl_replace_lane!(f64x2[f64x2:f64](LaneIdx2) <= f64); -impl v8x16 { - /// Shuffle lanes - /// - /// Create vector with lanes selected from the lanes of two input vectors - /// `a` and `b` by the indices specified in the immediate mode operand - /// `imm`. Each index selects an element of the result vector, where the - /// indices `i` in range `[0, 15]` select the `i`-th elements of `a`, and - /// the indices in range `[16, 31]` select the `i - 16`-th element of `b`. - #[inline] - // #[target_feature(enable = "simd128")] - // FIXME: #[cfg_attr(test, assert_instr(v8x16.shuffle))] - #[rustc_args_required_const(2)] - pub unsafe fn shuffle(a: v128, b: v128, imm: [LaneIdx32; 16]) -> v128 { - // FIXME: LLVM does not support v8x16.shuffle (use inline assembly?) - let result: v128; - asm!("v8x16.shuffle $0, $1, $2" : "=r"(result) : "r"(a), "r"(b), "r"(imm) : : ); - result +pub use ::coresimd::simd_llvm::simd_shuffle16 as __internal_v8x16_shuffle; +pub use self::sealed::v8x16 as __internal_v8x16; + +/// Shuffle lanes +/// +/// Create vector with lanes selected from the lanes of two input vectors +/// `a` and `b` by the indices specified in the immediate mode operand +/// `imm`. Each index selects an element of the result vector, where the +/// indices `i` in range `[0, 15]` select the `i`-th elements of `a`, and +/// the indices in range `[16, 31]` select the `i - 16`-th element of `b`. +#[macro_export] +macro_rules! v8x16_shuffle { + ($a:expr, $b:expr, [ + $imm0:expr, $imm1:expr, $imm2:expr, $imm3:expr, + $imm4:expr, $imm5:expr, $imm6:expr, $imm7:expr, + $imm8:expr, $imm9:expr, $imm10:expr, $imm11:expr, + $imm12:expr, $imm13:expr, $imm14:expr, $imm15:expr + ]) => { + #[allow(unused_unsafe)] + unsafe { + let a: $crate::arch::wasm32::v128 = $a; + let b: $crate::arch::wasm32::v128 = $b; + union U { + e: v128, + i: $crate::arch::wasm32::__internal_v8x16, + } + let a = U { e: a }.i; + let b = U { e: b }.i; + + let r: $crate::arch::wasm32::__internal_v8x16 = + $crate::arch::wasm32::__internal_v8x16_shuffle(a, b, [ + $imm0 as u32, $imm1, $imm2, $imm3, + $imm4, $imm5, $imm6, $imm7, + $imm8, $imm9, $imm10, $imm11, + $imm12, $imm13, $imm14, $imm15 + ]); + U { i: r }.e + } } } @@ -557,6 +550,9 @@ impl_boolean_reduction!(i64x2[v64x2]); macro_rules! impl_comparisons { ($id:ident[$ivec_ty:ident]) => { + impl_comparisons!($id[$ivec_ty=>$ivec_ty]); + }; + ($id:ident[$ivec_ty:ident=>$rvec_ty:ident]) => { impl $id { /// Equality #[inline] @@ -566,7 +562,7 @@ macro_rules! impl_comparisons { use coresimd::simd_llvm::simd_eq; let a: sealed::$ivec_ty = ::mem::transmute(a); let b: sealed::$ivec_ty = ::mem::transmute(b); - let c: sealed::$ivec_ty = simd_eq(a, b); + let c: sealed::$rvec_ty = simd_eq(a, b); ::mem::transmute(c) } /// Non-Equality @@ -577,7 +573,7 @@ macro_rules! impl_comparisons { use coresimd::simd_llvm::simd_ne; let a: sealed::$ivec_ty = ::mem::transmute(a); let b: sealed::$ivec_ty = ::mem::transmute(b); - let c: sealed::$ivec_ty = simd_ne(a, b); + let c: sealed::$rvec_ty = simd_ne(a, b); ::mem::transmute(c) } /// Less-than @@ -588,7 +584,7 @@ macro_rules! impl_comparisons { use coresimd::simd_llvm::simd_lt; let a: sealed::$ivec_ty = ::mem::transmute(a); let b: sealed::$ivec_ty = ::mem::transmute(b); - let c: sealed::$ivec_ty = simd_lt(a, b); + let c: sealed::$rvec_ty = simd_lt(a, b); ::mem::transmute(c) } @@ -600,7 +596,7 @@ macro_rules! impl_comparisons { use coresimd::simd_llvm::simd_le; let a: sealed::$ivec_ty = ::mem::transmute(a); let b: sealed::$ivec_ty = ::mem::transmute(b); - let c: sealed::$ivec_ty = simd_le(a, b); + let c: sealed::$rvec_ty = simd_le(a, b); ::mem::transmute(c) } @@ -612,7 +608,7 @@ macro_rules! impl_comparisons { use coresimd::simd_llvm::simd_gt; let a: sealed::$ivec_ty = ::mem::transmute(a); let b: sealed::$ivec_ty = ::mem::transmute(b); - let c: sealed::$ivec_ty = simd_gt(a, b); + let c: sealed::$rvec_ty = simd_gt(a, b); ::mem::transmute(c) } @@ -624,7 +620,7 @@ macro_rules! impl_comparisons { use coresimd::simd_llvm::simd_ge; let a: sealed::$ivec_ty = ::mem::transmute(a); let b: sealed::$ivec_ty = ::mem::transmute(b); - let c: sealed::$ivec_ty = simd_ge(a, b); + let c: sealed::$rvec_ty = simd_ge(a, b); ::mem::transmute(c) } } @@ -635,8 +631,8 @@ impl_comparisons!(i8x16[v8x16]); impl_comparisons!(i16x8[v16x8]); impl_comparisons!(i32x4[v32x4]); impl_comparisons!(i64x2[v64x2]); -impl_comparisons!(f32x4[f32x4]); -impl_comparisons!(f64x2[f64x2]); +impl_comparisons!(f32x4[f32x4=>v32x4]); +impl_comparisons!(f64x2[f64x2=>v64x2]); // Load and store impl v128 { @@ -696,10 +692,7 @@ macro_rules! impl_floating_point_ops { // #[target_feature(enable = "simd128")] // FIXME: #[cfg_attr(test, assert_instr($id.min))] pub unsafe fn min(a: v128, b: v128) -> v128 { - use coresimd::simd_llvm::simd_fmin; - let a: sealed::$id = ::mem::transmute(a); - let b: sealed::$id = ::mem::transmute(b); - ::mem::transmute(simd_fmin(a, b)) + v128::bitselect(a, b, $id::lt(a, b)) } /// NaN-propagating maximum @@ -709,10 +702,7 @@ macro_rules! impl_floating_point_ops { // #[target_feature(enable = "simd128")] // FIXME: #[cfg_attr(test, assert_instr($id.max))] pub unsafe fn max(a: v128, b: v128) -> v128 { - use coresimd::simd_llvm::simd_fmax; - let a: sealed::$id = ::mem::transmute(a); - let b: sealed::$id = ::mem::transmute(b); - ::mem::transmute(simd_fmax(a, b)) + v128::bitselect(a, b, $id::gt(a, b)) } /// Square-root @@ -794,9 +784,9 @@ macro_rules! impl_conversion { // Integer to floating point impl_conversion!(convert_s_i32x4["f32x4.convert_s/i32x4"]: v32x4 => f32x4 | f32x4); -impl_conversion!(convert_u_i32x4["f32x4.convert_u/i32x4"]: v32x4 => f32x4 | f32x4); +impl_conversion!(convert_u_i32x4["f32x4.convert_u/i32x4"]: u32x4 => f32x4 | f32x4); impl_conversion!(convert_s_i64x2["f64x2.convert_s/i64x2"]: v64x2 => f64x2 | f64x2); -impl_conversion!(convert_u_i64x2["f64x2.convert_u/i64x2"]: v64x2 => f64x2 | f64x2); +impl_conversion!(convert_u_i64x2["f64x2.convert_u/i64x2"]: u64x2 => f64x2 | f64x2); // Floating point to integer with saturation impl_conversion!(trunc_s_f32x4_sat["i32x4.trunc_s/f32x4:sat"]: f32x4 => v32x4 | i32x4); diff --git a/crates/wasm-test/.cargo/config b/crates/wasm-test/.cargo/config new file mode 100644 index 0000000000..908f2d6dde --- /dev/null +++ b/crates/wasm-test/.cargo/config @@ -0,0 +1,2 @@ +[target.wasm32-unknown-unknown] +runner = 'wasm-bindgen-test-runner' \ No newline at end of file diff --git a/crates/wasm-test/Cargo.toml b/crates/wasm-test/Cargo.toml new file mode 100644 index 0000000000..7910113447 --- /dev/null +++ b/crates/wasm-test/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "wasm-test" +version = "0.1.0" +authors = ["gnzlbg "] + +[target.'cfg(target_arch = "wasm32")'.dev-dependencies] +wasm-bindgen-test = { git = 'https://github.com/rustwasm/wasm-bindgen' } +coresimd = { path = "../coresimd" } + +[patch.crates-io] +wasm-bindgen = { git = 'https://github.com/rustwasm/wasm-bindgen' } diff --git a/crates/wasm-test/src/lib.rs b/crates/wasm-test/src/lib.rs new file mode 100644 index 0000000000..0c9ac1ac8e --- /dev/null +++ b/crates/wasm-test/src/lib.rs @@ -0,0 +1 @@ +#![no_std] diff --git a/crates/wasm-test/tests/simd128.rs b/crates/wasm-test/tests/simd128.rs new file mode 100644 index 0000000000..9853d1a01a --- /dev/null +++ b/crates/wasm-test/tests/simd128.rs @@ -0,0 +1,584 @@ +#![feature(use_extern_macros, stdsimd, asm, stmt_expr_attributes)] + +#[macro_use] +extern crate coresimd; +extern crate wasm_bindgen_test; + +use coresimd::arch::wasm32::*; +use std::mem; +use wasm_bindgen_test::*; + +fn compare_bytes(a: v128, b: v128) { + let a: [u8; 16] = unsafe { mem::transmute(a) }; + let b: [u8; 16] = unsafe { mem::transmute(b) }; + assert_eq!(a, b); +} + +#[wasm_bindgen_test] +fn v128_const() { + const A: v128 = unsafe { + v128::const_([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + }; + compare_bytes(A, A); +} + +macro_rules! test_splat { + ($test_id:ident: $id:ident($val:expr) => $($vals:expr),*) => { + #[wasm_bindgen_test] + fn $test_id() { + const A: v128 = unsafe { + $id::splat($val) + }; + const B: v128 = unsafe { + v128::const_([$($vals),*]) + }; + compare_bytes(A, B); + } + } +} + +test_splat!(i8x16_splat: i8x16(42) => 42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42); +test_splat!(i16x8_splat: i16x8(42) => 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0); +test_splat!(i32x4_splat: i32x4(42) => 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0); +test_splat!(i64x2_splat: i64x2(42) => 42, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0); +test_splat!(f32x4_splat: f32x4(42.) => 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66); +test_splat!(f64x2_splat: f64x2(42.) => 0, 0, 0, 0, 0, 0, 69, 64, 0, 0, 0, 0, 0, 0, 69, 64); + +// tests extract and replace lanes +macro_rules! test_extract { + ($test_id:ident: $id:ident[$ety:ident] => $extract_fn:ident | [$val:expr; $count:expr] + | [$($vals:expr),*] => ($other:expr) + | $($ids:expr),*) => { + #[wasm_bindgen_test] + fn $test_id() { + unsafe { + // splat vector and check that all indices contain the same value + // splatted: + const A: v128 = unsafe { + $id::splat($val) + }; + $( + assert_eq!($id::$extract_fn(A, $ids) as $ety, $val); + )*; + + // create a vector from array and check that the indices contain + // the same values as in the array: + let arr: [$ety; $count] = [$($vals),*]; + let mut vec: v128 = mem::transmute(arr); + $( + assert_eq!($id::$extract_fn(vec, $ids) as $ety, arr[$ids]); + )*; + + // replace lane 0 with another value + vec = $id::replace_lane(vec, 0, $other); + assert_ne!($id::$extract_fn(vec, 0) as $ety, arr[0]); + assert_eq!($id::$extract_fn(vec, 0) as $ety, $other); + } + } + } +} + +test_extract!(i8x16_extract_u: i8x16[u8] => extract_lane_u | [255; 16] + | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] => (42) + | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +); +test_extract!(i8x16_extract_s: i8x16[i8] => extract_lane_s | [-122; 16] + | [0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15] => (-42) + | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +); + +test_extract!(i16x8_extract_u: i16x8[u16] => extract_lane_u | [255; 8] + | [0, 1, 2, 3, 4, 5, 6, 7] => (42) | 0, 1, 2, 3, 4, 5, 6, 7 +); +test_extract!(i16x8_extract_s: i16x8[i16] => extract_lane_s | [-122; 8] + | [0, -1, 2, -3, 4, -5, 6, -7] => (-42) | 0, 1, 2, 3, 4, 5, 6, 7 +); +test_extract!(i32x4_extract: i32x4[i32] => extract_lane | [-122; 4] + | [0, -1, 2, -3] => (42) | 0, 1, 2, 3 +); +test_extract!(i64x2_extract: i64x2[i64] => extract_lane | [-122; 2] + | [0, -1] => (42) | 0, 1 +); +test_extract!(f32x4_extract: f32x4[f32] => extract_lane | [-122.; 4] + | [0., -1., 2., -3.] => (42.) | 0, 1, 2, 3 +); +test_extract!(f64x2_extract: f64x2[f64] => extract_lane | [-122.; 2] + | [0., -1.] => (42.) | 0, 1 +); + +#[wasm_bindgen_test] +fn v8x16_shuffle() { + unsafe { + let a = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + let b = [ + 16_u8, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ]; + + let vec_a: v128 = mem::transmute(a); + let vec_b: v128 = mem::transmute(b); + + let vec_r = v8x16_shuffle!( + vec_a, + vec_b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + + let e = [0_u8, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]; + let vec_e: v128 = mem::transmute(e); + compare_bytes(vec_r, vec_e); + } +} + +macro_rules! floating_point { + (f32) => { + true + }; + (f64) => { + true + }; + ($id:ident) => { + false + }; +} + +trait IsNan: Sized { + fn is_nan(self) -> bool { + false + } +} +impl IsNan for i8 {} +impl IsNan for i16 {} +impl IsNan for i32 {} +impl IsNan for i64 {} + +macro_rules! test_bop { + ($id:ident[$ety:ident; $ecount:expr] | + $binary_op:ident [$op_test_id:ident] : + ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => { + test_bop!( + $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]: + ([$($in_a),*], [$($in_b),*]) => [$($out),*] + ); + + }; + ($id:ident[$ety:ident; $ecount:expr] => $oty:ident | + $binary_op:ident [$op_test_id:ident] : + ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => { + #[wasm_bindgen_test] + fn $op_test_id() { + unsafe { + let a_input: [$ety; $ecount] = [$($in_a),*]; + let b_input: [$ety; $ecount] = [$($in_b),*]; + let output: [$oty; $ecount] = [$($out),*]; + + let a_vec_in: v128 = mem::transmute(a_input); + let b_vec_in: v128 = mem::transmute(b_input); + let vec_res: v128 = $id::$binary_op(a_vec_in, b_vec_in); + + let res: [$oty; $ecount] = mem::transmute(vec_res); + + if !floating_point!($ety) { + assert_eq!(res, output); + } else { + for i in 0..$ecount { + let r = res[i]; + let o = output[i]; + assert_eq!(r.is_nan(), o.is_nan()); + if !r.is_nan() { + assert_eq!(r, o); + } + } + } + } + } + } +} + +macro_rules! test_bops { + ($id:ident[$ety:ident; $ecount:expr] | + $binary_op:ident [$op_test_id:ident]: + ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => { + #[wasm_bindgen_test] + fn $op_test_id() { + unsafe { + let a_input: [$ety; $ecount] = [$($in_a),*]; + let output: [$ety; $ecount] = [$($out),*]; + + let a_vec_in: v128 = mem::transmute(a_input); + let vec_res: v128 = $id::$binary_op(a_vec_in, $in_b); + + let res: [$ety; $ecount] = mem::transmute(vec_res); + assert_eq!(res, output); + } + } + } +} + +macro_rules! test_uop { + ($id:ident[$ety:ident; $ecount:expr] | + $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => { + #[wasm_bindgen_test] + fn $op_test_id() { + unsafe { + let a_input: [$ety; $ecount] = [$($in_a),*]; + let output: [$ety; $ecount] = [$($out),*]; + + let a_vec_in: v128 = mem::transmute(a_input); + let vec_res: v128 = $id::$unary_op(a_vec_in); + + let res: [$ety; $ecount] = mem::transmute(vec_res); + assert_eq!(res, output); + } + } + } +} + +test_bop!(i8x16[i8; 16] | add[i8x16_add_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], + [8, i8::min_value(), 10, 11, 12, 13, 14, 1, 1, 1, 1, 1, 1, 1, 1, 1]) => + [8, i8::max_value(), 12, 14, 16, 18, 20, i8::min_value(), 2, 2, 2, 2, 2, 2, 2, 2]); +test_bop!(i8x16[i8; 16] | sub[i8x16_sub_test]: + ([0, -1, 2, 3, 4, 5, 6, -1, 1, 1, 1, 1, 1, 1, 1, 1], + [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1]) => + [-8, i8::max_value(), -8, -8, -8, -8, -8, i8::min_value(), 0, 0, 0, 0, 0, 0, 0, 0]); +test_bop!(i8x16[i8; 16] | mul[i8x16_mul_test]: + ([0, -2, 2, 3, 4, 5, 6, 2, 1, 1, 1, 1, 1, 1, 1, 1], + [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1]) => + [0, 0, 20, 33, 48, 65, 84, -2, 1, 1, 1, 1, 1, 1, 1, 1]); +test_uop!(i8x16[i8; 16] | neg[i8x16_neg_test]: + [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1] => + [-8, i8::min_value(), -10, -11, -12, -13, -14, i8::min_value() + 1, -1, -1, -1, -1, -1, -1, -1, -1]); + +test_bop!(i16x8[i16; 8] | add[i16x8_add_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], + [8, i16::min_value(), 10, 11, 12, 13, 14, 1]) => + [8, i16::max_value(), 12, 14, 16, 18, 20, i16::min_value()]); +test_bop!(i16x8[i16; 8] | sub[i16x8_sub_test]: + ([0, -1, 2, 3, 4, 5, 6, -1], + [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()]) => + [-8, i16::max_value(), -8, -8, -8, -8, -8, i16::min_value()]); +test_bop!(i16x8[i16; 8] | mul[i16x8_mul_test]: + ([0, -2, 2, 3, 4, 5, 6, 2], + [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()]) => + [0, 0, 20, 33, 48, 65, 84, -2]); +test_uop!(i16x8[i16; 8] | neg[i16x8_neg_test]: + [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()] => + [-8, i16::min_value(), -10, -11, -12, -13, -14, i16::min_value() + 1]); + +test_bop!(i32x4[i32; 4] | add[i32x4_add_test]: + ([0, -1, 2, i32::max_value()], + [8, i32::min_value(), 10, 1]) => + [8, i32::max_value(), 12, i32::min_value()]); +test_bop!(i32x4[i32; 4] | sub[i32x4_sub_test]: + ([0, -1, 2, -1], + [8, i32::min_value(), 10, i32::max_value()]) => + [-8, i32::max_value(), -8, i32::min_value()]); +test_bop!(i32x4[i32; 4] | mul[i32x4_mul_test]: + ([0, -2, 2, 2], + [8, i32::min_value(), 10, i32::max_value()]) => + [0, 0, 20, -2]); +test_uop!(i32x4[i32; 4] | neg[i32x4_neg_test]: + [8, i32::min_value(), 10, i32::max_value()] => + [-8, i32::min_value(), -10, i32::min_value() + 1]); + +test_bop!(i64x2[i64; 2] | add[i64x2_add_test]: + ([-1, i64::max_value()], + [i64::min_value(), 1]) => + [i64::max_value(), i64::min_value()]); +test_bop!(i64x2[i64; 2] | sub[i64x2_sub_test]: + ([-1, -1], + [i64::min_value(), i64::max_value()]) => + [ i64::max_value(), i64::min_value()]); +// note: mul for i64x2 is not part of the spec +test_uop!(i64x2[i64; 2] | neg[i64x2_neg_test]: + [i64::min_value(), i64::max_value()] => + [i64::min_value(), i64::min_value() + 1]); + +test_bops!(i8x16[i8; 16] | shl[i8x16_shl_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) => + [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]); +test_bops!(i16x8[i16; 8] | shl[i16x8_shl_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) => + [0, -2, 4, 6, 8, 10, 12, -2]); +test_bops!(i32x4[i32; 4] | shl[i32x4_shl_test]: + ([0, -1, 2, 3], 1) => [0, -2, 4, 6]); +test_bops!(i64x2[i64; 2] | shl[i64x2_shl_test]: + ([0, -1], 1) => [0, -2]); + +test_bops!(i8x16[i8; 16] | shr_s[i8x16_shr_s_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) => + [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]); +test_bops!(i16x8[i16; 8] | shr_s[i16x8_shr_s_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) => + [0, -1, 1, 1, 2, 2, 3, i16::max_value() / 2]); +test_bops!(i32x4[i32; 4] | shr_s[i32x4_shr_s_test]: + ([0, -1, 2, 3], 1) => [0, -1, 1, 1]); +test_bops!(i64x2[i64; 2] | shr_s[i64x2_shr_s_test]: + ([0, -1], 1) => [0, -1]); + +test_bops!(i8x16[i8; 16] | shr_u[i8x16_uhr_u_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) => + [0, i8::max_value(), 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]); +test_bops!(i16x8[i16; 8] | shr_u[i16x8_uhr_u_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) => + [0, i16::max_value(), 1, 1, 2, 2, 3, i16::max_value() / 2]); +test_bops!(i32x4[i32; 4] | shr_u[i32x4_uhr_u_test]: + ([0, -1, 2, 3], 1) => [0, i32::max_value(), 1, 1]); +test_bops!(i64x2[i64; 2] | shr_u[i64x2_uhr_u_test]: + ([0, -1], 1) => [0, i64::max_value()]); + +#[wasm_bindgen_test] +fn v128_bitwise_logical_ops() { + unsafe { + let a: [u32; 4] = [u32::max_value(), 0, u32::max_value(), 0]; + let b: [u32; 4] = [u32::max_value(); 4]; + let c: [u32; 4] = [0; 4]; + + let vec_a: v128 = mem::transmute(a); + let vec_b: v128 = mem::transmute(b); + let vec_c: v128 = mem::transmute(c); + + let r: v128 = v128::and(vec_a, vec_a); + compare_bytes(r, vec_a); + let r: v128 = v128::and(vec_a, vec_b); + compare_bytes(r, vec_a); + let r: v128 = v128::or(vec_a, vec_b); + compare_bytes(r, vec_b); + let r: v128 = v128::not(vec_b); + compare_bytes(r, vec_c); + let r: v128 = v128::xor(vec_a, vec_c); + compare_bytes(r, vec_a); + + let r: v128 = v128::bitselect(vec_b, vec_c, vec_b); + compare_bytes(r, vec_b); + let r: v128 = v128::bitselect(vec_b, vec_c, vec_c); + compare_bytes(r, vec_c); + let r: v128 = v128::bitselect(vec_b, vec_c, vec_a); + compare_bytes(r, vec_a); + } +} + +macro_rules! test_bool_red { + ($id:ident[$test_id:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => { + #[wasm_bindgen_test] + fn $test_id() { + unsafe { + let vec_a: v128 = mem::transmute([$($true),*]); // true + let vec_b: v128 = mem::transmute([$($false),*]); // false + let vec_c: v128 = mem::transmute([$($alt),*]); // alternating + + assert_eq!($id::any_true(vec_a), 1); + assert_eq!($id::any_true(vec_b), 0); + assert_eq!($id::any_true(vec_c), 1); + + assert_eq!($id::all_true(vec_a), 1); + assert_eq!($id::all_true(vec_b), 0); + assert_eq!($id::all_true(vec_c), 0); + } + } + } +} + +test_bool_red!( + i8x16[i8x16_boolean_reductions] + | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] +); +test_bool_red!( + i16x8[i16x8_boolean_reductions] + | [1_i16, 1, 1, 1, 1, 1, 1, 1] + | [0_i16, 0, 0, 0, 0, 0, 0, 0] + | [1_i16, 0, 1, 0, 1, 0, 1, 0] +); +test_bool_red!( + i32x4[i32x4_boolean_reductions] + | [1_i32, 1, 1, 1] + | [0_i32, 0, 0, 0] + | [1_i32, 0, 1, 0] +); +test_bool_red!( + i64x2[i64x2_boolean_reductions] | [1_i64, 1] | [0_i64, 0] | [1_i64, 0] +); + +test_bop!(i8x16[i8; 16] | eq[i8x16_eq_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); +test_bop!(i16x8[i16; 8] | eq[i16x8_eq_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1]); +test_bop!(i32x4[i32; 4] | eq[i32x4_eq_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); +test_bop!(i64x2[i64; 2] | eq[i64x2_eq_test]: ([0, 1], [0, 2]) => [-1, 0]); +test_bop!(f32x4[f32; 4] => i32 | eq[f32x4_eq_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); +test_bop!(f64x2[f64; 2] => i64 | eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]); + +test_bop!(i8x16[i8; 16] | ne[i8x16_ne_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); +test_bop!(i16x8[i16; 8] | ne[i16x8_ne_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, 0]); +test_bop!(i32x4[i32; 4] | ne[i32x4_ne_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); +test_bop!(i64x2[i64; 2] | ne[i64x2_ne_test]: ([0, 1], [0, 2]) => [0, -1]); +test_bop!(f32x4[f32; 4] => i32 | ne[f32x4_ne_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); +test_bop!(f64x2[f64; 2] => i64 | ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]); + +test_bop!(i8x16[i8; 16] | lt[i8x16_lt_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); +test_bop!(i16x8[i16; 8] | lt[i16x8_lt_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, 0]); +test_bop!(i32x4[i32; 4] | lt[i32x4_lt_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); +test_bop!(i64x2[i64; 2] | lt[i64x2_lt_test]: ([0, 1], [0, 2]) => [0, -1]); +test_bop!(f32x4[f32; 4] => i32 | lt[f32x4_lt_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); +test_bop!(f64x2[f64; 2] => i64 | lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]); + +test_bop!(i8x16[i8; 16] | gt[i8x16_gt_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); +test_bop!(i16x8[i16; 8] | gt[i16x8_gt_test]: + ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, 0]); +test_bop!(i32x4[i32; 4] | gt[i32x4_gt_test]: + ([0, 2, 2, 4], [0, 1, 2, 3]) => [0, -1, 0, -1]); +test_bop!(i64x2[i64; 2] | gt[i64x2_gt_test]: ([0, 2], [0, 1]) => [0, -1]); +test_bop!(f32x4[f32; 4] => i32 | gt[f32x4_gt_test]: + ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]); +test_bop!(f64x2[f64; 2] => i64 | gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]); + +test_bop!(i8x16[i8; 16] | ge[i8x16_ge_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); +test_bop!(i16x8[i16; 8] | ge[i16x8_ge_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1]); +test_bop!(i32x4[i32; 4] | ge[i32x4_ge_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); +test_bop!(i64x2[i64; 2] | ge[i64x2_ge_test]: ([0, 1], [0, 2]) => [-1, 0]); +test_bop!(f32x4[f32; 4] => i32 | ge[f32x4_ge_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); +test_bop!(f64x2[f64; 2] => i64 | ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]); + +test_bop!(i8x16[i8; 16] | le[i8x16_le_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + ) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); +test_bop!(i16x8[i16; 8] | le[i16x8_le_test]: + ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1]); +test_bop!(i32x4[i32; 4] | le[i32x4_le_test]: + ([0, 2, 2, 4], [0, 1, 2, 3]) => [-1, 0, -1, 0]); +test_bop!(i64x2[i64; 2] | le[i64x2_le_test]: ([0, 2], [0, 1]) => [-1, 0]); +test_bop!(f32x4[f32; 4] => i32 | le[f32x4_le_test]: + ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]); +test_bop!(f64x2[f64; 2] => i64 | le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]); + +#[wasm_bindgen_test] +fn v128_bitwise_load_store() { + unsafe { + let mut arr: [i32; 4] = [0, 1, 2, 3]; + + let vec = v128::load(arr.as_ptr() as *const v128); + let vec = i32x4::add(vec, vec); + v128::store(arr.as_mut_ptr() as *mut v128, vec); + + assert_eq!(arr, [0, 2, 4, 6]); + } +} + +test_uop!(f32x4[f32; 4] | neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]); +test_uop!(f32x4[f32; 4] | abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]); +test_bop!(f32x4[f32; 4] | min[f32x4_min_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]); +test_bop!(f32x4[f32; 4] | min[f32x4_min_test_nan]: + ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN]) + => [0., -3., -4., std::f32::NAN]); +test_bop!(f32x4[f32; 4] | max[f32x4_max_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]); +test_bop!(f32x4[f32; 4] | max[f32x4_max_test_nan]: + ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN]) + => [1., -1., 7., std::f32::NAN]); +test_bop!(f32x4[f32; 4] | add[f32x4_add_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]); +test_bop!(f32x4[f32; 4] | sub[f32x4_sub_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]); +test_bop!(f32x4[f32; 4] | mul[f32x4_mul_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]); +test_bop!(f32x4[f32; 4] | div[f32x4_div_test]: + ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]); + +test_uop!(f64x2[f64; 2] | neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]); +test_uop!(f64x2[f64; 2] | abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]); +test_bop!(f64x2[f64; 2] | min[f64x2_min_test]: + ([0., -1.], [1., -3.]) => [0., -3.]); +test_bop!(f64x2[f64; 2] | min[f64x2_min_test_nan]: + ([7., 8.], [-4., std::f64::NAN]) + => [ -4., std::f64::NAN]); +test_bop!(f64x2[f64; 2] | max[f64x2_max_test]: + ([0., -1.], [1., -3.]) => [1., -1.]); +test_bop!(f64x2[f64; 2] | max[f64x2_max_test_nan]: + ([7., 8.], [ -4., std::f64::NAN]) + => [7., std::f64::NAN]); +test_bop!(f64x2[f64; 2] | add[f64x2_add_test]: + ([0., -1.], [1., -3.]) => [1., -4.]); +test_bop!(f64x2[f64; 2] | sub[f64x2_sub_test]: + ([0., -1.], [1., -3.]) => [-1., 2.]); +test_bop!(f64x2[f64; 2] | mul[f64x2_mul_test]: + ([0., -1.], [1., -3.]) => [0., 3.]); +test_bop!(f64x2[f64; 2] | div[f64x2_div_test]: + ([0., -8.], [1., 4.]) => [0., -2.]); + +macro_rules! test_conv { + ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr, $to:expr) => { + #[wasm_bindgen_test] + fn $test_id() { + unsafe { + let from: v128 = mem::transmute($from); + let to: v128 = mem::transmute($to); + + let r: v128 = $to_ty::$conv_id(from); + + compare_bytes(r, to); + } + } + }; +} + +test_conv!( + f32x4_convert_s_i32x4 | convert_s_i32x4 | f32x4 | [1_i32, 2, 3, 4], + [1_f32, 2., 3., 4.] +); +test_conv!( + f32x4_convert_u_i32x4 + | convert_u_i32x4 + | f32x4 + | [u32::max_value(), 2, 3, 4], + [u32::max_value() as f32, 2., 3., 4.] +); + +test_conv!( + f64x2_convert_s_i64x2 | convert_s_i64x2 | f64x2 | [1_i64, 2], + [1_f64, 2.] +); +test_conv!( + f64x2_convert_u_i64x2 | convert_u_i64x2 | f64x2 | [u64::max_value(), 2], + [-1., 2.] +); + +// FIXME: this fails, and produces -2147483648 instead of saturating at +// i32::max_value() test_conv!(i32x4_trunc_s_f32x4_sat | trunc_s_f32x4_sat | +// i32x4 | [1_f32, 2., (i32::max_value() as f32 + 1.), 4.], +// [1_i32, 2, i32::max_value(), 4]); FIXME: add other saturating tests