From 946718556c931dcc19135359e0178a413eaa0309 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 15 Aug 2018 18:20:33 +0200 Subject: [PATCH] Add wasm32 simd128 intrinsics (#549) * Add wasm32 simd128 intrinsics * test wasm32 simd128 instructions * Run wasm tests like all other tests * use modules instead of types to access wasm simd128 interpretations * generate docs for wasm32-unknown-unknown * fix typo * Enable #[assert_instr] on wasm32 * Shell out to Node's `execSync` to execute `wasm2wat` over our wasm file * Parse the wasm file line-by-line, looking for various function markers and such * Use the `elem` section to build a function pointer table, allowing us to map exactly from function pointer to a function * Avoid losing debug info (the names section) in release mode by stripping `--strip-debug` from `rust-lld`. * remove exclude list from Cargo.toml * fix assert_instr for non-wasm targets * re-format assert-instr changes * add crate that uses assert_instr * Fix instructions having extra quotes * Add assert_instr for wasm memory intrinsics * Remove hacks for git wasm-bindgen * add wasm_simd128 feature * make wasm32 build correctly * run simd128 tests on ci * remove wasm-assert-instr-tests --- .travis.yml | 12 - Cargo.toml | 3 + ci/docker/wasm32-unknown-unknown/Dockerfile | 37 + ci/dox.sh | 1 + ci/lld-shim.rs | 11 + ci/run-docker.sh | 6 +- ci/run.sh | 6 +- coresimd/simd_llvm.rs | 3 +- coresimd/{wasm32.rs => wasm32/mod.rs} | 19 + coresimd/wasm32/simd128.rs | 1424 +++++++++++++++++++ crates/assert-instr-macro/src/lib.rs | 28 +- crates/coresimd/Cargo.toml | 5 + crates/coresimd/src/lib.rs | 6 +- crates/stdsimd-test/Cargo.toml | 4 + crates/stdsimd-test/src/lib.rs | 174 ++- crates/stdsimd/Cargo.toml | 4 + crates/stdsimd/src/lib.rs | 1 + 17 files changed, 1684 insertions(+), 60 deletions(-) create mode 100644 ci/docker/wasm32-unknown-unknown/Dockerfile create mode 100644 ci/lld-shim.rs rename coresimd/{wasm32.rs => wasm32/mod.rs} (71%) create mode 100644 coresimd/wasm32/simd128.rs diff --git a/.travis.yml b/.travis.yml index 0746da3949..6b21652456 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,18 +30,6 @@ matrix: env: TARGET=x86_64-apple-darwin NO_ADD=1 script: ci/run.sh - env: TARGET=wasm32-unknown-unknown - before_script: - - git clone --recursive https://github.com/WebAssembly/wabt - - (cd wabt && git reset --hard a0bdeb7 && make -j4) - - export PATH=$PATH:$PWD/wabt/bin - script: - - cargo build --target wasm32-unknown-unknown -p stdsimd - - cargo build --target wasm32-unknown-unknown -p stdsimd --release - - cargo rustc --target wasm32-unknown-unknown -p stdsimd --release --example wasm -- -C lto - - wasm2wat target/wasm32-unknown-unknown/release/examples/wasm.wasm -o wasm.wat - - cat wasm.wat - - grep current_memory wasm.wat - - grep grow_memory wasm.wat - env: TARGET=thumbv6m-none-eabi NOSTD=1 - env: TARGET=thumbv7m-none-eabi NOSTD=1 - env: TARGET=thumbv7em-none-eabi NOSTD=1 diff --git a/Cargo.toml b/Cargo.toml index d789fed9aa..4e96e5f494 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,9 @@ members = [ "crates/stdsimd-verify", "crates/stdsimd", ] +exclude = [ + "crates/wasm-assert-instr-tests" +] [profile.release] debug = true diff --git a/ci/docker/wasm32-unknown-unknown/Dockerfile b/ci/docker/wasm32-unknown-unknown/Dockerfile new file mode 100644 index 0000000000..56eef71204 --- /dev/null +++ b/ci/docker/wasm32-unknown-unknown/Dockerfile @@ -0,0 +1,37 @@ +FROM ubuntu:18.04 + +RUN apt-get update -y && apt-get install -y --no-install-recommends \ + ca-certificates \ + clang \ + cmake \ + curl \ + git \ + libc6-dev \ + make \ + python \ + xz-utils + +# Install `wasm2wat` +RUN git clone --recursive https://github.com/WebAssembly/wabt +RUN make -C wabt -j$(nproc) +ENV PATH=$PATH:/wabt/bin + +# Install `wasm-bindgen-test-runner` +RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.16/wasm-bindgen-0.2.16-x86_64-unknown-linux-musl.tar.gz \ + | tar xzf - +ENV PATH=$PATH:/wasm-bindgen-0.2.16-x86_64-unknown-linux-musl +ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner + +# Install `node` +RUN curl https://nodejs.org/dist/v10.8.0/node-v10.8.0-linux-x64.tar.xz | tar xJf - +ENV PATH=$PATH:/node-v10.8.0-linux-x64/bin + +# We use a shim linker that removes `--strip-debug` when passed to LLD. While +# this typically results in invalid debug information in release mode it doesn't +# result in an invalid names section which is what we're interested in. +COPY lld-shim.rs / +ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=/tmp/lld-shim + +# Rustc isn't available until this container starts, so defer compilation of the +# shim. +ENTRYPOINT /rust/bin/rustc /lld-shim.rs -o /tmp/lld-shim && exec bash "$@" diff --git a/ci/dox.sh b/ci/dox.sh index a604fb541d..fe7e04711d 100755 --- a/ci/dox.sh +++ b/ci/dox.sh @@ -44,6 +44,7 @@ dox aarch64 aarch64-unknown-linux-gnu dox powerpc64le powerpc64le-unknown-linux-gnu dox mips mips-unknown-linux-gnu dox mips64 mips64-unknown-linux-gnuabi64 +dox wasm32 wasm32-unknown-unknown # If we're on travis, not a PR, and on the right branch, publish! if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then diff --git a/ci/lld-shim.rs b/ci/lld-shim.rs new file mode 100644 index 0000000000..10263869e8 --- /dev/null +++ b/ci/lld-shim.rs @@ -0,0 +1,11 @@ +use std::os::unix::prelude::*; +use std::process::Command; +use std::env; + +fn main() { + let args = env::args() + .skip(1) + .filter(|s| s != "--strip-debug") + .collect::>(); + panic!("failed to exec: {}", Command::new("rust-lld").args(&args).exec()); +} diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 0c560c825c..5226363410 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -13,8 +13,8 @@ run() { --user `id -u`:`id -g` \ --rm \ --init \ - --volume $HOME/.cargo:/cargo \ - --env CARGO_HOME=/cargo \ + --volume $HOME/.cargo:/cargo-h \ + --env CARGO_HOME=/cargo-h \ --volume `rustc --print sysroot`:/rust:ro \ --env TARGET=$target \ --env STDSIMD_TEST_EVERYTHING \ @@ -25,7 +25,7 @@ run() { --privileged \ stdsimd \ bash \ - -c 'PATH=$PATH:/rust/bin exec ci/run.sh' + -c 'PATH=/rust/bin:$PATH exec ci/run.sh' } if [ -z "$1" ]; then diff --git a/ci/run.sh b/ci/run.sh index d2350fc6c7..8bc915d38b 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -59,13 +59,17 @@ cargo_test() { cargo_test cargo_test "--release" -# Test x86 targets compiled with AVX. +# Test targets compiled with extra features. case ${TARGET} in x86*) RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx" export STDSIMD_DISABLE_ASSERT_INSTR=1 cargo_test "--release" ;; + wasm32-unknown-unknown*) + # export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128" + cargo_test "--release --features=wasm_simd128" + ;; *) ;; esac diff --git a/coresimd/simd_llvm.rs b/coresimd/simd_llvm.rs index 2ba3944bd4..072a950b4c 100644 --- a/coresimd/simd_llvm.rs +++ b/coresimd/simd_llvm.rs @@ -51,8 +51,7 @@ extern "platform-intrinsic" { pub fn simd_select(m: M, a: T, b: T) -> T; pub fn simd_fmin(a: T, b: T) -> T; - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416 - // pub fn simd_fmax(a: T, b: T) -> T; + pub fn simd_fmax(a: T, b: T) -> T; pub fn simd_fsqrt(a: T) -> T; pub fn simd_fma(a: T, b: T, c: T) -> T; diff --git a/coresimd/wasm32.rs b/coresimd/wasm32/mod.rs similarity index 71% rename from coresimd/wasm32.rs rename to coresimd/wasm32/mod.rs index ac13458122..2862d296a9 100644 --- a/coresimd/wasm32.rs +++ b/coresimd/wasm32/mod.rs @@ -1,3 +1,20 @@ +//! WASM32 intrinsics + + +#[macro_use] +#[cfg(all(not(test), feature = "wasm_simd128"))] +mod simd128; + +#[cfg(all(test, feature = "wasm_simd128"))] +pub mod simd128; +#[cfg(all(test, feature = "wasm_simd128"))] +pub use self::simd128::*; + +#[cfg(test)] +use stdsimd_test::assert_instr; +#[cfg(test)] +use wasm_bindgen_test::wasm_bindgen_test; + extern "C" { #[link_name = "llvm.wasm.grow.memory.i32"] fn llvm_grow_memory(pages: i32) -> i32; @@ -12,6 +29,7 @@ extern "C" { /// /// [instr]: https://github.com/WebAssembly/design/blob/master/Semantics.md#resizing #[inline] +#[cfg_attr(test, assert_instr("memory.size"))] pub unsafe fn current_memory() -> i32 { llvm_current_memory() } @@ -25,6 +43,7 @@ pub unsafe fn current_memory() -> i32 { /// /// [instr]: https://github.com/WebAssembly/design/blob/master/Semantics.md#resizing #[inline] +#[cfg_attr(test, assert_instr("memory.grow"))] pub unsafe fn grow_memory(delta: i32) -> i32 { llvm_grow_memory(delta) } diff --git a/coresimd/wasm32/simd128.rs b/coresimd/wasm32/simd128.rs new file mode 100644 index 0000000000..3c23189942 --- /dev/null +++ b/coresimd/wasm32/simd128.rs @@ -0,0 +1,1424 @@ +//! This module implements the [WebAssembly `SIMD128` ISA]. +//! +//! [WebAssembly `SIMD128` ISA]: +//! https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md +// +// This files is structured as follows: +// * first the types are defined +// * then macros implementing the different APIs are provided +// * finally the API of each type is implements +// +#![allow(non_camel_case_types)] + +#[cfg(test)] +use stdsimd_test::assert_instr; +#[cfg(test)] +use wasm_bindgen_test::wasm_bindgen_test; + +//////////////////////////////////////////////////////////////////////////////// +// Types + +/// A single unconstrained byte (0-255). +pub type ImmByte = u8; +/// A byte with values in the range 0–1 identifying a lane. +pub type LaneIdx2 = u8; +/// A byte with values in the range 0–3 identifying a lane. +pub type LaneIdx4 = u8; +/// A byte with values in the range 0–7 identifying a lane. +pub type LaneIdx8 = u8; +/// A byte with values in the range 0–15 identifying a lane. +pub type LaneIdx16 = u8; +/// A byte with values in the range 0–31 identifying a lane. +pub type LaneIdx32 = u8; + +types! { + /// WASM-specific 128-bit wide SIMD vector type + pub struct v128(i128); +} + +mod sealed { + types! { + /// 128-bit wide SIMD vector type with 8 16-bit wide signed lanes + pub struct v8x16( + pub i8, pub i8, pub i8, pub i8, pub i8, pub i8, pub i8, pub i8, + pub i8, pub i8, pub i8, pub i8, pub i8, pub i8, pub i8, pub i8, + ); + /// 128-bit wide SIMD vector type with 8 16-bit wide signed lanes + pub struct v16x8( + pub i16, pub i16, pub i16, pub i16, + pub i16, pub i16, pub i16, pub i16 + ); + /// 128-bit wide SIMD vector type with 4 32-bit wide signed lanes + pub struct v32x4(pub i32, pub i32, pub i32, pub i32); + /// 128-bit wide SIMD vector type with 2 64-bit wide signed lanes + pub struct v64x2(pub i64, pub i64); + + /// 128-bit wide SIMD vector type with 8 16-bit wide unsigned lanes + pub struct u8x16( + pub u8, pub u8, pub u8, pub u8, pub u8, pub u8, pub u8, pub u8, + pub u8, pub u8, pub u8, pub u8, pub u8, pub u8, pub u8, pub u8, + ); + /// 128-bit wide SIMD vector type with 8 16-bit wide unsigned lanes + pub struct u16x8( + pub u16, pub u16, pub u16, pub u16, + pub u16, pub u16, pub u16, pub u16 + ); + /// 128-bit wide SIMD vector type with 4 32-bit wide unsigned lanes + pub struct u32x4(pub u32, pub u32, pub u32, pub u32); + /// 128-bit wide SIMD vector type with 2 64-bit wide unsigned lanes + pub struct u64x2(pub u64, pub u64); + + /// 128-bit wide SIMD vector type with 4 32-bit wide floating-point lanes + pub struct f32x4(pub f32, pub f32, pub f32, pub f32); + /// 128-bit wide SIMD vector type with 2 64-bit wide floating-point lanes + pub struct f64x2(pub f64, pub f64); + } + + #[allow(improper_ctypes)] + extern "C" { + #[link_name = "llvm.fabs.v4f32"] + fn abs_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.fabs.v2f64"] + fn abs_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.sqrt.v4f32"] + fn sqrt_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.sqrt.v2f64"] + fn sqrt_v2f64(x: f64x2) -> f64x2; + #[link_name = "shufflevector"] + pub fn shufflevector_v16i8(x: v8x16, y: v8x16, i: v8x16) -> v8x16; + + } + impl f32x4 { + #[inline(always)] + pub unsafe fn abs(self) -> Self { + abs_v4f32(self) + } + #[inline(always)] + pub unsafe fn sqrt(self) -> Self { + sqrt_v4f32(self) + } + } + impl f64x2 { + #[inline(always)] + pub unsafe fn abs(self) -> Self { + abs_v2f64(self) + } + #[inline(always)] + pub unsafe fn sqrt(self) -> Self { + sqrt_v2f64(self) + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Macros implementing the spec APIs: + +macro_rules! impl_splat { + ($id:ident[$ivec_ty:ident : $elem_ty:ident] <= $x_ty:ident | $($lane_id:ident),*) => { + /// Create vector with identical lanes + /// + /// Construct a vector with `x` replicated to all lanes. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($ident.splat))] + pub const unsafe fn splat(x: $x_ty) -> v128 { + union U { + vec: self::sealed::$ivec_ty, + res: v128 + } + U { vec: self::sealed::$ivec_ty($({ struct $lane_id; x as $elem_ty}),*) }.res + } + } +} + +macro_rules! impl_extract_lane { + ($id:ident[$ivec_ty:ident : $selem_ty:ident|$uelem_ty:ident]($lane_idx:ty) + => $x_ty:ident) => { + /// Extract lane as a scalar (sign-extend) + /// + /// Extract the scalar value of lane specified in the immediate + /// mode operand `imm` from `a` by sign-extending it. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_s, imm = + // 0))] + #[rustc_args_required_const(1)] + pub unsafe fn extract_lane_s(a: v128, imm: $lane_idx) -> $x_ty { + use coresimd::simd_llvm::simd_extract; + union U { + vec: self::sealed::$ivec_ty, + a: v128, + } + // the vectors store a signed integer => extract into it + let v: $selem_ty = simd_extract( + U { a }.vec, + imm as u32, /* zero-extends index */ + ); + v as $x_ty + } + + /// Extract lane as a scalar (zero-extend) + /// + /// Extract the scalar value of lane specified in the immediate + /// mode operand `imm` from `a` by zero-extending it. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u, imm = + // 0))] + #[rustc_args_required_const(1)] + pub unsafe fn extract_lane_u(a: v128, imm: $lane_idx) -> $x_ty { + use coresimd::simd_llvm::simd_extract; + union U { + vec: self::sealed::$ivec_ty, + a: v128, + } + // the vectors store a signed integer => extract into it + let v: $selem_ty = simd_extract( + U { a }.vec, + imm as u32, /* zero-extends index */ + ); + // re-interpret the signed integer as an unsigned one of the + // same size (no-op) + let v: $uelem_ty = ::mem::transmute(v); + // cast the internal unsigned integer to a larger signed + // integer (zero-extends) + v as $x_ty + } + }; + ($id:ident[$ivec_ty:ident]($lane_idx:ty) => $x_ty:ident) => { + /// Extract lane as a scalar + /// + /// Extract the scalar value of lane specified in the immediate + /// mode operand `imm` from `a`. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u, imm = + // 0))] + #[rustc_args_required_const(1)] + pub unsafe fn extract_lane(a: v128, imm: $lane_idx) -> $x_ty { + use coresimd::simd_llvm::simd_extract; + union U { + vec: self::sealed::$ivec_ty, + a: v128, + } + // the vectors store a signed integer => extract into it + simd_extract(U { a }.vec, imm as u32 /* zero-extends index */) + } + }; +} + +macro_rules! impl_replace_lane { + ($id:ident[$ivec_ty:ident:$ielem_ty:ident]($lane_idx:ty) <= $x_ty:ident) => { + /// Replace lane value + /// + /// Return a new vector with lanes identical to `a`, except for + /// lane specified in the immediate mode argument `i` which + /// has the value `x`. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u))] + #[rustc_args_required_const(1)] + pub unsafe fn replace_lane(a: v128, imm: $lane_idx, x: $x_ty) -> v128 { + use coresimd::simd_llvm::simd_insert; + union U { + vec: self::sealed::$ivec_ty, + a: v128, + } + // the vectors store a signed integer => extract into it + ::mem::transmute(simd_insert( + U { a }.vec, + imm as u32, /* zero-extends index */ + x as $ielem_ty, + )) + } + }; +} + +macro_rules! impl_wrapping_add_sub_neg { + ($id:ident[$ivec_ty:ident]) => { + /// Lane-wise wrapping integer addition + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.add))] + pub unsafe fn add(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_add; + let a: sealed::$ivec_ty = ::mem::transmute(a); + let b: sealed::$ivec_ty = ::mem::transmute(b); + ::mem::transmute(simd_add(a, b)) + } + + /// Lane-wise wrapping integer subtraction + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.sub))] + pub unsafe fn sub(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_sub; + let a: sealed::$ivec_ty = ::mem::transmute(a); + let b: sealed::$ivec_ty = ::mem::transmute(b); + ::mem::transmute(simd_sub(a, b)) + } + + /// Lane-wise wrapping integer negation + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.neg))] + pub unsafe fn neg(a: v128) -> v128 { + use coresimd::simd_llvm::simd_mul; + let a: sealed::$ivec_ty = ::mem::transmute(a); + let b: sealed::$ivec_ty = ::mem::transmute($id::splat(-1)); + ::mem::transmute(simd_mul(b, a)) + } + + // note: multiplication explicitly omitted because i64x2 does + // not implement it + }; +} + +// TODO: Saturating integer arithmetic +// need to add intrinsics to rustc + +// note: multiplication explicitly implemented separately because i64x2 does +// not implement it + +macro_rules! impl_wrapping_mul { + ($id:ident[$ivec_ty:ident]) => { + /// Lane-wise wrapping integer multiplication + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.mul))] + pub unsafe fn mul(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_mul; + let a: sealed::$ivec_ty = ::mem::transmute(a); + let b: sealed::$ivec_ty = ::mem::transmute(b); + ::mem::transmute(simd_mul(a, b)) + } + }; +} + +macro_rules! impl_shl_scalar { + ($id:ident[$ivec_ty:ident : $t:ty]) => { + /// Left shift by scalar. + /// + /// Shift the bits in each lane to the left by the same amount. + /// Only the low bits of the shift amount are used. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.shl))] + pub unsafe fn shl(a: v128, y: i32) -> v128 { + use coresimd::simd_llvm::simd_shl; + let a: sealed::$ivec_ty = ::mem::transmute(a); + let b: sealed::$ivec_ty = ::mem::transmute($id::splat(y as $t)); + ::mem::transmute(simd_shl(a, b)) + } + }; +} + +macro_rules! impl_shr_scalar { + ($id:ident[$svec_ty:ident : $uvec_ty:ident : $t:ty]) => { + /// Arithmetic right shift by scalar. + /// + /// Shift the bits in each lane to the right by the same amount. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.shr))] + pub unsafe fn shr_s(a: v128, y: i32) -> v128 { + use coresimd::simd_llvm::simd_shr; + let a: sealed::$svec_ty = ::mem::transmute(a); + let b: sealed::$svec_ty = ::mem::transmute($id::splat(y as $t)); + ::mem::transmute(simd_shr(a, b)) + } + + /// Logical right shift by scalar. + /// + /// Shift the bits in each lane to the right by the same amount. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.shr))] + pub unsafe fn shr_u(a: v128, y: i32) -> v128 { + use coresimd::simd_llvm::simd_shr; + let a: sealed::$uvec_ty = ::mem::transmute(a); + let b: sealed::$uvec_ty = ::mem::transmute($id::splat(y as $t)); + ::mem::transmute(simd_shr(a, b)) + } + }; +} + +macro_rules! impl_boolean_reduction { + ($id:ident[$ivec_ty:ident]) => { + /// Any lane true + /// + /// Returns `1` if any lane in `a` is non-zero, `0` otherwise. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.any_true))] + pub unsafe fn any_true(a: v128) -> i32 { + use coresimd::simd_llvm::simd_reduce_any; + let a: sealed::$ivec_ty = ::mem::transmute(a); + if simd_reduce_any(a) { + 1 + } else { + 0 + } + } + + /// All lanes true + /// + /// Returns `1` if all lanes in `a` are non-zero, `0` otherwise. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.all_true))] + pub unsafe fn all_true(a: v128) -> i32 { + use coresimd::simd_llvm::simd_reduce_all; + let a: sealed::$ivec_ty = ::mem::transmute(a); + if simd_reduce_all(a) { + 1 + } else { + 0 + } + } + }; +} + +macro_rules! impl_comparisons { + ($id:ident[$ivec_ty:ident]) => { + impl_comparisons!($id[$ivec_ty=>$ivec_ty]); + }; + ($id:ident[$ivec_ty:ident=>$rvec_ty:ident]) => { + /// Equality + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.eq))] + pub unsafe fn eq(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_eq; + let a: sealed::$ivec_ty = ::mem::transmute(a); + let b: sealed::$ivec_ty = ::mem::transmute(b); + let c: sealed::$rvec_ty = simd_eq(a, b); + ::mem::transmute(c) + } + /// Non-Equality + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.ne))] + pub unsafe fn ne(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_ne; + let a: sealed::$ivec_ty = ::mem::transmute(a); + let b: sealed::$ivec_ty = ::mem::transmute(b); + let c: sealed::$rvec_ty = simd_ne(a, b); + ::mem::transmute(c) + } + /// Less-than + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.lt))] + pub unsafe fn lt(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_lt; + let a: sealed::$ivec_ty = ::mem::transmute(a); + let b: sealed::$ivec_ty = ::mem::transmute(b); + let c: sealed::$rvec_ty = simd_lt(a, b); + ::mem::transmute(c) + } + /// Less-than or equal + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.le))] + pub unsafe fn le(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_le; + let a: sealed::$ivec_ty = ::mem::transmute(a); + let b: sealed::$ivec_ty = ::mem::transmute(b); + let c: sealed::$rvec_ty = simd_le(a, b); + ::mem::transmute(c) + } + /// Greater-than + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.gt))] + pub unsafe fn gt(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_gt; + let a: sealed::$ivec_ty = ::mem::transmute(a); + let b: sealed::$ivec_ty = ::mem::transmute(b); + let c: sealed::$rvec_ty = simd_gt(a, b); + ::mem::transmute(c) + } + /// Greater-than or equal + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.ge))] + pub unsafe fn ge(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_ge; + let a: sealed::$ivec_ty = ::mem::transmute(a); + let b: sealed::$ivec_ty = ::mem::transmute(b); + let c: sealed::$rvec_ty = simd_ge(a, b); + ::mem::transmute(c) + } + } +} + +// Floating-point operations +macro_rules! impl_floating_point_ops { + ($id:ident) => { + /// Negation + /// + /// Apply the IEEE `negate(x)` function to each lane. This simply + /// inverts the sign bit, preserving all other bits, even for `NaN` + /// inputs. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.neg))] + pub unsafe fn neg(a: v128) -> v128 { + use coresimd::simd_llvm::simd_mul; + let a: sealed::$id = ::mem::transmute(a); + let b: sealed::$id = ::mem::transmute($id::splat(-1.)); + ::mem::transmute(simd_mul(b, a)) + } + /// Absolute value + /// + /// Apply the IEEE `abs(x)` function to each lane. This simply + /// clears the sign bit, preserving all other bits, even for `NaN` + /// inputs. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.abs))] + pub unsafe fn abs(a: v128) -> v128 { + let a: sealed::$id = ::mem::transmute(a); + ::mem::transmute(a.abs()) + } + /// NaN-propagating minimum + /// + /// Lane-wise minimum value, propagating `NaN`s. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.min))] + pub unsafe fn min(a: v128, b: v128) -> v128 { + v128::bitselect(a, b, $id::lt(a, b)) + } + /// NaN-propagating maximum + /// + /// Lane-wise maximum value, propagating `NaN`s. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.max))] + pub unsafe fn max(a: v128, b: v128) -> v128 { + v128::bitselect(a, b, $id::gt(a, b)) + } + /// Square-root + /// + /// Lane-wise square-root. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.sqrt))] + pub unsafe fn sqrt(a: v128) -> v128 { + let a: sealed::$id = ::mem::transmute(a); + ::mem::transmute(a.sqrt()) + } + /// Lane-wise addition + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.add))] + pub unsafe fn add(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_add; + let a: sealed::$id = ::mem::transmute(a); + let b: sealed::$id = ::mem::transmute(b); + ::mem::transmute(simd_add(a, b)) + } + /// Lane-wise subtraction + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.sub))] + pub unsafe fn sub(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_sub; + let a: sealed::$id = ::mem::transmute(a); + let b: sealed::$id = ::mem::transmute(b); + ::mem::transmute(simd_sub(a, b)) + } + /// Lane-wise multiplication + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.mul))] + pub unsafe fn mul(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_mul; + let a: sealed::$id = ::mem::transmute(a); + let b: sealed::$id = ::mem::transmute(b); + ::mem::transmute(simd_mul(a, b)) + } + /// Lane-wise division + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.div))] + pub unsafe fn div(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_div; + let a: sealed::$id = ::mem::transmute(a); + let b: sealed::$id = ::mem::transmute(b); + ::mem::transmute(simd_div(a, b)) + } + }; +} + +macro_rules! impl_conversion { + ($conversion:ident[$instr:expr]: $from_ty:ident => $to_ty:ident | $id:ident) => { + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($instr))] + pub unsafe fn $conversion(a: v128) -> v128 { + use coresimd::simd_llvm::simd_cast; + let a: sealed::$from_ty = ::mem::transmute(a); + let b: sealed::$to_ty = simd_cast(a); + ::mem::transmute(b) + } + }; +} + +//////////////////////////////////////////////////////////////////////////////// +// Implementations: + +// v128 +impl v128 { + /////////////////////////////////////////////////////////////////////////// + // Const constructor: + + /// Materialize a constant SIMD value from the immediate operands. + /// + /// The `v128.const` instruction is encoded with 16 immediate bytes + /// `imm` which provide the bits of the vector directly. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr(v128.const, imm = + // [ImmByte::new(42); 16]))] + #[rustc_args_required_const(0)] + pub const unsafe fn const_(imm: [ImmByte; 16]) -> v128 { + union U { + imm: [ImmByte; 16], + vec: v128, + } + U { imm }.vec + } + + /////////////////////////////////////////////////////////////////////////// + // Bitwise logical operations: + + /// Bitwise logical and + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.and))] + pub unsafe fn and(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_and; + simd_and(a, b) + } + + /// Bitwise logical or + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.or))] + pub unsafe fn or(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_or; + simd_or(a, b) + } + + /// Bitwise logical xor + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.xor))] + pub unsafe fn xor(a: v128, b: v128) -> v128 { + use coresimd::simd_llvm::simd_xor; + simd_xor(a, b) + } + + /// Bitwise logical not + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.not))] + pub unsafe fn not(a: v128) -> v128 { + union U { + v: u128, + c: [ImmByte; 16], + } + // FIXME: https://github.com/rust-lang/rust/issues/53193 + const C: [ImmByte; 16] = unsafe { + U { + v: ::_core::u128::MAX, + }.c + }; + Self::xor(v128::const_(C), a) + } + + /// Bitwise select + /// + /// Use the bits in the control mask `c` to select the corresponding bit + /// from `v1` when `1` and `v2` when `0`. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.bitselect))] + pub unsafe fn bitselect(v1: v128, v2: v128, c: v128) -> v128 { + // FIXME: use llvm.select instead - we need to add a `simd_bitselect` + // intrinsic to rustc that converts a v128 vector into a i1x128. The + // `simd_select` intrinsic converts e.g. a i8x16 into a i1x16 which is + // not what we want here: + Self::or(Self::and(v1, c), Self::and(v2, Self::not(c))) + } + + /////////////////////////////////////////////////////////////////////////// + // Memory load/stores: + + /// Load a `v128` vector from the given heap address. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.load))] + pub unsafe fn load(m: *const v128) -> v128 { + ::_core::ptr::read(m) + } + + /// Store a `v128` vector to the given heap address. + #[inline] + // #[target_feature(enable = "simd128")] + // FIXME: #[cfg_attr(test, assert_instr($id.store))] + pub unsafe fn store(m: *mut v128, a: v128) { + ::_core::ptr::write(m, a) + } +} + +pub use self::sealed::v8x16 as __internal_v8x16; +pub use coresimd::simd_llvm::simd_shuffle16 as __internal_v8x16_shuffle; +/// Shuffle lanes +/// +/// Create vector with lanes selected from the lanes of two input vectors +/// `a` and `b` by the indices specified in the immediate mode operand +/// `imm`. Each index selects an element of the result vector, where the +/// indices `i` in range `[0, 15]` select the `i`-th elements of `a`, and +/// the indices in range `[16, 31]` select the `i - 16`-th element of `b`. +#[macro_export] +macro_rules! v8x16_shuffle { + ($a:expr, $b:expr, [ + $imm0:expr, $imm1:expr, $imm2:expr, $imm3:expr, + $imm4:expr, $imm5:expr, $imm6:expr, $imm7:expr, + $imm8:expr, $imm9:expr, $imm10:expr, $imm11:expr, + $imm12:expr, $imm13:expr, $imm14:expr, $imm15:expr + ]) => { + #[allow(unused_unsafe)] + unsafe { + let a: $crate::arch::wasm32::v128 = $a; + let b: $crate::arch::wasm32::v128 = $b; + union U { + e: v128, + i: $crate::arch::wasm32::__internal_v8x16, + } + let a = U { e: a }.i; + let b = U { e: b }.i; + + let r: $crate::arch::wasm32::__internal_v8x16 = + $crate::arch::wasm32::__internal_v8x16_shuffle( + a, + b, + [ + $imm0 as u32, + $imm1, + $imm2, + $imm3, + $imm4, + $imm5, + $imm6, + $imm7, + $imm8, + $imm9, + $imm10, + $imm11, + $imm12, + $imm13, + $imm14, + $imm15, + ], + ); + U { i: r }.e + } + }; +} + +/// WASM-specific v8x16 instructions with modulo-arithmetic semantics +pub mod i8x16 { + use super::*; + impl_splat!( + i8x16[v8x16: i8] <= i32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 + ); + impl_extract_lane!(i8x16[v8x16:i8|u8](LaneIdx16) => i32); + impl_replace_lane!(i8x16[v8x16: i8](LaneIdx16) <= i32); + impl_wrapping_add_sub_neg!(i8x16[v8x16]); + impl_wrapping_mul!(i8x16[v8x16]); + impl_shl_scalar!(i8x16[v8x16: i32]); + impl_shr_scalar!(i8x16[v8x16: u8x16: i32]); + impl_boolean_reduction!(i8x16[v8x16]); + impl_comparisons!(i8x16[v8x16]); +} + +/// WASM-specific v16x8 instructions with modulo-arithmetic semantics +pub mod i16x8 { + use super::*; + impl_splat!(i16x8[v16x8: i16] <= i32 | x0, x1, x2, x3, x4, x5, x6, x7); + impl_extract_lane!(i16x8[v16x8:i16|u16](LaneIdx8) => i32); + impl_replace_lane!(i16x8[v16x8: i16](LaneIdx8) <= i32); + impl_wrapping_add_sub_neg!(i16x8[v16x8]); + impl_wrapping_mul!(i16x8[v16x8]); + impl_shl_scalar!(i16x8[v16x8: i32]); + impl_shr_scalar!(i16x8[v16x8: u16x8: i32]); + impl_boolean_reduction!(i16x8[v16x8]); + impl_comparisons!(i16x8[v16x8]); +} + +/// WASM-specific v32x4 instructions with modulo-arithmetic semantics +pub mod i32x4 { + use super::*; + impl_splat!(i32x4[v32x4: i32] <= i32 | x0, x1, x2, x3); + impl_extract_lane!(i32x4[v32x4](LaneIdx4) => i32); + impl_replace_lane!(i32x4[v32x4: i32](LaneIdx4) <= i32); + impl_wrapping_add_sub_neg!(i32x4[v32x4]); + impl_wrapping_mul!(i32x4[v32x4]); + impl_shl_scalar!(i32x4[v32x4: i32]); + impl_shr_scalar!(i32x4[v32x4: u32x4: i32]); + impl_boolean_reduction!(i32x4[v32x4]); + impl_comparisons!(i32x4[v32x4]); + impl_conversion!(trunc_s_f32x4_sat["i32x4.trunc_s/f32x4:sat"]: f32x4 => v32x4 | i32x4); + impl_conversion!(trunc_u_f32x4_sat["i32x4.trunc_s/f32x4:sat"]: f32x4 => u32x4 | i32x4); +} + +/// WASM-specific v64x2 instructions with modulo-arithmetic semantics +pub mod i64x2 { + use super::*; + impl_splat!(i64x2[v64x2: i64] <= i64 | x0, x1); + impl_extract_lane!(i64x2[v64x2](LaneIdx2) => i64); + impl_replace_lane!(i64x2[v64x2: i64](LaneIdx2) <= i64); + impl_wrapping_add_sub_neg!(i64x2[v64x2]); + // note: wrapping multiplication for i64x2 is not part of the spec + impl_shl_scalar!(i64x2[v64x2: i64]); + impl_shr_scalar!(i64x2[v64x2: u64x2: i64]); + impl_boolean_reduction!(i64x2[v64x2]); + impl_comparisons!(i64x2[v64x2]); + impl_conversion!(trunc_s_f64x2_sat["i64x2.trunc_s/f64x2:sat"]: f64x2 => v64x2 | i64x2); + impl_conversion!(trunc_u_f64x2_sat["i64x2.trunc_s/f64x2:sat"]: f64x2 => u64x2 | i64x2); +} + +/// WASM-specific v32x4 floating-point instructions +pub mod f32x4 { + use super::*; + impl_splat!(f32x4[f32x4: f32] <= f32 | x0, x1, x2, x3); + impl_extract_lane!(f32x4[f32x4](LaneIdx4) => f32); + impl_replace_lane!(f32x4[f32x4: f32](LaneIdx4) <= f32); + impl_comparisons!(f32x4[f32x4=>v32x4]); + impl_floating_point_ops!(f32x4); + impl_conversion!(convert_s_i32x4["f32x4.convert_s/i32x4"]: v32x4 => f32x4 | f32x4); + impl_conversion!(convert_u_i32x4["f32x4.convert_u/i32x4"]: u32x4 => f32x4 | f32x4); + +} + +/// WASM-specific v64x2 floating-point instructions +pub mod f64x2 { + use super::*; + impl_splat!(f64x2[f64x2: f64] <= f64 | x0, x1); + impl_extract_lane!(f64x2[f64x2](LaneIdx2) => f64); + impl_replace_lane!(f64x2[f64x2: f64](LaneIdx2) <= f64); + impl_comparisons!(f64x2[f64x2=>v64x2]); + impl_floating_point_ops!(f64x2); + impl_conversion!(convert_s_i64x2["f64x2.convert_s/i64x2"]: v64x2 => f64x2 | f64x2); + impl_conversion!(convert_u_i64x2["f64x2.convert_u/i64x2"]: u64x2 => f64x2 | f64x2); +} + +#[cfg(test)] +pub mod tests { + use super::*; + use std; + use std::mem; + use std::prelude::v1::*; + use wasm_bindgen_test::*; + + fn compare_bytes(a: v128, b: v128) { + let a: [u8; 16] = unsafe { mem::transmute(a) }; + let b: [u8; 16] = unsafe { mem::transmute(b) }; + assert_eq!(a, b); + } + + #[wasm_bindgen_test] + fn v128_const() { + const A: v128 = unsafe { + v128::const_([ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + ]) + }; + compare_bytes(A, A); + } + + macro_rules! test_splat { + ($test_id:ident: $id:ident($val:expr) => $($vals:expr),*) => { + #[wasm_bindgen_test] + fn $test_id() { + const A: v128 = unsafe { + $id::splat($val) + }; + const B: v128 = unsafe { + v128::const_([$($vals),*]) + }; + compare_bytes(A, B); + } + } + } + + test_splat!(i8x16_splat: i8x16(42) => 42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42); + test_splat!(i16x8_splat: i16x8(42) => 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0); + test_splat!(i32x4_splat: i32x4(42) => 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0); + test_splat!(i64x2_splat: i64x2(42) => 42, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0); + test_splat!(f32x4_splat: f32x4(42.) => 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66); + test_splat!(f64x2_splat: f64x2(42.) => 0, 0, 0, 0, 0, 0, 69, 64, 0, 0, 0, 0, 0, 0, 69, 64); + + // tests extract and replace lanes + macro_rules! test_extract { + ($test_id:ident: $id:ident[$ety:ident] => $extract_fn:ident | [$val:expr; $count:expr] + | [$($vals:expr),*] => ($other:expr) + | $($ids:expr),*) => { + #[wasm_bindgen_test] + fn $test_id() { + unsafe { + // splat vector and check that all indices contain the same value + // splatted: + const A: v128 = unsafe { + $id::splat($val) + }; + $( + assert_eq!($id::$extract_fn(A, $ids) as $ety, $val); + )*; + + // create a vector from array and check that the indices contain + // the same values as in the array: + let arr: [$ety; $count] = [$($vals),*]; + let mut vec: v128 = mem::transmute(arr); + $( + assert_eq!($id::$extract_fn(vec, $ids) as $ety, arr[$ids]); + )*; + + // replace lane 0 with another value + vec = $id::replace_lane(vec, 0, $other); + assert_ne!($id::$extract_fn(vec, 0) as $ety, arr[0]); + assert_eq!($id::$extract_fn(vec, 0) as $ety, $other); + } + } + } + } + + test_extract!(i8x16_extract_u: i8x16[u8] => extract_lane_u | [255; 16] + | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] => (42) + | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + ); + test_extract!(i8x16_extract_s: i8x16[i8] => extract_lane_s | [-122; 16] + | [0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15] => (-42) + | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + ); + + test_extract!(i16x8_extract_u: i16x8[u16] => extract_lane_u | [255; 8] + | [0, 1, 2, 3, 4, 5, 6, 7] => (42) | 0, 1, 2, 3, 4, 5, 6, 7 + ); + test_extract!(i16x8_extract_s: i16x8[i16] => extract_lane_s | [-122; 8] + | [0, -1, 2, -3, 4, -5, 6, -7] => (-42) | 0, 1, 2, 3, 4, 5, 6, 7 + ); + test_extract!(i32x4_extract: i32x4[i32] => extract_lane | [-122; 4] + | [0, -1, 2, -3] => (42) | 0, 1, 2, 3 + ); + test_extract!(i64x2_extract: i64x2[i64] => extract_lane | [-122; 2] + | [0, -1] => (42) | 0, 1 + ); + test_extract!(f32x4_extract: f32x4[f32] => extract_lane | [-122.; 4] + | [0., -1., 2., -3.] => (42.) | 0, 1, 2, 3 + ); + test_extract!(f64x2_extract: f64x2[f64] => extract_lane | [-122.; 2] + | [0., -1.] => (42.) | 0, 1 + ); + + #[wasm_bindgen_test] + fn v8x16_shuffle() { + unsafe { + let a = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + let b = [ + 16_u8, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, + ]; + + let vec_a: v128 = mem::transmute(a); + let vec_b: v128 = mem::transmute(b); + + let vec_r = v8x16_shuffle!( + vec_a, + vec_b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + + let e = + [0_u8, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]; + let vec_e: v128 = mem::transmute(e); + compare_bytes(vec_r, vec_e); + } + } + + macro_rules! floating_point { + (f32) => { + true + }; + (f64) => { + true + }; + ($id:ident) => { + false + }; + } + + trait IsNan: Sized { + fn is_nan(self) -> bool { + false + } + } + impl IsNan for i8 {} + impl IsNan for i16 {} + impl IsNan for i32 {} + impl IsNan for i64 {} + + macro_rules! test_bop { + ($id:ident[$ety:ident; $ecount:expr] | + $binary_op:ident [$op_test_id:ident] : + ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => { + test_bop!( + $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]: + ([$($in_a),*], [$($in_b),*]) => [$($out),*] + ); + + }; + ($id:ident[$ety:ident; $ecount:expr] => $oty:ident | + $binary_op:ident [$op_test_id:ident] : + ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => { + #[wasm_bindgen_test] + fn $op_test_id() { + unsafe { + let a_input: [$ety; $ecount] = [$($in_a),*]; + let b_input: [$ety; $ecount] = [$($in_b),*]; + let output: [$oty; $ecount] = [$($out),*]; + + let a_vec_in: v128 = mem::transmute(a_input); + let b_vec_in: v128 = mem::transmute(b_input); + let vec_res: v128 = $id::$binary_op(a_vec_in, b_vec_in); + + let res: [$oty; $ecount] = mem::transmute(vec_res); + + if !floating_point!($ety) { + assert_eq!(res, output); + } else { + for i in 0..$ecount { + let r = res[i]; + let o = output[i]; + assert_eq!(r.is_nan(), o.is_nan()); + if !r.is_nan() { + assert_eq!(r, o); + } + } + } + } + } + } + } + + macro_rules! test_bops { + ($id:ident[$ety:ident; $ecount:expr] | + $binary_op:ident [$op_test_id:ident]: + ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => { + #[wasm_bindgen_test] + fn $op_test_id() { + unsafe { + let a_input: [$ety; $ecount] = [$($in_a),*]; + let output: [$ety; $ecount] = [$($out),*]; + + let a_vec_in: v128 = mem::transmute(a_input); + let vec_res: v128 = $id::$binary_op(a_vec_in, $in_b); + + let res: [$ety; $ecount] = mem::transmute(vec_res); + assert_eq!(res, output); + } + } + } + } + + macro_rules! test_uop { + ($id:ident[$ety:ident; $ecount:expr] | + $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => { + #[wasm_bindgen_test] + fn $op_test_id() { + unsafe { + let a_input: [$ety; $ecount] = [$($in_a),*]; + let output: [$ety; $ecount] = [$($out),*]; + + let a_vec_in: v128 = mem::transmute(a_input); + let vec_res: v128 = $id::$unary_op(a_vec_in); + + let res: [$ety; $ecount] = mem::transmute(vec_res); + assert_eq!(res, output); + } + } + } + } + + test_bop!(i8x16[i8; 16] | add[i8x16_add_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], + [8, i8::min_value(), 10, 11, 12, 13, 14, 1, 1, 1, 1, 1, 1, 1, 1, 1]) => + [8, i8::max_value(), 12, 14, 16, 18, 20, i8::min_value(), 2, 2, 2, 2, 2, 2, 2, 2]); + test_bop!(i8x16[i8; 16] | sub[i8x16_sub_test]: + ([0, -1, 2, 3, 4, 5, 6, -1, 1, 1, 1, 1, 1, 1, 1, 1], + [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1]) => + [-8, i8::max_value(), -8, -8, -8, -8, -8, i8::min_value(), 0, 0, 0, 0, 0, 0, 0, 0]); + test_bop!(i8x16[i8; 16] | mul[i8x16_mul_test]: + ([0, -2, 2, 3, 4, 5, 6, 2, 1, 1, 1, 1, 1, 1, 1, 1], + [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1]) => + [0, 0, 20, 33, 48, 65, 84, -2, 1, 1, 1, 1, 1, 1, 1, 1]); + test_uop!(i8x16[i8; 16] | neg[i8x16_neg_test]: + [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1] => + [-8, i8::min_value(), -10, -11, -12, -13, -14, i8::min_value() + 1, -1, -1, -1, -1, -1, -1, -1, -1]); + + test_bop!(i16x8[i16; 8] | add[i16x8_add_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], + [8, i16::min_value(), 10, 11, 12, 13, 14, 1]) => + [8, i16::max_value(), 12, 14, 16, 18, 20, i16::min_value()]); + test_bop!(i16x8[i16; 8] | sub[i16x8_sub_test]: + ([0, -1, 2, 3, 4, 5, 6, -1], + [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()]) => + [-8, i16::max_value(), -8, -8, -8, -8, -8, i16::min_value()]); + test_bop!(i16x8[i16; 8] | mul[i16x8_mul_test]: + ([0, -2, 2, 3, 4, 5, 6, 2], + [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()]) => + [0, 0, 20, 33, 48, 65, 84, -2]); + test_uop!(i16x8[i16; 8] | neg[i16x8_neg_test]: + [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()] => + [-8, i16::min_value(), -10, -11, -12, -13, -14, i16::min_value() + 1]); + + test_bop!(i32x4[i32; 4] | add[i32x4_add_test]: + ([0, -1, 2, i32::max_value()], + [8, i32::min_value(), 10, 1]) => + [8, i32::max_value(), 12, i32::min_value()]); + test_bop!(i32x4[i32; 4] | sub[i32x4_sub_test]: + ([0, -1, 2, -1], + [8, i32::min_value(), 10, i32::max_value()]) => + [-8, i32::max_value(), -8, i32::min_value()]); + test_bop!(i32x4[i32; 4] | mul[i32x4_mul_test]: + ([0, -2, 2, 2], + [8, i32::min_value(), 10, i32::max_value()]) => + [0, 0, 20, -2]); + test_uop!(i32x4[i32; 4] | neg[i32x4_neg_test]: + [8, i32::min_value(), 10, i32::max_value()] => + [-8, i32::min_value(), -10, i32::min_value() + 1]); + + test_bop!(i64x2[i64; 2] | add[i64x2_add_test]: + ([-1, i64::max_value()], + [i64::min_value(), 1]) => + [i64::max_value(), i64::min_value()]); + test_bop!(i64x2[i64; 2] | sub[i64x2_sub_test]: + ([-1, -1], + [i64::min_value(), i64::max_value()]) => + [ i64::max_value(), i64::min_value()]); + // note: mul for i64x2 is not part of the spec + test_uop!(i64x2[i64; 2] | neg[i64x2_neg_test]: + [i64::min_value(), i64::max_value()] => + [i64::min_value(), i64::min_value() + 1]); + + test_bops!(i8x16[i8; 16] | shl[i8x16_shl_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) => + [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]); + test_bops!(i16x8[i16; 8] | shl[i16x8_shl_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) => + [0, -2, 4, 6, 8, 10, 12, -2]); + test_bops!(i32x4[i32; 4] | shl[i32x4_shl_test]: + ([0, -1, 2, 3], 1) => [0, -2, 4, 6]); + test_bops!(i64x2[i64; 2] | shl[i64x2_shl_test]: + ([0, -1], 1) => [0, -2]); + + test_bops!(i8x16[i8; 16] | shr_s[i8x16_shr_s_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) => + [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]); + test_bops!(i16x8[i16; 8] | shr_s[i16x8_shr_s_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) => + [0, -1, 1, 1, 2, 2, 3, i16::max_value() / 2]); + test_bops!(i32x4[i32; 4] | shr_s[i32x4_shr_s_test]: + ([0, -1, 2, 3], 1) => [0, -1, 1, 1]); + test_bops!(i64x2[i64; 2] | shr_s[i64x2_shr_s_test]: + ([0, -1], 1) => [0, -1]); + + test_bops!(i8x16[i8; 16] | shr_u[i8x16_uhr_u_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) => + [0, i8::max_value(), 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]); + test_bops!(i16x8[i16; 8] | shr_u[i16x8_uhr_u_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) => + [0, i16::max_value(), 1, 1, 2, 2, 3, i16::max_value() / 2]); + test_bops!(i32x4[i32; 4] | shr_u[i32x4_uhr_u_test]: + ([0, -1, 2, 3], 1) => [0, i32::max_value(), 1, 1]); + test_bops!(i64x2[i64; 2] | shr_u[i64x2_uhr_u_test]: + ([0, -1], 1) => [0, i64::max_value()]); + + #[wasm_bindgen_test] + fn v128_bitwise_logical_ops() { + unsafe { + let a: [u32; 4] = [u32::max_value(), 0, u32::max_value(), 0]; + let b: [u32; 4] = [u32::max_value(); 4]; + let c: [u32; 4] = [0; 4]; + + let vec_a: v128 = mem::transmute(a); + let vec_b: v128 = mem::transmute(b); + let vec_c: v128 = mem::transmute(c); + + let r: v128 = v128::and(vec_a, vec_a); + compare_bytes(r, vec_a); + let r: v128 = v128::and(vec_a, vec_b); + compare_bytes(r, vec_a); + let r: v128 = v128::or(vec_a, vec_b); + compare_bytes(r, vec_b); + let r: v128 = v128::not(vec_b); + compare_bytes(r, vec_c); + let r: v128 = v128::xor(vec_a, vec_c); + compare_bytes(r, vec_a); + + let r: v128 = v128::bitselect(vec_b, vec_c, vec_b); + compare_bytes(r, vec_b); + let r: v128 = v128::bitselect(vec_b, vec_c, vec_c); + compare_bytes(r, vec_c); + let r: v128 = v128::bitselect(vec_b, vec_c, vec_a); + compare_bytes(r, vec_a); + } + } + + macro_rules! test_bool_red { + ($id:ident[$test_id:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => { + #[wasm_bindgen_test] + fn $test_id() { + unsafe { + let vec_a: v128 = mem::transmute([$($true),*]); // true + let vec_b: v128 = mem::transmute([$($false),*]); // false + let vec_c: v128 = mem::transmute([$($alt),*]); // alternating + + assert_eq!($id::any_true(vec_a), 1); + assert_eq!($id::any_true(vec_b), 0); + assert_eq!($id::any_true(vec_c), 1); + + assert_eq!($id::all_true(vec_a), 1); + assert_eq!($id::all_true(vec_b), 0); + assert_eq!($id::all_true(vec_c), 0); + } + } + } + } + + test_bool_red!( + i8x16[i8x16_boolean_reductions] + | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + ); + test_bool_red!( + i16x8[i16x8_boolean_reductions] + | [1_i16, 1, 1, 1, 1, 1, 1, 1] + | [0_i16, 0, 0, 0, 0, 0, 0, 0] + | [1_i16, 0, 1, 0, 1, 0, 1, 0] + ); + test_bool_red!( + i32x4[i32x4_boolean_reductions] + | [1_i32, 1, 1, 1] + | [0_i32, 0, 0, 0] + | [1_i32, 0, 1, 0] + ); + test_bool_red!( + i64x2[i64x2_boolean_reductions] | [1_i64, 1] | [0_i64, 0] | [1_i64, 0] + ); + + test_bop!(i8x16[i8; 16] | eq[i8x16_eq_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i16x8[i16; 8] | eq[i16x8_eq_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i32x4[i32; 4] | eq[i32x4_eq_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); + test_bop!(i64x2[i64; 2] | eq[i64x2_eq_test]: ([0, 1], [0, 2]) => [-1, 0]); + test_bop!(f32x4[f32; 4] => i32 | eq[f32x4_eq_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); + test_bop!(f64x2[f64; 2] => i64 | eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]); + + test_bop!(i8x16[i8; 16] | ne[i8x16_ne_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i16x8[i16; 8] | ne[i16x8_ne_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i32x4[i32; 4] | ne[i32x4_ne_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); + test_bop!(i64x2[i64; 2] | ne[i64x2_ne_test]: ([0, 1], [0, 2]) => [0, -1]); + test_bop!(f32x4[f32; 4] => i32 | ne[f32x4_ne_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); + test_bop!(f64x2[f64; 2] => i64 | ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]); + + test_bop!(i8x16[i8; 16] | lt[i8x16_lt_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i16x8[i16; 8] | lt[i16x8_lt_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i32x4[i32; 4] | lt[i32x4_lt_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); + test_bop!(i64x2[i64; 2] | lt[i64x2_lt_test]: ([0, 1], [0, 2]) => [0, -1]); + test_bop!(f32x4[f32; 4] => i32 | lt[f32x4_lt_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); + test_bop!(f64x2[f64; 2] => i64 | lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]); + + test_bop!(i8x16[i8; 16] | gt[i8x16_gt_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i16x8[i16; 8] | gt[i16x8_gt_test]: + ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i32x4[i32; 4] | gt[i32x4_gt_test]: + ([0, 2, 2, 4], [0, 1, 2, 3]) => [0, -1, 0, -1]); + test_bop!(i64x2[i64; 2] | gt[i64x2_gt_test]: ([0, 2], [0, 1]) => [0, -1]); + test_bop!(f32x4[f32; 4] => i32 | gt[f32x4_gt_test]: + ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]); + test_bop!(f64x2[f64; 2] => i64 | gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]); + + test_bop!(i8x16[i8; 16] | ge[i8x16_ge_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i16x8[i16; 8] | ge[i16x8_ge_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i32x4[i32; 4] | ge[i32x4_ge_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); + test_bop!(i64x2[i64; 2] | ge[i64x2_ge_test]: ([0, 1], [0, 2]) => [-1, 0]); + test_bop!(f32x4[f32; 4] => i32 | ge[f32x4_ge_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); + test_bop!(f64x2[f64; 2] => i64 | ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]); + + test_bop!(i8x16[i8; 16] | le[i8x16_le_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + ) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i16x8[i16; 8] | le[i16x8_le_test]: + ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i32x4[i32; 4] | le[i32x4_le_test]: + ([0, 2, 2, 4], [0, 1, 2, 3]) => [-1, 0, -1, 0]); + test_bop!(i64x2[i64; 2] | le[i64x2_le_test]: ([0, 2], [0, 1]) => [-1, 0]); + test_bop!(f32x4[f32; 4] => i32 | le[f32x4_le_test]: + ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]); + test_bop!(f64x2[f64; 2] => i64 | le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]); + + #[wasm_bindgen_test] + fn v128_bitwise_load_store() { + unsafe { + let mut arr: [i32; 4] = [0, 1, 2, 3]; + + let vec = v128::load(arr.as_ptr() as *const v128); + let vec = i32x4::add(vec, vec); + v128::store(arr.as_mut_ptr() as *mut v128, vec); + + assert_eq!(arr, [0, 2, 4, 6]); + } + } + + test_uop!(f32x4[f32; 4] | neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]); + test_uop!(f32x4[f32; 4] | abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]); + test_bop!(f32x4[f32; 4] | min[f32x4_min_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]); + test_bop!(f32x4[f32; 4] | min[f32x4_min_test_nan]: + ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN]) + => [0., -3., -4., std::f32::NAN]); + test_bop!(f32x4[f32; 4] | max[f32x4_max_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]); + test_bop!(f32x4[f32; 4] | max[f32x4_max_test_nan]: + ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN]) + => [1., -1., 7., std::f32::NAN]); + test_bop!(f32x4[f32; 4] | add[f32x4_add_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]); + test_bop!(f32x4[f32; 4] | sub[f32x4_sub_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]); + test_bop!(f32x4[f32; 4] | mul[f32x4_mul_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]); + test_bop!(f32x4[f32; 4] | div[f32x4_div_test]: + ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]); + + test_uop!(f64x2[f64; 2] | neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]); + test_uop!(f64x2[f64; 2] | abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]); + test_bop!(f64x2[f64; 2] | min[f64x2_min_test]: + ([0., -1.], [1., -3.]) => [0., -3.]); + test_bop!(f64x2[f64; 2] | min[f64x2_min_test_nan]: + ([7., 8.], [-4., std::f64::NAN]) + => [ -4., std::f64::NAN]); + test_bop!(f64x2[f64; 2] | max[f64x2_max_test]: + ([0., -1.], [1., -3.]) => [1., -1.]); + test_bop!(f64x2[f64; 2] | max[f64x2_max_test_nan]: + ([7., 8.], [ -4., std::f64::NAN]) + => [7., std::f64::NAN]); + test_bop!(f64x2[f64; 2] | add[f64x2_add_test]: + ([0., -1.], [1., -3.]) => [1., -4.]); + test_bop!(f64x2[f64; 2] | sub[f64x2_sub_test]: + ([0., -1.], [1., -3.]) => [-1., 2.]); + test_bop!(f64x2[f64; 2] | mul[f64x2_mul_test]: + ([0., -1.], [1., -3.]) => [0., 3.]); + test_bop!(f64x2[f64; 2] | div[f64x2_div_test]: + ([0., -8.], [1., 4.]) => [0., -2.]); + + macro_rules! test_conv { + ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr, $to:expr) => { + #[wasm_bindgen_test] + fn $test_id() { + unsafe { + let from: v128 = mem::transmute($from); + let to: v128 = mem::transmute($to); + + let r: v128 = $to_ty::$conv_id(from); + + compare_bytes(r, to); + } + } + }; + } + + test_conv!( + f32x4_convert_s_i32x4 | convert_s_i32x4 | f32x4 | [1_i32, 2, 3, 4], + [1_f32, 2., 3., 4.] + ); + test_conv!( + f32x4_convert_u_i32x4 + | convert_u_i32x4 + | f32x4 + | [u32::max_value(), 2, 3, 4], + [u32::max_value() as f32, 2., 3., 4.] + ); + test_conv!( + f64x2_convert_s_i64x2 | convert_s_i64x2 | f64x2 | [1_i64, 2], + [1_f64, 2.] + ); + test_conv!( + f64x2_convert_u_i64x2 + | convert_u_i64x2 + | f64x2 + | [u64::max_value(), 2], + [18446744073709552000.0, 2.] + ); + + // FIXME: this fails, and produces -2147483648 instead of saturating at + // i32::max_value() test_conv!(i32x4_trunc_s_f32x4_sat | trunc_s_f32x4_sat + // | i32x4 | [1_f32, 2., (i32::max_value() as f32 + 1.), 4.], + // [1_i32, 2, i32::max_value(), 4]); FIXME: add other saturating tests +} diff --git a/crates/assert-instr-macro/src/lib.rs b/crates/assert-instr-macro/src/lib.rs index 25b5572ad8..e5575e85a3 100644 --- a/crates/assert-instr-macro/src/lib.rs +++ b/crates/assert-instr-macro/src/lib.rs @@ -38,17 +38,9 @@ pub fn assert_instr( // testing for. let disable_assert_instr = std::env::var("STDSIMD_DISABLE_ASSERT_INSTR").is_ok(); - let maybe_ignore = if cfg!(optimized) && !disable_assert_instr { - TokenStream::new() - } else { - (quote! { #[ignore] }).into() - }; use quote::ToTokens; let instr_str = instr - .clone() - .into_token_stream() - .to_string() .replace('.', "_") .replace(|c: char| c.is_whitespace(), ""); let assert_name = syn::Ident::new( @@ -124,16 +116,22 @@ pub fn assert_instr( } }; + // If instruction tests are disabled avoid emitting this shim at all, just + // return the original item without our attribute. + if !cfg!(optimized) || disable_assert_instr { + return (quote! { #item }).into(); + } + let tts: TokenStream = quote! { - #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), test)] #[allow(non_snake_case)] - #maybe_ignore fn #assert_name() { #to_test ::stdsimd_test::assert(#shim_name as usize, stringify!(#shim_name), - stringify!(#instr)); + #instr); } }.into(); // why? necessary now to get tests to work? @@ -148,13 +146,17 @@ pub fn assert_instr( } struct Invoc { - instr: syn::Expr, + instr: String, args: Vec<(syn::Ident, syn::Expr)>, } impl syn::synom::Synom for Invoc { named!(parse -> Self, do_parse!( - instr: syn!(syn::Expr) >> + instr: alt!( + map!(syn!(syn::Ident), |s| s.to_string()) + | + map!(syn!(syn::LitStr), |s| s.value()) + ) >> args: many0!(do_parse!( syn!(syn::token::Comma) >> name: syn!(syn::Ident) >> diff --git a/crates/coresimd/Cargo.toml b/crates/coresimd/Cargo.toml index 5bc2e5d7ef..dac4d916da 100644 --- a/crates/coresimd/Cargo.toml +++ b/crates/coresimd/Cargo.toml @@ -22,9 +22,14 @@ maintenance = { status = "experimental" } stdsimd-test = { version = "0.*", path = "../stdsimd-test" } stdsimd = { version = "0.0.3", path = "../stdsimd" } +[target.wasm32-unknown-unknown.dev-dependencies] +wasm-bindgen-test = "0.2.16" + [features] # Internal-usage only: denies all warnings. strict = [] # Internal-usage only: enables only those intrinsics supported by Intel's # Software Development Environment (SDE). intel_sde = [] +# Enables wasm simd128 intrinsics +wasm_simd128 = [] diff --git a/crates/coresimd/src/lib.rs b/crates/coresimd/src/lib.rs index 1c5f185a8a..8d892e3b47 100644 --- a/crates/coresimd/src/lib.rs +++ b/crates/coresimd/src/lib.rs @@ -11,6 +11,7 @@ #![allow(unused_features)] #![feature( const_fn, + const_fn_union, link_llvm_intrinsics, platform_intrinsics, repr_simd, @@ -34,7 +35,7 @@ arm_target_feature, aarch64_target_feature, mips_target_feature, - powerpc_target_feature + powerpc_target_feature, )] #![cfg_attr( test, @@ -81,6 +82,9 @@ extern crate stdsimd_test; #[cfg(test)] extern crate test; +#[cfg(all(test, target_arch = "wasm32"))] +extern crate wasm_bindgen_test; + #[path = "../../../coresimd/mod.rs"] mod coresimd; diff --git a/crates/stdsimd-test/Cargo.toml b/crates/stdsimd-test/Cargo.toml index e2fc6e30d3..cf459b6e04 100644 --- a/crates/stdsimd-test/Cargo.toml +++ b/crates/stdsimd-test/Cargo.toml @@ -10,3 +10,7 @@ backtrace = "0.3" cc = "1.0" lazy_static = "1.0" rustc-demangle = "0.1.8" +wasm-bindgen = "0.2.16" + +[features] +default = [] diff --git a/crates/stdsimd-test/src/lib.rs b/crates/stdsimd-test/src/lib.rs index 06d1db5136..9f56363835 100644 --- a/crates/stdsimd-test/src/lib.rs +++ b/crates/stdsimd-test/src/lib.rs @@ -17,12 +17,16 @@ extern crate cc; extern crate lazy_static; extern crate rustc_demangle; extern crate simd_test_macro; +extern crate wasm_bindgen; use std::collections::HashMap; use std::env; +use std::path::Path; use std::process::Command; use std::str; +use wasm_bindgen::prelude::*; + pub use assert_instr_macro::*; pub use simd_test_macro::*; @@ -32,6 +36,7 @@ lazy_static! { } struct Function { + addr: Option, instrs: Vec, } @@ -40,6 +45,10 @@ struct Instruction { } fn disassemble_myself() -> HashMap> { + if cfg!(target_arch = "wasm32") { + return parse_wasm2wat(); + } + let me = env::current_exe().expect("failed to get current exe"); if cfg!(target_arch = "x86_64") @@ -145,6 +154,7 @@ fn parse_objdump(output: &str) -> HashMap> { ret.entry(normalize(symbol)) .or_insert_with(Vec::new) .push(Function { + addr: None, instrs: instructions, }); } @@ -189,6 +199,7 @@ fn parse_otool(output: &str) -> HashMap> { ret.entry(normalize(symbol)) .or_insert_with(Vec::new) .push(Function { + addr: None, instrs: instructions, }); } @@ -239,6 +250,7 @@ fn parse_dumpbin(output: &str) -> HashMap> { ret.entry(normalize(symbol)) .or_insert_with(Vec::new) .push(Function { + addr: None, instrs: instructions, }); } @@ -246,6 +258,100 @@ fn parse_dumpbin(output: &str) -> HashMap> { ret } + +#[wasm_bindgen(module = "child_process")] +extern "C" { + #[wasm_bindgen(js_name = execSync)] + fn exec_sync(cmd: &str) -> Buffer; +} + +#[wasm_bindgen(module = "buffer")] +extern "C" { + type Buffer; + #[wasm_bindgen(method, js_name = toString)] + fn to_string(this: &Buffer) -> String; +} + +#[wasm_bindgen] +extern "C" { + #[wasm_bindgen(js_namespace = require)] + fn resolve(module: &str) -> String; + #[wasm_bindgen(js_namespace = console, js_name = log)] + fn js_console_log(s: &str); +} + +// println! doesn't work on wasm32 right now, so shadow the compiler's println! +// macro with our own shim that redirects to `console.log`. +#[cfg(target_arch = "wasm32")] +macro_rules! println { + ($($args:tt)*) => (js_console_log(&format!($($args)*))) +} + +fn parse_wasm2wat() -> HashMap> { + // Our wasm module in the wasm-bindgen test harness is called + // "wasm-bindgen-test_bg". When running in node this is actually a shim JS + // file. Ask node where that JS file is, and then we use that with a wasm + // extension to find the wasm file itself. + let js_shim = resolve("wasm-bindgen-test_bg"); + let js_shim = Path::new(&js_shim).with_extension("wasm"); + + // Execute `wasm2wat` synchronously, waiting for and capturing all of its + // output. + let output = + exec_sync(&format!("wasm2wat {}", js_shim.display())).to_string(); + + let mut ret: HashMap> = HashMap::new(); + let mut lines = output.lines().map(|s| s.trim()); + while let Some(line) = lines.next() { + // If we found the table of function pointers, fill in the known + // address for all our `Function` instances + if line.starts_with("(elem") { + for (i, name) in line.split_whitespace().skip(3).enumerate() { + let name = name.trim_right_matches(")"); + for f in ret.get_mut(name).expect("ret.get_mut(name) failed") { + f.addr = Some(i + 1); + } + } + continue; + } + + // If this isn't a function, we don't care about it. + if !line.starts_with("(func ") { + continue; + } + + let mut function = Function { + instrs: Vec::new(), + addr: None, + }; + + // Empty functions will end in `))` so there's nothing to do, otherwise + // we'll have a bunch of following lines which are instructions. + // + // Lines that have an imbalanced `)` mark the end of a function. + if !line.ends_with("))") { + while let Some(line) = lines.next() { + function.instrs.push(Instruction { + parts: line + .split_whitespace() + .map(|s| s.to_string()) + .collect(), + }); + if !line.starts_with("(") && line.ends_with(")") { + break; + } + } + } + + // The second element here split on whitespace should be the name of + // the function, skipping the type/params/results + ret.entry(line.split_whitespace().nth(1).unwrap().to_string()) + .or_insert(Vec::new()) + .push(function); + } + return ret; +} + fn normalize(symbol: &str) -> String { let symbol = rustc_demangle::demangle(symbol).to_string(); match symbol.rfind("::h") { @@ -259,27 +365,8 @@ fn normalize(symbol: &str) -> String { /// This asserts that the function at `fnptr` contains the instruction /// `expected` provided. pub fn assert(fnptr: usize, fnname: &str, expected: &str) { - // Translate this function pointer to a symbolic name that we'd have found - // in the disassembly. - let mut sym = None; - backtrace::resolve(fnptr as *mut _, |name| { - sym = name.name().and_then(|s| s.as_str()).map(normalize); - }); - - let functions = - if let Some(s) = sym.as_ref().and_then(|s| DISASSEMBLY.get(s)) { - s - } else { - if let Some(sym) = sym { - println!("assumed symbol name: `{}`", sym); - } - println!("maybe related functions"); - for f in DISASSEMBLY.keys().filter(|k| k.contains(fnname)) { - println!("\t- {}", f); - } - panic!("failed to find disassembly of {:#x} ({})", fnptr, fnname); - }; - + let mut fnname = fnname.to_string(); + let functions = get_functions(fnptr, &mut fnname); assert_eq!(functions.len(), 1); let function = &functions[0]; @@ -362,16 +449,14 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { // Help debug by printing out the found disassembly, and then panic as we // didn't find the instruction. - println!( - "disassembly for {}: ", - sym.as_ref().expect("symbol not found") - ); + println!("disassembly for {}: ", fnname,); for (i, instr) in instrs.iter().enumerate() { - print!("\t{:2}: ", i); + let mut s = format!("\t{:2}: ", i); for part in &instr.parts { - print!("{} ", part); + s.push_str(part); + s.push_str(" "); } - println!(); + println!("{}", s); } if !found { @@ -394,6 +479,39 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { } } +fn get_functions(fnptr: usize, fnname: &mut String) -> &'static [Function] { + // Translate this function pointer to a symbolic name that we'd have found + // in the disassembly. + let mut sym = None; + backtrace::resolve(fnptr as *mut _, |name| { + sym = name.name().and_then(|s| s.as_str()).map(normalize); + }); + + if let Some(sym) = &sym { + if let Some(s) = DISASSEMBLY.get(sym) { + *fnname = sym.to_string(); + return s; + } + } + + let exact_match = DISASSEMBLY + .iter() + .find(|(_, list)| list.iter().any(|f| f.addr == Some(fnptr))); + if let Some((name, list)) = exact_match { + *fnname = name.to_string(); + return list; + } + + if let Some(sym) = sym { + println!("assumed symbol name: `{}`", sym); + } + println!("maybe related functions"); + for f in DISASSEMBLY.keys().filter(|k| k.contains(&**fnname)) { + println!("\t- {}", f); + } + panic!("failed to find disassembly of {:#x} ({})", fnptr, fnname); +} + pub fn assert_skip_test_ok(name: &str) { if env::var("STDSIMD_TEST_EVERYTHING").is_err() { return; diff --git a/crates/stdsimd/Cargo.toml b/crates/stdsimd/Cargo.toml index 4ab553db48..3db3ed1187 100644 --- a/crates/stdsimd/Cargo.toml +++ b/crates/stdsimd/Cargo.toml @@ -37,3 +37,7 @@ path = "../../examples/hex.rs" name = "wasm" crate-type = ["cdylib"] path = "../../examples/wasm.rs" + +[features] +default = [] +wasm_simd128 = ["coresimd/wasm_simd128"] \ No newline at end of file diff --git a/crates/stdsimd/src/lib.rs b/crates/stdsimd/src/lib.rs index 65871cc5eb..021dc06ae3 100644 --- a/crates/stdsimd/src/lib.rs +++ b/crates/stdsimd/src/lib.rs @@ -17,6 +17,7 @@ extern crate libc; extern crate std as __do_not_use_this_import; #[cfg(test)] +#[allow(unused_imports)] #[macro_use(println, print)] extern crate std;