From de933054355afc6c185826f81720df69b60d136e Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 8 Aug 2018 15:36:12 +0200
Subject: [PATCH 01/18] Add wasm32 simd128 intrinsics

---
 coresimd/simd_llvm.rs                 |   3 +-
 coresimd/{wasm32.rs => wasm32/mod.rs} |   4 +
 coresimd/wasm32/simd128.rs            | 805 ++++++++++++++++++++++++++
 3 files changed, 810 insertions(+), 2 deletions(-)
 rename coresimd/{wasm32.rs => wasm32/mod.rs} (96%)
 create mode 100644 coresimd/wasm32/simd128.rs
diff --git a/coresimd/simd_llvm.rs b/coresimd/simd_llvm.rs
index 2ba3944bd4..072a950b4c 100644
--- a/coresimd/simd_llvm.rs
+++ b/coresimd/simd_llvm.rs
@@ -51,8 +51,7 @@ extern "platform-intrinsic" {
     pub fn simd_select<M, T>(m: M, a: T, b: T) -> T;
 
     pub fn simd_fmin<T>(a: T, b: T) -> T;
-    // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416
-    // pub fn simd_fmax<T>(a: T, b: T) -> T;
+    pub fn simd_fmax<T>(a: T, b: T) -> T;
 
     pub fn simd_fsqrt<T>(a: T) -> T;
     pub fn simd_fma<T>(a: T, b: T, c: T) -> T;
diff --git a/coresimd/wasm32.rs b/coresimd/wasm32/mod.rs
similarity index 96%
rename from coresimd/wasm32.rs
rename to coresimd/wasm32/mod.rs
index ac13458122..3e2e7aad88 100644
--- a/coresimd/wasm32.rs
+++ b/coresimd/wasm32/mod.rs
@@ -1,3 +1,7 @@
+//! WASM32 intrinsics
+
+mod simd128;
+
 extern "C" {
     #[link_name = "llvm.wasm.grow.memory.i32"]
     fn llvm_grow_memory(pages: i32) -> i32;
diff --git a/coresimd/wasm32/simd128.rs b/coresimd/wasm32/simd128.rs
new file mode 100644
index 0000000000..dc0177e2ad
--- /dev/null
+++ b/coresimd/wasm32/simd128.rs
@@ -0,0 +1,805 @@
+//! This module implements the [WebAssembly `SIMD128` ISA].
+//!
+//! [WebAssembly `SIMD128` ISA]:
+//! https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md
+
+#![allow(non_camel_case_types)]
+
+/// A single unconstrained byte (0-255).
+#[derive(Copy, Clone, Debug)]
+pub struct ImmByte(u8);
+impl ImmByte {
+    /// Constructor
+    #[inline]
+    #[rustc_args_required_const(0)]
+    pub const fn new(value: u8) -> Self {
+        ImmByte(value)
+    }
+}
+
+macro_rules! impl_laneidx {
+    ($id:ident($ty:ty): [$_from:expr, $_to:expr] | $(#[$doc:meta])*) => {
+        #[derive(Copy, Clone, Debug)]
+        pub struct $id($ty);
+        impl $id {
+            #[inline]
+            #[rustc_args_required_const(0)]
+            pub const fn new(x: $ty) -> Self {
+                // FIXME: not allowed in const fn:
+                // * if statements
+                // * unreachable_unchecked / panic / abort
+                //
+                // if x < $from || x > $to {
+                //     unsafe { ::_core::hint::unreachable_unchecked() };
+                //     debug_assert!(...)
+                // }
+                $id(x)
+            }
+        }
+    };
+}
+impl_laneidx!(LaneIdx2(u8): [0, 1] | /// A byte with values in the range 0–1 identifying a lane.
+);
+impl_laneidx!(LaneIdx4(u8): [0, 3] | /// A byte with values in the range 0–3 identifying a lane.
+);
+impl_laneidx!(LaneIdx8(u8): [0, 7] | /// A byte with values in the range 0–7 identifying a lane.
+);
+impl_laneidx!(LaneIdx16(u8): [0, 15] | /// A byte with values in the range 0–15 identifying a lane.
+);
+impl_laneidx!(LaneIdx32(u8): [0, 31] | /// A byte with values in the range 0–31 identifying a lane.
+);
+
+types! {
+    /// WASM-specific 128-bit wide SIMD vector type
+    pub struct v128(i128);
+}
+
+mod sealed {
+    types! {
+        /// 128-bit wide SIMD vector type with 8 16-bit wide signed lanes
+        pub struct v8x16(
+            pub i8, pub i8, pub i8, pub i8, pub i8, pub i8, pub i8, pub i8,
+            pub i8, pub i8, pub i8, pub i8, pub i8, pub i8, pub i8, pub i8,
+        );
+        /// 128-bit wide SIMD vector type with 8 16-bit wide signed lanes
+        pub struct v16x8(
+            pub i16, pub i16, pub i16, pub i16,
+            pub i16, pub i16, pub i16, pub i16
+        );
+        /// 128-bit wide SIMD vector type with 4 32-bit wide signed lanes
+        pub struct v32x4(pub i32, pub i32, pub i32, pub i32);
+        /// 128-bit wide SIMD vector type with 2 64-bit wide signed lanes
+        pub struct v64x2(pub i64, pub i64);
+
+        /// 128-bit wide SIMD vector type with 8 16-bit wide unsigned lanes
+        pub struct u8x16(
+            pub u8, pub u8, pub u8, pub u8, pub u8, pub u8, pub u8, pub u8,
+            pub u8, pub u8, pub u8, pub u8, pub u8, pub u8, pub u8, pub u8,
+        );
+        /// 128-bit wide SIMD vector type with 8 16-bit wide unsigned lanes
+        pub struct u16x8(
+            pub u16, pub u16, pub u16, pub u16,
+            pub u16, pub u16, pub u16, pub u16
+        );
+        /// 128-bit wide SIMD vector type with 4 32-bit wide unsigned lanes
+        pub struct u32x4(pub u32, pub u32, pub u32, pub u32);
+        /// 128-bit wide SIMD vector type with 2 64-bit wide unsigned lanes
+        pub struct u64x2(pub u64, pub u64);
+
+        /// 128-bit wide SIMD vector type with 4 32-bit wide floating-point lanes
+        pub struct f32x4(pub f32, pub f32, pub f32, pub f32);
+        /// 128-bit wide SIMD vector type with 2 64-bit wide floating-point lanes
+        pub struct f64x2(pub f64, pub f64);
+    }
+
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[link_name = "llvm.fabs.v4f32"]
+        fn abs_v4f32(x: f32x4) -> f32x4;
+        #[link_name = "llvm.fabs.v2f64"]
+        fn abs_v2f64(x: f64x2) -> f64x2;
+        #[link_name = "llvm.sqrt.v4f32"]
+        fn sqrt_v4f32(x: f32x4) -> f32x4;
+        #[link_name = "llvm.sqrt.v2f64"]
+        fn sqrt_v2f64(x: f64x2) -> f64x2;
+    }
+    impl f32x4 {
+        #[inline(always)]
+        pub unsafe fn abs(self) -> Self {
+            abs_v4f32(self)
+        }
+        #[inline(always)]
+        pub unsafe fn sqrt(self) -> Self {
+            sqrt_v4f32(self)
+        }
+    }
+    impl f64x2 {
+        #[inline(always)]
+        pub unsafe fn abs(self) -> Self {
+            abs_v2f64(self)
+        }
+        #[inline(always)]
+        pub unsafe fn sqrt(self) -> Self {
+            sqrt_v2f64(self)
+        }
+    }
+}
+
+/// WASM-specific v8x16 instructions
+pub struct v8x16;
+/// WASM-specific v16x8 instructions
+pub struct v16x8;
+/// WASM-specific v32x4 instructions
+pub struct v32x4;
+/// WASM-specific v64x2instructions
+pub struct v64x2;
+
+/// WASM-specific v8x16 instructions with modulo-arithmetic semantics
+pub struct i8x16;
+/// WASM-specific v16x8 instructions with modulo-arithmetic semantics
+pub struct i16x8;
+/// WASM-specific v32x4 instructions with modulo-arithmetic semantics
+pub struct i32x4;
+/// WASM-specific v64x2 instructions with modulo-arithmetic semantics
+pub struct i64x2;
+
+/// WASM-specific v32x4 floating-point instructions
+pub struct f32x4;
+/// WASM-specific v64x2 floating-point instructions
+pub struct f64x2;
+
+impl v128 {
+    /// Materialize a constant SIMD value from the immediate operands.
+    ///
+    /// The `v128.const` instruction is encoded with 16 immediate bytes
+    /// `imm` which provide the bits of the vector directly.
+    #[inline]
+    // #[target_feature(enable = "simd128")]
+    // FIXME: #[cfg_attr(test, assert_instr(v128.const, imm = [ImmByte::new(42); 16]))]
+    #[rustc_args_required_const(0)]
+    pub const unsafe fn const_(imm: [ImmByte; 16]) -> v128 {
+        union U {
+            imm: [ImmByte; 16],
+            vec: v128,
+        }
+        U { imm }.vec
+    }
+}
+
+macro_rules! impl_splat {
+    ($id:ident[$ivec_ty:ident : $elem_ty:ident] <= $x_ty:ident | $($lane_id:ident),*) => {
+        impl $id {
+            /// Create vector with identical lanes
+            ///
+            /// Construct a vector with `x` replicated to all lanes.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($ident.splat))]
+            pub const unsafe fn splat(x: $x_ty) -> v128 {
+                union U {
+                    vec: self::sealed::$ivec_ty,
+                    res: v128
+                }
+                U { vec: self::sealed::$ivec_ty($({ struct $lane_id; x as $elem_ty}),*) }.res
+            }
+        }
+    }
+}
+impl_splat!(i8x16[v8x16:i8] <= i32 |
+            x0, x1, x2, x3, x4, x5, x6, x7,
+            x8, x9, x10, x11, x12, x13, x14, x15
+);
+impl_splat!(i16x8[v16x8:i16] <= i32 | x0, x1, x2, x3, x4, x5, x6, x7);
+impl_splat!(i32x4[v32x4:i32] <= i32 | x0, x1, x2, x3);
+impl_splat!(i64x2[v64x2:i64] <= i64 | x0, x1);
+impl_splat!(f32x4[f32x4:f32] <= f32 | x0, x1, x2, x3);
+impl_splat!(f64x2[f64x2:f64] <= f64 | x0, x1);
+
+macro_rules! impl_extract_lane {
+    ($id:ident[$ivec_ty:ident : $selem_ty:ident|$uelem_ty:ident]($lane_idx:ty)
+     => $x_ty:ident) => {
+        impl $id {
+            /// Extract lane as a scalar (sign-extend)
+            ///
+            /// Extract the scalar value of lane specified in the immediate mode
+            /// operand `imm` from `a` by sign-extending it.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_s, imm = 0))]
+            #[rustc_args_required_const(1)]
+            pub unsafe fn extract_lane_s(a: v128, imm: $lane_idx) -> $x_ty {
+                use coresimd::simd_llvm::simd_extract;
+                union U {
+                    vec: self::sealed::$ivec_ty,
+                    a: v128
+                }
+                // the vectors store a signed integer => extract into it
+                let v: $selem_ty = simd_extract(U { a }.vec, imm.0 as u32 /* zero-extends index */);
+                v as $x_ty
+            }
+
+            /// Extract lane as a scalar (zero-extend)
+            ///
+            /// Extract the scalar value of lane specified in the immediate mode
+            /// operand `imm` from `a` by zero-extending it.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u, imm = 0))]
+            #[rustc_args_required_const(1)]
+            pub unsafe fn extract_lane_u(a: v128, imm: $lane_idx) -> $x_ty {
+                use coresimd::simd_llvm::simd_extract;
+                union U {
+                    vec: self::sealed::$ivec_ty,
+                    a: v128
+                }
+                // the vectors store a signed integer => extract into it
+                let v: $selem_ty = simd_extract(U { a }.vec, imm.0 as u32  /* zero-extends index */);
+                // re-interpret the signed integer as an unsigned one of the same size (no-op)
+                let v: $uelem_ty= ::mem::transmute(v);
+                // cast the internal unsigned integer to a larger signed integer (zero-extends)
+                v as $x_ty
+            }
+        }
+    };
+    ($id:ident[$ivec_ty:ident]($lane_idx:ty) => $x_ty:ident) => {
+        impl $id {
+            /// Extract lane as a scalar
+            ///
+            /// Extract the scalar value of lane specified in the immediate mode
+            /// operand `imm` from `a`.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u, imm = 0))]
+            #[rustc_args_required_const(1)]
+            pub unsafe fn extract_lane(a: v128, imm: $lane_idx) -> $x_ty {
+                use coresimd::simd_llvm::simd_extract;
+                union U {
+                    vec: self::sealed::$ivec_ty,
+                    a: v128
+                }
+                // the vectors store a signed integer => extract into it
+                simd_extract(U { a }.vec, imm.0 as u32  /* zero-extends index */)
+            }
+        }
+    };
+}
+impl_extract_lane!(i8x16[v8x16:i8|u8](LaneIdx16) => i32);
+impl_extract_lane!(i16x8[v16x8:i16|u16](LaneIdx8) => i32);
+impl_extract_lane!(i32x4[v32x4](LaneIdx4) => i32);
+impl_extract_lane!(i64x2[v64x2](LaneIdx2) => i64);
+impl_extract_lane!(f32x4[f32x4](LaneIdx4) => f32);
+impl_extract_lane!(f64x2[f64x2](LaneIdx2) => f64);
+
+macro_rules! impl_replace_lane {
+    ($id:ident[$ivec_ty:ident:$ielem_ty:ident]($lane_idx:ty) <= $x_ty:ident) => {
+        impl $id {
+            /// Replace lane value
+            ///
+            /// Return a new vector with lanes identical to `a`, except for lane
+            /// specified in the immediate mode argument `i` which has the value
+            /// `x`.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u))]
+            #[rustc_args_required_const(1)]
+            pub unsafe fn replace_lane(a: v128, imm: $lane_idx, x: $x_ty) -> v128 {
+                use coresimd::simd_llvm::simd_insert;
+                union U {
+                    vec: self::sealed::$ivec_ty,
+                    a: v128
+                }
+                // the vectors store a signed integer => extract into it
+                ::mem::transmute(
+                    simd_insert(U { a }.vec,
+                                imm.0 as u32  /* zero-extends index */,
+                                x as $ielem_ty)
+                )
+            }
+        }
+    };
+}
+
+impl_replace_lane!(i8x16[v8x16:i8](LaneIdx16) <= i32);
+impl_replace_lane!(i16x8[v16x8:i16](LaneIdx8) <= i32);
+impl_replace_lane!(i32x4[v32x4:i32](LaneIdx4) <= i32);
+impl_replace_lane!(i64x2[v64x2:i64](LaneIdx2) <= i64);
+impl_replace_lane!(f32x4[f32x4:f32](LaneIdx4) <= f32);
+impl_replace_lane!(f64x2[f64x2:f64](LaneIdx2) <= f64);
+
+impl v8x16 {
+    /// Shuffle lanes
+    ///
+    /// Create vector with lanes selected from the lanes of two input vectors
+    /// `a` and `b` by the indices specified in the immediate mode operand
+    /// `imm`. Each index selects an element of the result vector, where the
+    /// indices `i` in range `[0, 15]` select the `i`-th elements of `a`, and
+    /// the indices in range `[16, 31]` select the `i - 16`-th element of `b`.
+    #[inline]
+    // #[target_feature(enable = "simd128")]
+    // FIXME: #[cfg_attr(test, assert_instr(v8x16.shuffle))]
+    #[rustc_args_required_const(2)]
+    pub unsafe fn shuffle(a: v128, b: v128, imm: [LaneIdx32; 16]) -> v128 {
+        // FIXME: LLVM does not support v8x16.shuffle (use inline assembly?)
+        let result: v128;
+        asm!("v8x16.shuffle $0, $1, $2" : "=r"(result) : "r"(a), "r"(b), "r"(imm) : : );
+        result
+    }
+}
+
+macro_rules! impl_wrapping_add_sub_neg {
+    ($id:ident[$ivec_ty:ident]) => {
+        impl $id {
+            /// Lane-wise wrapping integer addition
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.add))]
+            pub unsafe fn add(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_add;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                let b: sealed::$ivec_ty = ::mem::transmute(b);
+                ::mem::transmute(simd_add(a, b))
+            }
+
+            /// Lane-wise wrapping integer subtraction
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.sub))]
+            pub unsafe fn sub(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_sub;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                let b: sealed::$ivec_ty = ::mem::transmute(b);
+                ::mem::transmute(simd_sub(a, b))
+            }
+
+            /// Lane-wise wrapping integer negation
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.neg))]
+            pub unsafe fn neg(a: v128) -> v128 {
+                use coresimd::simd_llvm::simd_mul;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                let b: sealed::$ivec_ty = ::mem::transmute($id::splat(-1));
+                ::mem::transmute(simd_mul(b, a))
+            }
+
+            // note: multiplication explicitly omitted (see below)
+        }
+    }
+}
+
+impl_wrapping_add_sub_neg!(i8x16[v8x16]);
+impl_wrapping_add_sub_neg!(i16x8[v16x8]);
+impl_wrapping_add_sub_neg!(i32x4[v32x4]);
+impl_wrapping_add_sub_neg!(i64x2[v64x2]);
+
+macro_rules! impl_wrapping_mul {
+    ($id:ident[$ivec_ty:ident]) => {
+        impl $id {
+            /// Lane-wise wrapping integer multiplication
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.mul))]
+            pub unsafe fn mul(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_mul;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                let b: sealed::$ivec_ty = ::mem::transmute(b);
+                ::mem::transmute(simd_mul(a, b))
+            }
+        }
+    };
+}
+
+impl_wrapping_mul!(i8x16[v8x16]);
+impl_wrapping_mul!(i16x8[v16x8]);
+impl_wrapping_mul!(i32x4[v32x4]);
+// note: wrapping multiplication for i64x2 is not part of the spec
+
+// TODO: Saturating integer arithmetic
+// need to add intrinsics to rustc
+
+macro_rules! impl_shl_scalar {
+    ($id:ident[$ivec_ty:ident : $t:ty]) => {
+        impl $id {
+            /// Left shift by scalar.
+            ///
+            /// Shift the bits in each lane to the left by the same amount.
+            /// Only the low bits of the shift amount are used.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.shl))]
+            pub unsafe fn shl(a: v128, y: i32) -> v128 {
+                use coresimd::simd_llvm::simd_shl;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                let b: sealed::$ivec_ty = ::mem::transmute($id::splat(y as $t));
+                ::mem::transmute(simd_shl(a, b))
+            }
+        }
+    }
+}
+
+impl_shl_scalar!(i8x16[v8x16:i32]);
+impl_shl_scalar!(i16x8[v16x8:i32]);
+impl_shl_scalar!(i32x4[v32x4:i32]);
+impl_shl_scalar!(i64x2[v64x2:i64]);
+
+macro_rules! impl_shr_scalar {
+    ($id:ident[$svec_ty:ident : $uvec_ty:ident : $t:ty]) => {
+        impl $id {
+            /// Arithmetic right shift by scalar.
+            ///
+            /// Shift the bits in each lane to the right by the same amount. 
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.shr))]
+            pub unsafe fn shr_s(a: v128, y: i32) -> v128 {
+                use coresimd::simd_llvm::simd_shr;
+                let a: sealed::$svec_ty = ::mem::transmute(a);
+                let b: sealed::$svec_ty = ::mem::transmute($id::splat(y as $t));
+                ::mem::transmute(simd_shr(a, b))
+            }
+
+            /// Logical right shift by scalar.
+            ///
+            /// Shift the bits in each lane to the right by the same amount. 
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.shr))]
+            pub unsafe fn shr_u(a: v128, y: i32) -> v128 {
+                use coresimd::simd_llvm::simd_shr;
+                let a: sealed::$uvec_ty = ::mem::transmute(a);
+                let b: sealed::$uvec_ty = ::mem::transmute($id::splat(y as $t));
+                ::mem::transmute(simd_shr(a, b))
+            }
+
+        }
+    }
+}
+
+impl_shr_scalar!(i8x16[v8x16:u8x16:i32]);
+impl_shr_scalar!(i16x8[v16x8:u16x8:i32]);
+impl_shr_scalar!(i32x4[v32x4:u32x4:i32]);
+impl_shr_scalar!(i64x2[v64x2:u64x2:i64]);
+
+
+// Bitwise logical operations
+impl v128 {
+    /// Bitwise logical and
+    #[inline]
+    // #[target_feature(enable = "simd128")]
+    // FIXME: #[cfg_attr(test, assert_instr($id.and))]
+    pub unsafe fn and(a: v128, b: v128) -> v128 {
+        use coresimd::simd_llvm::simd_and;
+        simd_and(a, b)
+    }
+
+    /// Bitwise logical or
+    #[inline]
+    // #[target_feature(enable = "simd128")]
+    // FIXME: #[cfg_attr(test, assert_instr($id.or))]
+    pub unsafe fn or(a: v128, b: v128) -> v128 {
+        use coresimd::simd_llvm::simd_or;
+        simd_or(a, b)
+    }
+
+    /// Bitwise logical xor
+    #[inline]
+    // #[target_feature(enable = "simd128")]
+    // FIXME: #[cfg_attr(test, assert_instr($id.xor))]
+    pub unsafe fn xor(a: v128, b: v128) -> v128 {
+        use coresimd::simd_llvm::simd_xor;
+        simd_xor(a, b)
+    }
+
+    /// Bitwise logical not
+    #[inline]
+    // #[target_feature(enable = "simd128")]
+    // FIXME: #[cfg_attr(test, assert_instr($id.not))]
+    pub unsafe fn not(a: v128) -> v128 {
+        union U {
+            v: u128,
+            c: [ImmByte; 16]
+        }
+        // FIXME: https://github.com/rust-lang/rust/issues/53193
+        const C: [ImmByte; 16] = unsafe { U { v: ::_core::u128::MAX }.c };
+        Self::xor(v128::const_(C), a)
+    }
+
+    /// Bitwise select
+    ///
+    /// Use the bits in the control mask `c` to select the corresponding bit
+    /// from `v1` when `1` and `v2` when `0`.
+    #[inline]
+    // #[target_feature(enable = "simd128")]
+    // FIXME: #[cfg_attr(test, assert_instr($id.bitselectnot))]
+    pub unsafe fn bitselect(v1: v128, v2: v128, c: v128) -> v128 {
+        // FIXME: use llvm.select instead - we need to add a `simd_bitselect`
+        // intrinsic to rustc that converts a v128 vector into a i1x128. The
+        // `simd_select` intrinsic converts e.g. a i8x16 into a i1x16 which is not
+        // what we want here:
+        Self::or(Self::and(v1, c), Self::and(v2, Self::not(c)))
+    }
+}
+
+macro_rules! impl_boolean_reduction {
+    ($id:ident[$ivec_ty:ident]) => {
+        impl $id {
+            /// Any lane true
+            ///
+            /// Returns `1` if any lane in `a` is non-zero, `0` otherwise.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.any_true))]
+            pub unsafe fn any_true(a: v128) -> i32 {
+                use coresimd::simd_llvm::simd_reduce_any;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                if simd_reduce_any(a) { 1 } else { 0 }
+            }
+
+            /// All lanes true
+            ///
+            /// Returns `1` if all lanes in `a` are non-zero, `0` otherwise.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.all_true))]
+            pub unsafe fn all_true(a: v128) -> i32 {
+                use coresimd::simd_llvm::simd_reduce_all;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                if simd_reduce_all(a) { 1 } else { 0 }
+            }
+        }
+    }
+}
+
+impl_boolean_reduction!(i8x16[v8x16]);
+impl_boolean_reduction!(i16x8[v16x8]);
+impl_boolean_reduction!(i32x4[v32x4]);
+impl_boolean_reduction!(i64x2[v64x2]);
+
+macro_rules! impl_comparisons {
+    ($id:ident[$ivec_ty:ident]) => {
+        impl $id {
+            /// Equality
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.eq))]
+            pub unsafe fn eq(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_eq;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                let b: sealed::$ivec_ty = ::mem::transmute(b);
+                let c: sealed::$ivec_ty = simd_eq(a, b);
+                ::mem::transmute(c)
+            }
+            /// Non-Equality
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.ne))]
+            pub unsafe fn ne(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_ne;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                let b: sealed::$ivec_ty = ::mem::transmute(b);
+                let c: sealed::$ivec_ty = simd_ne(a, b);
+                ::mem::transmute(c)
+            }
+            /// Less-than
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.lt))]
+            pub unsafe fn lt(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_lt;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                let b: sealed::$ivec_ty = ::mem::transmute(b);
+                let c: sealed::$ivec_ty = simd_lt(a, b);
+                ::mem::transmute(c)
+            }
+
+            /// Less-than or equal
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.le))]
+            pub unsafe fn le(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_le;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                let b: sealed::$ivec_ty = ::mem::transmute(b);
+                let c: sealed::$ivec_ty = simd_le(a, b);
+                ::mem::transmute(c)
+            }
+
+            /// Greater-than
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.gt))]
+            pub unsafe fn gt(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_gt;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                let b: sealed::$ivec_ty = ::mem::transmute(b);
+                let c: sealed::$ivec_ty = simd_gt(a, b);
+                ::mem::transmute(c)
+            }
+
+            /// Greater-than or equal
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.ge))]
+            pub unsafe fn ge(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_ge;
+                let a: sealed::$ivec_ty = ::mem::transmute(a);
+                let b: sealed::$ivec_ty = ::mem::transmute(b);
+                let c: sealed::$ivec_ty = simd_ge(a, b);
+                ::mem::transmute(c)
+            }
+        }
+    }
+}
+
+impl_comparisons!(i8x16[v8x16]);
+impl_comparisons!(i16x8[v16x8]);
+impl_comparisons!(i32x4[v32x4]);
+impl_comparisons!(i64x2[v64x2]);
+impl_comparisons!(f32x4[f32x4]);
+impl_comparisons!(f64x2[f64x2]);
+
+// Load and store
+impl v128 {
+    /// Load a `v128` vector from the given heap address.
+    #[inline]
+    // #[target_feature(enable = "simd128")]
+    // FIXME: #[cfg_attr(test, assert_instr($id.load))]
+    pub unsafe fn load(m: *const v128) -> v128 {
+        ::_core::ptr::read(m)
+
+    }
+
+    /// Store a `v128` vector to the given heap address.
+    #[inline]
+    // #[target_feature(enable = "simd128")]
+    // FIXME: #[cfg_attr(test, assert_instr($id.store))]
+    pub unsafe fn store(m: *mut v128, a: v128) {
+        ::_core::ptr::write(m, a)
+    }
+}
+
+// Floating-point operations
+macro_rules! impl_floating_point_ops {
+    ($id:ident) => {
+        impl $id {
+            /// Negation
+            ///
+            /// Apply the IEEE `negate(x)` function to each lane. This simply
+            /// inverts the sign bit, preserving all other bits, even for `NaN`
+            /// inputs.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.neg))]
+            pub unsafe fn neg(a: v128) -> v128 {
+                use coresimd::simd_llvm::simd_mul;
+                let a: sealed::$id = ::mem::transmute(a);
+                let b: sealed::$id = ::mem::transmute($id::splat(-1.));
+                ::mem::transmute(simd_mul(b, a))
+            }
+            /// Absolute value
+            ///
+            /// Apply the IEEE `abs(x)` function to each lane. This simply
+            /// clears the sign bit, preserving all other bits, even for `NaN`
+            /// inputs.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.abs))]
+            pub unsafe fn abs(a: v128) -> v128 {
+                let a: sealed::$id = ::mem::transmute(a);
+                ::mem::transmute(a.abs())
+            }
+
+            /// NaN-propagating minimum
+            ///
+            /// Lane-wise minimum value, propagating `NaN`s.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.min))]
+            pub unsafe fn min(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_fmin;
+                let a: sealed::$id = ::mem::transmute(a);
+                let b: sealed::$id = ::mem::transmute(b);
+                ::mem::transmute(simd_fmin(a, b))
+            }
+
+            /// NaN-propagating maximum
+            ///
+            /// Lane-wise maximum value, propagating `NaN`s.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.max))]
+            pub unsafe fn max(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_fmax;
+                let a: sealed::$id = ::mem::transmute(a);
+                let b: sealed::$id = ::mem::transmute(b);
+                ::mem::transmute(simd_fmax(a, b))
+            }
+
+            /// Square-root
+            ///
+            /// Lane-wise square-root.
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.sqrt))]
+            pub unsafe fn sqrt(a: v128) -> v128 {
+                let a: sealed::$id = ::mem::transmute(a);
+                ::mem::transmute(a.sqrt())
+            }
+
+            /// Lane-wise addition
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.add))]
+            pub unsafe fn add(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_add;
+                let a: sealed::$id = ::mem::transmute(a);
+                let b: sealed::$id = ::mem::transmute(b);
+                ::mem::transmute(simd_add(a, b))
+            }
+
+            /// Lane-wise subtraction
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.sub))]
+            pub unsafe fn sub(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_sub;
+                let a: sealed::$id = ::mem::transmute(a);
+                let b: sealed::$id = ::mem::transmute(b);
+                ::mem::transmute(simd_sub(a, b))
+            }
+
+            /// Lane-wise multiplication
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.mul))]
+            pub unsafe fn mul(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_mul;
+                let a: sealed::$id = ::mem::transmute(a);
+                let b: sealed::$id = ::mem::transmute(b);
+                ::mem::transmute(simd_mul(a, b))
+            }
+
+            /// Lane-wise division
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($id.div))]
+            pub unsafe fn div(a: v128, b: v128) -> v128 {
+                use coresimd::simd_llvm::simd_div;
+                let a: sealed::$id = ::mem::transmute(a);
+                let b: sealed::$id = ::mem::transmute(b);
+                ::mem::transmute(simd_div(a, b))
+            }
+        }
+    };
+}
+
+impl_floating_point_ops!(f32x4);
+impl_floating_point_ops!(f64x2);
+
+macro_rules! impl_conversion {
+    ($conversion:ident[$instr:expr]: $from_ty:ident => $to_ty:ident | $id:ident) => {
+        impl $id {
+            #[inline]
+            // #[target_feature(enable = "simd128")]
+            // FIXME: #[cfg_attr(test, assert_instr($instr))]
+            pub unsafe fn $conversion(a: v128) -> v128 {
+                use coresimd::simd_llvm::simd_cast;
+                let a: sealed::$from_ty = ::mem::transmute(a);
+                let b: sealed::$to_ty = simd_cast(a);
+                ::mem::transmute(b)
+            }
+        }
+    }
+}
+
+// Integer to floating point
+impl_conversion!(convert_s_i32x4["f32x4.convert_s/i32x4"]: v32x4 => f32x4 | f32x4);
+impl_conversion!(convert_u_i32x4["f32x4.convert_u/i32x4"]: v32x4 => f32x4 | f32x4);
+impl_conversion!(convert_s_i64x2["f64x2.convert_s/i64x2"]: v64x2 => f64x2 | f64x2);
+impl_conversion!(convert_u_i64x2["f64x2.convert_u/i64x2"]: v64x2 => f64x2 | f64x2);
+
+// Floating point to integer with saturation
+impl_conversion!(trunc_s_f32x4_sat["i32x4.trunc_s/f32x4:sat"]: f32x4 => v32x4 | i32x4);
+impl_conversion!(trunc_u_f32x4_sat["i32x4.trunc_s/f32x4:sat"]: f32x4 => u32x4 | i32x4);
+impl_conversion!(trunc_s_f64x2_sat["i64x2.trunc_s/f64x2:sat"]: f64x2 => v64x2 | i64x2);
+impl_conversion!(trunc_u_f64x2_sat["i64x2.trunc_s/f64x2:sat"]: f64x2 => u64x2 | i64x2);

From 410eb8d1fe5fa21eaf7399f01e16b3e9a94184ae Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Thu, 9 Aug 2018 14:41:22 +0200
Subject: [PATCH 02/18] test wasm32 simd128 instructions

---
 .travis.yml                       |   5 +
 Cargo.toml                        |   3 +
 coresimd/wasm32/mod.rs            |   2 +
 coresimd/wasm32/simd128.rs        | 152 ++++----
 crates/wasm-test/.cargo/config    |   2 +
 crates/wasm-test/Cargo.toml       |  11 +
 crates/wasm-test/src/lib.rs       |   1 +
 crates/wasm-test/tests/simd128.rs | 583 ++++++++++++++++++++++++++++++
 8 files changed, 678 insertions(+), 81 deletions(-)
 create mode 100644 crates/wasm-test/.cargo/config
 create mode 100644 crates/wasm-test/Cargo.toml
 create mode 100644 crates/wasm-test/src/lib.rs
 create mode 100644 crates/wasm-test/tests/simd128.rs

diff --git a/.travis.yml b/.travis.yml
index 0746da3949..dd9f6a33c9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -34,6 +34,8 @@ matrix:
         - git clone --recursive https://github.com/WebAssembly/wabt
         - (cd wabt && git reset --hard a0bdeb7 && make -j4)
         - export PATH=$PATH:$PWD/wabt/bin
+        - git clone https://github.com/rustwasm/wasm-bindgen
+        - (cd wasm-bindgen && cargo install --path crates/cli)
       script:
         - cargo build --target wasm32-unknown-unknown -p stdsimd
         - cargo build --target wasm32-unknown-unknown -p stdsimd --release
@@ -42,6 +44,9 @@ matrix:
         - cat wasm.wat
         - grep current_memory wasm.wat
         - grep grow_memory wasm.wat
+        - cd crates/wasm-test
+        - cargo test --target=$TARGET
+        - cargo test --target=$TARGET --release
     - env: TARGET=thumbv6m-none-eabi NOSTD=1
     - env: TARGET=thumbv7m-none-eabi NOSTD=1
     - env: TARGET=thumbv7em-none-eabi NOSTD=1
diff --git a/Cargo.toml b/Cargo.toml
index d789fed9aa..2dbf903acb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,6 +3,9 @@ members = [
   "crates/stdsimd-verify",
   "crates/stdsimd",
 ]
+exclude = [
+  "crates/wasm-test"
+]
 
 [profile.release]
 debug = true
diff --git a/coresimd/wasm32/mod.rs b/coresimd/wasm32/mod.rs
index 3e2e7aad88..f5c71cd58a 100644
--- a/coresimd/wasm32/mod.rs
+++ b/coresimd/wasm32/mod.rs
@@ -1,6 +1,8 @@
 //! WASM32 intrinsics
 
+#[macro_use]
 mod simd128;
+pub use self::simd128::*;
 
 extern "C" {
     #[link_name = "llvm.wasm.grow.memory.i32"]
diff --git a/coresimd/wasm32/simd128.rs b/coresimd/wasm32/simd128.rs
index dc0177e2ad..cc46e1fd25 100644
--- a/coresimd/wasm32/simd128.rs
+++ b/coresimd/wasm32/simd128.rs
@@ -6,48 +6,17 @@
 #![allow(non_camel_case_types)]
 
 /// A single unconstrained byte (0-255).
-#[derive(Copy, Clone, Debug)]
-pub struct ImmByte(u8);
-impl ImmByte {
-    /// Constructor
-    #[inline]
-    #[rustc_args_required_const(0)]
-    pub const fn new(value: u8) -> Self {
-        ImmByte(value)
-    }
-}
-
-macro_rules! impl_laneidx {
-    ($id:ident($ty:ty): [$_from:expr, $_to:expr] | $(#[$doc:meta])*) => {
-        #[derive(Copy, Clone, Debug)]
-        pub struct $id($ty);
-        impl $id {
-            #[inline]
-            #[rustc_args_required_const(0)]
-            pub const fn new(x: $ty) -> Self {
-                // FIXME: not allowed in const fn:
-                // * if statements
-                // * unreachable_unchecked / panic / abort
-                //
-                // if x < $from || x > $to {
-                //     unsafe { ::_core::hint::unreachable_unchecked() };
-                //     debug_assert!(...)
-                // }
-                $id(x)
-            }
-        }
-    };
-}
-impl_laneidx!(LaneIdx2(u8): [0, 1] | /// A byte with values in the range 0–1 identifying a lane.
-);
-impl_laneidx!(LaneIdx4(u8): [0, 3] | /// A byte with values in the range 0–3 identifying a lane.
-);
-impl_laneidx!(LaneIdx8(u8): [0, 7] | /// A byte with values in the range 0–7 identifying a lane.
-);
-impl_laneidx!(LaneIdx16(u8): [0, 15] | /// A byte with values in the range 0–15 identifying a lane.
-);
-impl_laneidx!(LaneIdx32(u8): [0, 31] | /// A byte with values in the range 0–31 identifying a lane.
-);
+pub type ImmByte = u8;
+/// A byte with values in the range 0–1 identifying a lane.
+pub type LaneIdx2 = u8;
+/// A byte with values in the range 0–3 identifying a lane.
+pub type LaneIdx4 = u8;
+/// A byte with values in the range 0–7 identifying a lane.
+pub type LaneIdx8 = u8;
+/// A byte with values in the range 0–15 identifying a lane.
+pub type LaneIdx16 = u8;
+/// A byte with values in the range 0–31 identifying a lane.
+pub type LaneIdx32 = u8;
 
 types! {
     /// WASM-specific 128-bit wide SIMD vector type
@@ -102,6 +71,9 @@ mod sealed {
         fn sqrt_v4f32(x: f32x4) -> f32x4;
         #[link_name = "llvm.sqrt.v2f64"]
         fn sqrt_v2f64(x: f64x2) -> f64x2;
+        #[link_name = "shufflevector"]
+        pub fn shufflevector_v16i8(x: v8x16, y: v8x16, i: v8x16) -> v8x16;
+
     }
     impl f32x4 {
         #[inline(always)]
@@ -214,7 +186,7 @@ macro_rules! impl_extract_lane {
                     a: v128
                 }
                 // the vectors store a signed integer => extract into it
-                let v: $selem_ty = simd_extract(U { a }.vec, imm.0 as u32 /* zero-extends index */);
+                let v: $selem_ty = simd_extract(U { a }.vec, imm as u32 /* zero-extends index */);
                 v as $x_ty
             }
 
@@ -233,7 +205,7 @@ macro_rules! impl_extract_lane {
                     a: v128
                 }
                 // the vectors store a signed integer => extract into it
-                let v: $selem_ty = simd_extract(U { a }.vec, imm.0 as u32  /* zero-extends index */);
+                let v: $selem_ty = simd_extract(U { a }.vec, imm as u32  /* zero-extends index */);
                 // re-interpret the signed integer as an unsigned one of the same size (no-op)
                 let v: $uelem_ty= ::mem::transmute(v);
                 // cast the internal unsigned integer to a larger signed integer (zero-extends)
@@ -258,7 +230,7 @@ macro_rules! impl_extract_lane {
                     a: v128
                 }
                 // the vectors store a signed integer => extract into it
-                simd_extract(U { a }.vec, imm.0 as u32  /* zero-extends index */)
+                simd_extract(U { a }.vec, imm as u32  /* zero-extends index */)
             }
         }
     };
@@ -291,7 +263,7 @@ macro_rules! impl_replace_lane {
                 // the vectors store a signed integer => extract into it
                 ::mem::transmute(
                     simd_insert(U { a }.vec,
-                                imm.0 as u32  /* zero-extends index */,
+                                imm as u32  /* zero-extends index */,
                                 x as $ielem_ty)
                 )
             }
@@ -306,23 +278,44 @@ impl_replace_lane!(i64x2[v64x2:i64](LaneIdx2) <= i64);
 impl_replace_lane!(f32x4[f32x4:f32](LaneIdx4) <= f32);
 impl_replace_lane!(f64x2[f64x2:f64](LaneIdx2) <= f64);
 
-impl v8x16 {
-    /// Shuffle lanes
-    ///
-    /// Create vector with lanes selected from the lanes of two input vectors
-    /// `a` and `b` by the indices specified in the immediate mode operand
-    /// `imm`. Each index selects an element of the result vector, where the
-    /// indices `i` in range `[0, 15]` select the `i`-th elements of `a`, and
-    /// the indices in range `[16, 31]` select the `i - 16`-th element of `b`.
-    #[inline]
-    // #[target_feature(enable = "simd128")]
-    // FIXME: #[cfg_attr(test, assert_instr(v8x16.shuffle))]
-    #[rustc_args_required_const(2)]
-    pub unsafe fn shuffle(a: v128, b: v128, imm: [LaneIdx32; 16]) -> v128 {
-        // FIXME: LLVM does not support v8x16.shuffle (use inline assembly?)
-        let result: v128;
-        asm!("v8x16.shuffle $0, $1, $2" : "=r"(result) : "r"(a), "r"(b), "r"(imm) : : );
-        result
+pub use ::coresimd::simd_llvm::simd_shuffle16 as __internal_v8x16_shuffle;
+pub use self::sealed::v8x16 as __internal_v8x16;
+
+/// Shuffle lanes
+///
+/// Create vector with lanes selected from the lanes of two input vectors
+/// `a` and `b` by the indices specified in the immediate mode operand
+/// `imm`. Each index selects an element of the result vector, where the
+/// indices `i` in range `[0, 15]` select the `i`-th elements of `a`, and
+/// the indices in range `[16, 31]` select the `i - 16`-th element of `b`.
+#[macro_export]
+macro_rules! v8x16_shuffle {
+    ($a:expr, $b:expr, [
+        $imm0:expr, $imm1:expr, $imm2:expr, $imm3:expr,
+        $imm4:expr, $imm5:expr, $imm6:expr, $imm7:expr,
+        $imm8:expr, $imm9:expr, $imm10:expr, $imm11:expr,
+        $imm12:expr, $imm13:expr, $imm14:expr, $imm15:expr
+    ]) => {
+        #[allow(unused_unsafe)]
+        unsafe {
+            let a: $crate::arch::wasm32::v128 = $a;
+            let b: $crate::arch::wasm32::v128 = $b;
+            union U {
+                e: v128,
+                i: $crate::arch::wasm32::__internal_v8x16,
+            }
+            let a = U { e: a }.i;
+            let b = U { e: b }.i;
+
+            let r: $crate::arch::wasm32::__internal_v8x16 =
+                $crate::arch::wasm32::__internal_v8x16_shuffle(a, b, [
+                    $imm0 as u32, $imm1, $imm2, $imm3,
+                    $imm4, $imm5, $imm6, $imm7,
+                    $imm8, $imm9, $imm10, $imm11,
+                    $imm12, $imm13, $imm14, $imm15
+                ]);
+            U { i: r }.e
+        }
     }
 }
 
@@ -557,6 +550,9 @@ impl_boolean_reduction!(i64x2[v64x2]);
 
 macro_rules! impl_comparisons {
     ($id:ident[$ivec_ty:ident]) => {
+        impl_comparisons!($id[$ivec_ty=>$ivec_ty]);
+    };
+    ($id:ident[$ivec_ty:ident=>$rvec_ty:ident]) => {
         impl $id {
             /// Equality
             #[inline]
@@ -566,7 +562,7 @@ macro_rules! impl_comparisons {
                 use coresimd::simd_llvm::simd_eq;
                 let a: sealed::$ivec_ty = ::mem::transmute(a);
                 let b: sealed::$ivec_ty = ::mem::transmute(b);
-                let c: sealed::$ivec_ty = simd_eq(a, b);
+                let c: sealed::$rvec_ty = simd_eq(a, b);
                 ::mem::transmute(c)
             }
             /// Non-Equality
@@ -577,7 +573,7 @@ macro_rules! impl_comparisons {
                 use coresimd::simd_llvm::simd_ne;
                 let a: sealed::$ivec_ty = ::mem::transmute(a);
                 let b: sealed::$ivec_ty = ::mem::transmute(b);
-                let c: sealed::$ivec_ty = simd_ne(a, b);
+                let c: sealed::$rvec_ty = simd_ne(a, b);
                 ::mem::transmute(c)
             }
             /// Less-than
@@ -588,7 +584,7 @@ macro_rules! impl_comparisons {
                 use coresimd::simd_llvm::simd_lt;
                 let a: sealed::$ivec_ty = ::mem::transmute(a);
                 let b: sealed::$ivec_ty = ::mem::transmute(b);
-                let c: sealed::$ivec_ty = simd_lt(a, b);
+                let c: sealed::$rvec_ty = simd_lt(a, b);
                 ::mem::transmute(c)
             }
 
@@ -600,7 +596,7 @@ macro_rules! impl_comparisons {
                 use coresimd::simd_llvm::simd_le;
                 let a: sealed::$ivec_ty = ::mem::transmute(a);
                 let b: sealed::$ivec_ty = ::mem::transmute(b);
-                let c: sealed::$ivec_ty = simd_le(a, b);
+                let c: sealed::$rvec_ty = simd_le(a, b);
                 ::mem::transmute(c)
             }
 
@@ -612,7 +608,7 @@ macro_rules! impl_comparisons {
                 use coresimd::simd_llvm::simd_gt;
                 let a: sealed::$ivec_ty = ::mem::transmute(a);
                 let b: sealed::$ivec_ty = ::mem::transmute(b);
-                let c: sealed::$ivec_ty = simd_gt(a, b);
+                let c: sealed::$rvec_ty = simd_gt(a, b);
                 ::mem::transmute(c)
             }
 
@@ -624,7 +620,7 @@ macro_rules! impl_comparisons {
                 use coresimd::simd_llvm::simd_ge;
                 let a: sealed::$ivec_ty = ::mem::transmute(a);
                 let b: sealed::$ivec_ty = ::mem::transmute(b);
-                let c: sealed::$ivec_ty = simd_ge(a, b);
+                let c: sealed::$rvec_ty = simd_ge(a, b);
                 ::mem::transmute(c)
             }
         }
@@ -635,8 +631,8 @@ impl_comparisons!(i8x16[v8x16]);
 impl_comparisons!(i16x8[v16x8]);
 impl_comparisons!(i32x4[v32x4]);
 impl_comparisons!(i64x2[v64x2]);
-impl_comparisons!(f32x4[f32x4]);
-impl_comparisons!(f64x2[f64x2]);
+impl_comparisons!(f32x4[f32x4=>v32x4]);
+impl_comparisons!(f64x2[f64x2=>v64x2]);
 
 // Load and store
 impl v128 {
@@ -696,10 +692,7 @@ macro_rules! impl_floating_point_ops {
             // #[target_feature(enable = "simd128")]
             // FIXME: #[cfg_attr(test, assert_instr($id.min))]
             pub unsafe fn min(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_fmin;
-                let a: sealed::$id = ::mem::transmute(a);
-                let b: sealed::$id = ::mem::transmute(b);
-                ::mem::transmute(simd_fmin(a, b))
+                v128::bitselect(a, b, $id::lt(a, b))
             }
 
             /// NaN-propagating maximum
@@ -709,10 +702,7 @@ macro_rules! impl_floating_point_ops {
             // #[target_feature(enable = "simd128")]
             // FIXME: #[cfg_attr(test, assert_instr($id.max))]
             pub unsafe fn max(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_fmax;
-                let a: sealed::$id = ::mem::transmute(a);
-                let b: sealed::$id = ::mem::transmute(b);
-                ::mem::transmute(simd_fmax(a, b))
+                v128::bitselect(a, b, $id::gt(a, b))
             }
 
             /// Square-root
@@ -794,9 +784,9 @@ macro_rules! impl_conversion {
 
 // Integer to floating point
 impl_conversion!(convert_s_i32x4["f32x4.convert_s/i32x4"]: v32x4 => f32x4 | f32x4);
-impl_conversion!(convert_u_i32x4["f32x4.convert_u/i32x4"]: v32x4 => f32x4 | f32x4);
+impl_conversion!(convert_u_i32x4["f32x4.convert_u/i32x4"]: u32x4 => f32x4 | f32x4);
 impl_conversion!(convert_s_i64x2["f64x2.convert_s/i64x2"]: v64x2 => f64x2 | f64x2);
-impl_conversion!(convert_u_i64x2["f64x2.convert_u/i64x2"]: v64x2 => f64x2 | f64x2);
+impl_conversion!(convert_u_i64x2["f64x2.convert_u/i64x2"]: u64x2 => f64x2 | f64x2);
 
 // Floating point to integer with saturation
 impl_conversion!(trunc_s_f32x4_sat["i32x4.trunc_s/f32x4:sat"]: f32x4 => v32x4 | i32x4);
diff --git a/crates/wasm-test/.cargo/config b/crates/wasm-test/.cargo/config
new file mode 100644
index 0000000000..908f2d6dde
--- /dev/null
+++ b/crates/wasm-test/.cargo/config
@@ -0,0 +1,2 @@
+[target.wasm32-unknown-unknown]
+runner = 'wasm-bindgen-test-runner'
\ No newline at end of file
diff --git a/crates/wasm-test/Cargo.toml b/crates/wasm-test/Cargo.toml
new file mode 100644
index 0000000000..7910113447
--- /dev/null
+++ b/crates/wasm-test/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "wasm-test"
+version = "0.1.0"
+authors = ["gnzlbg <gonzalobg88@gmail.com>"]
+
+[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
+wasm-bindgen-test = { git = 'https://github.com/rustwasm/wasm-bindgen' }
+coresimd = { path = "../coresimd" }
+
+[patch.crates-io]
+wasm-bindgen = { git = 'https://github.com/rustwasm/wasm-bindgen' }
diff --git a/crates/wasm-test/src/lib.rs b/crates/wasm-test/src/lib.rs
new file mode 100644
index 0000000000..0c9ac1ac8e
--- /dev/null
+++ b/crates/wasm-test/src/lib.rs
@@ -0,0 +1 @@
+#![no_std]
diff --git a/crates/wasm-test/tests/simd128.rs b/crates/wasm-test/tests/simd128.rs
new file mode 100644
index 0000000000..111049f239
--- /dev/null
+++ b/crates/wasm-test/tests/simd128.rs
@@ -0,0 +1,583 @@
+#![feature(use_extern_macros, stdsimd, asm, stmt_expr_attributes)]
+
+#[macro_use]
+extern crate coresimd;
+extern crate wasm_bindgen_test;
+
+use coresimd::arch::wasm32::*;
+use std::mem;
+use wasm_bindgen_test::*;
+
+fn compare_bytes(a: v128, b: v128) {
+    let a: [u8; 16] = unsafe { mem::transmute(a) };
+    let b: [u8; 16] = unsafe { mem::transmute(b) };
+    assert_eq!(a, b);
+}
+
+#[wasm_bindgen_test]
+fn v128_const() {
+    const A: v128 = unsafe {
+        v128::const_([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+    };
+    compare_bytes(A, A);
+}
+
+macro_rules! test_splat {
+    ($test_id:ident: $id:ident($val:expr) => $($vals:expr),*) => {
+        #[wasm_bindgen_test]
+        fn $test_id() {
+            const A: v128 = unsafe {
+                $id::splat($val)
+            };
+            const B: v128 = unsafe {
+                v128::const_([$($vals),*])
+            };
+            compare_bytes(A, B);
+        }
+    }
+}
+
+test_splat!(i8x16_splat: i8x16(42) => 42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42);
+test_splat!(i16x8_splat: i16x8(42) => 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0);
+test_splat!(i32x4_splat: i32x4(42) => 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0);
+test_splat!(i64x2_splat: i64x2(42) => 42, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0);
+test_splat!(f32x4_splat: f32x4(42.) => 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66);
+test_splat!(f64x2_splat: f64x2(42.) => 0, 0, 0, 0, 0, 0, 69, 64, 0, 0, 0, 0, 0, 0, 69, 64);
+
+// tests extract and replace lanes
+macro_rules! test_extract {
+    ($test_id:ident: $id:ident[$ety:ident] => $extract_fn:ident | [$val:expr; $count:expr]
+     | [$($vals:expr),*] => ($other:expr)
+     | $($ids:expr),*) => {
+        #[wasm_bindgen_test]
+        fn $test_id() {
+            unsafe {
+                // splat vector and check that all indices contain the same value
+                // splatted:
+                const A: v128 = unsafe {
+                    $id::splat($val)
+                };
+                $(
+                    assert_eq!($id::$extract_fn(A, $ids) as $ety, $val);
+                )*;
+
+                // create a vector from array and check that the indices contain
+                // the same values as in the array:
+                let arr: [$ety; $count] = [$($vals),*];
+                let mut vec: v128 = mem::transmute(arr);
+                $(
+                    assert_eq!($id::$extract_fn(vec, $ids) as $ety, arr[$ids]);
+                )*;
+
+                // replace lane 0 with another value
+                vec = $id::replace_lane(vec, 0, $other);
+                assert_ne!($id::$extract_fn(vec, 0) as $ety, arr[0]);
+                assert_eq!($id::$extract_fn(vec, 0) as $ety, $other);
+            }
+        }
+    }
+}
+
+test_extract!(i8x16_extract_u: i8x16[u8] => extract_lane_u | [255; 16]
+              | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] => (42)
+              | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+);
+test_extract!(i8x16_extract_s: i8x16[i8] => extract_lane_s | [-122; 16]
+              | [0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15] => (-42)
+              | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+);
+
+test_extract!(i16x8_extract_u: i16x8[u16] => extract_lane_u | [255; 8]
+              | [0, 1, 2, 3, 4, 5, 6, 7]  => (42) | 0, 1, 2, 3, 4, 5, 6, 7
+);
+test_extract!(i16x8_extract_s: i16x8[i16] => extract_lane_s | [-122; 8]
+              | [0, -1, 2, -3, 4, -5, 6, -7]  => (-42) | 0, 1, 2, 3, 4, 5, 6, 7
+);
+test_extract!(i32x4_extract: i32x4[i32] => extract_lane | [-122; 4]
+              | [0, -1, 2, -3]  => (42) | 0, 1, 2, 3
+);
+test_extract!(i64x2_extract: i64x2[i64] => extract_lane | [-122; 2]
+              | [0, -1]  => (42) | 0, 1
+);
+test_extract!(f32x4_extract: f32x4[f32] => extract_lane | [-122.; 4]
+              | [0., -1., 2., -3.]  => (42.) | 0, 1, 2, 3
+);
+test_extract!(f64x2_extract: f64x2[f64] => extract_lane | [-122.; 2]
+              | [0., -1.]  => (42.) | 0, 1
+);
+
+#[wasm_bindgen_test]
+fn v8x16_shuffle() {
+    unsafe {
+        let a = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+        let b = [
+            16_u8, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        ];
+
+        let vec_a: v128 = mem::transmute(a);
+        let vec_b: v128 = mem::transmute(b);
+
+        let vec_r = v8x16_shuffle!(
+            vec_a,
+            vec_b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        );
+
+        let e = [0_u8, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30];
+        let vec_e: v128 = mem::transmute(e);
+        compare_bytes(vec_r, vec_e);
+    }
+}
+
+macro_rules! floating_point {
+    (f32) => {
+        true
+    };
+    (f64) => {
+        true
+    };
+    ($id:ident) => {
+        false
+    };
+}
+
+trait IsNan: Sized {
+    fn is_nan(self) -> bool {
+        false
+    }
+}
+impl IsNan for i8 {}
+impl IsNan for i16 {}
+impl IsNan for i32 {}
+impl IsNan for i64 {}
+
+macro_rules! test_bop {
+    ($id:ident[$ety:ident; $ecount:expr] |
+     $binary_op:ident [$op_test_id:ident] :
+     ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
+        test_bop!(
+            $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]:
+            ([$($in_a),*], [$($in_b),*]) => [$($out),*]
+        );
+
+    };
+    ($id:ident[$ety:ident; $ecount:expr] => $oty:ident |
+     $binary_op:ident [$op_test_id:ident] :
+     ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
+        #[wasm_bindgen_test]
+        fn $op_test_id() {
+            unsafe {
+                let a_input: [$ety; $ecount] = [$($in_a),*];
+                let b_input: [$ety; $ecount] = [$($in_b),*];
+                let output: [$oty; $ecount] = [$($out),*];
+
+                let a_vec_in: v128 = mem::transmute(a_input);
+                let b_vec_in: v128 = mem::transmute(b_input);
+                let vec_res: v128 = $id::$binary_op(a_vec_in, b_vec_in);
+
+                let res: [$oty; $ecount] = mem::transmute(vec_res);
+
+                if !floating_point!($ety) {
+                    assert_eq!(res, output);
+                } else {
+                    for i in 0..$ecount {
+                        let r = res[i];
+                        let o = output[i];
+                        assert_eq!(r.is_nan(), o.is_nan());
+                        if !r.is_nan() {
+                            assert_eq!(r, o);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+macro_rules! test_bops {
+    ($id:ident[$ety:ident; $ecount:expr] |
+     $binary_op:ident [$op_test_id:ident]:
+     ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => {
+        #[wasm_bindgen_test]
+        fn $op_test_id() {
+            unsafe {
+                let a_input: [$ety; $ecount] = [$($in_a),*];
+                let output: [$ety; $ecount] = [$($out),*];
+
+                let a_vec_in: v128 = mem::transmute(a_input);
+                let vec_res: v128 = $id::$binary_op(a_vec_in, $in_b);
+
+                let res: [$ety; $ecount] = mem::transmute(vec_res);
+                assert_eq!(res, output);
+            }
+        }
+    }
+}
+
+macro_rules! test_uop {
+    ($id:ident[$ety:ident; $ecount:expr] |
+     $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => {
+        #[wasm_bindgen_test]
+        fn $op_test_id() {
+            unsafe {
+                let a_input: [$ety; $ecount] = [$($in_a),*];
+                let output: [$ety; $ecount] = [$($out),*];
+
+                let a_vec_in: v128 = mem::transmute(a_input);
+                let vec_res: v128 = $id::$unary_op(a_vec_in);
+
+                let res: [$ety; $ecount] = mem::transmute(vec_res);
+                assert_eq!(res, output);
+            }
+        }
+    }
+}
+
+test_bop!(i8x16[i8; 16] | add[i8x16_add_test]:
+          ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1],
+           [8, i8::min_value(), 10, 11, 12, 13, 14, 1, 1, 1, 1, 1, 1, 1, 1, 1]) =>
+          [8, i8::max_value(), 12, 14, 16, 18, 20, i8::min_value(), 2, 2, 2, 2, 2, 2, 2, 2]);
+test_bop!(i8x16[i8; 16] | sub[i8x16_sub_test]:
+          ([0, -1, 2, 3, 4, 5, 6, -1, 1, 1, 1, 1, 1, 1, 1, 1],
+           [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1]) =>
+          [-8, i8::max_value(), -8, -8, -8, -8, -8, i8::min_value(), 0, 0, 0, 0, 0, 0, 0, 0]);
+test_bop!(i8x16[i8; 16] | mul[i8x16_mul_test]:
+          ([0, -2, 2, 3, 4, 5, 6, 2, 1, 1, 1, 1, 1, 1, 1, 1],
+           [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1]) =>
+          [0, 0, 20, 33, 48, 65, 84, -2, 1, 1, 1, 1, 1, 1, 1, 1]);
+test_uop!(i8x16[i8; 16] | neg[i8x16_neg_test]:
+          [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1] =>
+          [-8, i8::min_value(), -10, -11, -12, -13, -14, i8::min_value() + 1, -1, -1, -1, -1, -1, -1, -1, -1]);
+
+test_bop!(i16x8[i16; 8] | add[i16x8_add_test]:
+          ([0, -1, 2, 3, 4, 5, 6, i16::max_value()],
+           [8, i16::min_value(), 10, 11, 12, 13, 14, 1]) =>
+          [8, i16::max_value(), 12, 14, 16, 18, 20, i16::min_value()]);
+test_bop!(i16x8[i16; 8] | sub[i16x8_sub_test]:
+          ([0, -1, 2, 3, 4, 5, 6, -1],
+           [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()]) =>
+          [-8, i16::max_value(), -8, -8, -8, -8, -8, i16::min_value()]);
+test_bop!(i16x8[i16; 8] | mul[i16x8_mul_test]:
+          ([0, -2, 2, 3, 4, 5, 6, 2],
+           [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()]) =>
+          [0, 0, 20, 33, 48, 65, 84, -2]);
+test_uop!(i16x8[i16; 8] | neg[i16x8_neg_test]:
+          [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()] =>
+          [-8, i16::min_value(), -10, -11, -12, -13, -14, i16::min_value() + 1]);
+
+test_bop!(i32x4[i32; 4] | add[i32x4_add_test]:
+          ([0, -1, 2, i32::max_value()],
+           [8, i32::min_value(), 10, 1]) =>
+          [8, i32::max_value(), 12, i32::min_value()]);
+test_bop!(i32x4[i32; 4] | sub[i32x4_sub_test]:
+          ([0, -1, 2, -1],
+           [8, i32::min_value(), 10, i32::max_value()]) =>
+          [-8, i32::max_value(), -8, i32::min_value()]);
+test_bop!(i32x4[i32; 4] | mul[i32x4_mul_test]:
+          ([0, -2, 2, 2],
+           [8, i32::min_value(), 10, i32::max_value()]) =>
+          [0, 0, 20, -2]);
+test_uop!(i32x4[i32; 4] | neg[i32x4_neg_test]:
+          [8, i32::min_value(), 10, i32::max_value()] =>
+          [-8, i32::min_value(), -10, i32::min_value() + 1]);
+
+test_bop!(i64x2[i64; 2] | add[i64x2_add_test]:
+          ([-1, i64::max_value()],
+           [i64::min_value(), 1]) =>
+          [i64::max_value(), i64::min_value()]);
+test_bop!(i64x2[i64; 2] | sub[i64x2_sub_test]:
+          ([-1, -1],
+           [i64::min_value(), i64::max_value()]) =>
+          [ i64::max_value(), i64::min_value()]);
+// note: mul for i64x2 is not part of the spec
+test_uop!(i64x2[i64; 2] | neg[i64x2_neg_test]:
+          [i64::min_value(), i64::max_value()] =>
+          [i64::min_value(), i64::min_value() + 1]);
+
+test_bops!(i8x16[i8; 16] | shl[i8x16_shl_test]:
+          ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+           [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]);
+test_bops!(i16x8[i16; 8] | shl[i16x8_shl_test]:
+          ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) =>
+           [0, -2, 4, 6, 8, 10, 12, -2]);
+test_bops!(i32x4[i32; 4] | shl[i32x4_shl_test]:
+           ([0, -1, 2, 3], 1) => [0, -2, 4, 6]);
+test_bops!(i64x2[i64; 2] | shl[i64x2_shl_test]:
+           ([0, -1], 1) => [0, -2]);
+
+test_bops!(i8x16[i8; 16] | shr_s[i8x16_shr_s_test]:
+           ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+           [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
+test_bops!(i16x8[i16; 8] | shr_s[i16x8_shr_s_test]:
+           ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) =>
+           [0, -1, 1, 1, 2, 2, 3, i16::max_value() / 2]);
+test_bops!(i32x4[i32; 4] | shr_s[i32x4_shr_s_test]:
+           ([0, -1, 2, 3], 1) => [0, -1, 1, 1]);
+test_bops!(i64x2[i64; 2] | shr_s[i64x2_shr_s_test]:
+           ([0, -1], 1) => [0, -1]);
+
+test_bops!(i8x16[i8; 16] | shr_u[i8x16_uhr_u_test]:
+           ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+           [0, i8::max_value(), 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
+test_bops!(i16x8[i16; 8] | shr_u[i16x8_uhr_u_test]:
+           ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) =>
+           [0, i16::max_value(), 1, 1, 2, 2, 3, i16::max_value() / 2]);
+test_bops!(i32x4[i32; 4] | shr_u[i32x4_uhr_u_test]:
+           ([0, -1, 2, 3], 1) => [0, i32::max_value(), 1, 1]);
+test_bops!(i64x2[i64; 2] | shr_u[i64x2_uhr_u_test]:
+           ([0, -1], 1) => [0, i64::max_value()]);
+
+#[wasm_bindgen_test]
+fn v128_bitwise_logical_ops() {
+    unsafe {
+        let a: [u32; 4] = [u32::max_value(), 0, u32::max_value(), 0];
+        let b: [u32; 4] = [u32::max_value(); 4];
+        let c: [u32; 4] = [0; 4];
+
+        let vec_a: v128 = mem::transmute(a);
+        let vec_b: v128 = mem::transmute(b);
+        let vec_c: v128 = mem::transmute(c);
+
+        let r: v128 = v128::and(vec_a, vec_a);
+        compare_bytes(r, vec_a);
+        let r: v128 = v128::and(vec_a, vec_b);
+        compare_bytes(r, vec_a);
+        let r: v128 = v128::or(vec_a, vec_b);
+        compare_bytes(r, vec_b);
+        let r: v128 = v128::not(vec_b);
+        compare_bytes(r, vec_c);
+        let r: v128 = v128::xor(vec_a, vec_c);
+        compare_bytes(r, vec_a);
+
+        let r: v128 = v128::bitselect(vec_b, vec_c, vec_b);
+        compare_bytes(r, vec_b);
+        let r: v128 = v128::bitselect(vec_b, vec_c, vec_c);
+        compare_bytes(r, vec_c);
+        let r: v128 = v128::bitselect(vec_b, vec_c, vec_a);
+        compare_bytes(r, vec_a);
+    }
+}
+
+macro_rules! test_bool_red {
+    ($id:ident[$test_id:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => {
+        #[wasm_bindgen_test]
+        fn $test_id() {
+            unsafe {
+                let vec_a: v128 = mem::transmute([$($true),*]); // true
+                let vec_b: v128 = mem::transmute([$($false),*]); // false
+                let vec_c: v128 = mem::transmute([$($alt),*]); // alternating
+
+                assert_eq!($id::any_true(vec_a), 1);
+                assert_eq!($id::any_true(vec_b), 0);
+                assert_eq!($id::any_true(vec_c), 1);
+
+                assert_eq!($id::all_true(vec_a), 1);
+                assert_eq!($id::all_true(vec_b), 0);
+                assert_eq!($id::all_true(vec_c), 0);
+            }
+        }
+    }
+}
+
+test_bool_red!(
+    i8x16[i8x16_boolean_reductions]
+        | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+        | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
+);
+test_bool_red!(
+    i16x8[i16x8_boolean_reductions]
+        | [1_i16, 1, 1, 1, 1, 1, 1, 1]
+        | [0_i16, 0, 0, 0, 0, 0, 0, 0]
+        | [1_i16, 0, 1, 0, 1, 0, 1, 0]
+);
+test_bool_red!(
+    i32x4[i32x4_boolean_reductions]
+        | [1_i32, 1, 1, 1]
+        | [0_i32, 0, 0, 0]
+        | [1_i32, 0, 1, 0]
+);
+test_bool_red!(
+    i64x2[i64x2_boolean_reductions] | [1_i64, 1] | [0_i64, 0] | [1_i64, 0]
+);
+
+test_bop!(i8x16[i8; 16] | eq[i8x16_eq_test]:
+          ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+           [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+          [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
+test_bop!(i16x8[i16; 8] | eq[i16x8_eq_test]:
+          ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+          [-1, 0, -1, 0 ,-1, 0, -1, -1]);
+test_bop!(i32x4[i32; 4] | eq[i32x4_eq_test]:
+          ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
+test_bop!(i64x2[i64; 2] | eq[i64x2_eq_test]: ([0, 1], [0, 2]) => [-1, 0]);
+test_bop!(f32x4[f32; 4] => i32 | eq[f32x4_eq_test]:
+          ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
+test_bop!(f64x2[f64; 2] => i64 | eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
+
+test_bop!(i8x16[i8; 16] | ne[i8x16_ne_test]:
+          ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+           [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+          [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
+test_bop!(i16x8[i16; 8] | ne[i16x8_ne_test]:
+          ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+          [0, -1, 0, -1 ,0, -1, 0, 0]);
+test_bop!(i32x4[i32; 4] | ne[i32x4_ne_test]:
+          ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
+test_bop!(i64x2[i64; 2] | ne[i64x2_ne_test]: ([0, 1], [0, 2]) => [0, -1]);
+test_bop!(f32x4[f32; 4] => i32 | ne[f32x4_ne_test]:
+          ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
+test_bop!(f64x2[f64; 2] => i64 | ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]);
+
+test_bop!(i8x16[i8; 16] | lt[i8x16_lt_test]:
+          ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+           [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+          [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
+test_bop!(i16x8[i16; 8] | lt[i16x8_lt_test]:
+          ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+          [0, -1, 0, -1 ,0, -1, 0, 0]);
+test_bop!(i32x4[i32; 4] | lt[i32x4_lt_test]:
+          ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
+test_bop!(i64x2[i64; 2] | lt[i64x2_lt_test]: ([0, 1], [0, 2]) => [0, -1]);
+test_bop!(f32x4[f32; 4] => i32 | lt[f32x4_lt_test]:
+          ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
+test_bop!(f64x2[f64; 2] => i64 | lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]);
+
+test_bop!(i8x16[i8; 16] | gt[i8x16_gt_test]:
+          ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15],
+           [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) =>
+          [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
+test_bop!(i16x8[i16; 8] | gt[i16x8_gt_test]:
+          ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
+          [0, -1, 0, -1 ,0, -1, 0, 0]);
+test_bop!(i32x4[i32; 4] | gt[i32x4_gt_test]:
+          ([0, 2, 2, 4], [0, 1, 2, 3]) => [0, -1, 0, -1]);
+test_bop!(i64x2[i64; 2] | gt[i64x2_gt_test]: ([0, 2], [0, 1]) => [0, -1]);
+test_bop!(f32x4[f32; 4] => i32 | gt[f32x4_gt_test]:
+          ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]);
+test_bop!(f64x2[f64; 2] => i64 | gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]);
+
+test_bop!(i8x16[i8; 16] | ge[i8x16_ge_test]:
+          ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+           [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+          [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
+test_bop!(i16x8[i16; 8] | ge[i16x8_ge_test]:
+          ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+          [-1, 0, -1, 0 ,-1, 0, -1, -1]);
+test_bop!(i32x4[i32; 4] | ge[i32x4_ge_test]:
+          ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
+test_bop!(i64x2[i64; 2] | ge[i64x2_ge_test]: ([0, 1], [0, 2]) => [-1, 0]);
+test_bop!(f32x4[f32; 4] => i32 | ge[f32x4_ge_test]:
+          ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
+test_bop!(f64x2[f64; 2] => i64 | ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
+
+test_bop!(i8x16[i8; 16] | le[i8x16_le_test]:
+          ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15],
+           [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+           ) =>
+          [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
+test_bop!(i16x8[i16; 8] | le[i16x8_le_test]:
+          ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
+          [-1, 0, -1, 0 ,-1, 0, -1, -1]);
+test_bop!(i32x4[i32; 4] | le[i32x4_le_test]:
+          ([0, 2, 2, 4], [0, 1, 2, 3]) => [-1, 0, -1, 0]);
+test_bop!(i64x2[i64; 2] | le[i64x2_le_test]: ([0, 2], [0, 1]) => [-1, 0]);
+test_bop!(f32x4[f32; 4] => i32 | le[f32x4_le_test]:
+          ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]);
+test_bop!(f64x2[f64; 2] => i64 | le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]);
+
+#[wasm_bindgen_test]
+fn v128_bitwise_load_store() {
+    unsafe {
+        let mut arr: [i32; 4] = [0, 1, 2, 3];
+
+        let vec = v128::load(arr.as_ptr() as *const v128);
+        let vec = i32x4::add(vec, vec);
+        v128::store(arr.as_mut_ptr() as *mut v128, vec);
+
+        assert_eq!(arr, [0, 2, 4, 6]);
+    }
+}
+
+test_uop!(f32x4[f32; 4] | neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]);
+test_uop!(f32x4[f32; 4] | abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]);
+test_bop!(f32x4[f32; 4] | min[f32x4_min_test]:
+          ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]);
+test_bop!(f32x4[f32; 4] | min[f32x4_min_test_nan]:
+          ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
+          => [0., -3., -4., std::f32::NAN]);
+test_bop!(f32x4[f32; 4] | max[f32x4_max_test]:
+          ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]);
+test_bop!(f32x4[f32; 4] | max[f32x4_max_test_nan]:
+          ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
+          => [1., -1., 7., std::f32::NAN]);
+test_bop!(f32x4[f32; 4] | add[f32x4_add_test]:
+          ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]);
+test_bop!(f32x4[f32; 4] | sub[f32x4_sub_test]:
+          ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]);
+test_bop!(f32x4[f32; 4] | mul[f32x4_mul_test]:
+          ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]);
+test_bop!(f32x4[f32; 4] | div[f32x4_div_test]:
+          ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]);
+
+test_uop!(f64x2[f64; 2] | neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]);
+test_uop!(f64x2[f64; 2] | abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]);
+test_bop!(f64x2[f64; 2] | min[f64x2_min_test]:
+          ([0., -1.], [1., -3.]) => [0., -3.]);
+test_bop!(f64x2[f64; 2] | min[f64x2_min_test_nan]:
+          ([7., 8.], [-4., std::f64::NAN])
+          => [ -4., std::f64::NAN]);
+test_bop!(f64x2[f64; 2] | max[f64x2_max_test]:
+          ([0., -1.], [1., -3.]) => [1., -1.]);
+test_bop!(f64x2[f64; 2] | max[f64x2_max_test_nan]:
+          ([7., 8.], [ -4., std::f64::NAN])
+          => [7., std::f64::NAN]);
+test_bop!(f64x2[f64; 2] | add[f64x2_add_test]:
+          ([0., -1.], [1., -3.]) => [1., -4.]);
+test_bop!(f64x2[f64; 2] | sub[f64x2_sub_test]:
+          ([0., -1.], [1., -3.]) => [-1., 2.]);
+test_bop!(f64x2[f64; 2] | mul[f64x2_mul_test]:
+          ([0., -1.], [1., -3.]) => [0., 3.]);
+test_bop!(f64x2[f64; 2] | div[f64x2_div_test]:
+          ([0., -8.], [1., 4.]) => [0., -2.]);
+
+macro_rules! test_conv {
+    ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr,  $to:expr) => {
+        #[wasm_bindgen_test]
+        fn $test_id() {
+            unsafe {
+                let from: v128 = mem::transmute($from);
+                let to: v128 = mem::transmute($to);
+
+                let r: v128 = $to_ty::$conv_id(from);
+
+                compare_bytes(r, to);
+            }
+        }
+    };
+}
+
+test_conv!(
+    f32x4_convert_s_i32x4 | convert_s_i32x4 | f32x4 | [1_i32, 2, 3, 4],
+    [1_f32, 2., 3., 4.]
+);
+test_conv!(
+    f32x4_convert_u_i32x4
+        | convert_u_i32x4
+        | f32x4
+        | [u32::max_value(), 2, 3, 4],
+    [u32::max_value() as f32, 2., 3., 4.]
+);
+test_conv!(
+    f64x2_convert_s_i64x2 | convert_s_i64x2 | f64x2 | [1_i64, 2],
+    [1_f64, 2.]
+);
+test_conv!(
+    f64x2_convert_u_i64x2 | convert_u_i64x2 | f64x2 | [u64::max_value(), 2],
+    [18446744073709552000.0, 2.]
+);
+
+// FIXME: this fails, and produces -2147483648 instead of saturating at
+// i32::max_value() test_conv!(i32x4_trunc_s_f32x4_sat | trunc_s_f32x4_sat |
+// i32x4 | [1_f32, 2., (i32::max_value() as f32 + 1.), 4.],
+// [1_i32, 2, i32::max_value(), 4]); FIXME: add other saturating tests

From ea0e593e93abe3f49b2e7be1700d845bab008b66 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Thu, 9 Aug 2018 17:24:24 +0200
Subject: [PATCH 03/18] Run wasm tests like all other tests

---
 .travis.yml                                 |  17 -
 ci/docker/wasm32-unknown-unknown/Dockerfile |  27 +
 ci/run.sh                                   |   4 +
 coresimd/wasm32/mod.rs                      |   3 +
 coresimd/wasm32/simd128.rs                  | 805 ++++++++++++++++++--
 crates/coresimd/Cargo.toml                  |   3 +
 crates/coresimd/src/lib.rs                  |   3 +
 crates/wasm-test/.cargo/config              |   2 -
 crates/wasm-test/Cargo.toml                 |  11 -
 crates/wasm-test/src/lib.rs                 |   1 -
 crates/wasm-test/tests/simd128.rs           | 583 --------------
 11 files changed, 767 insertions(+), 692 deletions(-)
 create mode 100644 ci/docker/wasm32-unknown-unknown/Dockerfile
 delete mode 100644 crates/wasm-test/.cargo/config
 delete mode 100644 crates/wasm-test/Cargo.toml
 delete mode 100644 crates/wasm-test/src/lib.rs
 delete mode 100644 crates/wasm-test/tests/simd128.rs

diff --git a/.travis.yml b/.travis.yml
index dd9f6a33c9..6b21652456 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,23 +30,6 @@ matrix:
       env: TARGET=x86_64-apple-darwin NO_ADD=1
       script: ci/run.sh
     - env: TARGET=wasm32-unknown-unknown
-      before_script:
-        - git clone --recursive https://github.com/WebAssembly/wabt
-        - (cd wabt && git reset --hard a0bdeb7 && make -j4)
-        - export PATH=$PATH:$PWD/wabt/bin
-        - git clone https://github.com/rustwasm/wasm-bindgen
-        - (cd wasm-bindgen && cargo install --path crates/cli)
-      script:
-        - cargo build --target wasm32-unknown-unknown -p stdsimd
-        - cargo build --target wasm32-unknown-unknown -p stdsimd --release
-        - cargo rustc --target wasm32-unknown-unknown -p stdsimd --release --example wasm -- -C lto
-        - wasm2wat target/wasm32-unknown-unknown/release/examples/wasm.wasm -o wasm.wat
-        - cat wasm.wat
-        - grep current_memory wasm.wat
-        - grep grow_memory wasm.wat
-        - cd crates/wasm-test
-        - cargo test --target=$TARGET
-        - cargo test --target=$TARGET --release
     - env: TARGET=thumbv6m-none-eabi NOSTD=1
     - env: TARGET=thumbv7m-none-eabi NOSTD=1
     - env: TARGET=thumbv7em-none-eabi NOSTD=1
diff --git a/ci/docker/wasm32-unknown-unknown/Dockerfile b/ci/docker/wasm32-unknown-unknown/Dockerfile
new file mode 100644
index 0000000000..35183554e3
--- /dev/null
+++ b/ci/docker/wasm32-unknown-unknown/Dockerfile
@@ -0,0 +1,27 @@
+FROM ubuntu:18.04
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends \
+  ca-certificates \
+  clang \
+  cmake \
+  curl \
+  git \
+  libc6-dev \
+  make \
+  python \
+  xz-utils
+
+# Install `wasm2wat`
+RUN git clone --recursive https://github.com/WebAssembly/wabt
+RUN make -C wabt -j$(nproc)
+ENV PATH=$PATH:/wabt/bin
+
+# Install `wasm-bindgen-test-runner`
+RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.15/wasm-bindgen-0.2.15-x86_64-unknown-linux-musl.tar.gz \
+  | tar xzf -
+ENV PATH=$PATH:/wasm-bindgen-0.2.15-x86_64-unknown-linux-musl
+ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner
+
+# Install `node`
+RUN curl https://nodejs.org/dist/v10.8.0/node-v10.8.0-linux-x64.tar.xz | tar xJf -
+ENV PATH=$PATH:/node-v10.8.0-linux-x64/bin
diff --git a/ci/run.sh b/ci/run.sh
index d2350fc6c7..875a206e84 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -39,6 +39,10 @@ case ${TARGET} in
     *android*)
         export STDSIMD_DISABLE_ASSERT_INSTR=1
         ;;
+    wasm32*)
+        # export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128"
+        ;;
+
     *)
         ;;
 esac
diff --git a/coresimd/wasm32/mod.rs b/coresimd/wasm32/mod.rs
index f5c71cd58a..1b0c82bb4e 100644
--- a/coresimd/wasm32/mod.rs
+++ b/coresimd/wasm32/mod.rs
@@ -1,7 +1,10 @@
 //! WASM32 intrinsics
 
 #[macro_use]
+#[cfg(not(test))]
 mod simd128;
+#[cfg(test)]
+pub mod simd128;
 pub use self::simd128::*;
 
 extern "C" {
diff --git a/coresimd/wasm32/simd128.rs b/coresimd/wasm32/simd128.rs
index cc46e1fd25..2c72dc0ebc 100644
--- a/coresimd/wasm32/simd128.rs
+++ b/coresimd/wasm32/simd128.rs
@@ -127,7 +127,8 @@ impl v128 {
     /// `imm` which provide the bits of the vector directly.
     #[inline]
     // #[target_feature(enable = "simd128")]
-    // FIXME: #[cfg_attr(test, assert_instr(v128.const, imm = [ImmByte::new(42); 16]))]
+    // FIXME: #[cfg_attr(test, assert_instr(v128.const, imm =
+    // [ImmByte::new(42); 16]))]
     #[rustc_args_required_const(0)]
     pub const unsafe fn const_(imm: [ImmByte; 16]) -> v128 {
         union U {
@@ -157,15 +158,29 @@ macro_rules! impl_splat {
         }
     }
 }
-impl_splat!(i8x16[v8x16:i8] <= i32 |
-            x0, x1, x2, x3, x4, x5, x6, x7,
-            x8, x9, x10, x11, x12, x13, x14, x15
+impl_splat!(
+    i8x16[v8x16: i8] <= i32 | x0,
+    x1,
+    x2,
+    x3,
+    x4,
+    x5,
+    x6,
+    x7,
+    x8,
+    x9,
+    x10,
+    x11,
+    x12,
+    x13,
+    x14,
+    x15
 );
-impl_splat!(i16x8[v16x8:i16] <= i32 | x0, x1, x2, x3, x4, x5, x6, x7);
-impl_splat!(i32x4[v32x4:i32] <= i32 | x0, x1, x2, x3);
-impl_splat!(i64x2[v64x2:i64] <= i64 | x0, x1);
-impl_splat!(f32x4[f32x4:f32] <= f32 | x0, x1, x2, x3);
-impl_splat!(f64x2[f64x2:f64] <= f64 | x0, x1);
+impl_splat!(i16x8[v16x8: i16] <= i32 | x0, x1, x2, x3, x4, x5, x6, x7);
+impl_splat!(i32x4[v32x4: i32] <= i32 | x0, x1, x2, x3);
+impl_splat!(i64x2[v64x2: i64] <= i64 | x0, x1);
+impl_splat!(f32x4[f32x4: f32] <= f32 | x0, x1, x2, x3);
+impl_splat!(f64x2[f64x2: f64] <= f64 | x0, x1);
 
 macro_rules! impl_extract_lane {
     ($id:ident[$ivec_ty:ident : $selem_ty:ident|$uelem_ty:ident]($lane_idx:ty)
@@ -173,42 +188,52 @@ macro_rules! impl_extract_lane {
         impl $id {
             /// Extract lane as a scalar (sign-extend)
             ///
-            /// Extract the scalar value of lane specified in the immediate mode
-            /// operand `imm` from `a` by sign-extending it.
+            /// Extract the scalar value of lane specified in the immediate
+            /// mode operand `imm` from `a` by sign-extending it.
             #[inline]
             // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_s, imm = 0))]
+            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_s, imm =
+            // 0))]
             #[rustc_args_required_const(1)]
             pub unsafe fn extract_lane_s(a: v128, imm: $lane_idx) -> $x_ty {
                 use coresimd::simd_llvm::simd_extract;
                 union U {
                     vec: self::sealed::$ivec_ty,
-                    a: v128
+                    a: v128,
                 }
                 // the vectors store a signed integer => extract into it
-                let v: $selem_ty = simd_extract(U { a }.vec, imm as u32 /* zero-extends index */);
+                let v: $selem_ty = simd_extract(
+                    U { a }.vec,
+                    imm as u32, /* zero-extends index */
+                );
                 v as $x_ty
             }
 
             /// Extract lane as a scalar (zero-extend)
             ///
-            /// Extract the scalar value of lane specified in the immediate mode
-            /// operand `imm` from `a` by zero-extending it.
+            /// Extract the scalar value of lane specified in the immediate
+            /// mode operand `imm` from `a` by zero-extending it.
             #[inline]
             // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u, imm = 0))]
+            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u, imm =
+            // 0))]
             #[rustc_args_required_const(1)]
             pub unsafe fn extract_lane_u(a: v128, imm: $lane_idx) -> $x_ty {
                 use coresimd::simd_llvm::simd_extract;
                 union U {
                     vec: self::sealed::$ivec_ty,
-                    a: v128
+                    a: v128,
                 }
                 // the vectors store a signed integer => extract into it
-                let v: $selem_ty = simd_extract(U { a }.vec, imm as u32  /* zero-extends index */);
-                // re-interpret the signed integer as an unsigned one of the same size (no-op)
-                let v: $uelem_ty= ::mem::transmute(v);
-                // cast the internal unsigned integer to a larger signed integer (zero-extends)
+                let v: $selem_ty = simd_extract(
+                    U { a }.vec,
+                    imm as u32, /* zero-extends index */
+                );
+                // re-interpret the signed integer as an unsigned one of the
+                // same size (no-op)
+                let v: $uelem_ty = ::mem::transmute(v);
+                // cast the internal unsigned integer to a larger signed
+                // integer (zero-extends)
                 v as $x_ty
             }
         }
@@ -217,20 +242,24 @@ macro_rules! impl_extract_lane {
         impl $id {
             /// Extract lane as a scalar
             ///
-            /// Extract the scalar value of lane specified in the immediate mode
-            /// operand `imm` from `a`.
+            /// Extract the scalar value of lane specified in the immediate
+            /// mode operand `imm` from `a`.
             #[inline]
             // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u, imm = 0))]
+            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u, imm =
+            // 0))]
             #[rustc_args_required_const(1)]
             pub unsafe fn extract_lane(a: v128, imm: $lane_idx) -> $x_ty {
                 use coresimd::simd_llvm::simd_extract;
                 union U {
                     vec: self::sealed::$ivec_ty,
-                    a: v128
+                    a: v128,
                 }
                 // the vectors store a signed integer => extract into it
-                simd_extract(U { a }.vec, imm as u32  /* zero-extends index */)
+                simd_extract(
+                    U { a }.vec,
+                    imm as u32, /* zero-extends index */
+                )
             }
         }
     };
@@ -247,39 +276,41 @@ macro_rules! impl_replace_lane {
         impl $id {
             /// Replace lane value
             ///
-            /// Return a new vector with lanes identical to `a`, except for lane
-            /// specified in the immediate mode argument `i` which has the value
-            /// `x`.
+            /// Return a new vector with lanes identical to `a`, except for
+            /// lane specified in the immediate mode argument `i` which
+            /// has the value `x`.
             #[inline]
             // #[target_feature(enable = "simd128")]
             // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u))]
             #[rustc_args_required_const(1)]
-            pub unsafe fn replace_lane(a: v128, imm: $lane_idx, x: $x_ty) -> v128 {
+            pub unsafe fn replace_lane(
+                a: v128, imm: $lane_idx, x: $x_ty,
+            ) -> v128 {
                 use coresimd::simd_llvm::simd_insert;
                 union U {
                     vec: self::sealed::$ivec_ty,
-                    a: v128
+                    a: v128,
                 }
                 // the vectors store a signed integer => extract into it
-                ::mem::transmute(
-                    simd_insert(U { a }.vec,
-                                imm as u32  /* zero-extends index */,
-                                x as $ielem_ty)
-                )
+                ::mem::transmute(simd_insert(
+                    U { a }.vec,
+                    imm as u32, /* zero-extends index */
+                    x as $ielem_ty,
+                ))
             }
         }
     };
 }
 
-impl_replace_lane!(i8x16[v8x16:i8](LaneIdx16) <= i32);
-impl_replace_lane!(i16x8[v16x8:i16](LaneIdx8) <= i32);
-impl_replace_lane!(i32x4[v32x4:i32](LaneIdx4) <= i32);
-impl_replace_lane!(i64x2[v64x2:i64](LaneIdx2) <= i64);
-impl_replace_lane!(f32x4[f32x4:f32](LaneIdx4) <= f32);
-impl_replace_lane!(f64x2[f64x2:f64](LaneIdx2) <= f64);
+impl_replace_lane!(i8x16[v8x16: i8](LaneIdx16) <= i32);
+impl_replace_lane!(i16x8[v16x8: i16](LaneIdx8) <= i32);
+impl_replace_lane!(i32x4[v32x4: i32](LaneIdx4) <= i32);
+impl_replace_lane!(i64x2[v64x2: i64](LaneIdx2) <= i64);
+impl_replace_lane!(f32x4[f32x4: f32](LaneIdx4) <= f32);
+impl_replace_lane!(f64x2[f64x2: f64](LaneIdx2) <= f64);
 
-pub use ::coresimd::simd_llvm::simd_shuffle16 as __internal_v8x16_shuffle;
 pub use self::sealed::v8x16 as __internal_v8x16;
+pub use coresimd::simd_llvm::simd_shuffle16 as __internal_v8x16_shuffle;
 
 /// Shuffle lanes
 ///
@@ -308,15 +339,31 @@ macro_rules! v8x16_shuffle {
             let b = U { e: b }.i;
 
             let r: $crate::arch::wasm32::__internal_v8x16 =
-                $crate::arch::wasm32::__internal_v8x16_shuffle(a, b, [
-                    $imm0 as u32, $imm1, $imm2, $imm3,
-                    $imm4, $imm5, $imm6, $imm7,
-                    $imm8, $imm9, $imm10, $imm11,
-                    $imm12, $imm13, $imm14, $imm15
-                ]);
+                $crate::arch::wasm32::__internal_v8x16_shuffle(
+                    a,
+                    b,
+                    [
+                        $imm0 as u32,
+                        $imm1,
+                        $imm2,
+                        $imm3,
+                        $imm4,
+                        $imm5,
+                        $imm6,
+                        $imm7,
+                        $imm8,
+                        $imm9,
+                        $imm10,
+                        $imm11,
+                        $imm12,
+                        $imm13,
+                        $imm14,
+                        $imm15,
+                    ],
+                );
             U { i: r }.e
         }
-    }
+    };
 }
 
 macro_rules! impl_wrapping_add_sub_neg {
@@ -357,7 +404,7 @@ macro_rules! impl_wrapping_add_sub_neg {
 
             // note: multiplication explicitly omitted (see below)
         }
-    }
+    };
 }
 
 impl_wrapping_add_sub_neg!(i8x16[v8x16]);
@@ -403,56 +450,57 @@ macro_rules! impl_shl_scalar {
             pub unsafe fn shl(a: v128, y: i32) -> v128 {
                 use coresimd::simd_llvm::simd_shl;
                 let a: sealed::$ivec_ty = ::mem::transmute(a);
-                let b: sealed::$ivec_ty = ::mem::transmute($id::splat(y as $t));
+                let b: sealed::$ivec_ty =
+                    ::mem::transmute($id::splat(y as $t));
                 ::mem::transmute(simd_shl(a, b))
             }
         }
-    }
+    };
 }
 
-impl_shl_scalar!(i8x16[v8x16:i32]);
-impl_shl_scalar!(i16x8[v16x8:i32]);
-impl_shl_scalar!(i32x4[v32x4:i32]);
-impl_shl_scalar!(i64x2[v64x2:i64]);
+impl_shl_scalar!(i8x16[v8x16: i32]);
+impl_shl_scalar!(i16x8[v16x8: i32]);
+impl_shl_scalar!(i32x4[v32x4: i32]);
+impl_shl_scalar!(i64x2[v64x2: i64]);
 
 macro_rules! impl_shr_scalar {
     ($id:ident[$svec_ty:ident : $uvec_ty:ident : $t:ty]) => {
         impl $id {
             /// Arithmetic right shift by scalar.
             ///
-            /// Shift the bits in each lane to the right by the same amount. 
+            /// Shift the bits in each lane to the right by the same amount.
             #[inline]
             // #[target_feature(enable = "simd128")]
             // FIXME: #[cfg_attr(test, assert_instr($id.shr))]
             pub unsafe fn shr_s(a: v128, y: i32) -> v128 {
                 use coresimd::simd_llvm::simd_shr;
                 let a: sealed::$svec_ty = ::mem::transmute(a);
-                let b: sealed::$svec_ty = ::mem::transmute($id::splat(y as $t));
+                let b: sealed::$svec_ty =
+                    ::mem::transmute($id::splat(y as $t));
                 ::mem::transmute(simd_shr(a, b))
             }
 
             /// Logical right shift by scalar.
             ///
-            /// Shift the bits in each lane to the right by the same amount. 
+            /// Shift the bits in each lane to the right by the same amount.
             #[inline]
             // #[target_feature(enable = "simd128")]
             // FIXME: #[cfg_attr(test, assert_instr($id.shr))]
             pub unsafe fn shr_u(a: v128, y: i32) -> v128 {
                 use coresimd::simd_llvm::simd_shr;
                 let a: sealed::$uvec_ty = ::mem::transmute(a);
-                let b: sealed::$uvec_ty = ::mem::transmute($id::splat(y as $t));
+                let b: sealed::$uvec_ty =
+                    ::mem::transmute($id::splat(y as $t));
                 ::mem::transmute(simd_shr(a, b))
             }
-
         }
-    }
+    };
 }
 
-impl_shr_scalar!(i8x16[v8x16:u8x16:i32]);
-impl_shr_scalar!(i16x8[v16x8:u16x8:i32]);
-impl_shr_scalar!(i32x4[v32x4:u32x4:i32]);
-impl_shr_scalar!(i64x2[v64x2:u64x2:i64]);
-
+impl_shr_scalar!(i8x16[v8x16: u8x16: i32]);
+impl_shr_scalar!(i16x8[v16x8: u16x8: i32]);
+impl_shr_scalar!(i32x4[v32x4: u32x4: i32]);
+impl_shr_scalar!(i64x2[v64x2: u64x2: i64]);
 
 // Bitwise logical operations
 impl v128 {
@@ -490,10 +538,14 @@ impl v128 {
     pub unsafe fn not(a: v128) -> v128 {
         union U {
             v: u128,
-            c: [ImmByte; 16]
+            c: [ImmByte; 16],
         }
         // FIXME: https://github.com/rust-lang/rust/issues/53193
-        const C: [ImmByte; 16] = unsafe { U { v: ::_core::u128::MAX }.c };
+        const C: [ImmByte; 16] = unsafe {
+            U {
+                v: ::_core::u128::MAX,
+            }.c
+        };
         Self::xor(v128::const_(C), a)
     }
 
@@ -507,8 +559,8 @@ impl v128 {
     pub unsafe fn bitselect(v1: v128, v2: v128, c: v128) -> v128 {
         // FIXME: use llvm.select instead - we need to add a `simd_bitselect`
         // intrinsic to rustc that converts a v128 vector into a i1x128. The
-        // `simd_select` intrinsic converts e.g. a i8x16 into a i1x16 which is not
-        // what we want here:
+        // `simd_select` intrinsic converts e.g. a i8x16 into a i1x16 which is
+        // not what we want here:
         Self::or(Self::and(v1, c), Self::and(v2, Self::not(c)))
     }
 }
@@ -525,7 +577,11 @@ macro_rules! impl_boolean_reduction {
             pub unsafe fn any_true(a: v128) -> i32 {
                 use coresimd::simd_llvm::simd_reduce_any;
                 let a: sealed::$ivec_ty = ::mem::transmute(a);
-                if simd_reduce_any(a) { 1 } else { 0 }
+                if simd_reduce_any(a) {
+                    1
+                } else {
+                    0
+                }
             }
 
             /// All lanes true
@@ -537,10 +593,14 @@ macro_rules! impl_boolean_reduction {
             pub unsafe fn all_true(a: v128) -> i32 {
                 use coresimd::simd_llvm::simd_reduce_all;
                 let a: sealed::$ivec_ty = ::mem::transmute(a);
-                if simd_reduce_all(a) { 1 } else { 0 }
+                if simd_reduce_all(a) {
+                    1
+                } else {
+                    0
+                }
             }
         }
-    }
+    };
 }
 
 impl_boolean_reduction!(i8x16[v8x16]);
@@ -642,7 +702,6 @@ impl v128 {
     // FIXME: #[cfg_attr(test, assert_instr($id.load))]
     pub unsafe fn load(m: *const v128) -> v128 {
         ::_core::ptr::read(m)
-
     }
 
     /// Store a `v128` vector to the given heap address.
@@ -779,7 +838,7 @@ macro_rules! impl_conversion {
                 ::mem::transmute(b)
             }
         }
-    }
+    };
 }
 
 // Integer to floating point
@@ -793,3 +852,593 @@ impl_conversion!(trunc_s_f32x4_sat["i32x4.trunc_s/f32x4:sat"]: f32x4 => v32x4 |
 impl_conversion!(trunc_u_f32x4_sat["i32x4.trunc_s/f32x4:sat"]: f32x4 => u32x4 | i32x4);
 impl_conversion!(trunc_s_f64x2_sat["i64x2.trunc_s/f64x2:sat"]: f64x2 => v64x2 | i64x2);
 impl_conversion!(trunc_u_f64x2_sat["i64x2.trunc_s/f64x2:sat"]: f64x2 => u64x2 | i64x2);
+
+#[cfg(test)]
+pub mod tests {
+    use super::*;
+    use std;
+    use std::mem;
+    use std::prelude::v1::*;
+    use wasm_bindgen_test::*;
+
+    fn compare_bytes(a: v128, b: v128) {
+        let a: [u8; 16] = unsafe { mem::transmute(a) };
+        let b: [u8; 16] = unsafe { mem::transmute(b) };
+        assert_eq!(a, b);
+    }
+
+    #[wasm_bindgen_test]
+    fn v128_const() {
+        const A: v128 = unsafe {
+            v128::const_([
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+            ])
+        };
+        compare_bytes(A, A);
+    }
+
+    macro_rules! test_splat {
+        ($test_id:ident: $id:ident($val:expr) => $($vals:expr),*) => {
+            #[wasm_bindgen_test]
+            fn $test_id() {
+                const A: v128 = unsafe {
+                    $id::splat($val)
+                };
+                const B: v128 = unsafe {
+                    v128::const_([$($vals),*])
+                };
+                compare_bytes(A, B);
+            }
+        }
+    }
+
+    test_splat!(i8x16_splat: i8x16(42) => 42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42);
+    test_splat!(i16x8_splat: i16x8(42) => 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0);
+    test_splat!(i32x4_splat: i32x4(42) => 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0);
+    test_splat!(i64x2_splat: i64x2(42) => 42, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0);
+    test_splat!(f32x4_splat: f32x4(42.) => 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66);
+    test_splat!(f64x2_splat: f64x2(42.) => 0, 0, 0, 0, 0, 0, 69, 64, 0, 0, 0, 0, 0, 0, 69, 64);
+
+    // tests extract and replace lanes
+    macro_rules! test_extract {
+        ($test_id:ident: $id:ident[$ety:ident] => $extract_fn:ident | [$val:expr; $count:expr]
+         | [$($vals:expr),*] => ($other:expr)
+         | $($ids:expr),*) => {
+            #[wasm_bindgen_test]
+            fn $test_id() {
+                unsafe {
+                    // splat vector and check that all indices contain the same value
+                    // splatted:
+                    const A: v128 = unsafe {
+                        $id::splat($val)
+                    };
+                    $(
+                        assert_eq!($id::$extract_fn(A, $ids) as $ety, $val);
+                    )*;
+
+                    // create a vector from array and check that the indices contain
+                    // the same values as in the array:
+                    let arr: [$ety; $count] = [$($vals),*];
+                    let mut vec: v128 = mem::transmute(arr);
+                    $(
+                        assert_eq!($id::$extract_fn(vec, $ids) as $ety, arr[$ids]);
+                    )*;
+
+                    // replace lane 0 with another value
+                    vec = $id::replace_lane(vec, 0, $other);
+                    assert_ne!($id::$extract_fn(vec, 0) as $ety, arr[0]);
+                    assert_eq!($id::$extract_fn(vec, 0) as $ety, $other);
+                }
+            }
+        }
+    }
+
+    test_extract!(i8x16_extract_u: i8x16[u8] => extract_lane_u | [255; 16]
+                  | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] => (42)
+                  | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+    );
+    test_extract!(i8x16_extract_s: i8x16[i8] => extract_lane_s | [-122; 16]
+                  | [0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15] => (-42)
+                  | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+    );
+
+    test_extract!(i16x8_extract_u: i16x8[u16] => extract_lane_u | [255; 8]
+                  | [0, 1, 2, 3, 4, 5, 6, 7]  => (42) | 0, 1, 2, 3, 4, 5, 6, 7
+    );
+    test_extract!(i16x8_extract_s: i16x8[i16] => extract_lane_s | [-122; 8]
+                  | [0, -1, 2, -3, 4, -5, 6, -7]  => (-42) | 0, 1, 2, 3, 4, 5, 6, 7
+    );
+    test_extract!(i32x4_extract: i32x4[i32] => extract_lane | [-122; 4]
+                  | [0, -1, 2, -3]  => (42) | 0, 1, 2, 3
+    );
+    test_extract!(i64x2_extract: i64x2[i64] => extract_lane | [-122; 2]
+                  | [0, -1]  => (42) | 0, 1
+    );
+    test_extract!(f32x4_extract: f32x4[f32] => extract_lane | [-122.; 4]
+                  | [0., -1., 2., -3.]  => (42.) | 0, 1, 2, 3
+    );
+    test_extract!(f64x2_extract: f64x2[f64] => extract_lane | [-122.; 2]
+                  | [0., -1.]  => (42.) | 0, 1
+    );
+
+    #[wasm_bindgen_test]
+    fn v8x16_shuffle() {
+        unsafe {
+            let a = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+            let b = [
+                16_u8, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                31,
+            ];
+
+            let vec_a: v128 = mem::transmute(a);
+            let vec_b: v128 = mem::transmute(b);
+
+            let vec_r = v8x16_shuffle!(
+                vec_a,
+                vec_b,
+                [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+            );
+
+            let e =
+                [0_u8, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30];
+            let vec_e: v128 = mem::transmute(e);
+            compare_bytes(vec_r, vec_e);
+        }
+    }
+
+    macro_rules! floating_point {
+        (f32) => {
+            true
+        };
+        (f64) => {
+            true
+        };
+        ($id:ident) => {
+            false
+        };
+    }
+
+    trait IsNan: Sized {
+        fn is_nan(self) -> bool {
+            false
+        }
+    }
+    impl IsNan for i8 {}
+    impl IsNan for i16 {}
+    impl IsNan for i32 {}
+    impl IsNan for i64 {}
+
+    macro_rules! test_bop {
+        ($id:ident[$ety:ident; $ecount:expr] |
+         $binary_op:ident [$op_test_id:ident] :
+         ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
+            test_bop!(
+                $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]:
+                ([$($in_a),*], [$($in_b),*]) => [$($out),*]
+            );
+
+        };
+        ($id:ident[$ety:ident; $ecount:expr] => $oty:ident |
+         $binary_op:ident [$op_test_id:ident] :
+         ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
+            #[wasm_bindgen_test]
+            fn $op_test_id() {
+                unsafe {
+                    let a_input: [$ety; $ecount] = [$($in_a),*];
+                    let b_input: [$ety; $ecount] = [$($in_b),*];
+                    let output: [$oty; $ecount] = [$($out),*];
+
+                    let a_vec_in: v128 = mem::transmute(a_input);
+                    let b_vec_in: v128 = mem::transmute(b_input);
+                    let vec_res: v128 = $id::$binary_op(a_vec_in, b_vec_in);
+
+                    let res: [$oty; $ecount] = mem::transmute(vec_res);
+
+                    if !floating_point!($ety) {
+                        assert_eq!(res, output);
+                    } else {
+                        for i in 0..$ecount {
+                            let r = res[i];
+                            let o = output[i];
+                            assert_eq!(r.is_nan(), o.is_nan());
+                            if !r.is_nan() {
+                                assert_eq!(r, o);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    macro_rules! test_bops {
+        ($id:ident[$ety:ident; $ecount:expr] |
+         $binary_op:ident [$op_test_id:ident]:
+         ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => {
+            #[wasm_bindgen_test]
+            fn $op_test_id() {
+                unsafe {
+                    let a_input: [$ety; $ecount] = [$($in_a),*];
+                    let output: [$ety; $ecount] = [$($out),*];
+
+                    let a_vec_in: v128 = mem::transmute(a_input);
+                    let vec_res: v128 = $id::$binary_op(a_vec_in, $in_b);
+
+                    let res: [$ety; $ecount] = mem::transmute(vec_res);
+                    assert_eq!(res, output);
+                }
+            }
+        }
+    }
+
+    macro_rules! test_uop {
+        ($id:ident[$ety:ident; $ecount:expr] |
+         $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => {
+            #[wasm_bindgen_test]
+            fn $op_test_id() {
+                unsafe {
+                    let a_input: [$ety; $ecount] = [$($in_a),*];
+                    let output: [$ety; $ecount] = [$($out),*];
+
+                    let a_vec_in: v128 = mem::transmute(a_input);
+                    let vec_res: v128 = $id::$unary_op(a_vec_in);
+
+                    let res: [$ety; $ecount] = mem::transmute(vec_res);
+                    assert_eq!(res, output);
+                }
+            }
+        }
+    }
+
+    test_bop!(i8x16[i8; 16] | add[i8x16_add_test]:
+              ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1],
+               [8, i8::min_value(), 10, 11, 12, 13, 14, 1, 1, 1, 1, 1, 1, 1, 1, 1]) =>
+              [8, i8::max_value(), 12, 14, 16, 18, 20, i8::min_value(), 2, 2, 2, 2, 2, 2, 2, 2]);
+    test_bop!(i8x16[i8; 16] | sub[i8x16_sub_test]:
+              ([0, -1, 2, 3, 4, 5, 6, -1, 1, 1, 1, 1, 1, 1, 1, 1],
+               [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1]) =>
+              [-8, i8::max_value(), -8, -8, -8, -8, -8, i8::min_value(), 0, 0, 0, 0, 0, 0, 0, 0]);
+    test_bop!(i8x16[i8; 16] | mul[i8x16_mul_test]:
+              ([0, -2, 2, 3, 4, 5, 6, 2, 1, 1, 1, 1, 1, 1, 1, 1],
+               [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1]) =>
+              [0, 0, 20, 33, 48, 65, 84, -2, 1, 1, 1, 1, 1, 1, 1, 1]);
+    test_uop!(i8x16[i8; 16] | neg[i8x16_neg_test]:
+              [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1] =>
+              [-8, i8::min_value(), -10, -11, -12, -13, -14, i8::min_value() + 1, -1, -1, -1, -1, -1, -1, -1, -1]);
+
+    test_bop!(i16x8[i16; 8] | add[i16x8_add_test]:
+              ([0, -1, 2, 3, 4, 5, 6, i16::max_value()],
+               [8, i16::min_value(), 10, 11, 12, 13, 14, 1]) =>
+              [8, i16::max_value(), 12, 14, 16, 18, 20, i16::min_value()]);
+    test_bop!(i16x8[i16; 8] | sub[i16x8_sub_test]:
+              ([0, -1, 2, 3, 4, 5, 6, -1],
+               [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()]) =>
+              [-8, i16::max_value(), -8, -8, -8, -8, -8, i16::min_value()]);
+    test_bop!(i16x8[i16; 8] | mul[i16x8_mul_test]:
+              ([0, -2, 2, 3, 4, 5, 6, 2],
+               [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()]) =>
+              [0, 0, 20, 33, 48, 65, 84, -2]);
+    test_uop!(i16x8[i16; 8] | neg[i16x8_neg_test]:
+              [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()] =>
+              [-8, i16::min_value(), -10, -11, -12, -13, -14, i16::min_value() + 1]);
+
+    test_bop!(i32x4[i32; 4] | add[i32x4_add_test]:
+              ([0, -1, 2, i32::max_value()],
+               [8, i32::min_value(), 10, 1]) =>
+              [8, i32::max_value(), 12, i32::min_value()]);
+    test_bop!(i32x4[i32; 4] | sub[i32x4_sub_test]:
+              ([0, -1, 2, -1],
+               [8, i32::min_value(), 10, i32::max_value()]) =>
+              [-8, i32::max_value(), -8, i32::min_value()]);
+    test_bop!(i32x4[i32; 4] | mul[i32x4_mul_test]:
+              ([0, -2, 2, 2],
+               [8, i32::min_value(), 10, i32::max_value()]) =>
+              [0, 0, 20, -2]);
+    test_uop!(i32x4[i32; 4] | neg[i32x4_neg_test]:
+              [8, i32::min_value(), 10, i32::max_value()] =>
+              [-8, i32::min_value(), -10, i32::min_value() + 1]);
+
+    test_bop!(i64x2[i64; 2] | add[i64x2_add_test]:
+              ([-1, i64::max_value()],
+               [i64::min_value(), 1]) =>
+              [i64::max_value(), i64::min_value()]);
+    test_bop!(i64x2[i64; 2] | sub[i64x2_sub_test]:
+              ([-1, -1],
+               [i64::min_value(), i64::max_value()]) =>
+              [ i64::max_value(), i64::min_value()]);
+    // note: mul for i64x2 is not part of the spec
+    test_uop!(i64x2[i64; 2] | neg[i64x2_neg_test]:
+              [i64::min_value(), i64::max_value()] =>
+              [i64::min_value(), i64::min_value() + 1]);
+
+    test_bops!(i8x16[i8; 16] | shl[i8x16_shl_test]:
+               ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+               [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]);
+    test_bops!(i16x8[i16; 8] | shl[i16x8_shl_test]:
+               ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) =>
+               [0, -2, 4, 6, 8, 10, 12, -2]);
+    test_bops!(i32x4[i32; 4] | shl[i32x4_shl_test]:
+               ([0, -1, 2, 3], 1) => [0, -2, 4, 6]);
+    test_bops!(i64x2[i64; 2] | shl[i64x2_shl_test]:
+               ([0, -1], 1) => [0, -2]);
+
+    test_bops!(i8x16[i8; 16] | shr_s[i8x16_shr_s_test]:
+               ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+               [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
+    test_bops!(i16x8[i16; 8] | shr_s[i16x8_shr_s_test]:
+               ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) =>
+               [0, -1, 1, 1, 2, 2, 3, i16::max_value() / 2]);
+    test_bops!(i32x4[i32; 4] | shr_s[i32x4_shr_s_test]:
+               ([0, -1, 2, 3], 1) => [0, -1, 1, 1]);
+    test_bops!(i64x2[i64; 2] | shr_s[i64x2_shr_s_test]:
+               ([0, -1], 1) => [0, -1]);
+
+    test_bops!(i8x16[i8; 16] | shr_u[i8x16_uhr_u_test]:
+               ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+               [0, i8::max_value(), 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
+    test_bops!(i16x8[i16; 8] | shr_u[i16x8_uhr_u_test]:
+               ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) =>
+               [0, i16::max_value(), 1, 1, 2, 2, 3, i16::max_value() / 2]);
+    test_bops!(i32x4[i32; 4] | shr_u[i32x4_uhr_u_test]:
+               ([0, -1, 2, 3], 1) => [0, i32::max_value(), 1, 1]);
+    test_bops!(i64x2[i64; 2] | shr_u[i64x2_uhr_u_test]:
+               ([0, -1], 1) => [0, i64::max_value()]);
+
+    #[wasm_bindgen_test]
+    fn v128_bitwise_logical_ops() {
+        unsafe {
+            let a: [u32; 4] = [u32::max_value(), 0, u32::max_value(), 0];
+            let b: [u32; 4] = [u32::max_value(); 4];
+            let c: [u32; 4] = [0; 4];
+
+            let vec_a: v128 = mem::transmute(a);
+            let vec_b: v128 = mem::transmute(b);
+            let vec_c: v128 = mem::transmute(c);
+
+            let r: v128 = v128::and(vec_a, vec_a);
+            compare_bytes(r, vec_a);
+            let r: v128 = v128::and(vec_a, vec_b);
+            compare_bytes(r, vec_a);
+            let r: v128 = v128::or(vec_a, vec_b);
+            compare_bytes(r, vec_b);
+            let r: v128 = v128::not(vec_b);
+            compare_bytes(r, vec_c);
+            let r: v128 = v128::xor(vec_a, vec_c);
+            compare_bytes(r, vec_a);
+
+            let r: v128 = v128::bitselect(vec_b, vec_c, vec_b);
+            compare_bytes(r, vec_b);
+            let r: v128 = v128::bitselect(vec_b, vec_c, vec_c);
+            compare_bytes(r, vec_c);
+            let r: v128 = v128::bitselect(vec_b, vec_c, vec_a);
+            compare_bytes(r, vec_a);
+        }
+    }
+
+    macro_rules! test_bool_red {
+        ($id:ident[$test_id:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => {
+            #[wasm_bindgen_test]
+            fn $test_id() {
+                unsafe {
+                    let vec_a: v128 = mem::transmute([$($true),*]); // true
+                    let vec_b: v128 = mem::transmute([$($false),*]); // false
+                    let vec_c: v128 = mem::transmute([$($alt),*]); // alternating
+
+                    assert_eq!($id::any_true(vec_a), 1);
+                    assert_eq!($id::any_true(vec_b), 0);
+                    assert_eq!($id::any_true(vec_c), 1);
+
+                    assert_eq!($id::all_true(vec_a), 1);
+                    assert_eq!($id::all_true(vec_b), 0);
+                    assert_eq!($id::all_true(vec_c), 0);
+                }
+            }
+        }
+    }
+
+    test_bool_red!(
+        i8x16[i8x16_boolean_reductions]
+            | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+            | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
+    );
+    test_bool_red!(
+        i16x8[i16x8_boolean_reductions]
+            | [1_i16, 1, 1, 1, 1, 1, 1, 1]
+            | [0_i16, 0, 0, 0, 0, 0, 0, 0]
+            | [1_i16, 0, 1, 0, 1, 0, 1, 0]
+    );
+    test_bool_red!(
+        i32x4[i32x4_boolean_reductions]
+            | [1_i32, 1, 1, 1]
+            | [0_i32, 0, 0, 0]
+            | [1_i32, 0, 1, 0]
+    );
+    test_bool_red!(
+        i64x2[i64x2_boolean_reductions] | [1_i64, 1] | [0_i64, 0] | [1_i64, 0]
+    );
+
+    test_bop!(i8x16[i8; 16] | eq[i8x16_eq_test]:
+              ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+               [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+              [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i16x8[i16; 8] | eq[i16x8_eq_test]:
+              ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+              [-1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i32x4[i32; 4] | eq[i32x4_eq_test]:
+              ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
+    test_bop!(i64x2[i64; 2] | eq[i64x2_eq_test]: ([0, 1], [0, 2]) => [-1, 0]);
+    test_bop!(f32x4[f32; 4] => i32 | eq[f32x4_eq_test]:
+              ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
+    test_bop!(f64x2[f64; 2] => i64 | eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
+
+    test_bop!(i8x16[i8; 16] | ne[i8x16_ne_test]:
+              ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+               [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+              [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i16x8[i16; 8] | ne[i16x8_ne_test]:
+              ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+              [0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i32x4[i32; 4] | ne[i32x4_ne_test]:
+              ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
+    test_bop!(i64x2[i64; 2] | ne[i64x2_ne_test]: ([0, 1], [0, 2]) => [0, -1]);
+    test_bop!(f32x4[f32; 4] => i32 | ne[f32x4_ne_test]:
+              ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
+    test_bop!(f64x2[f64; 2] => i64 | ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]);
+
+    test_bop!(i8x16[i8; 16] | lt[i8x16_lt_test]:
+              ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+               [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+              [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i16x8[i16; 8] | lt[i16x8_lt_test]:
+              ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+              [0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i32x4[i32; 4] | lt[i32x4_lt_test]:
+              ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
+    test_bop!(i64x2[i64; 2] | lt[i64x2_lt_test]: ([0, 1], [0, 2]) => [0, -1]);
+    test_bop!(f32x4[f32; 4] => i32 | lt[f32x4_lt_test]:
+              ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
+    test_bop!(f64x2[f64; 2] => i64 | lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]);
+
+    test_bop!(i8x16[i8; 16] | gt[i8x16_gt_test]:
+          ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15],
+           [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) =>
+              [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i16x8[i16; 8] | gt[i16x8_gt_test]:
+              ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
+              [0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i32x4[i32; 4] | gt[i32x4_gt_test]:
+              ([0, 2, 2, 4], [0, 1, 2, 3]) => [0, -1, 0, -1]);
+    test_bop!(i64x2[i64; 2] | gt[i64x2_gt_test]: ([0, 2], [0, 1]) => [0, -1]);
+    test_bop!(f32x4[f32; 4] => i32 | gt[f32x4_gt_test]:
+              ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]);
+    test_bop!(f64x2[f64; 2] => i64 | gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]);
+
+    test_bop!(i8x16[i8; 16] | ge[i8x16_ge_test]:
+              ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+               [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+              [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i16x8[i16; 8] | ge[i16x8_ge_test]:
+              ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+              [-1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i32x4[i32; 4] | ge[i32x4_ge_test]:
+              ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
+    test_bop!(i64x2[i64; 2] | ge[i64x2_ge_test]: ([0, 1], [0, 2]) => [-1, 0]);
+    test_bop!(f32x4[f32; 4] => i32 | ge[f32x4_ge_test]:
+              ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
+    test_bop!(f64x2[f64; 2] => i64 | ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
+
+    test_bop!(i8x16[i8; 16] | le[i8x16_le_test]:
+              ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15],
+               [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+              ) =>
+              [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i16x8[i16; 8] | le[i16x8_le_test]:
+              ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
+              [-1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i32x4[i32; 4] | le[i32x4_le_test]:
+              ([0, 2, 2, 4], [0, 1, 2, 3]) => [-1, 0, -1, 0]);
+    test_bop!(i64x2[i64; 2] | le[i64x2_le_test]: ([0, 2], [0, 1]) => [-1, 0]);
+    test_bop!(f32x4[f32; 4] => i32 | le[f32x4_le_test]:
+              ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]);
+    test_bop!(f64x2[f64; 2] => i64 | le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]);
+
+    #[wasm_bindgen_test]
+    fn v128_bitwise_load_store() {
+        unsafe {
+            let mut arr: [i32; 4] = [0, 1, 2, 3];
+
+            let vec = v128::load(arr.as_ptr() as *const v128);
+            let vec = i32x4::add(vec, vec);
+            v128::store(arr.as_mut_ptr() as *mut v128, vec);
+
+            assert_eq!(arr, [0, 2, 4, 6]);
+        }
+    }
+
+    test_uop!(f32x4[f32; 4] | neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]);
+    test_uop!(f32x4[f32; 4] | abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]);
+    test_bop!(f32x4[f32; 4] | min[f32x4_min_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]);
+    test_bop!(f32x4[f32; 4] | min[f32x4_min_test_nan]:
+              ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
+              => [0., -3., -4., std::f32::NAN]);
+    test_bop!(f32x4[f32; 4] | max[f32x4_max_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]);
+    test_bop!(f32x4[f32; 4] | max[f32x4_max_test_nan]:
+              ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
+              => [1., -1., 7., std::f32::NAN]);
+    test_bop!(f32x4[f32; 4] | add[f32x4_add_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]);
+    test_bop!(f32x4[f32; 4] | sub[f32x4_sub_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]);
+    test_bop!(f32x4[f32; 4] | mul[f32x4_mul_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]);
+    test_bop!(f32x4[f32; 4] | div[f32x4_div_test]:
+              ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]);
+
+    test_uop!(f64x2[f64; 2] | neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]);
+    test_uop!(f64x2[f64; 2] | abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]);
+    test_bop!(f64x2[f64; 2] | min[f64x2_min_test]:
+              ([0., -1.], [1., -3.]) => [0., -3.]);
+    test_bop!(f64x2[f64; 2] | min[f64x2_min_test_nan]:
+              ([7., 8.], [-4., std::f64::NAN])
+              => [ -4., std::f64::NAN]);
+    test_bop!(f64x2[f64; 2] | max[f64x2_max_test]:
+              ([0., -1.], [1., -3.]) => [1., -1.]);
+    test_bop!(f64x2[f64; 2] | max[f64x2_max_test_nan]:
+              ([7., 8.], [ -4., std::f64::NAN])
+              => [7., std::f64::NAN]);
+    test_bop!(f64x2[f64; 2] | add[f64x2_add_test]:
+              ([0., -1.], [1., -3.]) => [1., -4.]);
+    test_bop!(f64x2[f64; 2] | sub[f64x2_sub_test]:
+              ([0., -1.], [1., -3.]) => [-1., 2.]);
+    test_bop!(f64x2[f64; 2] | mul[f64x2_mul_test]:
+              ([0., -1.], [1., -3.]) => [0., 3.]);
+    test_bop!(f64x2[f64; 2] | div[f64x2_div_test]:
+              ([0., -8.], [1., 4.]) => [0., -2.]);
+
+    macro_rules! test_conv {
+        ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr,  $to:expr) => {
+            #[wasm_bindgen_test]
+            fn $test_id() {
+                unsafe {
+                    let from: v128 = mem::transmute($from);
+                    let to: v128 = mem::transmute($to);
+
+                    let r: v128 = $to_ty::$conv_id(from);
+
+                    compare_bytes(r, to);
+                }
+            }
+        };
+    }
+
+    test_conv!(
+        f32x4_convert_s_i32x4 | convert_s_i32x4 | f32x4 | [1_i32, 2, 3, 4],
+        [1_f32, 2., 3., 4.]
+    );
+    test_conv!(
+        f32x4_convert_u_i32x4
+            | convert_u_i32x4
+            | f32x4
+            | [u32::max_value(), 2, 3, 4],
+        [u32::max_value() as f32, 2., 3., 4.]
+    );
+    test_conv!(
+        f64x2_convert_s_i64x2 | convert_s_i64x2 | f64x2 | [1_i64, 2],
+        [1_f64, 2.]
+    );
+    test_conv!(
+        f64x2_convert_u_i64x2
+            | convert_u_i64x2
+            | f64x2
+            | [u64::max_value(), 2],
+        [18446744073709552000.0, 2.]
+    );
+
+    // FIXME: this fails, and produces -2147483648 instead of saturating at
+    // i32::max_value() test_conv!(i32x4_trunc_s_f32x4_sat | trunc_s_f32x4_sat
+    // | i32x4 | [1_f32, 2., (i32::max_value() as f32 + 1.), 4.],
+    // [1_i32, 2, i32::max_value(), 4]); FIXME: add other saturating tests
+}
diff --git a/crates/coresimd/Cargo.toml b/crates/coresimd/Cargo.toml
index 5bc2e5d7ef..15f2eb15a4 100644
--- a/crates/coresimd/Cargo.toml
+++ b/crates/coresimd/Cargo.toml
@@ -22,6 +22,9 @@ maintenance = { status = "experimental" }
 stdsimd-test = { version = "0.*", path = "../stdsimd-test" }
 stdsimd = { version = "0.0.3", path = "../stdsimd" }
 
+[target.wasm32-unknown-unknown.dev-dependencies]
+wasm-bindgen-test = "=0.2.15"
+
 [features]
 # Internal-usage only: denies all warnings.
 strict = []
diff --git a/crates/coresimd/src/lib.rs b/crates/coresimd/src/lib.rs
index 1c5f185a8a..6411fbb78e 100644
--- a/crates/coresimd/src/lib.rs
+++ b/crates/coresimd/src/lib.rs
@@ -81,6 +81,9 @@ extern crate stdsimd_test;
 #[cfg(test)]
 extern crate test;
 
+#[cfg(all(test, target_arch = "wasm32"))]
+extern crate wasm_bindgen_test;
+
 #[path = "../../../coresimd/mod.rs"]
 mod coresimd;
 
diff --git a/crates/wasm-test/.cargo/config b/crates/wasm-test/.cargo/config
deleted file mode 100644
index 908f2d6dde..0000000000
--- a/crates/wasm-test/.cargo/config
+++ /dev/null
@@ -1,2 +0,0 @@
-[target.wasm32-unknown-unknown]
-runner = 'wasm-bindgen-test-runner'
\ No newline at end of file
diff --git a/crates/wasm-test/Cargo.toml b/crates/wasm-test/Cargo.toml
deleted file mode 100644
index 7910113447..0000000000
--- a/crates/wasm-test/Cargo.toml
+++ /dev/null
@@ -1,11 +0,0 @@
-[package]
-name = "wasm-test"
-version = "0.1.0"
-authors = ["gnzlbg <gonzalobg88@gmail.com>"]
-
-[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
-wasm-bindgen-test = { git = 'https://github.com/rustwasm/wasm-bindgen' }
-coresimd = { path = "../coresimd" }
-
-[patch.crates-io]
-wasm-bindgen = { git = 'https://github.com/rustwasm/wasm-bindgen' }
diff --git a/crates/wasm-test/src/lib.rs b/crates/wasm-test/src/lib.rs
deleted file mode 100644
index 0c9ac1ac8e..0000000000
--- a/crates/wasm-test/src/lib.rs
+++ /dev/null
@@ -1 +0,0 @@
-#![no_std]
diff --git a/crates/wasm-test/tests/simd128.rs b/crates/wasm-test/tests/simd128.rs
deleted file mode 100644
index 111049f239..0000000000
--- a/crates/wasm-test/tests/simd128.rs
+++ /dev/null
@@ -1,583 +0,0 @@
-#![feature(use_extern_macros, stdsimd, asm, stmt_expr_attributes)]
-
-#[macro_use]
-extern crate coresimd;
-extern crate wasm_bindgen_test;
-
-use coresimd::arch::wasm32::*;
-use std::mem;
-use wasm_bindgen_test::*;
-
-fn compare_bytes(a: v128, b: v128) {
-    let a: [u8; 16] = unsafe { mem::transmute(a) };
-    let b: [u8; 16] = unsafe { mem::transmute(b) };
-    assert_eq!(a, b);
-}
-
-#[wasm_bindgen_test]
-fn v128_const() {
-    const A: v128 = unsafe {
-        v128::const_([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
-    };
-    compare_bytes(A, A);
-}
-
-macro_rules! test_splat {
-    ($test_id:ident: $id:ident($val:expr) => $($vals:expr),*) => {
-        #[wasm_bindgen_test]
-        fn $test_id() {
-            const A: v128 = unsafe {
-                $id::splat($val)
-            };
-            const B: v128 = unsafe {
-                v128::const_([$($vals),*])
-            };
-            compare_bytes(A, B);
-        }
-    }
-}
-
-test_splat!(i8x16_splat: i8x16(42) => 42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42);
-test_splat!(i16x8_splat: i16x8(42) => 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0);
-test_splat!(i32x4_splat: i32x4(42) => 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0);
-test_splat!(i64x2_splat: i64x2(42) => 42, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0);
-test_splat!(f32x4_splat: f32x4(42.) => 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66);
-test_splat!(f64x2_splat: f64x2(42.) => 0, 0, 0, 0, 0, 0, 69, 64, 0, 0, 0, 0, 0, 0, 69, 64);
-
-// tests extract and replace lanes
-macro_rules! test_extract {
-    ($test_id:ident: $id:ident[$ety:ident] => $extract_fn:ident | [$val:expr; $count:expr]
-     | [$($vals:expr),*] => ($other:expr)
-     | $($ids:expr),*) => {
-        #[wasm_bindgen_test]
-        fn $test_id() {
-            unsafe {
-                // splat vector and check that all indices contain the same value
-                // splatted:
-                const A: v128 = unsafe {
-                    $id::splat($val)
-                };
-                $(
-                    assert_eq!($id::$extract_fn(A, $ids) as $ety, $val);
-                )*;
-
-                // create a vector from array and check that the indices contain
-                // the same values as in the array:
-                let arr: [$ety; $count] = [$($vals),*];
-                let mut vec: v128 = mem::transmute(arr);
-                $(
-                    assert_eq!($id::$extract_fn(vec, $ids) as $ety, arr[$ids]);
-                )*;
-
-                // replace lane 0 with another value
-                vec = $id::replace_lane(vec, 0, $other);
-                assert_ne!($id::$extract_fn(vec, 0) as $ety, arr[0]);
-                assert_eq!($id::$extract_fn(vec, 0) as $ety, $other);
-            }
-        }
-    }
-}
-
-test_extract!(i8x16_extract_u: i8x16[u8] => extract_lane_u | [255; 16]
-              | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] => (42)
-              | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
-);
-test_extract!(i8x16_extract_s: i8x16[i8] => extract_lane_s | [-122; 16]
-              | [0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15] => (-42)
-              | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
-);
-
-test_extract!(i16x8_extract_u: i16x8[u16] => extract_lane_u | [255; 8]
-              | [0, 1, 2, 3, 4, 5, 6, 7]  => (42) | 0, 1, 2, 3, 4, 5, 6, 7
-);
-test_extract!(i16x8_extract_s: i16x8[i16] => extract_lane_s | [-122; 8]
-              | [0, -1, 2, -3, 4, -5, 6, -7]  => (-42) | 0, 1, 2, 3, 4, 5, 6, 7
-);
-test_extract!(i32x4_extract: i32x4[i32] => extract_lane | [-122; 4]
-              | [0, -1, 2, -3]  => (42) | 0, 1, 2, 3
-);
-test_extract!(i64x2_extract: i64x2[i64] => extract_lane | [-122; 2]
-              | [0, -1]  => (42) | 0, 1
-);
-test_extract!(f32x4_extract: f32x4[f32] => extract_lane | [-122.; 4]
-              | [0., -1., 2., -3.]  => (42.) | 0, 1, 2, 3
-);
-test_extract!(f64x2_extract: f64x2[f64] => extract_lane | [-122.; 2]
-              | [0., -1.]  => (42.) | 0, 1
-);
-
-#[wasm_bindgen_test]
-fn v8x16_shuffle() {
-    unsafe {
-        let a = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
-        let b = [
-            16_u8, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-        ];
-
-        let vec_a: v128 = mem::transmute(a);
-        let vec_b: v128 = mem::transmute(b);
-
-        let vec_r = v8x16_shuffle!(
-            vec_a,
-            vec_b,
-            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
-        );
-
-        let e = [0_u8, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30];
-        let vec_e: v128 = mem::transmute(e);
-        compare_bytes(vec_r, vec_e);
-    }
-}
-
-macro_rules! floating_point {
-    (f32) => {
-        true
-    };
-    (f64) => {
-        true
-    };
-    ($id:ident) => {
-        false
-    };
-}
-
-trait IsNan: Sized {
-    fn is_nan(self) -> bool {
-        false
-    }
-}
-impl IsNan for i8 {}
-impl IsNan for i16 {}
-impl IsNan for i32 {}
-impl IsNan for i64 {}
-
-macro_rules! test_bop {
-    ($id:ident[$ety:ident; $ecount:expr] |
-     $binary_op:ident [$op_test_id:ident] :
-     ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
-        test_bop!(
-            $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]:
-            ([$($in_a),*], [$($in_b),*]) => [$($out),*]
-        );
-
-    };
-    ($id:ident[$ety:ident; $ecount:expr] => $oty:ident |
-     $binary_op:ident [$op_test_id:ident] :
-     ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
-        #[wasm_bindgen_test]
-        fn $op_test_id() {
-            unsafe {
-                let a_input: [$ety; $ecount] = [$($in_a),*];
-                let b_input: [$ety; $ecount] = [$($in_b),*];
-                let output: [$oty; $ecount] = [$($out),*];
-
-                let a_vec_in: v128 = mem::transmute(a_input);
-                let b_vec_in: v128 = mem::transmute(b_input);
-                let vec_res: v128 = $id::$binary_op(a_vec_in, b_vec_in);
-
-                let res: [$oty; $ecount] = mem::transmute(vec_res);
-
-                if !floating_point!($ety) {
-                    assert_eq!(res, output);
-                } else {
-                    for i in 0..$ecount {
-                        let r = res[i];
-                        let o = output[i];
-                        assert_eq!(r.is_nan(), o.is_nan());
-                        if !r.is_nan() {
-                            assert_eq!(r, o);
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-macro_rules! test_bops {
-    ($id:ident[$ety:ident; $ecount:expr] |
-     $binary_op:ident [$op_test_id:ident]:
-     ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => {
-        #[wasm_bindgen_test]
-        fn $op_test_id() {
-            unsafe {
-                let a_input: [$ety; $ecount] = [$($in_a),*];
-                let output: [$ety; $ecount] = [$($out),*];
-
-                let a_vec_in: v128 = mem::transmute(a_input);
-                let vec_res: v128 = $id::$binary_op(a_vec_in, $in_b);
-
-                let res: [$ety; $ecount] = mem::transmute(vec_res);
-                assert_eq!(res, output);
-            }
-        }
-    }
-}
-
-macro_rules! test_uop {
-    ($id:ident[$ety:ident; $ecount:expr] |
-     $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => {
-        #[wasm_bindgen_test]
-        fn $op_test_id() {
-            unsafe {
-                let a_input: [$ety; $ecount] = [$($in_a),*];
-                let output: [$ety; $ecount] = [$($out),*];
-
-                let a_vec_in: v128 = mem::transmute(a_input);
-                let vec_res: v128 = $id::$unary_op(a_vec_in);
-
-                let res: [$ety; $ecount] = mem::transmute(vec_res);
-                assert_eq!(res, output);
-            }
-        }
-    }
-}
-
-test_bop!(i8x16[i8; 16] | add[i8x16_add_test]:
-          ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1],
-           [8, i8::min_value(), 10, 11, 12, 13, 14, 1, 1, 1, 1, 1, 1, 1, 1, 1]) =>
-          [8, i8::max_value(), 12, 14, 16, 18, 20, i8::min_value(), 2, 2, 2, 2, 2, 2, 2, 2]);
-test_bop!(i8x16[i8; 16] | sub[i8x16_sub_test]:
-          ([0, -1, 2, 3, 4, 5, 6, -1, 1, 1, 1, 1, 1, 1, 1, 1],
-           [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1]) =>
-          [-8, i8::max_value(), -8, -8, -8, -8, -8, i8::min_value(), 0, 0, 0, 0, 0, 0, 0, 0]);
-test_bop!(i8x16[i8; 16] | mul[i8x16_mul_test]:
-          ([0, -2, 2, 3, 4, 5, 6, 2, 1, 1, 1, 1, 1, 1, 1, 1],
-           [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1]) =>
-          [0, 0, 20, 33, 48, 65, 84, -2, 1, 1, 1, 1, 1, 1, 1, 1]);
-test_uop!(i8x16[i8; 16] | neg[i8x16_neg_test]:
-          [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1] =>
-          [-8, i8::min_value(), -10, -11, -12, -13, -14, i8::min_value() + 1, -1, -1, -1, -1, -1, -1, -1, -1]);
-
-test_bop!(i16x8[i16; 8] | add[i16x8_add_test]:
-          ([0, -1, 2, 3, 4, 5, 6, i16::max_value()],
-           [8, i16::min_value(), 10, 11, 12, 13, 14, 1]) =>
-          [8, i16::max_value(), 12, 14, 16, 18, 20, i16::min_value()]);
-test_bop!(i16x8[i16; 8] | sub[i16x8_sub_test]:
-          ([0, -1, 2, 3, 4, 5, 6, -1],
-           [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()]) =>
-          [-8, i16::max_value(), -8, -8, -8, -8, -8, i16::min_value()]);
-test_bop!(i16x8[i16; 8] | mul[i16x8_mul_test]:
-          ([0, -2, 2, 3, 4, 5, 6, 2],
-           [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()]) =>
-          [0, 0, 20, 33, 48, 65, 84, -2]);
-test_uop!(i16x8[i16; 8] | neg[i16x8_neg_test]:
-          [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()] =>
-          [-8, i16::min_value(), -10, -11, -12, -13, -14, i16::min_value() + 1]);
-
-test_bop!(i32x4[i32; 4] | add[i32x4_add_test]:
-          ([0, -1, 2, i32::max_value()],
-           [8, i32::min_value(), 10, 1]) =>
-          [8, i32::max_value(), 12, i32::min_value()]);
-test_bop!(i32x4[i32; 4] | sub[i32x4_sub_test]:
-          ([0, -1, 2, -1],
-           [8, i32::min_value(), 10, i32::max_value()]) =>
-          [-8, i32::max_value(), -8, i32::min_value()]);
-test_bop!(i32x4[i32; 4] | mul[i32x4_mul_test]:
-          ([0, -2, 2, 2],
-           [8, i32::min_value(), 10, i32::max_value()]) =>
-          [0, 0, 20, -2]);
-test_uop!(i32x4[i32; 4] | neg[i32x4_neg_test]:
-          [8, i32::min_value(), 10, i32::max_value()] =>
-          [-8, i32::min_value(), -10, i32::min_value() + 1]);
-
-test_bop!(i64x2[i64; 2] | add[i64x2_add_test]:
-          ([-1, i64::max_value()],
-           [i64::min_value(), 1]) =>
-          [i64::max_value(), i64::min_value()]);
-test_bop!(i64x2[i64; 2] | sub[i64x2_sub_test]:
-          ([-1, -1],
-           [i64::min_value(), i64::max_value()]) =>
-          [ i64::max_value(), i64::min_value()]);
-// note: mul for i64x2 is not part of the spec
-test_uop!(i64x2[i64; 2] | neg[i64x2_neg_test]:
-          [i64::min_value(), i64::max_value()] =>
-          [i64::min_value(), i64::min_value() + 1]);
-
-test_bops!(i8x16[i8; 16] | shl[i8x16_shl_test]:
-          ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
-           [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]);
-test_bops!(i16x8[i16; 8] | shl[i16x8_shl_test]:
-          ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) =>
-           [0, -2, 4, 6, 8, 10, 12, -2]);
-test_bops!(i32x4[i32; 4] | shl[i32x4_shl_test]:
-           ([0, -1, 2, 3], 1) => [0, -2, 4, 6]);
-test_bops!(i64x2[i64; 2] | shl[i64x2_shl_test]:
-           ([0, -1], 1) => [0, -2]);
-
-test_bops!(i8x16[i8; 16] | shr_s[i8x16_shr_s_test]:
-           ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
-           [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
-test_bops!(i16x8[i16; 8] | shr_s[i16x8_shr_s_test]:
-           ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) =>
-           [0, -1, 1, 1, 2, 2, 3, i16::max_value() / 2]);
-test_bops!(i32x4[i32; 4] | shr_s[i32x4_shr_s_test]:
-           ([0, -1, 2, 3], 1) => [0, -1, 1, 1]);
-test_bops!(i64x2[i64; 2] | shr_s[i64x2_shr_s_test]:
-           ([0, -1], 1) => [0, -1]);
-
-test_bops!(i8x16[i8; 16] | shr_u[i8x16_uhr_u_test]:
-           ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
-           [0, i8::max_value(), 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
-test_bops!(i16x8[i16; 8] | shr_u[i16x8_uhr_u_test]:
-           ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) =>
-           [0, i16::max_value(), 1, 1, 2, 2, 3, i16::max_value() / 2]);
-test_bops!(i32x4[i32; 4] | shr_u[i32x4_uhr_u_test]:
-           ([0, -1, 2, 3], 1) => [0, i32::max_value(), 1, 1]);
-test_bops!(i64x2[i64; 2] | shr_u[i64x2_uhr_u_test]:
-           ([0, -1], 1) => [0, i64::max_value()]);
-
-#[wasm_bindgen_test]
-fn v128_bitwise_logical_ops() {
-    unsafe {
-        let a: [u32; 4] = [u32::max_value(), 0, u32::max_value(), 0];
-        let b: [u32; 4] = [u32::max_value(); 4];
-        let c: [u32; 4] = [0; 4];
-
-        let vec_a: v128 = mem::transmute(a);
-        let vec_b: v128 = mem::transmute(b);
-        let vec_c: v128 = mem::transmute(c);
-
-        let r: v128 = v128::and(vec_a, vec_a);
-        compare_bytes(r, vec_a);
-        let r: v128 = v128::and(vec_a, vec_b);
-        compare_bytes(r, vec_a);
-        let r: v128 = v128::or(vec_a, vec_b);
-        compare_bytes(r, vec_b);
-        let r: v128 = v128::not(vec_b);
-        compare_bytes(r, vec_c);
-        let r: v128 = v128::xor(vec_a, vec_c);
-        compare_bytes(r, vec_a);
-
-        let r: v128 = v128::bitselect(vec_b, vec_c, vec_b);
-        compare_bytes(r, vec_b);
-        let r: v128 = v128::bitselect(vec_b, vec_c, vec_c);
-        compare_bytes(r, vec_c);
-        let r: v128 = v128::bitselect(vec_b, vec_c, vec_a);
-        compare_bytes(r, vec_a);
-    }
-}
-
-macro_rules! test_bool_red {
-    ($id:ident[$test_id:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => {
-        #[wasm_bindgen_test]
-        fn $test_id() {
-            unsafe {
-                let vec_a: v128 = mem::transmute([$($true),*]); // true
-                let vec_b: v128 = mem::transmute([$($false),*]); // false
-                let vec_c: v128 = mem::transmute([$($alt),*]); // alternating
-
-                assert_eq!($id::any_true(vec_a), 1);
-                assert_eq!($id::any_true(vec_b), 0);
-                assert_eq!($id::any_true(vec_c), 1);
-
-                assert_eq!($id::all_true(vec_a), 1);
-                assert_eq!($id::all_true(vec_b), 0);
-                assert_eq!($id::all_true(vec_c), 0);
-            }
-        }
-    }
-}
-
-test_bool_red!(
-    i8x16[i8x16_boolean_reductions]
-        | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-        | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-        | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
-);
-test_bool_red!(
-    i16x8[i16x8_boolean_reductions]
-        | [1_i16, 1, 1, 1, 1, 1, 1, 1]
-        | [0_i16, 0, 0, 0, 0, 0, 0, 0]
-        | [1_i16, 0, 1, 0, 1, 0, 1, 0]
-);
-test_bool_red!(
-    i32x4[i32x4_boolean_reductions]
-        | [1_i32, 1, 1, 1]
-        | [0_i32, 0, 0, 0]
-        | [1_i32, 0, 1, 0]
-);
-test_bool_red!(
-    i64x2[i64x2_boolean_reductions] | [1_i64, 1] | [0_i64, 0] | [1_i64, 0]
-);
-
-test_bop!(i8x16[i8; 16] | eq[i8x16_eq_test]:
-          ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
-           [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
-          [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
-test_bop!(i16x8[i16; 8] | eq[i16x8_eq_test]:
-          ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
-          [-1, 0, -1, 0 ,-1, 0, -1, -1]);
-test_bop!(i32x4[i32; 4] | eq[i32x4_eq_test]:
-          ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
-test_bop!(i64x2[i64; 2] | eq[i64x2_eq_test]: ([0, 1], [0, 2]) => [-1, 0]);
-test_bop!(f32x4[f32; 4] => i32 | eq[f32x4_eq_test]:
-          ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
-test_bop!(f64x2[f64; 2] => i64 | eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
-
-test_bop!(i8x16[i8; 16] | ne[i8x16_ne_test]:
-          ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
-           [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
-          [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
-test_bop!(i16x8[i16; 8] | ne[i16x8_ne_test]:
-          ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
-          [0, -1, 0, -1 ,0, -1, 0, 0]);
-test_bop!(i32x4[i32; 4] | ne[i32x4_ne_test]:
-          ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
-test_bop!(i64x2[i64; 2] | ne[i64x2_ne_test]: ([0, 1], [0, 2]) => [0, -1]);
-test_bop!(f32x4[f32; 4] => i32 | ne[f32x4_ne_test]:
-          ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
-test_bop!(f64x2[f64; 2] => i64 | ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]);
-
-test_bop!(i8x16[i8; 16] | lt[i8x16_lt_test]:
-          ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
-           [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
-          [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
-test_bop!(i16x8[i16; 8] | lt[i16x8_lt_test]:
-          ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
-          [0, -1, 0, -1 ,0, -1, 0, 0]);
-test_bop!(i32x4[i32; 4] | lt[i32x4_lt_test]:
-          ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
-test_bop!(i64x2[i64; 2] | lt[i64x2_lt_test]: ([0, 1], [0, 2]) => [0, -1]);
-test_bop!(f32x4[f32; 4] => i32 | lt[f32x4_lt_test]:
-          ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
-test_bop!(f64x2[f64; 2] => i64 | lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]);
-
-test_bop!(i8x16[i8; 16] | gt[i8x16_gt_test]:
-          ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15],
-           [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) =>
-          [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
-test_bop!(i16x8[i16; 8] | gt[i16x8_gt_test]:
-          ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
-          [0, -1, 0, -1 ,0, -1, 0, 0]);
-test_bop!(i32x4[i32; 4] | gt[i32x4_gt_test]:
-          ([0, 2, 2, 4], [0, 1, 2, 3]) => [0, -1, 0, -1]);
-test_bop!(i64x2[i64; 2] | gt[i64x2_gt_test]: ([0, 2], [0, 1]) => [0, -1]);
-test_bop!(f32x4[f32; 4] => i32 | gt[f32x4_gt_test]:
-          ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]);
-test_bop!(f64x2[f64; 2] => i64 | gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]);
-
-test_bop!(i8x16[i8; 16] | ge[i8x16_ge_test]:
-          ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
-           [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
-          [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
-test_bop!(i16x8[i16; 8] | ge[i16x8_ge_test]:
-          ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
-          [-1, 0, -1, 0 ,-1, 0, -1, -1]);
-test_bop!(i32x4[i32; 4] | ge[i32x4_ge_test]:
-          ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
-test_bop!(i64x2[i64; 2] | ge[i64x2_ge_test]: ([0, 1], [0, 2]) => [-1, 0]);
-test_bop!(f32x4[f32; 4] => i32 | ge[f32x4_ge_test]:
-          ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
-test_bop!(f64x2[f64; 2] => i64 | ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
-
-test_bop!(i8x16[i8; 16] | le[i8x16_le_test]:
-          ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15],
-           [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-           ) =>
-          [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
-test_bop!(i16x8[i16; 8] | le[i16x8_le_test]:
-          ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
-          [-1, 0, -1, 0 ,-1, 0, -1, -1]);
-test_bop!(i32x4[i32; 4] | le[i32x4_le_test]:
-          ([0, 2, 2, 4], [0, 1, 2, 3]) => [-1, 0, -1, 0]);
-test_bop!(i64x2[i64; 2] | le[i64x2_le_test]: ([0, 2], [0, 1]) => [-1, 0]);
-test_bop!(f32x4[f32; 4] => i32 | le[f32x4_le_test]:
-          ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]);
-test_bop!(f64x2[f64; 2] => i64 | le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]);
-
-#[wasm_bindgen_test]
-fn v128_bitwise_load_store() {
-    unsafe {
-        let mut arr: [i32; 4] = [0, 1, 2, 3];
-
-        let vec = v128::load(arr.as_ptr() as *const v128);
-        let vec = i32x4::add(vec, vec);
-        v128::store(arr.as_mut_ptr() as *mut v128, vec);
-
-        assert_eq!(arr, [0, 2, 4, 6]);
-    }
-}
-
-test_uop!(f32x4[f32; 4] | neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]);
-test_uop!(f32x4[f32; 4] | abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]);
-test_bop!(f32x4[f32; 4] | min[f32x4_min_test]:
-          ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]);
-test_bop!(f32x4[f32; 4] | min[f32x4_min_test_nan]:
-          ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
-          => [0., -3., -4., std::f32::NAN]);
-test_bop!(f32x4[f32; 4] | max[f32x4_max_test]:
-          ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]);
-test_bop!(f32x4[f32; 4] | max[f32x4_max_test_nan]:
-          ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
-          => [1., -1., 7., std::f32::NAN]);
-test_bop!(f32x4[f32; 4] | add[f32x4_add_test]:
-          ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]);
-test_bop!(f32x4[f32; 4] | sub[f32x4_sub_test]:
-          ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]);
-test_bop!(f32x4[f32; 4] | mul[f32x4_mul_test]:
-          ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]);
-test_bop!(f32x4[f32; 4] | div[f32x4_div_test]:
-          ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]);
-
-test_uop!(f64x2[f64; 2] | neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]);
-test_uop!(f64x2[f64; 2] | abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]);
-test_bop!(f64x2[f64; 2] | min[f64x2_min_test]:
-          ([0., -1.], [1., -3.]) => [0., -3.]);
-test_bop!(f64x2[f64; 2] | min[f64x2_min_test_nan]:
-          ([7., 8.], [-4., std::f64::NAN])
-          => [ -4., std::f64::NAN]);
-test_bop!(f64x2[f64; 2] | max[f64x2_max_test]:
-          ([0., -1.], [1., -3.]) => [1., -1.]);
-test_bop!(f64x2[f64; 2] | max[f64x2_max_test_nan]:
-          ([7., 8.], [ -4., std::f64::NAN])
-          => [7., std::f64::NAN]);
-test_bop!(f64x2[f64; 2] | add[f64x2_add_test]:
-          ([0., -1.], [1., -3.]) => [1., -4.]);
-test_bop!(f64x2[f64; 2] | sub[f64x2_sub_test]:
-          ([0., -1.], [1., -3.]) => [-1., 2.]);
-test_bop!(f64x2[f64; 2] | mul[f64x2_mul_test]:
-          ([0., -1.], [1., -3.]) => [0., 3.]);
-test_bop!(f64x2[f64; 2] | div[f64x2_div_test]:
-          ([0., -8.], [1., 4.]) => [0., -2.]);
-
-macro_rules! test_conv {
-    ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr,  $to:expr) => {
-        #[wasm_bindgen_test]
-        fn $test_id() {
-            unsafe {
-                let from: v128 = mem::transmute($from);
-                let to: v128 = mem::transmute($to);
-
-                let r: v128 = $to_ty::$conv_id(from);
-
-                compare_bytes(r, to);
-            }
-        }
-    };
-}
-
-test_conv!(
-    f32x4_convert_s_i32x4 | convert_s_i32x4 | f32x4 | [1_i32, 2, 3, 4],
-    [1_f32, 2., 3., 4.]
-);
-test_conv!(
-    f32x4_convert_u_i32x4
-        | convert_u_i32x4
-        | f32x4
-        | [u32::max_value(), 2, 3, 4],
-    [u32::max_value() as f32, 2., 3., 4.]
-);
-test_conv!(
-    f64x2_convert_s_i64x2 | convert_s_i64x2 | f64x2 | [1_i64, 2],
-    [1_f64, 2.]
-);
-test_conv!(
-    f64x2_convert_u_i64x2 | convert_u_i64x2 | f64x2 | [u64::max_value(), 2],
-    [18446744073709552000.0, 2.]
-);
-
-// FIXME: this fails, and produces -2147483648 instead of saturating at
-// i32::max_value() test_conv!(i32x4_trunc_s_f32x4_sat | trunc_s_f32x4_sat |
-// i32x4 | [1_f32, 2., (i32::max_value() as f32 + 1.), 4.],
-// [1_i32, 2, i32::max_value(), 4]); FIXME: add other saturating tests

From 50c7143ddcbaf6c8c1fd2547488d366872cba8a7 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Thu, 9 Aug 2018 18:15:12 +0200
Subject: [PATCH 04/18] use modules instead of types to access wasm simd128
 interpretations

---
 coresimd/wasm32/simd128.rs | 1185 ++++++++++++++++++------------------
 1 file changed, 580 insertions(+), 605 deletions(-)

diff --git a/coresimd/wasm32/simd128.rs b/coresimd/wasm32/simd128.rs
index 2c72dc0ebc..bce9755068 100644
--- a/coresimd/wasm32/simd128.rs
+++ b/coresimd/wasm32/simd128.rs
@@ -2,9 +2,17 @@
 //!
 //! [WebAssembly `SIMD128` ISA]:
 //! https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md
-
+//
+// This files is structured as follows:
+// * first the types are defined
+// * then macros implementing the different APIs are provided
+// * finally the API of each type is implements
+//
 #![allow(non_camel_case_types)]
 
+////////////////////////////////////////////////////////////////////////////////
+// Types
+
 /// A single unconstrained byte (0-255).
 pub type ImmByte = u8;
 /// A byte with values in the range 0–1 identifying a lane.
@@ -97,413 +105,491 @@ mod sealed {
     }
 }
 
-/// WASM-specific v8x16 instructions
-pub struct v8x16;
-/// WASM-specific v16x8 instructions
-pub struct v16x8;
-/// WASM-specific v32x4 instructions
-pub struct v32x4;
-/// WASM-specific v64x2instructions
-pub struct v64x2;
-
-/// WASM-specific v8x16 instructions with modulo-arithmetic semantics
-pub struct i8x16;
-/// WASM-specific v16x8 instructions with modulo-arithmetic semantics
-pub struct i16x8;
-/// WASM-specific v32x4 instructions with modulo-arithmetic semantics
-pub struct i32x4;
-/// WASM-specific v64x2 instructions with modulo-arithmetic semantics
-pub struct i64x2;
-
-/// WASM-specific v32x4 floating-point instructions
-pub struct f32x4;
-/// WASM-specific v64x2 floating-point instructions
-pub struct f64x2;
-
-impl v128 {
-    /// Materialize a constant SIMD value from the immediate operands.
-    ///
-    /// The `v128.const` instruction is encoded with 16 immediate bytes
-    /// `imm` which provide the bits of the vector directly.
-    #[inline]
-    // #[target_feature(enable = "simd128")]
-    // FIXME: #[cfg_attr(test, assert_instr(v128.const, imm =
-    // [ImmByte::new(42); 16]))]
-    #[rustc_args_required_const(0)]
-    pub const unsafe fn const_(imm: [ImmByte; 16]) -> v128 {
-        union U {
-            imm: [ImmByte; 16],
-            vec: v128,
-        }
-        U { imm }.vec
-    }
-}
+////////////////////////////////////////////////////////////////////////////////
+// Macros implementing the spec APIs:
 
 macro_rules! impl_splat {
     ($id:ident[$ivec_ty:ident : $elem_ty:ident] <= $x_ty:ident | $($lane_id:ident),*) => {
-        impl $id {
-            /// Create vector with identical lanes
-            ///
-            /// Construct a vector with `x` replicated to all lanes.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($ident.splat))]
-            pub const unsafe fn splat(x: $x_ty) -> v128 {
-                union U {
-                    vec: self::sealed::$ivec_ty,
-                    res: v128
-                }
-                U { vec: self::sealed::$ivec_ty($({ struct $lane_id; x as $elem_ty}),*) }.res
+        /// Create vector with identical lanes
+        ///
+        /// Construct a vector with `x` replicated to all lanes.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($ident.splat))]
+        pub const unsafe fn splat(x: $x_ty) -> v128 {
+            union U {
+                vec: self::sealed::$ivec_ty,
+                res: v128
             }
+            U { vec: self::sealed::$ivec_ty($({ struct $lane_id; x as $elem_ty}),*) }.res
         }
     }
 }
-impl_splat!(
-    i8x16[v8x16: i8] <= i32 | x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15
-);
-impl_splat!(i16x8[v16x8: i16] <= i32 | x0, x1, x2, x3, x4, x5, x6, x7);
-impl_splat!(i32x4[v32x4: i32] <= i32 | x0, x1, x2, x3);
-impl_splat!(i64x2[v64x2: i64] <= i64 | x0, x1);
-impl_splat!(f32x4[f32x4: f32] <= f32 | x0, x1, x2, x3);
-impl_splat!(f64x2[f64x2: f64] <= f64 | x0, x1);
 
 macro_rules! impl_extract_lane {
     ($id:ident[$ivec_ty:ident : $selem_ty:ident|$uelem_ty:ident]($lane_idx:ty)
      => $x_ty:ident) => {
-        impl $id {
-            /// Extract lane as a scalar (sign-extend)
-            ///
-            /// Extract the scalar value of lane specified in the immediate
-            /// mode operand `imm` from `a` by sign-extending it.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_s, imm =
-            // 0))]
-            #[rustc_args_required_const(1)]
-            pub unsafe fn extract_lane_s(a: v128, imm: $lane_idx) -> $x_ty {
-                use coresimd::simd_llvm::simd_extract;
-                union U {
-                    vec: self::sealed::$ivec_ty,
-                    a: v128,
-                }
-                // the vectors store a signed integer => extract into it
-                let v: $selem_ty = simd_extract(
-                    U { a }.vec,
-                    imm as u32, /* zero-extends index */
-                );
-                v as $x_ty
+        /// Extract lane as a scalar (sign-extend)
+        ///
+        /// Extract the scalar value of lane specified in the immediate
+        /// mode operand `imm` from `a` by sign-extending it.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_s, imm =
+        // 0))]
+        #[rustc_args_required_const(1)]
+        pub unsafe fn extract_lane_s(a: v128, imm: $lane_idx) -> $x_ty {
+            use coresimd::simd_llvm::simd_extract;
+            union U {
+                vec: self::sealed::$ivec_ty,
+                a: v128,
             }
+            // the vectors store a signed integer => extract into it
+            let v: $selem_ty = simd_extract(
+                U { a }.vec,
+                imm as u32, /* zero-extends index */
+            );
+            v as $x_ty
+        }
 
-            /// Extract lane as a scalar (zero-extend)
-            ///
-            /// Extract the scalar value of lane specified in the immediate
-            /// mode operand `imm` from `a` by zero-extending it.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u, imm =
-            // 0))]
-            #[rustc_args_required_const(1)]
-            pub unsafe fn extract_lane_u(a: v128, imm: $lane_idx) -> $x_ty {
-                use coresimd::simd_llvm::simd_extract;
-                union U {
-                    vec: self::sealed::$ivec_ty,
-                    a: v128,
-                }
-                // the vectors store a signed integer => extract into it
-                let v: $selem_ty = simd_extract(
-                    U { a }.vec,
-                    imm as u32, /* zero-extends index */
-                );
-                // re-interpret the signed integer as an unsigned one of the
-                // same size (no-op)
-                let v: $uelem_ty = ::mem::transmute(v);
-                // cast the internal unsigned integer to a larger signed
-                // integer (zero-extends)
-                v as $x_ty
+        /// Extract lane as a scalar (zero-extend)
+        ///
+        /// Extract the scalar value of lane specified in the immediate
+        /// mode operand `imm` from `a` by zero-extending it.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u, imm =
+        // 0))]
+        #[rustc_args_required_const(1)]
+        pub unsafe fn extract_lane_u(a: v128, imm: $lane_idx) -> $x_ty {
+            use coresimd::simd_llvm::simd_extract;
+            union U {
+                vec: self::sealed::$ivec_ty,
+                a: v128,
             }
+            // the vectors store a signed integer => extract into it
+            let v: $selem_ty = simd_extract(
+                U { a }.vec,
+                imm as u32, /* zero-extends index */
+            );
+            // re-interpret the signed integer as an unsigned one of the
+            // same size (no-op)
+            let v: $uelem_ty = ::mem::transmute(v);
+            // cast the internal unsigned integer to a larger signed
+            // integer (zero-extends)
+            v as $x_ty
         }
     };
     ($id:ident[$ivec_ty:ident]($lane_idx:ty) => $x_ty:ident) => {
-        impl $id {
-            /// Extract lane as a scalar
-            ///
-            /// Extract the scalar value of lane specified in the immediate
-            /// mode operand `imm` from `a`.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u, imm =
-            // 0))]
-            #[rustc_args_required_const(1)]
-            pub unsafe fn extract_lane(a: v128, imm: $lane_idx) -> $x_ty {
-                use coresimd::simd_llvm::simd_extract;
-                union U {
-                    vec: self::sealed::$ivec_ty,
-                    a: v128,
-                }
-                // the vectors store a signed integer => extract into it
-                simd_extract(
-                    U { a }.vec,
-                    imm as u32, /* zero-extends index */
-                )
+        /// Extract lane as a scalar
+        ///
+        /// Extract the scalar value of lane specified in the immediate
+        /// mode operand `imm` from `a`.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u, imm =
+        // 0))]
+        #[rustc_args_required_const(1)]
+        pub unsafe fn extract_lane(a: v128, imm: $lane_idx) -> $x_ty {
+            use coresimd::simd_llvm::simd_extract;
+            union U {
+                vec: self::sealed::$ivec_ty,
+                a: v128,
             }
+            // the vectors store a signed integer => extract into it
+            simd_extract(U { a }.vec, imm as u32 /* zero-extends index */)
         }
     };
 }
-impl_extract_lane!(i8x16[v8x16:i8|u8](LaneIdx16) => i32);
-impl_extract_lane!(i16x8[v16x8:i16|u16](LaneIdx8) => i32);
-impl_extract_lane!(i32x4[v32x4](LaneIdx4) => i32);
-impl_extract_lane!(i64x2[v64x2](LaneIdx2) => i64);
-impl_extract_lane!(f32x4[f32x4](LaneIdx4) => f32);
-impl_extract_lane!(f64x2[f64x2](LaneIdx2) => f64);
 
 macro_rules! impl_replace_lane {
     ($id:ident[$ivec_ty:ident:$ielem_ty:ident]($lane_idx:ty) <= $x_ty:ident) => {
-        impl $id {
-            /// Replace lane value
-            ///
-            /// Return a new vector with lanes identical to `a`, except for
-            /// lane specified in the immediate mode argument `i` which
-            /// has the value `x`.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u))]
-            #[rustc_args_required_const(1)]
-            pub unsafe fn replace_lane(
-                a: v128, imm: $lane_idx, x: $x_ty,
-            ) -> v128 {
-                use coresimd::simd_llvm::simd_insert;
-                union U {
-                    vec: self::sealed::$ivec_ty,
-                    a: v128,
-                }
-                // the vectors store a signed integer => extract into it
-                ::mem::transmute(simd_insert(
-                    U { a }.vec,
-                    imm as u32, /* zero-extends index */
-                    x as $ielem_ty,
-                ))
-            }
-        }
-    };
-}
-
-impl_replace_lane!(i8x16[v8x16: i8](LaneIdx16) <= i32);
-impl_replace_lane!(i16x8[v16x8: i16](LaneIdx8) <= i32);
-impl_replace_lane!(i32x4[v32x4: i32](LaneIdx4) <= i32);
-impl_replace_lane!(i64x2[v64x2: i64](LaneIdx2) <= i64);
-impl_replace_lane!(f32x4[f32x4: f32](LaneIdx4) <= f32);
-impl_replace_lane!(f64x2[f64x2: f64](LaneIdx2) <= f64);
-
-pub use self::sealed::v8x16 as __internal_v8x16;
-pub use coresimd::simd_llvm::simd_shuffle16 as __internal_v8x16_shuffle;
-
-/// Shuffle lanes
-///
-/// Create vector with lanes selected from the lanes of two input vectors
-/// `a` and `b` by the indices specified in the immediate mode operand
-/// `imm`. Each index selects an element of the result vector, where the
-/// indices `i` in range `[0, 15]` select the `i`-th elements of `a`, and
-/// the indices in range `[16, 31]` select the `i - 16`-th element of `b`.
-#[macro_export]
-macro_rules! v8x16_shuffle {
-    ($a:expr, $b:expr, [
-        $imm0:expr, $imm1:expr, $imm2:expr, $imm3:expr,
-        $imm4:expr, $imm5:expr, $imm6:expr, $imm7:expr,
-        $imm8:expr, $imm9:expr, $imm10:expr, $imm11:expr,
-        $imm12:expr, $imm13:expr, $imm14:expr, $imm15:expr
-    ]) => {
-        #[allow(unused_unsafe)]
-        unsafe {
-            let a: $crate::arch::wasm32::v128 = $a;
-            let b: $crate::arch::wasm32::v128 = $b;
+        /// Replace lane value
+        ///
+        /// Return a new vector with lanes identical to `a`, except for
+        /// lane specified in the immediate mode argument `i` which
+        /// has the value `x`.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.extract_lane_u))]
+        #[rustc_args_required_const(1)]
+        pub unsafe fn replace_lane(a: v128, imm: $lane_idx, x: $x_ty) -> v128 {
+            use coresimd::simd_llvm::simd_insert;
             union U {
-                e: v128,
-                i: $crate::arch::wasm32::__internal_v8x16,
+                vec: self::sealed::$ivec_ty,
+                a: v128,
             }
-            let a = U { e: a }.i;
-            let b = U { e: b }.i;
-
-            let r: $crate::arch::wasm32::__internal_v8x16 =
-                $crate::arch::wasm32::__internal_v8x16_shuffle(
-                    a,
-                    b,
-                    [
-                        $imm0 as u32,
-                        $imm1,
-                        $imm2,
-                        $imm3,
-                        $imm4,
-                        $imm5,
-                        $imm6,
-                        $imm7,
-                        $imm8,
-                        $imm9,
-                        $imm10,
-                        $imm11,
-                        $imm12,
-                        $imm13,
-                        $imm14,
-                        $imm15,
-                    ],
-                );
-            U { i: r }.e
+            // the vectors store a signed integer => extract into it
+            ::mem::transmute(simd_insert(
+                U { a }.vec,
+                imm as u32, /* zero-extends index */
+                x as $ielem_ty,
+            ))
         }
     };
 }
 
 macro_rules! impl_wrapping_add_sub_neg {
     ($id:ident[$ivec_ty:ident]) => {
-        impl $id {
-            /// Lane-wise wrapping integer addition
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.add))]
-            pub unsafe fn add(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_add;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                let b: sealed::$ivec_ty = ::mem::transmute(b);
-                ::mem::transmute(simd_add(a, b))
-            }
-
-            /// Lane-wise wrapping integer subtraction
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.sub))]
-            pub unsafe fn sub(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_sub;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                let b: sealed::$ivec_ty = ::mem::transmute(b);
-                ::mem::transmute(simd_sub(a, b))
-            }
+        /// Lane-wise wrapping integer addition
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.add))]
+        pub unsafe fn add(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_add;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            let b: sealed::$ivec_ty = ::mem::transmute(b);
+            ::mem::transmute(simd_add(a, b))
+        }
 
-            /// Lane-wise wrapping integer negation
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.neg))]
-            pub unsafe fn neg(a: v128) -> v128 {
-                use coresimd::simd_llvm::simd_mul;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                let b: sealed::$ivec_ty = ::mem::transmute($id::splat(-1));
-                ::mem::transmute(simd_mul(b, a))
-            }
+        /// Lane-wise wrapping integer subtraction
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.sub))]
+        pub unsafe fn sub(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_sub;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            let b: sealed::$ivec_ty = ::mem::transmute(b);
+            ::mem::transmute(simd_sub(a, b))
+        }
 
-            // note: multiplication explicitly omitted (see below)
+        /// Lane-wise wrapping integer negation
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.neg))]
+        pub unsafe fn neg(a: v128) -> v128 {
+            use coresimd::simd_llvm::simd_mul;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            let b: sealed::$ivec_ty = ::mem::transmute($id::splat(-1));
+            ::mem::transmute(simd_mul(b, a))
         }
+
+        // note: multiplication explicitly omitted because i64x2 does
+        // not implement it
     };
 }
 
-impl_wrapping_add_sub_neg!(i8x16[v8x16]);
-impl_wrapping_add_sub_neg!(i16x8[v16x8]);
-impl_wrapping_add_sub_neg!(i32x4[v32x4]);
-impl_wrapping_add_sub_neg!(i64x2[v64x2]);
+// TODO: Saturating integer arithmetic
+// need to add intrinsics to rustc
+
+// note: multiplication explicitly implemented separately because i64x2 does
+// not implement it
 
 macro_rules! impl_wrapping_mul {
     ($id:ident[$ivec_ty:ident]) => {
-        impl $id {
-            /// Lane-wise wrapping integer multiplication
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.mul))]
-            pub unsafe fn mul(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_mul;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                let b: sealed::$ivec_ty = ::mem::transmute(b);
-                ::mem::transmute(simd_mul(a, b))
-            }
+        /// Lane-wise wrapping integer multiplication
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.mul))]
+        pub unsafe fn mul(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_mul;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            let b: sealed::$ivec_ty = ::mem::transmute(b);
+            ::mem::transmute(simd_mul(a, b))
         }
     };
 }
 
-impl_wrapping_mul!(i8x16[v8x16]);
-impl_wrapping_mul!(i16x8[v16x8]);
-impl_wrapping_mul!(i32x4[v32x4]);
-// note: wrapping multiplication for i64x2 is not part of the spec
-
-// TODO: Saturating integer arithmetic
-// need to add intrinsics to rustc
-
 macro_rules! impl_shl_scalar {
     ($id:ident[$ivec_ty:ident : $t:ty]) => {
-        impl $id {
-            /// Left shift by scalar.
-            ///
-            /// Shift the bits in each lane to the left by the same amount.
-            /// Only the low bits of the shift amount are used.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.shl))]
-            pub unsafe fn shl(a: v128, y: i32) -> v128 {
-                use coresimd::simd_llvm::simd_shl;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                let b: sealed::$ivec_ty =
-                    ::mem::transmute($id::splat(y as $t));
-                ::mem::transmute(simd_shl(a, b))
-            }
+        /// Left shift by scalar.
+        ///
+        /// Shift the bits in each lane to the left by the same amount.
+        /// Only the low bits of the shift amount are used.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.shl))]
+        pub unsafe fn shl(a: v128, y: i32) -> v128 {
+            use coresimd::simd_llvm::simd_shl;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            let b: sealed::$ivec_ty = ::mem::transmute($id::splat(y as $t));
+            ::mem::transmute(simd_shl(a, b))
         }
     };
 }
 
-impl_shl_scalar!(i8x16[v8x16: i32]);
-impl_shl_scalar!(i16x8[v16x8: i32]);
-impl_shl_scalar!(i32x4[v32x4: i32]);
-impl_shl_scalar!(i64x2[v64x2: i64]);
-
 macro_rules! impl_shr_scalar {
     ($id:ident[$svec_ty:ident : $uvec_ty:ident : $t:ty]) => {
-        impl $id {
-            /// Arithmetic right shift by scalar.
-            ///
-            /// Shift the bits in each lane to the right by the same amount.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.shr))]
-            pub unsafe fn shr_s(a: v128, y: i32) -> v128 {
-                use coresimd::simd_llvm::simd_shr;
-                let a: sealed::$svec_ty = ::mem::transmute(a);
-                let b: sealed::$svec_ty =
-                    ::mem::transmute($id::splat(y as $t));
-                ::mem::transmute(simd_shr(a, b))
+        /// Arithmetic right shift by scalar.
+        ///
+        /// Shift the bits in each lane to the right by the same amount.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.shr))]
+        pub unsafe fn shr_s(a: v128, y: i32) -> v128 {
+            use coresimd::simd_llvm::simd_shr;
+            let a: sealed::$svec_ty = ::mem::transmute(a);
+            let b: sealed::$svec_ty = ::mem::transmute($id::splat(y as $t));
+            ::mem::transmute(simd_shr(a, b))
+        }
+
+        /// Logical right shift by scalar.
+        ///
+        /// Shift the bits in each lane to the right by the same amount.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.shr))]
+        pub unsafe fn shr_u(a: v128, y: i32) -> v128 {
+            use coresimd::simd_llvm::simd_shr;
+            let a: sealed::$uvec_ty = ::mem::transmute(a);
+            let b: sealed::$uvec_ty = ::mem::transmute($id::splat(y as $t));
+            ::mem::transmute(simd_shr(a, b))
+        }
+    };
+}
+
+macro_rules! impl_boolean_reduction {
+    ($id:ident[$ivec_ty:ident]) => {
+        /// Any lane true
+        ///
+        /// Returns `1` if any lane in `a` is non-zero, `0` otherwise.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.any_true))]
+        pub unsafe fn any_true(a: v128) -> i32 {
+            use coresimd::simd_llvm::simd_reduce_any;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            if simd_reduce_any(a) {
+                1
+            } else {
+                0
             }
+        }
 
-            /// Logical right shift by scalar.
-            ///
-            /// Shift the bits in each lane to the right by the same amount.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.shr))]
-            pub unsafe fn shr_u(a: v128, y: i32) -> v128 {
-                use coresimd::simd_llvm::simd_shr;
-                let a: sealed::$uvec_ty = ::mem::transmute(a);
-                let b: sealed::$uvec_ty =
-                    ::mem::transmute($id::splat(y as $t));
-                ::mem::transmute(simd_shr(a, b))
+        /// All lanes true
+        ///
+        /// Returns `1` if all lanes in `a` are non-zero, `0` otherwise.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.all_true))]
+        pub unsafe fn all_true(a: v128) -> i32 {
+            use coresimd::simd_llvm::simd_reduce_all;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            if simd_reduce_all(a) {
+                1
+            } else {
+                0
             }
         }
     };
 }
 
-impl_shr_scalar!(i8x16[v8x16: u8x16: i32]);
-impl_shr_scalar!(i16x8[v16x8: u16x8: i32]);
-impl_shr_scalar!(i32x4[v32x4: u32x4: i32]);
-impl_shr_scalar!(i64x2[v64x2: u64x2: i64]);
+macro_rules! impl_comparisons {
+    ($id:ident[$ivec_ty:ident]) => {
+        impl_comparisons!($id[$ivec_ty=>$ivec_ty]);
+    };
+    ($id:ident[$ivec_ty:ident=>$rvec_ty:ident]) => {
+        /// Equality
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.eq))]
+        pub unsafe fn eq(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_eq;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            let b: sealed::$ivec_ty = ::mem::transmute(b);
+            let c: sealed::$rvec_ty = simd_eq(a, b);
+            ::mem::transmute(c)
+        }
+        /// Non-Equality
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.ne))]
+        pub unsafe fn ne(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_ne;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            let b: sealed::$ivec_ty = ::mem::transmute(b);
+            let c: sealed::$rvec_ty = simd_ne(a, b);
+            ::mem::transmute(c)
+        }
+        /// Less-than
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.lt))]
+        pub unsafe fn lt(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_lt;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            let b: sealed::$ivec_ty = ::mem::transmute(b);
+            let c: sealed::$rvec_ty = simd_lt(a, b);
+            ::mem::transmute(c)
+        }
+        /// Less-than or equal
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.le))]
+        pub unsafe fn le(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_le;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            let b: sealed::$ivec_ty = ::mem::transmute(b);
+            let c: sealed::$rvec_ty = simd_le(a, b);
+            ::mem::transmute(c)
+        }
+        /// Greater-than
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.gt))]
+        pub unsafe fn gt(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_gt;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            let b: sealed::$ivec_ty = ::mem::transmute(b);
+            let c: sealed::$rvec_ty = simd_gt(a, b);
+            ::mem::transmute(c)
+        }
+        /// Greater-than or equal
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.ge))]
+        pub unsafe fn ge(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_ge;
+            let a: sealed::$ivec_ty = ::mem::transmute(a);
+            let b: sealed::$ivec_ty = ::mem::transmute(b);
+            let c: sealed::$rvec_ty = simd_ge(a, b);
+            ::mem::transmute(c)
+        }
+    }
+}
+
+// Floating-point operations
+macro_rules! impl_floating_point_ops {
+    ($id:ident) => {
+        /// Negation
+        ///
+        /// Apply the IEEE `negate(x)` function to each lane. This simply
+        /// inverts the sign bit, preserving all other bits, even for `NaN`
+        /// inputs.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.neg))]
+        pub unsafe fn neg(a: v128) -> v128 {
+            use coresimd::simd_llvm::simd_mul;
+            let a: sealed::$id = ::mem::transmute(a);
+            let b: sealed::$id = ::mem::transmute($id::splat(-1.));
+            ::mem::transmute(simd_mul(b, a))
+        }
+        /// Absolute value
+        ///
+        /// Apply the IEEE `abs(x)` function to each lane. This simply
+        /// clears the sign bit, preserving all other bits, even for `NaN`
+        /// inputs.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.abs))]
+        pub unsafe fn abs(a: v128) -> v128 {
+            let a: sealed::$id = ::mem::transmute(a);
+            ::mem::transmute(a.abs())
+        }
+        /// NaN-propagating minimum
+        ///
+        /// Lane-wise minimum value, propagating `NaN`s.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.min))]
+        pub unsafe fn min(a: v128, b: v128) -> v128 {
+            v128::bitselect(a, b, $id::lt(a, b))
+        }
+        /// NaN-propagating maximum
+        ///
+        /// Lane-wise maximum value, propagating `NaN`s.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.max))]
+        pub unsafe fn max(a: v128, b: v128) -> v128 {
+            v128::bitselect(a, b, $id::gt(a, b))
+        }
+        /// Square-root
+        ///
+        /// Lane-wise square-root.
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.sqrt))]
+        pub unsafe fn sqrt(a: v128) -> v128 {
+            let a: sealed::$id = ::mem::transmute(a);
+            ::mem::transmute(a.sqrt())
+        }
+        /// Lane-wise addition
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.add))]
+        pub unsafe fn add(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_add;
+            let a: sealed::$id = ::mem::transmute(a);
+            let b: sealed::$id = ::mem::transmute(b);
+            ::mem::transmute(simd_add(a, b))
+        }
+        /// Lane-wise subtraction
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.sub))]
+        pub unsafe fn sub(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_sub;
+            let a: sealed::$id = ::mem::transmute(a);
+            let b: sealed::$id = ::mem::transmute(b);
+            ::mem::transmute(simd_sub(a, b))
+        }
+        /// Lane-wise multiplication
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.mul))]
+        pub unsafe fn mul(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_mul;
+            let a: sealed::$id = ::mem::transmute(a);
+            let b: sealed::$id = ::mem::transmute(b);
+            ::mem::transmute(simd_mul(a, b))
+        }
+        /// Lane-wise division
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($id.div))]
+        pub unsafe fn div(a: v128, b: v128) -> v128 {
+            use coresimd::simd_llvm::simd_div;
+            let a: sealed::$id = ::mem::transmute(a);
+            let b: sealed::$id = ::mem::transmute(b);
+            ::mem::transmute(simd_div(a, b))
+        }
+    };
+}
+
+macro_rules! impl_conversion {
+    ($conversion:ident[$instr:expr]: $from_ty:ident => $to_ty:ident | $id:ident) => {
+        #[inline]
+        // #[target_feature(enable = "simd128")]
+        // FIXME: #[cfg_attr(test, assert_instr($instr))]
+        pub unsafe fn $conversion(a: v128) -> v128 {
+            use coresimd::simd_llvm::simd_cast;
+            let a: sealed::$from_ty = ::mem::transmute(a);
+            let b: sealed::$to_ty = simd_cast(a);
+            ::mem::transmute(b)
+        }
+    };
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementations:
 
-// Bitwise logical operations
+// v128
 impl v128 {
+    ///////////////////////////////////////////////////////////////////////////
+    // Const constructor:
+
+    /// Materialize a constant SIMD value from the immediate operands.
+    ///
+    /// The `v128.const` instruction is encoded with 16 immediate bytes
+    /// `imm` which provide the bits of the vector directly.
+    #[inline]
+    // #[target_feature(enable = "simd128")]
+    // FIXME: #[cfg_attr(test, assert_instr(v128.const, imm =
+    // [ImmByte::new(42); 16]))]
+    #[rustc_args_required_const(0)]
+    pub const unsafe fn const_(imm: [ImmByte; 16]) -> v128 {
+        union U {
+            imm: [ImmByte; 16],
+            vec: v128,
+        }
+        U { imm }.vec
+    }
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Bitwise logical operations:
+
     /// Bitwise logical and
     #[inline]
     // #[target_feature(enable = "simd128")]
@@ -563,139 +649,10 @@ impl v128 {
         // not what we want here:
         Self::or(Self::and(v1, c), Self::and(v2, Self::not(c)))
     }
-}
-
-macro_rules! impl_boolean_reduction {
-    ($id:ident[$ivec_ty:ident]) => {
-        impl $id {
-            /// Any lane true
-            ///
-            /// Returns `1` if any lane in `a` is non-zero, `0` otherwise.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.any_true))]
-            pub unsafe fn any_true(a: v128) -> i32 {
-                use coresimd::simd_llvm::simd_reduce_any;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                if simd_reduce_any(a) {
-                    1
-                } else {
-                    0
-                }
-            }
-
-            /// All lanes true
-            ///
-            /// Returns `1` if all lanes in `a` are non-zero, `0` otherwise.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.all_true))]
-            pub unsafe fn all_true(a: v128) -> i32 {
-                use coresimd::simd_llvm::simd_reduce_all;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                if simd_reduce_all(a) {
-                    1
-                } else {
-                    0
-                }
-            }
-        }
-    };
-}
-
-impl_boolean_reduction!(i8x16[v8x16]);
-impl_boolean_reduction!(i16x8[v16x8]);
-impl_boolean_reduction!(i32x4[v32x4]);
-impl_boolean_reduction!(i64x2[v64x2]);
-
-macro_rules! impl_comparisons {
-    ($id:ident[$ivec_ty:ident]) => {
-        impl_comparisons!($id[$ivec_ty=>$ivec_ty]);
-    };
-    ($id:ident[$ivec_ty:ident=>$rvec_ty:ident]) => {
-        impl $id {
-            /// Equality
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.eq))]
-            pub unsafe fn eq(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_eq;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                let b: sealed::$ivec_ty = ::mem::transmute(b);
-                let c: sealed::$rvec_ty = simd_eq(a, b);
-                ::mem::transmute(c)
-            }
-            /// Non-Equality
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.ne))]
-            pub unsafe fn ne(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_ne;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                let b: sealed::$ivec_ty = ::mem::transmute(b);
-                let c: sealed::$rvec_ty = simd_ne(a, b);
-                ::mem::transmute(c)
-            }
-            /// Less-than
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.lt))]
-            pub unsafe fn lt(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_lt;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                let b: sealed::$ivec_ty = ::mem::transmute(b);
-                let c: sealed::$rvec_ty = simd_lt(a, b);
-                ::mem::transmute(c)
-            }
-
-            /// Less-than or equal
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.le))]
-            pub unsafe fn le(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_le;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                let b: sealed::$ivec_ty = ::mem::transmute(b);
-                let c: sealed::$rvec_ty = simd_le(a, b);
-                ::mem::transmute(c)
-            }
-
-            /// Greater-than
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.gt))]
-            pub unsafe fn gt(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_gt;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                let b: sealed::$ivec_ty = ::mem::transmute(b);
-                let c: sealed::$rvec_ty = simd_gt(a, b);
-                ::mem::transmute(c)
-            }
-
-            /// Greater-than or equal
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.ge))]
-            pub unsafe fn ge(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_ge;
-                let a: sealed::$ivec_ty = ::mem::transmute(a);
-                let b: sealed::$ivec_ty = ::mem::transmute(b);
-                let c: sealed::$rvec_ty = simd_ge(a, b);
-                ::mem::transmute(c)
-            }
-        }
-    }
-}
 
-impl_comparisons!(i8x16[v8x16]);
-impl_comparisons!(i16x8[v16x8]);
-impl_comparisons!(i32x4[v32x4]);
-impl_comparisons!(i64x2[v64x2]);
-impl_comparisons!(f32x4[f32x4=>v32x4]);
-impl_comparisons!(f64x2[f64x2=>v64x2]);
+    ///////////////////////////////////////////////////////////////////////////
+    // Memory load/stores:
 
-// Load and store
-impl v128 {
     /// Load a `v128` vector from the given heap address.
     #[inline]
     // #[target_feature(enable = "simd128")]
@@ -713,145 +670,163 @@ impl v128 {
     }
 }
 
-// Floating-point operations
-macro_rules! impl_floating_point_ops {
-    ($id:ident) => {
-        impl $id {
-            /// Negation
-            ///
-            /// Apply the IEEE `negate(x)` function to each lane. This simply
-            /// inverts the sign bit, preserving all other bits, even for `NaN`
-            /// inputs.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.neg))]
-            pub unsafe fn neg(a: v128) -> v128 {
-                use coresimd::simd_llvm::simd_mul;
-                let a: sealed::$id = ::mem::transmute(a);
-                let b: sealed::$id = ::mem::transmute($id::splat(-1.));
-                ::mem::transmute(simd_mul(b, a))
-            }
-            /// Absolute value
-            ///
-            /// Apply the IEEE `abs(x)` function to each lane. This simply
-            /// clears the sign bit, preserving all other bits, even for `NaN`
-            /// inputs.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.abs))]
-            pub unsafe fn abs(a: v128) -> v128 {
-                let a: sealed::$id = ::mem::transmute(a);
-                ::mem::transmute(a.abs())
-            }
-
-            /// NaN-propagating minimum
-            ///
-            /// Lane-wise minimum value, propagating `NaN`s.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.min))]
-            pub unsafe fn min(a: v128, b: v128) -> v128 {
-                v128::bitselect(a, b, $id::lt(a, b))
-            }
-
-            /// NaN-propagating maximum
-            ///
-            /// Lane-wise maximum value, propagating `NaN`s.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.max))]
-            pub unsafe fn max(a: v128, b: v128) -> v128 {
-                v128::bitselect(a, b, $id::gt(a, b))
+pub use self::sealed::v8x16 as __internal_v8x16;
+pub use coresimd::simd_llvm::simd_shuffle16 as __internal_v8x16_shuffle;
+/// Shuffle lanes
+///
+/// Create vector with lanes selected from the lanes of two input vectors
+/// `a` and `b` by the indices specified in the immediate mode operand
+/// `imm`. Each index selects an element of the result vector, where the
+/// indices `i` in range `[0, 15]` select the `i`-th elements of `a`, and
+/// the indices in range `[16, 31]` select the `i - 16`-th element of `b`.
+#[macro_export]
+macro_rules! v8x16_shuffle {
+    ($a:expr, $b:expr, [
+        $imm0:expr, $imm1:expr, $imm2:expr, $imm3:expr,
+        $imm4:expr, $imm5:expr, $imm6:expr, $imm7:expr,
+        $imm8:expr, $imm9:expr, $imm10:expr, $imm11:expr,
+        $imm12:expr, $imm13:expr, $imm14:expr, $imm15:expr
+    ]) => {
+        #[allow(unused_unsafe)]
+        unsafe {
+            let a: $crate::arch::wasm32::v128 = $a;
+            let b: $crate::arch::wasm32::v128 = $b;
+            union U {
+                e: v128,
+                i: $crate::arch::wasm32::__internal_v8x16,
             }
+            let a = U { e: a }.i;
+            let b = U { e: b }.i;
 
-            /// Square-root
-            ///
-            /// Lane-wise square-root.
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.sqrt))]
-            pub unsafe fn sqrt(a: v128) -> v128 {
-                let a: sealed::$id = ::mem::transmute(a);
-                ::mem::transmute(a.sqrt())
-            }
+            let r: $crate::arch::wasm32::__internal_v8x16 =
+                $crate::arch::wasm32::__internal_v8x16_shuffle(
+                    a,
+                    b,
+                    [
+                        $imm0 as u32,
+                        $imm1,
+                        $imm2,
+                        $imm3,
+                        $imm4,
+                        $imm5,
+                        $imm6,
+                        $imm7,
+                        $imm8,
+                        $imm9,
+                        $imm10,
+                        $imm11,
+                        $imm12,
+                        $imm13,
+                        $imm14,
+                        $imm15,
+                    ],
+                );
+            U { i: r }.e
+        }
+    };
+}
 
-            /// Lane-wise addition
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.add))]
-            pub unsafe fn add(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_add;
-                let a: sealed::$id = ::mem::transmute(a);
-                let b: sealed::$id = ::mem::transmute(b);
-                ::mem::transmute(simd_add(a, b))
-            }
+/// WASM-specific v8x16 instructions with modulo-arithmetic semantics
+pub mod i8x16 {
+    use super::*;
+    impl_splat!(
+        i8x16[v8x16: i8] <= i32 | x0,
+        x1,
+        x2,
+        x3,
+        x4,
+        x5,
+        x6,
+        x7,
+        x8,
+        x9,
+        x10,
+        x11,
+        x12,
+        x13,
+        x14,
+        x15
+    );
+    impl_extract_lane!(i8x16[v8x16:i8|u8](LaneIdx16) => i32);
+    impl_replace_lane!(i8x16[v8x16: i8](LaneIdx16) <= i32);
+    impl_wrapping_add_sub_neg!(i8x16[v8x16]);
+    impl_wrapping_mul!(i8x16[v8x16]);
+    impl_shl_scalar!(i8x16[v8x16: i32]);
+    impl_shr_scalar!(i8x16[v8x16: u8x16: i32]);
+    impl_boolean_reduction!(i8x16[v8x16]);
+    impl_comparisons!(i8x16[v8x16]);
+}
 
-            /// Lane-wise subtraction
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.sub))]
-            pub unsafe fn sub(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_sub;
-                let a: sealed::$id = ::mem::transmute(a);
-                let b: sealed::$id = ::mem::transmute(b);
-                ::mem::transmute(simd_sub(a, b))
-            }
+/// WASM-specific v16x8 instructions with modulo-arithmetic semantics
+pub mod i16x8 {
+    use super::*;
+    impl_splat!(i16x8[v16x8: i16] <= i32 | x0, x1, x2, x3, x4, x5, x6, x7);
+    impl_extract_lane!(i16x8[v16x8:i16|u16](LaneIdx8) => i32);
+    impl_replace_lane!(i16x8[v16x8: i16](LaneIdx8) <= i32);
+    impl_wrapping_add_sub_neg!(i16x8[v16x8]);
+    impl_wrapping_mul!(i16x8[v16x8]);
+    impl_shl_scalar!(i16x8[v16x8: i32]);
+    impl_shr_scalar!(i16x8[v16x8: u16x8: i32]);
+    impl_boolean_reduction!(i16x8[v16x8]);
+    impl_comparisons!(i16x8[v16x8]);
+}
 
-            /// Lane-wise multiplication
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.mul))]
-            pub unsafe fn mul(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_mul;
-                let a: sealed::$id = ::mem::transmute(a);
-                let b: sealed::$id = ::mem::transmute(b);
-                ::mem::transmute(simd_mul(a, b))
-            }
+/// WASM-specific v32x4 instructions with modulo-arithmetic semantics
+pub mod i32x4 {
+    use super::*;
+    impl_splat!(i32x4[v32x4: i32] <= i32 | x0, x1, x2, x3);
+    impl_extract_lane!(i32x4[v32x4](LaneIdx4) => i32);
+    impl_replace_lane!(i32x4[v32x4: i32](LaneIdx4) <= i32);
+    impl_wrapping_add_sub_neg!(i32x4[v32x4]);
+    impl_wrapping_mul!(i32x4[v32x4]);
+    impl_shl_scalar!(i32x4[v32x4: i32]);
+    impl_shr_scalar!(i32x4[v32x4: u32x4: i32]);
+    impl_boolean_reduction!(i32x4[v32x4]);
+    impl_comparisons!(i32x4[v32x4]);
+    impl_conversion!(trunc_s_f32x4_sat["i32x4.trunc_s/f32x4:sat"]: f32x4 => v32x4 | i32x4);
+    impl_conversion!(trunc_u_f32x4_sat["i32x4.trunc_s/f32x4:sat"]: f32x4 => u32x4 | i32x4);
+}
 
-            /// Lane-wise division
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($id.div))]
-            pub unsafe fn div(a: v128, b: v128) -> v128 {
-                use coresimd::simd_llvm::simd_div;
-                let a: sealed::$id = ::mem::transmute(a);
-                let b: sealed::$id = ::mem::transmute(b);
-                ::mem::transmute(simd_div(a, b))
-            }
-        }
-    };
+/// WASM-specific v64x2 instructions with modulo-arithmetic semantics
+pub mod i64x2 {
+    use super::*;
+    impl_splat!(i64x2[v64x2: i64] <= i64 | x0, x1);
+    impl_extract_lane!(i64x2[v64x2](LaneIdx2) => i64);
+    impl_replace_lane!(i64x2[v64x2: i64](LaneIdx2) <= i64);
+    impl_wrapping_add_sub_neg!(i64x2[v64x2]);
+    // note: wrapping multiplication for i64x2 is not part of the spec
+    impl_shl_scalar!(i64x2[v64x2: i64]);
+    impl_shr_scalar!(i64x2[v64x2: u64x2: i64]);
+    impl_boolean_reduction!(i64x2[v64x2]);
+    impl_comparisons!(i64x2[v64x2]);
+    impl_conversion!(trunc_s_f64x2_sat["i64x2.trunc_s/f64x2:sat"]: f64x2 => v64x2 | i64x2);
+    impl_conversion!(trunc_u_f64x2_sat["i64x2.trunc_s/f64x2:sat"]: f64x2 => u64x2 | i64x2);
 }
 
-impl_floating_point_ops!(f32x4);
-impl_floating_point_ops!(f64x2);
+/// WASM-specific v32x4 floating-point instructions
+pub mod f32x4 {
+    use super::*;
+    impl_splat!(f32x4[f32x4: f32] <= f32 | x0, x1, x2, x3);
+    impl_extract_lane!(f32x4[f32x4](LaneIdx4) => f32);
+    impl_replace_lane!(f32x4[f32x4: f32](LaneIdx4) <= f32);
+    impl_comparisons!(f32x4[f32x4=>v32x4]);
+    impl_floating_point_ops!(f32x4);
+    impl_conversion!(convert_s_i32x4["f32x4.convert_s/i32x4"]: v32x4 => f32x4 | f32x4);
+    impl_conversion!(convert_u_i32x4["f32x4.convert_u/i32x4"]: u32x4 => f32x4 | f32x4);
 
-macro_rules! impl_conversion {
-    ($conversion:ident[$instr:expr]: $from_ty:ident => $to_ty:ident | $id:ident) => {
-        impl $id {
-            #[inline]
-            // #[target_feature(enable = "simd128")]
-            // FIXME: #[cfg_attr(test, assert_instr($instr))]
-            pub unsafe fn $conversion(a: v128) -> v128 {
-                use coresimd::simd_llvm::simd_cast;
-                let a: sealed::$from_ty = ::mem::transmute(a);
-                let b: sealed::$to_ty = simd_cast(a);
-                ::mem::transmute(b)
-            }
-        }
-    };
 }
 
-// Integer to floating point
-impl_conversion!(convert_s_i32x4["f32x4.convert_s/i32x4"]: v32x4 => f32x4 | f32x4);
-impl_conversion!(convert_u_i32x4["f32x4.convert_u/i32x4"]: u32x4 => f32x4 | f32x4);
-impl_conversion!(convert_s_i64x2["f64x2.convert_s/i64x2"]: v64x2 => f64x2 | f64x2);
-impl_conversion!(convert_u_i64x2["f64x2.convert_u/i64x2"]: u64x2 => f64x2 | f64x2);
-
-// Floating point to integer with saturation
-impl_conversion!(trunc_s_f32x4_sat["i32x4.trunc_s/f32x4:sat"]: f32x4 => v32x4 | i32x4);
-impl_conversion!(trunc_u_f32x4_sat["i32x4.trunc_s/f32x4:sat"]: f32x4 => u32x4 | i32x4);
-impl_conversion!(trunc_s_f64x2_sat["i64x2.trunc_s/f64x2:sat"]: f64x2 => v64x2 | i64x2);
-impl_conversion!(trunc_u_f64x2_sat["i64x2.trunc_s/f64x2:sat"]: f64x2 => u64x2 | i64x2);
+/// WASM-specific v64x2 floating-point instructions
+pub mod f64x2 {
+    use super::*;
+    impl_splat!(f64x2[f64x2: f64] <= f64 | x0, x1);
+    impl_extract_lane!(f64x2[f64x2](LaneIdx2) => f64);
+    impl_replace_lane!(f64x2[f64x2: f64](LaneIdx2) <= f64);
+    impl_comparisons!(f64x2[f64x2=>v64x2]);
+    impl_floating_point_ops!(f64x2);
+    impl_conversion!(convert_s_i64x2["f64x2.convert_s/i64x2"]: v64x2 => f64x2 | f64x2);
+    impl_conversion!(convert_u_i64x2["f64x2.convert_u/i64x2"]: u64x2 => f64x2 | f64x2);
+}
 
 #[cfg(test)]
 pub mod tests {

From 3ef01972e8c545b611d13af6d005903b5c38eee6 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Thu, 9 Aug 2018 18:53:23 +0200
Subject: [PATCH 05/18] generate docs for wasm32-unknown-unknown

---
 ci/dox.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ci/dox.sh b/ci/dox.sh
index a604fb541d..fe7e04711d 100755
--- a/ci/dox.sh
+++ b/ci/dox.sh
@@ -44,6 +44,7 @@ dox aarch64 aarch64-unknown-linux-gnu
 dox powerpc64le powerpc64le-unknown-linux-gnu
 dox mips mips-unknown-linux-gnu
 dox mips64 mips64-unknown-linux-gnuabi64
+dox wasm32 wasm32-unknown-unknown
 
 # If we're on travis, not a PR, and on the right branch, publish!
 if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then

From 8ae36ae231d1757db184f673bbc0b238ea5b3c9e Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Thu, 9 Aug 2018 19:25:20 +0200
Subject: [PATCH 06/18] fix typo

---
 coresimd/wasm32/simd128.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/coresimd/wasm32/simd128.rs b/coresimd/wasm32/simd128.rs
index bce9755068..905b610a65 100644
--- a/coresimd/wasm32/simd128.rs
+++ b/coresimd/wasm32/simd128.rs
@@ -641,7 +641,7 @@ impl v128 {
     /// from `v1` when `1` and `v2` when `0`.
     #[inline]
     // #[target_feature(enable = "simd128")]
-    // FIXME: #[cfg_attr(test, assert_instr($id.bitselectnot))]
+    // FIXME: #[cfg_attr(test, assert_instr($id.bitselect))]
     pub unsafe fn bitselect(v1: v128, v2: v128, c: v128) -> v128 {
         // FIXME: use llvm.select instead - we need to add a `simd_bitselect`
         // intrinsic to rustc that converts a v128 vector into a i1x128. The

From 123837597afa958f60b20a311f66064fac660907 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Thu, 9 Aug 2018 22:11:38 -0700
Subject: [PATCH 07/18] Enable #[assert_instr] on wasm32

* Shell out to Node's `execSync` to execute `wasm2wat` over our wasm file
* Parse the wasm file line-by-line, looking for various function markers and
  such
* Use the `elem` section to build a function pointer table, allowing us to map
  exactly from function pointer to a function
* Avoid losing debug info (the names section) in release mode by stripping
  `--strip-debug` from `rust-lld`.
---
 ci/docker/wasm32-unknown-unknown/Dockerfile |  10 ++
 ci/lld-shim.rs                              |  11 ++
 coresimd/wasm32/simd128.rs                  |   5 +
 crates/assert-instr-macro/src/lib.rs        |  26 +--
 crates/stdsimd-test/Cargo.toml              |   1 +
 crates/stdsimd-test/src/lib.rs              | 169 +++++++++++++++++---
 crates/stdsimd/src/lib.rs                   |   1 +
 7 files changed, 186 insertions(+), 37 deletions(-)
 create mode 100644 ci/lld-shim.rs

diff --git a/ci/docker/wasm32-unknown-unknown/Dockerfile b/ci/docker/wasm32-unknown-unknown/Dockerfile
index 35183554e3..453af264a2 100644
--- a/ci/docker/wasm32-unknown-unknown/Dockerfile
+++ b/ci/docker/wasm32-unknown-unknown/Dockerfile
@@ -25,3 +25,13 @@ ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner
 # Install `node`
 RUN curl https://nodejs.org/dist/v10.8.0/node-v10.8.0-linux-x64.tar.xz | tar xJf -
 ENV PATH=$PATH:/node-v10.8.0-linux-x64/bin
+
+# We use a shim linker that removes `--strip-debug` when passed to LLD. While
+# this typically results in invalid debug information in release mode it doesn't
+# result in an invalid names section which is what we're interested in.
+COPY lld-shim.rs /
+ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=/tmp/lld-shim
+
+# Rustc isn't available until this container starts, so defer compilation of the
+# shim.
+ENTRYPOINT /rust/bin/rustc /lld-shim.rs -o /tmp/lld-shim && exec bash "$@"
diff --git a/ci/lld-shim.rs b/ci/lld-shim.rs
new file mode 100644
index 0000000000..10263869e8
--- /dev/null
+++ b/ci/lld-shim.rs
@@ -0,0 +1,11 @@
+use std::os::unix::prelude::*;
+use std::process::Command;
+use std::env;
+
+fn main() {
+    let args = env::args()
+        .skip(1)
+        .filter(|s| s != "--strip-debug")
+        .collect::<Vec<_>>();
+    panic!("failed to exec: {}", Command::new("rust-lld").args(&args).exec());
+}
diff --git a/coresimd/wasm32/simd128.rs b/coresimd/wasm32/simd128.rs
index 905b610a65..3c23189942 100644
--- a/coresimd/wasm32/simd128.rs
+++ b/coresimd/wasm32/simd128.rs
@@ -10,6 +10,11 @@
 //
 #![allow(non_camel_case_types)]
 
+#[cfg(test)]
+use stdsimd_test::assert_instr;
+#[cfg(test)]
+use wasm_bindgen_test::wasm_bindgen_test;
+
 ////////////////////////////////////////////////////////////////////////////////
 // Types
 
diff --git a/crates/assert-instr-macro/src/lib.rs b/crates/assert-instr-macro/src/lib.rs
index 25b5572ad8..cd4077c4f8 100644
--- a/crates/assert-instr-macro/src/lib.rs
+++ b/crates/assert-instr-macro/src/lib.rs
@@ -38,17 +38,9 @@ pub fn assert_instr(
     // testing for.
     let disable_assert_instr =
         std::env::var("STDSIMD_DISABLE_ASSERT_INSTR").is_ok();
-    let maybe_ignore = if cfg!(optimized) && !disable_assert_instr {
-        TokenStream::new()
-    } else {
-        (quote! { #[ignore] }).into()
-    };
 
     use quote::ToTokens;
     let instr_str = instr
-        .clone()
-        .into_token_stream()
-        .to_string()
         .replace('.', "_")
         .replace(|c: char| c.is_whitespace(), "");
     let assert_name = syn::Ident::new(
@@ -124,10 +116,16 @@ pub fn assert_instr(
         }
     };
 
+    // If instruction tests are disabled avoid emitting this shim at all, just
+    // return the original item without our attribute.
+    if !cfg!(optimized) || disable_assert_instr {
+        return (quote! { #item }).into()
+    }
+
     let tts: TokenStream = quote! {
-        #[test]
+        #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+        #[cfg_attr(not(target_arch = "wasm32"), test)]
         #[allow(non_snake_case)]
-        #maybe_ignore
         fn #assert_name() {
             #to_test
 
@@ -148,13 +146,17 @@ pub fn assert_instr(
 }
 
 struct Invoc {
-    instr: syn::Expr,
+    instr: String,
     args: Vec<(syn::Ident, syn::Expr)>,
 }
 
 impl syn::synom::Synom for Invoc {
     named!(parse -> Self, do_parse!(
-        instr: syn!(syn::Expr) >>
+        instr: alt!(
+            map!(syn!(syn::Ident), |s| s.to_string())
+            |
+            map!(syn!(syn::LitStr), |s| s.value())
+        ) >>
         args: many0!(do_parse!(
             syn!(syn::token::Comma) >>
             name: syn!(syn::Ident) >>
diff --git a/crates/stdsimd-test/Cargo.toml b/crates/stdsimd-test/Cargo.toml
index e2fc6e30d3..fa905937d7 100644
--- a/crates/stdsimd-test/Cargo.toml
+++ b/crates/stdsimd-test/Cargo.toml
@@ -10,3 +10,4 @@ backtrace = "0.3"
 cc = "1.0"
 lazy_static = "1.0"
 rustc-demangle = "0.1.8"
+wasm-bindgen = "0.2.15"
diff --git a/crates/stdsimd-test/src/lib.rs b/crates/stdsimd-test/src/lib.rs
index 06d1db5136..7364e11ead 100644
--- a/crates/stdsimd-test/src/lib.rs
+++ b/crates/stdsimd-test/src/lib.rs
@@ -17,12 +17,16 @@ extern crate cc;
 extern crate lazy_static;
 extern crate rustc_demangle;
 extern crate simd_test_macro;
+extern crate wasm_bindgen;
 
 use std::collections::HashMap;
 use std::env;
+use std::path::Path;
 use std::process::Command;
 use std::str;
 
+use wasm_bindgen::prelude::*;
+
 pub use assert_instr_macro::*;
 pub use simd_test_macro::*;
 
@@ -32,6 +36,7 @@ lazy_static! {
 }
 
 struct Function {
+    addr: Option<usize>,
     instrs: Vec<Instruction>,
 }
 
@@ -40,6 +45,10 @@ struct Instruction {
 }
 
 fn disassemble_myself() -> HashMap<String, Vec<Function>> {
+    if cfg!(target_arch = "wasm32") {
+        return parse_wasm2wat();
+    }
+
     let me = env::current_exe().expect("failed to get current exe");
 
     if cfg!(target_arch = "x86_64")
@@ -145,6 +154,7 @@ fn parse_objdump(output: &str) -> HashMap<String, Vec<Function>> {
         ret.entry(normalize(symbol))
             .or_insert_with(Vec::new)
             .push(Function {
+                addr: None,
                 instrs: instructions,
             });
     }
@@ -189,6 +199,7 @@ fn parse_otool(output: &str) -> HashMap<String, Vec<Function>> {
         ret.entry(normalize(symbol))
             .or_insert_with(Vec::new)
             .push(Function {
+                addr: None,
                 instrs: instructions,
             });
     }
@@ -239,6 +250,7 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
         ret.entry(normalize(symbol))
             .or_insert_with(Vec::new)
             .push(Function {
+                addr: None,
                 instrs: instructions,
             });
     }
@@ -246,6 +258,97 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
     ret
 }
 
+#[wasm_bindgen(module = "child_process", version = "*")]
+extern {
+    #[wasm_bindgen(js_name = execSync)]
+    fn exec_sync(cmd: &str) -> Buffer;
+}
+
+#[wasm_bindgen(module = "buffer", version = "*")]
+extern {
+    type Buffer;
+    #[wasm_bindgen(method, js_name = toString)]
+    fn to_string(this: &Buffer) -> String;
+}
+
+#[wasm_bindgen]
+extern {
+    #[wasm_bindgen(js_namespace = require)]
+    fn resolve(module: &str) -> String;
+    #[wasm_bindgen(js_namespace = console, js_name = log)]
+    fn js_console_log(s: &str);
+}
+
+// println! doesn't work on wasm32 right now, so shadow the compiler's println!
+// macro with our own shim that redirects to `console.log`.
+#[cfg(target_arch = "wasm32")]
+macro_rules! println {
+    ($($args:tt)*) => (js_console_log(&format!($($args)*)))
+}
+
+fn parse_wasm2wat() -> HashMap<String, Vec<Function>> {
+    // Our wasm module in the wasm-bindgen test harness is called
+    // "wasm-bindgen-test_bg". When running in node this is actually a shim JS
+    // file. Ask node where that JS file is, and then we use that with a wasm
+    // extension to find the wasm file itself.
+    let js_shim = resolve("wasm-bindgen-test_bg");
+    let js_shim = Path::new(&js_shim)
+        .with_extension("wasm");
+
+    // Execute `wasm2wat` synchronously, waiting for and capturing all of its
+    // output.
+    let output = exec_sync(&format!("wasm2wat {}", js_shim.display()))
+        .to_string();
+
+    let mut ret: HashMap<String, Vec<Function>> = HashMap::new();
+    let mut lines = output.lines().map(|s| s.trim());
+    while let Some(line) = lines.next() {
+        // If we found the table of function pointers, fill in the known address
+        // for all our `Function` instances
+        if line.starts_with("(elem") {
+            for (i, name) in line.split_whitespace().skip(3).enumerate() {
+                let name = name.trim_right_matches(")");
+                for f in ret.get_mut(name).unwrap() {
+                    f.addr = Some(i + 1);
+                }
+            }
+            continue
+        }
+
+        // If this isn't a function, we don't care about it.
+        if !line.starts_with("(func ") {
+            continue
+        }
+
+        let mut function = Function {
+            instrs: Vec::new(),
+            addr: None,
+        };
+
+        // Empty functions will end in `))` so there's nothing to do, otherwise
+        // we'll have a bunch of following lines which are instructions.
+        //
+        // Lines that have an imbalanced `)` mark the end of a function.
+        if !line.ends_with("))") {
+            while let Some(line) = lines.next() {
+                function.instrs.push(Instruction {
+                    parts: line.split_whitespace().map(|s| s.to_string()).collect(),
+                });
+                if !line.starts_with("(") && line.ends_with(")") {
+                    break
+                }
+            }
+        }
+
+        // The second element here split on whitespace should be the name of the
+        // function, skipping the type/params/results
+        ret.entry(line.split_whitespace().nth(1).unwrap().to_string())
+            .or_insert(Vec::new())
+            .push(function);
+    }
+    return ret
+}
+
 fn normalize(symbol: &str) -> String {
     let symbol = rustc_demangle::demangle(symbol).to_string();
     match symbol.rfind("::h") {
@@ -259,27 +362,8 @@ fn normalize(symbol: &str) -> String {
 /// This asserts that the function at `fnptr` contains the instruction
 /// `expected` provided.
 pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
-    // Translate this function pointer to a symbolic name that we'd have found
-    // in the disassembly.
-    let mut sym = None;
-    backtrace::resolve(fnptr as *mut _, |name| {
-        sym = name.name().and_then(|s| s.as_str()).map(normalize);
-    });
-
-    let functions =
-        if let Some(s) = sym.as_ref().and_then(|s| DISASSEMBLY.get(s)) {
-            s
-        } else {
-            if let Some(sym) = sym {
-                println!("assumed symbol name: `{}`", sym);
-            }
-            println!("maybe related functions");
-            for f in DISASSEMBLY.keys().filter(|k| k.contains(fnname)) {
-                println!("\t- {}", f);
-            }
-            panic!("failed to find disassembly of {:#x} ({})", fnptr, fnname);
-        };
-
+    let mut fnname = fnname.to_string();
+    let functions = get_functions(fnptr, &mut fnname);
     assert_eq!(functions.len(), 1);
     let function = &functions[0];
 
@@ -364,14 +448,15 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
     // didn't find the instruction.
     println!(
         "disassembly for {}: ",
-        sym.as_ref().expect("symbol not found")
+        fnname,
     );
     for (i, instr) in instrs.iter().enumerate() {
-        print!("\t{:2}: ", i);
+        let mut s = format!("\t{:2}: ", i);
         for part in &instr.parts {
-            print!("{} ", part);
+            s.push_str(part);
+            s.push_str(" ");
         }
-        println!();
+        println!("{}", s);
     }
 
     if !found {
@@ -394,6 +479,40 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
     }
 }
 
+fn get_functions(fnptr: usize, fnname: &mut String) -> &'static [Function] {
+    // Translate this function pointer to a symbolic name that we'd have found
+    // in the disassembly.
+    let mut sym = None;
+    backtrace::resolve(fnptr as *mut _, |name| {
+        sym = name.name().and_then(|s| s.as_str()).map(normalize);
+    });
+
+    if let Some(sym) = &sym {
+        if let Some(s) = DISASSEMBLY.get(sym) {
+            *fnname = sym.to_string();
+            return s
+        }
+    }
+
+    let exact_match = DISASSEMBLY.iter().find(|(_, list)| {
+        list.iter().any(|f| f.addr == Some(fnptr))
+    });
+    if let Some((name, list)) = exact_match {
+        *fnname = name.to_string();
+        return list
+    }
+
+    if let Some(sym) = sym {
+        println!("assumed symbol name: `{}`", sym);
+    }
+    println!("maybe related functions");
+    for f in DISASSEMBLY.keys().filter(|k| k.contains(&**fnname)) {
+        println!("\t- {}", f);
+    }
+    panic!("failed to find disassembly of {:#x} ({})", fnptr, fnname);
+}
+
+
 pub fn assert_skip_test_ok(name: &str) {
     if env::var("STDSIMD_TEST_EVERYTHING").is_err() {
         return;
diff --git a/crates/stdsimd/src/lib.rs b/crates/stdsimd/src/lib.rs
index 65871cc5eb..021dc06ae3 100644
--- a/crates/stdsimd/src/lib.rs
+++ b/crates/stdsimd/src/lib.rs
@@ -17,6 +17,7 @@ extern crate libc;
 extern crate std as __do_not_use_this_import;
 
 #[cfg(test)]
+#[allow(unused_imports)]
 #[macro_use(println, print)]
 extern crate std;
 

From a95e75c06fd77129b5836152d1d642de4ff9c262 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Fri, 10 Aug 2018 09:49:18 +0200
Subject: [PATCH 08/18] remove exclude list from Cargo.toml

---
 Cargo.toml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 2dbf903acb..d789fed9aa 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,9 +3,6 @@ members = [
   "crates/stdsimd-verify",
   "crates/stdsimd",
 ]
-exclude = [
-  "crates/wasm-test"
-]
 
 [profile.release]
 debug = true

From 645fa00bde776078ffe792e1ec0e2b4ab1f2b355 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Fri, 10 Aug 2018 13:08:24 +0200
Subject: [PATCH 09/18] fix assert_instr for non-wasm targets

---
 crates/stdsimd-test/src/lib.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/crates/stdsimd-test/src/lib.rs b/crates/stdsimd-test/src/lib.rs
index 7364e11ead..712dac09ac 100644
--- a/crates/stdsimd-test/src/lib.rs
+++ b/crates/stdsimd-test/src/lib.rs
@@ -362,6 +362,12 @@ fn normalize(symbol: &str) -> String {
 /// This asserts that the function at `fnptr` contains the instruction
 /// `expected` provided.
 pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
+    // The string in expected is surrounded by '"', strip these:
+    let expected = {
+        assert!(expected.len() > 2 && expected.starts_with('"')
+                && expected.ends_with('"'));
+        expected.get(1..expected.len()-1).unwrap()
+    };
     let mut fnname = fnname.to_string();
     let functions = get_functions(fnptr, &mut fnname);
     assert_eq!(functions.len(), 1);

From a5ecef9880a1842a9fc47da0650a3a38fcc5f295 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Fri, 10 Aug 2018 13:08:40 +0200
Subject: [PATCH 10/18] re-format assert-instr changes

---
 crates/assert-instr-macro/src/lib.rs |  2 +-
 crates/stdsimd-test/src/lib.rs       | 59 ++++++++++++++--------------
 2 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/crates/assert-instr-macro/src/lib.rs b/crates/assert-instr-macro/src/lib.rs
index cd4077c4f8..b80d931b84 100644
--- a/crates/assert-instr-macro/src/lib.rs
+++ b/crates/assert-instr-macro/src/lib.rs
@@ -119,7 +119,7 @@ pub fn assert_instr(
     // If instruction tests are disabled avoid emitting this shim at all, just
     // return the original item without our attribute.
     if !cfg!(optimized) || disable_assert_instr {
-        return (quote! { #item }).into()
+        return (quote! { #item }).into();
     }
 
     let tts: TokenStream = quote! {
diff --git a/crates/stdsimd-test/src/lib.rs b/crates/stdsimd-test/src/lib.rs
index 712dac09ac..09a2b27420 100644
--- a/crates/stdsimd-test/src/lib.rs
+++ b/crates/stdsimd-test/src/lib.rs
@@ -259,20 +259,20 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
 }
 
 #[wasm_bindgen(module = "child_process", version = "*")]
-extern {
+extern "C" {
     #[wasm_bindgen(js_name = execSync)]
     fn exec_sync(cmd: &str) -> Buffer;
 }
 
 #[wasm_bindgen(module = "buffer", version = "*")]
-extern {
+extern "C" {
     type Buffer;
     #[wasm_bindgen(method, js_name = toString)]
     fn to_string(this: &Buffer) -> String;
 }
 
 #[wasm_bindgen]
-extern {
+extern "C" {
     #[wasm_bindgen(js_namespace = require)]
     fn resolve(module: &str) -> String;
     #[wasm_bindgen(js_namespace = console, js_name = log)]
@@ -292,19 +292,18 @@ fn parse_wasm2wat() -> HashMap<String, Vec<Function>> {
     // file. Ask node where that JS file is, and then we use that with a wasm
     // extension to find the wasm file itself.
     let js_shim = resolve("wasm-bindgen-test_bg");
-    let js_shim = Path::new(&js_shim)
-        .with_extension("wasm");
+    let js_shim = Path::new(&js_shim).with_extension("wasm");
 
     // Execute `wasm2wat` synchronously, waiting for and capturing all of its
     // output.
-    let output = exec_sync(&format!("wasm2wat {}", js_shim.display()))
-        .to_string();
+    let output =
+        exec_sync(&format!("wasm2wat {}", js_shim.display())).to_string();
 
     let mut ret: HashMap<String, Vec<Function>> = HashMap::new();
     let mut lines = output.lines().map(|s| s.trim());
     while let Some(line) = lines.next() {
-        // If we found the table of function pointers, fill in the known address
-        // for all our `Function` instances
+        // If we found the table of function pointers, fill in the known
+        // address for all our `Function` instances
         if line.starts_with("(elem") {
             for (i, name) in line.split_whitespace().skip(3).enumerate() {
                 let name = name.trim_right_matches(")");
@@ -312,12 +311,12 @@ fn parse_wasm2wat() -> HashMap<String, Vec<Function>> {
                     f.addr = Some(i + 1);
                 }
             }
-            continue
+            continue;
         }
 
         // If this isn't a function, we don't care about it.
         if !line.starts_with("(func ") {
-            continue
+            continue;
         }
 
         let mut function = Function {
@@ -332,21 +331,24 @@ fn parse_wasm2wat() -> HashMap<String, Vec<Function>> {
         if !line.ends_with("))") {
             while let Some(line) = lines.next() {
                 function.instrs.push(Instruction {
-                    parts: line.split_whitespace().map(|s| s.to_string()).collect(),
+                    parts: line
+                        .split_whitespace()
+                        .map(|s| s.to_string())
+                        .collect(),
                 });
                 if !line.starts_with("(") && line.ends_with(")") {
-                    break
+                    break;
                 }
             }
         }
 
-        // The second element here split on whitespace should be the name of the
-        // function, skipping the type/params/results
+        // The second element here split on whitespace should be the name of
+        // the function, skipping the type/params/results
         ret.entry(line.split_whitespace().nth(1).unwrap().to_string())
             .or_insert(Vec::new())
             .push(function);
     }
-    return ret
+    return ret;
 }
 
 fn normalize(symbol: &str) -> String {
@@ -364,9 +366,12 @@ fn normalize(symbol: &str) -> String {
 pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
     // The string in expected is surrounded by '"', strip these:
     let expected = {
-        assert!(expected.len() > 2 && expected.starts_with('"')
-                && expected.ends_with('"'));
-        expected.get(1..expected.len()-1).unwrap()
+        assert!(
+            expected.len() > 2
+                && expected.starts_with('"')
+                && expected.ends_with('"')
+        );
+        expected.get(1..expected.len() - 1).unwrap()
     };
     let mut fnname = fnname.to_string();
     let functions = get_functions(fnptr, &mut fnname);
@@ -452,10 +457,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
 
     // Help debug by printing out the found disassembly, and then panic as we
     // didn't find the instruction.
-    println!(
-        "disassembly for {}: ",
-        fnname,
-    );
+    println!("disassembly for {}: ", fnname,);
     for (i, instr) in instrs.iter().enumerate() {
         let mut s = format!("\t{:2}: ", i);
         for part in &instr.parts {
@@ -496,16 +498,16 @@ fn get_functions(fnptr: usize, fnname: &mut String) -> &'static [Function] {
     if let Some(sym) = &sym {
         if let Some(s) = DISASSEMBLY.get(sym) {
             *fnname = sym.to_string();
-            return s
+            return s;
         }
     }
 
-    let exact_match = DISASSEMBLY.iter().find(|(_, list)| {
-        list.iter().any(|f| f.addr == Some(fnptr))
-    });
+    let exact_match = DISASSEMBLY
+        .iter()
+        .find(|(_, list)| list.iter().any(|f| f.addr == Some(fnptr)));
     if let Some((name, list)) = exact_match {
         *fnname = name.to_string();
-        return list
+        return list;
     }
 
     if let Some(sym) = sym {
@@ -518,7 +520,6 @@ fn get_functions(fnptr: usize, fnname: &mut String) -> &'static [Function] {
     panic!("failed to find disassembly of {:#x} ({})", fnptr, fnname);
 }
 
-
 pub fn assert_skip_test_ok(name: &str) {
     if env::var("STDSIMD_TEST_EVERYTHING").is_err() {
         return;

From 861dd73029d4a549860b9b32844d95f361556c42 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Fri, 10 Aug 2018 14:25:31 +0200
Subject: [PATCH 11/18] add crate that uses assert_instr

---
 Cargo.toml                                |  3 +
 crates/stdsimd-test/Cargo.toml            |  4 ++
 crates/stdsimd-test/src/lib.rs            | 16 +++--
 crates/wasm-assert-instr-tests/Cargo.toml | 16 +++++
 crates/wasm-assert-instr-tests/readme.md  | 71 +++++++++++++++++++++++
 crates/wasm-assert-instr-tests/src/lib.rs | 22 +++++++
 6 files changed, 128 insertions(+), 4 deletions(-)
 create mode 100644 crates/wasm-assert-instr-tests/Cargo.toml
 create mode 100644 crates/wasm-assert-instr-tests/readme.md
 create mode 100644 crates/wasm-assert-instr-tests/src/lib.rs

diff --git a/Cargo.toml b/Cargo.toml
index d789fed9aa..4e96e5f494 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,6 +3,9 @@ members = [
   "crates/stdsimd-verify",
   "crates/stdsimd",
 ]
+exclude = [
+  "crates/wasm-assert-instr-tests"
+]
 
 [profile.release]
 debug = true
diff --git a/crates/stdsimd-test/Cargo.toml b/crates/stdsimd-test/Cargo.toml
index fa905937d7..8c98fd1c56 100644
--- a/crates/stdsimd-test/Cargo.toml
+++ b/crates/stdsimd-test/Cargo.toml
@@ -11,3 +11,7 @@ cc = "1.0"
 lazy_static = "1.0"
 rustc-demangle = "0.1.8"
 wasm-bindgen = "0.2.15"
+
+[features]
+default = []
+git_wasm_bindgen = []
\ No newline at end of file
diff --git a/crates/stdsimd-test/src/lib.rs b/crates/stdsimd-test/src/lib.rs
index 09a2b27420..6efd69d27e 100644
--- a/crates/stdsimd-test/src/lib.rs
+++ b/crates/stdsimd-test/src/lib.rs
@@ -258,13 +258,20 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
     ret
 }
 
-#[wasm_bindgen(module = "child_process", version = "*")]
+
+#[cfg_attr(feature = "git_wasm_bindgen",
+           wasm_bindgen(module = "child_process"))]
+#[cfg_attr(not(feature = "git_wasm_bindgen"),
+           wasm_bindgen(module = "child_process", version = "*"))]
 extern "C" {
     #[wasm_bindgen(js_name = execSync)]
     fn exec_sync(cmd: &str) -> Buffer;
 }
 
-#[wasm_bindgen(module = "buffer", version = "*")]
+#[cfg_attr(feature = "git_wasm_bindgen",
+           wasm_bindgen(module = "buffer"))]
+#[cfg_attr(not(feature = "git_wasm_bindgen"),
+           wasm_bindgen(module = "buffer", version = "*"))]
 extern "C" {
     type Buffer;
     #[wasm_bindgen(method, js_name = toString)]
@@ -307,7 +314,7 @@ fn parse_wasm2wat() -> HashMap<String, Vec<Function>> {
         if line.starts_with("(elem") {
             for (i, name) in line.split_whitespace().skip(3).enumerate() {
                 let name = name.trim_right_matches(")");
-                for f in ret.get_mut(name).unwrap() {
+                for f in ret.get_mut(name).expect("ret.get_mut(name) failed") {
                     f.addr = Some(i + 1);
                 }
             }
@@ -371,7 +378,8 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
                 && expected.starts_with('"')
                 && expected.ends_with('"')
         );
-        expected.get(1..expected.len() - 1).unwrap()
+        expected.get(1..expected.len() - 1)
+            .expect("expected must be a '\"' delimited string, e.g., \"nop\"")
     };
     let mut fnname = fnname.to_string();
     let functions = get_functions(fnptr, &mut fnname);
diff --git a/crates/wasm-assert-instr-tests/Cargo.toml b/crates/wasm-assert-instr-tests/Cargo.toml
new file mode 100644
index 0000000000..07885fa839
--- /dev/null
+++ b/crates/wasm-assert-instr-tests/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "wasm-assert-instr-tests"
+version = "0.1.0"
+authors = ["gnzlbg <gonzalobg88@gmail.com>"]
+
+[dependencies]
+coresimd = { path = "../coresimd" }
+[dev-dependencies]
+stdsimd-test = { path = "../stdsimd-test", features = ["git_wasm_bindgen"] }
+
+[target.wasm32-unknown-unknown.dev-dependencies]
+wasm-bindgen-test = { git = 'https://github.com/rustwasm/wasm-bindgen' }
+
+[patch.crates-io]
+wasm-bindgen = { git = 'https://github.com/rustwasm/wasm-bindgen' }
+wasm-bindgen-test = { git = 'https://github.com/rustwasm/wasm-bindgen' }
\ No newline at end of file
diff --git a/crates/wasm-assert-instr-tests/readme.md b/crates/wasm-assert-instr-tests/readme.md
new file mode 100644
index 0000000000..165c38d638
--- /dev/null
+++ b/crates/wasm-assert-instr-tests/readme.md
@@ -0,0 +1,71 @@
+# assert_instr on WASM32
+
+This crate uses `assert_instr` to verify the assembly of wasm functions.
+
+# Set up
+
+This crate needs a couple of tools installed:
+
+1. Install latest version of `wasm-bindgen` CLI tools
+
+```
+git clone git@github.com:rustwasm/wasm-bindgen
+cd wasm-bindgen
+cargo install --path crates/cli
+
+# This makes wasm-bindgen-test-runner the test runner for wasm32-unknown-unknown:
+``` 
+
+2. Install WABT
+
+```
+# MacOSX
+brew install wabt
+
+# From source: 
+git clone --recursive https://github.com/WebAssembly/wabt
+make -C wabt -j
+
+# Add it to the path
+PATH=$PATH:/wabt/bin
+```
+
+The `stdsimd-test` proc macro needs to be able to find these in the path. We
+could add an environment variable to configure these.
+
+3. Install Node
+
+Using `nvm`, homebrew, or manually. The test runner needs to find a recent
+enough node in the `PATH`:
+
+```
+# MacOSX
+brew install node
+
+# Other
+curl https://nodejs.org/dist/v10.8.0/node-v10.8.0-linux-x64.tar.xz | tar xJf -
+PATH=$PATH:/node-v10.8.0-linux-x64/bin
+```
+
+4. Compile and install linker shim
+
+```
+# In stdsimd/
+cd ci
+rustc lld-shim -o lld-shim
+```
+
+# Running the tests
+
+This is how you can run the tests:
+
+```
+CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=PATH/TO/lld-shim \
+CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner \
+cargo test --target=wasm32-unknown-unknown --release
+```
+
+you can also set the `CARGO_TARGET_WASM32_...` linker and test runner globally
+with `export ...`.
+
+To see the build fail, pass it `RUSTFLAGS="-C target-feature=+simd128"`.
diff --git a/crates/wasm-assert-instr-tests/src/lib.rs b/crates/wasm-assert-instr-tests/src/lib.rs
new file mode 100644
index 0000000000..f5d82a328f
--- /dev/null
+++ b/crates/wasm-assert-instr-tests/src/lib.rs
@@ -0,0 +1,22 @@
+//! Includes assert_instr tests for wasm that currently pass.
+#![feature(stdsimd)]
+#![cfg_attr(test, feature(use_extern_macros))]
+
+extern crate coresimd;
+#[cfg(test)]
+extern crate stdsimd_test;
+#[cfg(all(test, target_arch = "wasm32"))]
+extern crate wasm_bindgen_test;
+
+use coresimd::arch::wasm32::*;
+
+#[cfg(test)]
+use stdsimd_test::assert_instr;
+
+#[cfg(test)]
+use wasm_bindgen_test::*;
+
+#[cfg_attr(test, assert_instr(foo))]
+pub fn i8x16_add(a: v128, b: v128) -> v128 {
+    unsafe { i8x16::add(a, b) }
+}

From eff5fedd71a395adab8506d334ccbc05abcab00b Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Fri, 10 Aug 2018 07:32:09 -0700
Subject: [PATCH 12/18] Fix instructions having extra quotes

---
 crates/assert-instr-macro/src/lib.rs |  2 +-
 crates/stdsimd-test/src/lib.rs       | 10 ----------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/crates/assert-instr-macro/src/lib.rs b/crates/assert-instr-macro/src/lib.rs
index b80d931b84..e5575e85a3 100644
--- a/crates/assert-instr-macro/src/lib.rs
+++ b/crates/assert-instr-macro/src/lib.rs
@@ -131,7 +131,7 @@ pub fn assert_instr(
 
             ::stdsimd_test::assert(#shim_name as usize,
                                    stringify!(#shim_name),
-                                   stringify!(#instr));
+                                   #instr);
         }
     }.into();
     // why? necessary now to get tests to work?
diff --git a/crates/stdsimd-test/src/lib.rs b/crates/stdsimd-test/src/lib.rs
index 6efd69d27e..97d0825ec6 100644
--- a/crates/stdsimd-test/src/lib.rs
+++ b/crates/stdsimd-test/src/lib.rs
@@ -371,16 +371,6 @@ fn normalize(symbol: &str) -> String {
 /// This asserts that the function at `fnptr` contains the instruction
 /// `expected` provided.
 pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
-    // The string in expected is surrounded by '"', strip these:
-    let expected = {
-        assert!(
-            expected.len() > 2
-                && expected.starts_with('"')
-                && expected.ends_with('"')
-        );
-        expected.get(1..expected.len() - 1)
-            .expect("expected must be a '\"' delimited string, e.g., \"nop\"")
-    };
     let mut fnname = fnname.to_string();
     let functions = get_functions(fnptr, &mut fnname);
     assert_eq!(functions.len(), 1);

From da980732dc061989598e9e420aba4b9363cbc1e3 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Fri, 10 Aug 2018 08:05:05 -0700
Subject: [PATCH 13/18] Add assert_instr for wasm memory intrinsics

---
 Cargo.toml                                  |  4 ++++
 ci/docker/wasm32-unknown-unknown/Dockerfile | 14 +++++++++++---
 ci/run-docker.sh                            |  6 +++---
 coresimd/wasm32/mod.rs                      |  7 +++++++
 crates/stdsimd-test/src/lib.rs              | 10 ++--------
 5 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 4e96e5f494..4e7b31daa9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,3 +14,7 @@ opt-level = 3
 [profile.bench]
 debug = 1
 opt-level = 3
+
+[patch.crates-io]
+wasm-bindgen = { git = 'https://github.com/rustwasm/wasm-bindgen' }
+wasm-bindgen-test = { git = 'https://github.com/rustwasm/wasm-bindgen' }
diff --git a/ci/docker/wasm32-unknown-unknown/Dockerfile b/ci/docker/wasm32-unknown-unknown/Dockerfile
index 453af264a2..734d0bf2ad 100644
--- a/ci/docker/wasm32-unknown-unknown/Dockerfile
+++ b/ci/docker/wasm32-unknown-unknown/Dockerfile
@@ -17,9 +17,17 @@ RUN make -C wabt -j$(nproc)
 ENV PATH=$PATH:/wabt/bin
 
 # Install `wasm-bindgen-test-runner`
-RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.15/wasm-bindgen-0.2.15-x86_64-unknown-linux-musl.tar.gz \
-  | tar xzf -
-ENV PATH=$PATH:/wasm-bindgen-0.2.15-x86_64-unknown-linux-musl
+# RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.15/wasm-bindgen-0.2.15-x86_64-unknown-linux-musl.tar.gz \
+#   | tar xzf -
+# ENV PATH=$PATH:/wasm-bindgen-0.2.15-x86_64-unknown-linux-musl
+# TODO: remove these lines once we can use published releases
+ENV CARGO_HOME=/cargo RUSTUP_HOME=/rustup
+ENV PATH=$PATH:/cargo/bin
+RUN curl https://sh.rustup.rs | sh -s -- -y
+ENV LIBZ_SYS_STATIC=1
+RUN cargo install --git https://github.com/rustwasm/wasm-bindgen \
+  wasm-bindgen-cli --features vendored-openssl
+
 ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner
 
 # Install `node`
diff --git a/ci/run-docker.sh b/ci/run-docker.sh
index 0c560c825c..5226363410 100755
--- a/ci/run-docker.sh
+++ b/ci/run-docker.sh
@@ -13,8 +13,8 @@ run() {
       --user `id -u`:`id -g` \
       --rm \
       --init \
-      --volume $HOME/.cargo:/cargo \
-      --env CARGO_HOME=/cargo \
+      --volume $HOME/.cargo:/cargo-h \
+      --env CARGO_HOME=/cargo-h \
       --volume `rustc --print sysroot`:/rust:ro \
       --env TARGET=$target \
       --env STDSIMD_TEST_EVERYTHING \
@@ -25,7 +25,7 @@ run() {
       --privileged \
       stdsimd \
       bash \
-      -c 'PATH=$PATH:/rust/bin exec ci/run.sh'
+      -c 'PATH=/rust/bin:$PATH exec ci/run.sh'
 }
 
 if [ -z "$1" ]; then
diff --git a/coresimd/wasm32/mod.rs b/coresimd/wasm32/mod.rs
index 1b0c82bb4e..054e187f9d 100644
--- a/coresimd/wasm32/mod.rs
+++ b/coresimd/wasm32/mod.rs
@@ -7,6 +7,11 @@ mod simd128;
 pub mod simd128;
 pub use self::simd128::*;
 
+#[cfg(test)]
+use stdsimd_test::assert_instr;
+#[cfg(test)]
+use wasm_bindgen_test::wasm_bindgen_test;
+
 extern "C" {
     #[link_name = "llvm.wasm.grow.memory.i32"]
     fn llvm_grow_memory(pages: i32) -> i32;
@@ -21,6 +26,7 @@ extern "C" {
 ///
 /// [instr]: https://github.com/WebAssembly/design/blob/master/Semantics.md#resizing
 #[inline]
+#[cfg_attr(test, assert_instr("memory.size"))]
 pub unsafe fn current_memory() -> i32 {
     llvm_current_memory()
 }
@@ -34,6 +40,7 @@ pub unsafe fn current_memory() -> i32 {
 ///
 /// [instr]: https://github.com/WebAssembly/design/blob/master/Semantics.md#resizing
 #[inline]
+#[cfg_attr(test, assert_instr("memory.grow"))]
 pub unsafe fn grow_memory(delta: i32) -> i32 {
     llvm_grow_memory(delta)
 }
diff --git a/crates/stdsimd-test/src/lib.rs b/crates/stdsimd-test/src/lib.rs
index 97d0825ec6..9f56363835 100644
--- a/crates/stdsimd-test/src/lib.rs
+++ b/crates/stdsimd-test/src/lib.rs
@@ -259,19 +259,13 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
 }
 
 
-#[cfg_attr(feature = "git_wasm_bindgen",
-           wasm_bindgen(module = "child_process"))]
-#[cfg_attr(not(feature = "git_wasm_bindgen"),
-           wasm_bindgen(module = "child_process", version = "*"))]
+#[wasm_bindgen(module = "child_process")]
 extern "C" {
     #[wasm_bindgen(js_name = execSync)]
     fn exec_sync(cmd: &str) -> Buffer;
 }
 
-#[cfg_attr(feature = "git_wasm_bindgen",
-           wasm_bindgen(module = "buffer"))]
-#[cfg_attr(not(feature = "git_wasm_bindgen"),
-           wasm_bindgen(module = "buffer", version = "*"))]
+#[wasm_bindgen(module = "buffer")]
 extern "C" {
     type Buffer;
     #[wasm_bindgen(method, js_name = toString)]

From 131eccdb37eea83c8881adba1c9d3cccab487494 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Mon, 13 Aug 2018 16:43:28 -0700
Subject: [PATCH 14/18] Remove hacks for git wasm-bindgen

---
 Cargo.toml                                  |  4 ----
 ci/docker/wasm32-unknown-unknown/Dockerfile | 14 +++-----------
 crates/coresimd/Cargo.toml                  |  2 +-
 crates/stdsimd-test/Cargo.toml              |  3 +--
 4 files changed, 5 insertions(+), 18 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 4e7b31daa9..4e96e5f494 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,3 @@ opt-level = 3
 [profile.bench]
 debug = 1
 opt-level = 3
-
-[patch.crates-io]
-wasm-bindgen = { git = 'https://github.com/rustwasm/wasm-bindgen' }
-wasm-bindgen-test = { git = 'https://github.com/rustwasm/wasm-bindgen' }
diff --git a/ci/docker/wasm32-unknown-unknown/Dockerfile b/ci/docker/wasm32-unknown-unknown/Dockerfile
index 734d0bf2ad..56eef71204 100644
--- a/ci/docker/wasm32-unknown-unknown/Dockerfile
+++ b/ci/docker/wasm32-unknown-unknown/Dockerfile
@@ -17,17 +17,9 @@ RUN make -C wabt -j$(nproc)
 ENV PATH=$PATH:/wabt/bin
 
 # Install `wasm-bindgen-test-runner`
-# RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.15/wasm-bindgen-0.2.15-x86_64-unknown-linux-musl.tar.gz \
-#   | tar xzf -
-# ENV PATH=$PATH:/wasm-bindgen-0.2.15-x86_64-unknown-linux-musl
-# TODO: remove these lines once we can use published releases
-ENV CARGO_HOME=/cargo RUSTUP_HOME=/rustup
-ENV PATH=$PATH:/cargo/bin
-RUN curl https://sh.rustup.rs | sh -s -- -y
-ENV LIBZ_SYS_STATIC=1
-RUN cargo install --git https://github.com/rustwasm/wasm-bindgen \
-  wasm-bindgen-cli --features vendored-openssl
-
+RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.16/wasm-bindgen-0.2.16-x86_64-unknown-linux-musl.tar.gz \
+  | tar xzf -
+ENV PATH=$PATH:/wasm-bindgen-0.2.16-x86_64-unknown-linux-musl
 ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner
 
 # Install `node`
diff --git a/crates/coresimd/Cargo.toml b/crates/coresimd/Cargo.toml
index 15f2eb15a4..f564ad1388 100644
--- a/crates/coresimd/Cargo.toml
+++ b/crates/coresimd/Cargo.toml
@@ -23,7 +23,7 @@ stdsimd-test = { version = "0.*", path = "../stdsimd-test" }
 stdsimd = { version = "0.0.3", path = "../stdsimd" }
 
 [target.wasm32-unknown-unknown.dev-dependencies]
-wasm-bindgen-test = "=0.2.15"
+wasm-bindgen-test = "0.2.16"
 
 [features]
 # Internal-usage only: denies all warnings.
diff --git a/crates/stdsimd-test/Cargo.toml b/crates/stdsimd-test/Cargo.toml
index 8c98fd1c56..cf459b6e04 100644
--- a/crates/stdsimd-test/Cargo.toml
+++ b/crates/stdsimd-test/Cargo.toml
@@ -10,8 +10,7 @@ backtrace = "0.3"
 cc = "1.0"
 lazy_static = "1.0"
 rustc-demangle = "0.1.8"
-wasm-bindgen = "0.2.15"
+wasm-bindgen = "0.2.16"
 
 [features]
 default = []
-git_wasm_bindgen = []
\ No newline at end of file

From 70bf4edcb0bb48ebd7038460e691a7326fe411e7 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 15 Aug 2018 11:09:26 +0200
Subject: [PATCH 15/18] add wasm_simd128 feature

---
 ci/run.sh                  | 10 +++++-----
 coresimd/wasm32/mod.rs     |  7 ++++---
 crates/coresimd/Cargo.toml |  2 ++
 crates/stdsimd/Cargo.toml  |  4 ++++
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/ci/run.sh b/ci/run.sh
index 875a206e84..2cde13c105 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -39,10 +39,6 @@ case ${TARGET} in
     *android*)
         export STDSIMD_DISABLE_ASSERT_INSTR=1
         ;;
-    wasm32*)
-        # export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128"
-        ;;
-
     *)
         ;;
 esac
@@ -63,13 +59,17 @@ cargo_test() {
 cargo_test
 cargo_test "--release"
 
-# Test x86 targets compiled with AVX.
+# Test targets compiled with extra features.
 case ${TARGET} in
     x86*)
         RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
         export STDSIMD_DISABLE_ASSERT_INSTR=1
         cargo_test "--release"
         ;;
+    wasm32-unknown-unknown*)
+        # export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128"
+        cargo_test "--release --features=wasm32_simd128"
+        ;;
     *)
         ;;
 esac
diff --git a/coresimd/wasm32/mod.rs b/coresimd/wasm32/mod.rs
index 054e187f9d..ea5a8e0ba0 100644
--- a/coresimd/wasm32/mod.rs
+++ b/coresimd/wasm32/mod.rs
@@ -1,11 +1,12 @@
 //! WASM32 intrinsics
 
 #[macro_use]
-#[cfg(not(test))]
+#[cfg(all(not(test), feature = "wasm_simd128"))]
 mod simd128;
-#[cfg(test)]
+
+#[cfg(all(test, feature = "wasm_simd128"))]
 pub mod simd128;
-pub use self::simd128::*;
+#[cfg(all(test, feature = "wasm_simd128"))]
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
diff --git a/crates/coresimd/Cargo.toml b/crates/coresimd/Cargo.toml
index f564ad1388..dac4d916da 100644
--- a/crates/coresimd/Cargo.toml
+++ b/crates/coresimd/Cargo.toml
@@ -31,3 +31,5 @@ strict = []
 # Internal-usage only: enables only those intrinsics supported by Intel's
 # Software Development Environment (SDE).
 intel_sde = []
+# Enables wasm simd128 intrinsics
+wasm_simd128 = []
diff --git a/crates/stdsimd/Cargo.toml b/crates/stdsimd/Cargo.toml
index 4ab553db48..3db3ed1187 100644
--- a/crates/stdsimd/Cargo.toml
+++ b/crates/stdsimd/Cargo.toml
@@ -37,3 +37,7 @@ path = "../../examples/hex.rs"
 name = "wasm"
 crate-type = ["cdylib"]
 path = "../../examples/wasm.rs"
+
+[features]
+default = []
+wasm_simd128 = ["coresimd/wasm_simd128"]
\ No newline at end of file

From cd091fef78bf3ac54bfee6e3cf8eb65228e6037a Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 15 Aug 2018 15:56:39 +0200
Subject: [PATCH 16/18] make wasm32 build correctly

---
 coresimd/wasm32/mod.rs     | 2 ++
 crates/coresimd/src/lib.rs | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/coresimd/wasm32/mod.rs b/coresimd/wasm32/mod.rs
index ea5a8e0ba0..2862d296a9 100644
--- a/coresimd/wasm32/mod.rs
+++ b/coresimd/wasm32/mod.rs
@@ -1,5 +1,6 @@
 //! WASM32 intrinsics
 
+
 #[macro_use]
 #[cfg(all(not(test), feature = "wasm_simd128"))]
 mod simd128;
@@ -7,6 +8,7 @@ mod simd128;
 #[cfg(all(test, feature = "wasm_simd128"))]
 pub mod simd128;
 #[cfg(all(test, feature = "wasm_simd128"))]
+pub use self::simd128::*;
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
diff --git a/crates/coresimd/src/lib.rs b/crates/coresimd/src/lib.rs
index 6411fbb78e..8d892e3b47 100644
--- a/crates/coresimd/src/lib.rs
+++ b/crates/coresimd/src/lib.rs
@@ -11,6 +11,7 @@
 #![allow(unused_features)]
 #![feature(
     const_fn,
+    const_fn_union,
     link_llvm_intrinsics,
     platform_intrinsics,
     repr_simd,
@@ -34,7 +35,7 @@
     arm_target_feature,
     aarch64_target_feature,
     mips_target_feature,
-    powerpc_target_feature
+    powerpc_target_feature,
 )]
 #![cfg_attr(
     test,

From 5be369bc9a3e80d0c87439635c5140683319f84f Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 15 Aug 2018 16:08:57 +0200
Subject: [PATCH 17/18] run simd128 tests on ci

---
 ci/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/run.sh b/ci/run.sh
index 2cde13c105..8bc915d38b 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -68,7 +68,7 @@ case ${TARGET} in
         ;;
     wasm32-unknown-unknown*)
         # export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128"
-        cargo_test "--release --features=wasm32_simd128"
+        cargo_test "--release --features=wasm_simd128"
         ;;
     *)
         ;;

From 1d3683db8e66ee0d6ccf388fcc61231b1cfa51b0 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 15 Aug 2018 17:24:11 +0200
Subject: [PATCH 18/18] remove wasm-assert-instr-tests

---
 crates/wasm-assert-instr-tests/Cargo.toml | 16 -----
 crates/wasm-assert-instr-tests/readme.md  | 71 -----------------------
 crates/wasm-assert-instr-tests/src/lib.rs | 22 -------
 3 files changed, 109 deletions(-)
 delete mode 100644 crates/wasm-assert-instr-tests/Cargo.toml
 delete mode 100644 crates/wasm-assert-instr-tests/readme.md
 delete mode 100644 crates/wasm-assert-instr-tests/src/lib.rs

diff --git a/crates/wasm-assert-instr-tests/Cargo.toml b/crates/wasm-assert-instr-tests/Cargo.toml
deleted file mode 100644
index 07885fa839..0000000000
--- a/crates/wasm-assert-instr-tests/Cargo.toml
+++ /dev/null
@@ -1,16 +0,0 @@
-[package]
-name = "wasm-assert-instr-tests"
-version = "0.1.0"
-authors = ["gnzlbg <gonzalobg88@gmail.com>"]
-
-[dependencies]
-coresimd = { path = "../coresimd" }
-[dev-dependencies]
-stdsimd-test = { path = "../stdsimd-test", features = ["git_wasm_bindgen"] }
-
-[target.wasm32-unknown-unknown.dev-dependencies]
-wasm-bindgen-test = { git = 'https://github.com/rustwasm/wasm-bindgen' }
-
-[patch.crates-io]
-wasm-bindgen = { git = 'https://github.com/rustwasm/wasm-bindgen' }
-wasm-bindgen-test = { git = 'https://github.com/rustwasm/wasm-bindgen' }
\ No newline at end of file
diff --git a/crates/wasm-assert-instr-tests/readme.md b/crates/wasm-assert-instr-tests/readme.md
deleted file mode 100644
index 165c38d638..0000000000
--- a/crates/wasm-assert-instr-tests/readme.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# assert_instr on WASM32
-
-This crate uses `assert_instr` to verify the assembly of wasm functions.
-
-# Set up
-
-This crate needs a couple of tools installed:
-
-1. Install latest version of `wasm-bindgen` CLI tools
-
-```
-git clone git@github.com:rustwasm/wasm-bindgen
-cd wasm-bindgen
-cargo install --path crates/cli
-
-# This makes wasm-bindgen-test-runner the test runner for wasm32-unknown-unknown:
-``` 
-
-2. Install WABT
-
-```
-# MacOSX
-brew install wabt
-
-# From source: 
-git clone --recursive https://github.com/WebAssembly/wabt
-make -C wabt -j
-
-# Add it to the path
-PATH=$PATH:/wabt/bin
-```
-
-The `stdsimd-test` proc macro needs to be able to find these in the path. We
-could add an environment variable to configure these.
-
-3. Install Node
-
-Using `nvm`, homebrew, or manually. The test runner needs to find a recent
-enough node in the `PATH`:
-
-```
-# MacOSX
-brew install node
-
-# Other
-curl https://nodejs.org/dist/v10.8.0/node-v10.8.0-linux-x64.tar.xz | tar xJf -
-PATH=$PATH:/node-v10.8.0-linux-x64/bin
-```
-
-4. Compile and install linker shim
-
-```
-# In stdsimd/
-cd ci
-rustc lld-shim -o lld-shim
-```
-
-# Running the tests
-
-This is how you can run the tests:
-
-```
-CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=PATH/TO/lld-shim \
-CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner \
-cargo test --target=wasm32-unknown-unknown --release
-```
-
-you can also set the `CARGO_TARGET_WASM32_...` linker and test runner globally
-with `export ...`.
-
-To see the build fail, pass it `RUSTFLAGS="-C target-feature=+simd128"`.
diff --git a/crates/wasm-assert-instr-tests/src/lib.rs b/crates/wasm-assert-instr-tests/src/lib.rs
deleted file mode 100644
index f5d82a328f..0000000000
--- a/crates/wasm-assert-instr-tests/src/lib.rs
+++ /dev/null
@@ -1,22 +0,0 @@
-//! Includes assert_instr tests for wasm that currently pass.
-#![feature(stdsimd)]
-#![cfg_attr(test, feature(use_extern_macros))]
-
-extern crate coresimd;
-#[cfg(test)]
-extern crate stdsimd_test;
-#[cfg(all(test, target_arch = "wasm32"))]
-extern crate wasm_bindgen_test;
-
-use coresimd::arch::wasm32::*;
-
-#[cfg(test)]
-use stdsimd_test::assert_instr;
-
-#[cfg(test)]
-use wasm_bindgen_test::*;
-
-#[cfg_attr(test, assert_instr(foo))]
-pub fn i8x16_add(a: v128, b: v128) -> v128 {
-    unsafe { i8x16::add(a, b) }
-}