Merge pull request #563 from robertknight/simd-to-array

Make `Simd::to_array` impls a simple transmute
robertknight · Jan 30, 2025 · f4b954a · f4b954a
2 parents 68a1f9d + ec309cb
commit f4b954a
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 36 deletions.
diff --git a/rten-simd/src/arch/aarch64.rs b/rten-simd/src/arch/aarch64.rs
@@ -4,13 +4,11 @@ use std::arch::aarch64::{
     vcleq_s32, vcltq_f32, vcltq_s32, vcombine_s16, vcvtnq_s32_f32, vcvtq_s32_f32, vdivq_f32,
     vdupq_n_f32, vdupq_n_s32, veorq_s32, vfmaq_f32, vld1q_f32, vld1q_s32, vld1q_u32, vmaxq_f32,
     vmaxq_s32, vminq_f32, vminq_s32, vmulq_f32, vmulq_s32, vqmovn_s32, vqmovun_s16,
-    vreinterpretq_f32_s32, vshlq_n_s32, vst1q_f32, vst1q_s32, vst1q_u32, vsubq_f32, vsubq_s32,
-};
-
-use core::arch::aarch64::{
-    vreinterpretq_s16_s32, vreinterpretq_s32_s16, vreinterpretq_s32_s8, vreinterpretq_s8_s32,
-    vzip1q_s16, vzip1q_s8, vzip2q_s16, vzip2q_s8,
+    vreinterpretq_f32_s32, vreinterpretq_s16_s32, vreinterpretq_s32_s16, vreinterpretq_s32_s8,
+    vreinterpretq_s8_s32, vshlq_n_s32, vst1q_f32, vst1q_s32, vsubq_f32, vsubq_s32, vzip1q_s16,
+    vzip1q_s8, vzip2q_s16, vzip2q_s8,
 };
+use std::mem::transmute;
 
 use crate::{Simd, SimdFloat, SimdInt, SimdMask};
 
@@ -30,8 +28,7 @@ impl SimdMask for uint32x4_t {
 
     #[inline]
     unsafe fn to_array(self) -> Self::Array {
-        let mut array = [0; 4];
-        vst1q_u32(array.as_mut_ptr(), self);
+        let array = transmute::<Self, [u32; 4]>(self);
         std::array::from_fn(|i| array[i] != 0)
     }
 }
@@ -70,9 +67,7 @@ impl Simd for int32x4_t {
 
     #[inline]
     unsafe fn to_array(self) -> Self::Array {
-        let mut array = [0; Self::LEN];
-        self.store(array.as_mut_ptr());
-        array
+        transmute::<Self, Self::Array>(self)
     }
 }
 
@@ -225,9 +220,7 @@ impl Simd for float32x4_t {
 
     #[inline]
     unsafe fn to_array(self) -> Self::Array {
-        let mut array = [0.; Self::LEN];
-        self.store(array.as_mut_ptr());
-        array
+        transmute::<Self, Self::Array>(self)
     }
 }
 

diff --git a/rten-simd/src/arch/wasm.rs b/rten-simd/src/arch/wasm.rs
@@ -9,6 +9,8 @@ use std::arch::wasm32::{
 #[cfg(target_feature = "relaxed-simd")]
 use std::arch::wasm32::f32x4_relaxed_madd;
 
+use std::mem::transmute;
+
 use crate::{Simd, SimdFloat, SimdInt, SimdMask};
 
 /// Wrapper around a WASM v128 type that marks it as containing integers.
@@ -41,8 +43,7 @@ impl SimdMask for v128i {
 
     #[inline]
     unsafe fn to_array(self) -> Self::Array {
-        let mut array = [0; Self::LEN];
-        self.store(array.as_mut_ptr());
+        let array = transmute::<v128, [u32; 4]>(self.0);
         std::array::from_fn(|i| array[i] != 0)
     }
 }
@@ -76,9 +77,7 @@ impl Simd for v128i {
 
     #[inline]
     unsafe fn to_array(self) -> Self::Array {
-        let mut array = [0; Self::LEN];
-        self.store(array.as_mut_ptr());
-        array
+        transmute::<v128, Self::Array>(self.0)
     }
 }
 
@@ -249,9 +248,7 @@ impl Simd for v128f {
 
     #[inline]
     unsafe fn to_array(self) -> Self::Array {
-        let mut array = [0.; Self::LEN];
-        self.store(array.as_mut_ptr());
-        array
+        transmute::<v128, Self::Array>(self.0)
     }
 }
 

diff --git a/rten-simd/src/arch/x86_64.rs b/rten-simd/src/arch/x86_64.rs
@@ -42,8 +42,7 @@ impl SimdMask for __m256i {
 
     #[inline]
     unsafe fn to_array(self) -> Self::Array {
-        let mut array = [0; Self::LEN];
-        self.store(array.as_mut_ptr());
+        let array = <Self as Simd>::to_array(self);
         std::array::from_fn(|i| array[i] != 0)
     }
 }
@@ -93,9 +92,7 @@ impl Simd for __m256i {
 
     #[inline]
     unsafe fn to_array(self) -> Self::Array {
-        let mut array = [0; Self::LEN];
-        self.store(array.as_mut_ptr());
-        array
+        transmute::<Self, Self::Array>(self)
     }
 }
 
@@ -292,9 +289,7 @@ impl Simd for __m256 {
 
     #[inline]
     unsafe fn to_array(self) -> Self::Array {
-        let mut array = [0.; Self::LEN];
-        self.store(array.as_mut_ptr());
-        array
+        transmute::<Self, Self::Array>(self)
     }
 }
 
@@ -494,9 +489,7 @@ impl Simd for __m512i {
     #[inline]
     #[target_feature(enable = "avx512f")]
     unsafe fn to_array(self) -> Self::Array {
-        let mut array = [0; Self::LEN];
-        self.store(array.as_mut_ptr());
-        array
+        transmute::<Self, Self::Array>(self)
     }
 }
 
@@ -699,9 +692,7 @@ impl Simd for __m512 {
     #[inline]
     #[target_feature(enable = "avx512f")]
     unsafe fn to_array(self) -> Self::Array {
-        let mut array = [0.; Self::LEN];
-        self.store(array.as_mut_ptr());
-        array
+        transmute::<Self, Self::Array>(self)
     }
 }
 

diff --git a/rten-simd/src/vec.rs b/rten-simd/src/vec.rs
@@ -140,6 +140,10 @@ pub trait Simd: Copy + Sized {
     }
 
     /// Return the contents of this vector as an array.
+    ///
+    /// This is a cheap transmute for most implementations because the SIMD
+    /// type and the array have the same layout. The converse is not true
+    /// because the SIMD type may have greater alignment.
     unsafe fn to_array(self) -> Self::Array;
 
     /// Return a new vector with all elements set to zero.
@@ -176,6 +180,10 @@ pub trait SimdMask: Copy {
     }
 
     /// Convert this SIMD mask to a boolean array.
+    ///
+    /// Unlike [`Simd::to_array`] this is not a simple transmute because
+    /// the elements need to be converted from the architecture-specific
+    /// representation of a mask to a `bool` array.
     unsafe fn to_array(self) -> Self::Array;
 
     /// Create a SIMD mask from a boolean array.