Skip to content

Commit

Permalink
Merge pull request #563 from robertknight/simd-to-array
Browse files Browse the repository at this point in the history
Make `Simd::to_array` impls a simple transmute
  • Loading branch information
robertknight authored Jan 30, 2025
2 parents 68a1f9d + ec309cb commit f4b954a
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 36 deletions.
21 changes: 7 additions & 14 deletions rten-simd/src/arch/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@ use std::arch::aarch64::{
vcleq_s32, vcltq_f32, vcltq_s32, vcombine_s16, vcvtnq_s32_f32, vcvtq_s32_f32, vdivq_f32,
vdupq_n_f32, vdupq_n_s32, veorq_s32, vfmaq_f32, vld1q_f32, vld1q_s32, vld1q_u32, vmaxq_f32,
vmaxq_s32, vminq_f32, vminq_s32, vmulq_f32, vmulq_s32, vqmovn_s32, vqmovun_s16,
vreinterpretq_f32_s32, vshlq_n_s32, vst1q_f32, vst1q_s32, vst1q_u32, vsubq_f32, vsubq_s32,
};

use core::arch::aarch64::{
vreinterpretq_s16_s32, vreinterpretq_s32_s16, vreinterpretq_s32_s8, vreinterpretq_s8_s32,
vzip1q_s16, vzip1q_s8, vzip2q_s16, vzip2q_s8,
vreinterpretq_f32_s32, vreinterpretq_s16_s32, vreinterpretq_s32_s16, vreinterpretq_s32_s8,
vreinterpretq_s8_s32, vshlq_n_s32, vst1q_f32, vst1q_s32, vsubq_f32, vsubq_s32, vzip1q_s16,
vzip1q_s8, vzip2q_s16, vzip2q_s8,
};
use std::mem::transmute;

use crate::{Simd, SimdFloat, SimdInt, SimdMask};

Expand All @@ -30,8 +28,7 @@ impl SimdMask for uint32x4_t {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; 4];
vst1q_u32(array.as_mut_ptr(), self);
let array = transmute::<Self, [u32; 4]>(self);
std::array::from_fn(|i| array[i] != 0)
}
}
Expand Down Expand Up @@ -70,9 +67,7 @@ impl Simd for int32x4_t {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<Self, Self::Array>(self)
}
}

Expand Down Expand Up @@ -225,9 +220,7 @@ impl Simd for float32x4_t {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0.; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<Self, Self::Array>(self)
}
}

Expand Down
13 changes: 5 additions & 8 deletions rten-simd/src/arch/wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use std::arch::wasm32::{
#[cfg(target_feature = "relaxed-simd")]
use std::arch::wasm32::f32x4_relaxed_madd;

use std::mem::transmute;

use crate::{Simd, SimdFloat, SimdInt, SimdMask};

/// Wrapper around a WASM v128 type that marks it as containing integers.
Expand Down Expand Up @@ -41,8 +43,7 @@ impl SimdMask for v128i {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; Self::LEN];
self.store(array.as_mut_ptr());
let array = transmute::<v128, [u32; 4]>(self.0);
std::array::from_fn(|i| array[i] != 0)
}
}
Expand Down Expand Up @@ -76,9 +77,7 @@ impl Simd for v128i {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<v128, Self::Array>(self.0)
}
}

Expand Down Expand Up @@ -249,9 +248,7 @@ impl Simd for v128f {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0.; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<v128, Self::Array>(self.0)
}
}

Expand Down
19 changes: 5 additions & 14 deletions rten-simd/src/arch/x86_64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ impl SimdMask for __m256i {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; Self::LEN];
self.store(array.as_mut_ptr());
let array = <Self as Simd>::to_array(self);
std::array::from_fn(|i| array[i] != 0)
}
}
Expand Down Expand Up @@ -93,9 +92,7 @@ impl Simd for __m256i {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<Self, Self::Array>(self)
}
}

Expand Down Expand Up @@ -292,9 +289,7 @@ impl Simd for __m256 {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0.; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<Self, Self::Array>(self)
}
}

Expand Down Expand Up @@ -494,9 +489,7 @@ impl Simd for __m512i {
#[inline]
#[target_feature(enable = "avx512f")]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<Self, Self::Array>(self)
}
}

Expand Down Expand Up @@ -699,9 +692,7 @@ impl Simd for __m512 {
#[inline]
#[target_feature(enable = "avx512f")]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0.; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<Self, Self::Array>(self)
}
}

Expand Down
8 changes: 8 additions & 0 deletions rten-simd/src/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ pub trait Simd: Copy + Sized {
}

/// Return the contents of this vector as an array.
///
/// This is a cheap transmute for most implementations because the SIMD
/// type and the array have the same layout. The converse is not true
/// because the SIMD type may have greater alignment.
unsafe fn to_array(self) -> Self::Array;

/// Return a new vector with all elements set to zero.
Expand Down Expand Up @@ -176,6 +180,10 @@ pub trait SimdMask: Copy {
}

/// Convert this SIMD mask to a boolean array.
///
/// Unlike [`Simd::to_array`] this is not a simple transmute because
/// the elements need to be converted from the architecture-specific
/// representation of a mask to a `bool` array.
unsafe fn to_array(self) -> Self::Array;

/// Create a SIMD mask from a boolean array.
Expand Down

0 comments on commit f4b954a

Please sign in to comment.