Skip to content

Commit

Permalink
Merge pull request #562 from robertknight/remove-load-interleave-i8
Browse files Browse the repository at this point in the history
Remove unused `SimdInt::load_interleaved_i8`
  • Loading branch information
robertknight authored Jan 29, 2025
2 parents 775d4c7 + 588107b commit 68a1f9d
Show file tree
Hide file tree
Showing 5 changed files with 0 additions and 141 deletions.
23 changes: 0 additions & 23 deletions rten-simd/src/arch/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,29 +158,6 @@ impl SimdInt for int32x4_t {
Self::load(lanes.as_ptr())
}

#[inline]
unsafe fn load_interleave_i8(
a_ptr: *const i8,
b_ptr: *const i8,
c_ptr: *const i8,
d_ptr: *const i8,
) -> Self {
use core::arch::aarch64::{
vcombine_s32, vld1_dup_s32, vreinterpret_s16_s8, vreinterpret_s32_s16,
vreinterpret_s8_s32, vzip1_s8, vzip_s16,
};

let a = vld1_dup_s32(a_ptr as *const i32);
let b = vld1_dup_s32(b_ptr as *const i32);
let c = vld1_dup_s32(c_ptr as *const i32);
let d = vld1_dup_s32(d_ptr as *const i32);

let ab = vzip1_s8(vreinterpret_s8_s32(a), vreinterpret_s8_s32(b));
let cd = vzip1_s8(vreinterpret_s8_s32(c), vreinterpret_s8_s32(d));
let abcd = vzip_s16(vreinterpret_s16_s8(ab), vreinterpret_s16_s8(cd));
vcombine_s32(vreinterpret_s32_s16(abcd.0), vreinterpret_s32_s16(abcd.1))
}

#[inline]
unsafe fn zip_lo_i8(self, rhs: Self) -> Self {
vreinterpretq_s32_s8(vzip1q_s8(
Expand Down
10 changes: 0 additions & 10 deletions rten-simd/src/arch/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,16 +135,6 @@ impl SimdInt for i32 {
self.clamp(0, 255) as u8
}

#[inline]
unsafe fn load_interleave_i8(
a0: *const i8,
a1: *const i8,
a2: *const i8,
a3: *const i8,
) -> Self {
i32::from_le_bytes([*a0 as u8, *a1 as u8, *a2 as u8, *a3 as u8])
}

#[inline]
unsafe fn load_extend_i8(ptr: *const i8) -> Self {
*ptr as i32
Expand Down
17 changes: 0 additions & 17 deletions rten-simd/src/arch/wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,23 +160,6 @@ impl SimdInt for v128i {
Self(tmp)
}

#[inline]
unsafe fn load_interleave_i8(
a_ptr: *const i8,
b_ptr: *const i8,
c_ptr: *const i8,
d_ptr: *const i8,
) -> Self {
let mut bytes: [i8; 16] = [0; 16];
for i in 0..Self::LEN {
bytes[i * 4] = *a_ptr.add(i);
bytes[i * 4 + 1] = *b_ptr.add(i);
bytes[i * 4 + 2] = *c_ptr.add(i);
bytes[i * 4 + 3] = *d_ptr.add(i);
}
Self(v128_load(bytes.as_ptr() as *const v128))
}

#[inline]
unsafe fn xor(self, rhs: Self) -> Self {
Self(v128_xor(self.0, rhs.0))
Expand Down
51 changes: 0 additions & 51 deletions rten-simd/src/arch/x86_64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,32 +200,6 @@ impl SimdInt for __m256i {
transmute::<[MaybeUninit<u8>; 8], [u8; 8]>(dest)
}

#[inline]
unsafe fn load_interleave_i8(
a_ptr: *const i8,
b_ptr: *const i8,
c_ptr: *const i8,
d_ptr: *const i8,
) -> Self {
use core::arch::x86_64::{
_mm256_castsi128_si256, _mm256_insertf128_si256, _mm_unpackhi_epi16,
_mm_unpacklo_epi16, _mm_unpacklo_epi8,
};
let a = _mm_loadl_epi64(a_ptr as *const __m128i);
let b = _mm_loadl_epi64(b_ptr as *const __m128i);
let c = _mm_loadl_epi64(c_ptr as *const __m128i);
let d = _mm_loadl_epi64(d_ptr as *const __m128i);

let ab = _mm_unpacklo_epi8(a, b); // A0 B0 ... A7 B7
let cd = _mm_unpacklo_epi8(c, d); // C0 C1 ... C7 D7

let abcd_lo = _mm_unpacklo_epi16(ab, cd); // A0 B0 C0 D0 ...
let abcd_hi = _mm_unpackhi_epi16(ab, cd); // A3 B3 C3 D3 ...

let lo = _mm256_castsi128_si256(abcd_lo);
_mm256_insertf128_si256(lo, abcd_hi, 1)
}

#[inline]
unsafe fn load_extend_i8(ptr: *const i8) -> Self {
use core::arch::x86_64::_mm256_cvtepi8_epi32;
Expand Down Expand Up @@ -610,31 +584,6 @@ impl SimdInt for __m512i {
self.to_array().map(|c| c.clamp(0, u8::MAX as i32) as u8)
}

#[inline]
#[target_feature(enable = "avx512f")]
unsafe fn load_interleave_i8(
a_ptr: *const i8,
b_ptr: *const i8,
c_ptr: *const i8,
d_ptr: *const i8,
) -> Self {
use core::arch::x86_64::{_mm512_castsi256_si512, _mm512_insertf32x8};
let lo = <__m256i as SimdInt>::load_interleave_i8(a_ptr, b_ptr, c_ptr, d_ptr);
let lo = _mm512_castsi256_si512(lo);
let hi = <__m256i as SimdInt>::load_interleave_i8(
a_ptr.add(8),
b_ptr.add(8),
c_ptr.add(8),
d_ptr.add(8),
);
let result = _mm512_insertf32x8(
transmute::<__m512i, __m512>(lo),
transmute::<__m256i, __m256>(hi),
1,
);
transmute::<__m512, __m512i>(result)
}

#[inline]
#[target_feature(enable = "avx512f")]
unsafe fn load_extend_i8(ptr: *const i8) -> Self {
Expand Down
40 changes: 0 additions & 40 deletions rten-simd/src/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,6 @@ pub trait SimdInt: Simd<Elem = i32> {
/// Load `S::LEN` i8 values from `ptr` and sign-extend to i32.
unsafe fn load_extend_i8(ptr: *const i8) -> Self;

/// Load and interleave 4 groups of i8 values.
///
/// The returned vector contains `[a[0], b[0], c[0], d[0], ...
/// a[N], b[N], c[N], d[N]]` where `N == Self::LEN`.
unsafe fn load_interleave_i8(a: *const i8, b: *const i8, c: *const i8, d: *const i8) -> Self;

/// Interleave i8 values from the low half of `self` and `rhs`.
unsafe fn zip_lo_i8(self, rhs: Self) -> Self;

Expand Down Expand Up @@ -388,40 +382,6 @@ pub mod tests {
assert_eq!(actual.as_ref(), expected);
}

#[test]
fn test_load_interleave_i8() {
let group_step = 5;
let a: Vec<_> = (0..).step_by(group_step).take(LEN).collect();
let b: Vec<_> = (1..).step_by(group_step).take(LEN).collect();
let c: Vec<_> = (2..).step_by(group_step).take(LEN).collect();
let d: Vec<_> = (3..).step_by(group_step).take(LEN).collect();

let mut expected = Vec::new();
for step in 0..LEN {
let base = step * group_step;
for i in 0..4 {
expected.push((base + i) as i8);
}
}

let vec = unsafe {
<SimdVec as SimdInt>::load_interleave_i8(
a.as_ptr(),
b.as_ptr(),
c.as_ptr(),
d.as_ptr(),
)
};
let actual = unsafe { vec.to_array() };
let actual: Vec<i8> = actual
.as_ref()
.iter()
.flat_map(|x| x.to_le_bytes().map(|b| b as i8))
.collect();

assert_eq!(actual.as_ref(), expected);
}

#[test]
fn test_zip_lo_hi_i8() {
let a_start = 0i8;
Expand Down

0 comments on commit 68a1f9d

Please sign in to comment.