Merge pull request #562 from robertknight/remove-load-interleave-i8

Remove unused `SimdInt::load_interleaved_i8`
robertknight · Jan 29, 2025 · 68a1f9d · 68a1f9d
2 parents 775d4c7 + 588107b
commit 68a1f9d
Show file tree

Hide file tree

Showing 5 changed files with 0 additions and 141 deletions.
diff --git a/rten-simd/src/arch/aarch64.rs b/rten-simd/src/arch/aarch64.rs
@@ -158,29 +158,6 @@ impl SimdInt for int32x4_t {
         Self::load(lanes.as_ptr())
     }
 
-    #[inline]
-    unsafe fn load_interleave_i8(
-        a_ptr: *const i8,
-        b_ptr: *const i8,
-        c_ptr: *const i8,
-        d_ptr: *const i8,
-    ) -> Self {
-        use core::arch::aarch64::{
-            vcombine_s32, vld1_dup_s32, vreinterpret_s16_s8, vreinterpret_s32_s16,
-            vreinterpret_s8_s32, vzip1_s8, vzip_s16,
-        };
-
-        let a = vld1_dup_s32(a_ptr as *const i32);
-        let b = vld1_dup_s32(b_ptr as *const i32);
-        let c = vld1_dup_s32(c_ptr as *const i32);
-        let d = vld1_dup_s32(d_ptr as *const i32);
-
-        let ab = vzip1_s8(vreinterpret_s8_s32(a), vreinterpret_s8_s32(b));
-        let cd = vzip1_s8(vreinterpret_s8_s32(c), vreinterpret_s8_s32(d));
-        let abcd = vzip_s16(vreinterpret_s16_s8(ab), vreinterpret_s16_s8(cd));
-        vcombine_s32(vreinterpret_s32_s16(abcd.0), vreinterpret_s32_s16(abcd.1))
-    }
-
     #[inline]
     unsafe fn zip_lo_i8(self, rhs: Self) -> Self {
         vreinterpretq_s32_s8(vzip1q_s8(

diff --git a/rten-simd/src/arch/scalar.rs b/rten-simd/src/arch/scalar.rs
@@ -135,16 +135,6 @@ impl SimdInt for i32 {
         self.clamp(0, 255) as u8
     }
 
-    #[inline]
-    unsafe fn load_interleave_i8(
-        a0: *const i8,
-        a1: *const i8,
-        a2: *const i8,
-        a3: *const i8,
-    ) -> Self {
-        i32::from_le_bytes([*a0 as u8, *a1 as u8, *a2 as u8, *a3 as u8])
-    }
-
     #[inline]
     unsafe fn load_extend_i8(ptr: *const i8) -> Self {
         *ptr as i32

diff --git a/rten-simd/src/arch/wasm.rs b/rten-simd/src/arch/wasm.rs
@@ -160,23 +160,6 @@ impl SimdInt for v128i {
         Self(tmp)
     }
 
-    #[inline]
-    unsafe fn load_interleave_i8(
-        a_ptr: *const i8,
-        b_ptr: *const i8,
-        c_ptr: *const i8,
-        d_ptr: *const i8,
-    ) -> Self {
-        let mut bytes: [i8; 16] = [0; 16];
-        for i in 0..Self::LEN {
-            bytes[i * 4] = *a_ptr.add(i);
-            bytes[i * 4 + 1] = *b_ptr.add(i);
-            bytes[i * 4 + 2] = *c_ptr.add(i);
-            bytes[i * 4 + 3] = *d_ptr.add(i);
-        }
-        Self(v128_load(bytes.as_ptr() as *const v128))
-    }
-
     #[inline]
     unsafe fn xor(self, rhs: Self) -> Self {
         Self(v128_xor(self.0, rhs.0))

diff --git a/rten-simd/src/arch/x86_64.rs b/rten-simd/src/arch/x86_64.rs
@@ -200,32 +200,6 @@ impl SimdInt for __m256i {
         transmute::<[MaybeUninit<u8>; 8], [u8; 8]>(dest)
     }
 
-    #[inline]
-    unsafe fn load_interleave_i8(
-        a_ptr: *const i8,
-        b_ptr: *const i8,
-        c_ptr: *const i8,
-        d_ptr: *const i8,
-    ) -> Self {
-        use core::arch::x86_64::{
-            _mm256_castsi128_si256, _mm256_insertf128_si256, _mm_unpackhi_epi16,
-            _mm_unpacklo_epi16, _mm_unpacklo_epi8,
-        };
-        let a = _mm_loadl_epi64(a_ptr as *const __m128i);
-        let b = _mm_loadl_epi64(b_ptr as *const __m128i);
-        let c = _mm_loadl_epi64(c_ptr as *const __m128i);
-        let d = _mm_loadl_epi64(d_ptr as *const __m128i);
-
-        let ab = _mm_unpacklo_epi8(a, b); // A0 B0 ... A7 B7
-        let cd = _mm_unpacklo_epi8(c, d); // C0 C1 ... C7 D7
-
-        let abcd_lo = _mm_unpacklo_epi16(ab, cd); // A0 B0 C0 D0 ...
-        let abcd_hi = _mm_unpackhi_epi16(ab, cd); // A3 B3 C3 D3 ...
-
-        let lo = _mm256_castsi128_si256(abcd_lo);
-        _mm256_insertf128_si256(lo, abcd_hi, 1)
-    }
-
     #[inline]
     unsafe fn load_extend_i8(ptr: *const i8) -> Self {
         use core::arch::x86_64::_mm256_cvtepi8_epi32;
@@ -610,31 +584,6 @@ impl SimdInt for __m512i {
         self.to_array().map(|c| c.clamp(0, u8::MAX as i32) as u8)
     }
 
-    #[inline]
-    #[target_feature(enable = "avx512f")]
-    unsafe fn load_interleave_i8(
-        a_ptr: *const i8,
-        b_ptr: *const i8,
-        c_ptr: *const i8,
-        d_ptr: *const i8,
-    ) -> Self {
-        use core::arch::x86_64::{_mm512_castsi256_si512, _mm512_insertf32x8};
-        let lo = <__m256i as SimdInt>::load_interleave_i8(a_ptr, b_ptr, c_ptr, d_ptr);
-        let lo = _mm512_castsi256_si512(lo);
-        let hi = <__m256i as SimdInt>::load_interleave_i8(
-            a_ptr.add(8),
-            b_ptr.add(8),
-            c_ptr.add(8),
-            d_ptr.add(8),
-        );
-        let result = _mm512_insertf32x8(
-            transmute::<__m512i, __m512>(lo),
-            transmute::<__m256i, __m256>(hi),
-            1,
-        );
-        transmute::<__m512, __m512i>(result)
-    }
-
     #[inline]
     #[target_feature(enable = "avx512f")]
     unsafe fn load_extend_i8(ptr: *const i8) -> Self {

diff --git a/rten-simd/src/vec.rs b/rten-simd/src/vec.rs
@@ -231,12 +231,6 @@ pub trait SimdInt: Simd<Elem = i32> {
     /// Load `S::LEN` i8 values from `ptr` and sign-extend to i32.
     unsafe fn load_extend_i8(ptr: *const i8) -> Self;
 
-    /// Load and interleave 4 groups of i8 values.
-    ///
-    /// The returned vector contains `[a[0], b[0], c[0], d[0], ...
-    /// a[N], b[N], c[N], d[N]]` where `N == Self::LEN`.
-    unsafe fn load_interleave_i8(a: *const i8, b: *const i8, c: *const i8, d: *const i8) -> Self;
-
     /// Interleave i8 values from the low half of `self` and `rhs`.
     unsafe fn zip_lo_i8(self, rhs: Self) -> Self;
 
@@ -388,40 +382,6 @@ pub mod tests {
                     assert_eq!(actual.as_ref(), expected);
                 }
 
-                #[test]
-                fn test_load_interleave_i8() {
-                    let group_step = 5;
-                    let a: Vec<_> = (0..).step_by(group_step).take(LEN).collect();
-                    let b: Vec<_> = (1..).step_by(group_step).take(LEN).collect();
-                    let c: Vec<_> = (2..).step_by(group_step).take(LEN).collect();
-                    let d: Vec<_> = (3..).step_by(group_step).take(LEN).collect();
-
-                    let mut expected = Vec::new();
-                    for step in 0..LEN {
-                        let base = step * group_step;
-                        for i in 0..4 {
-                            expected.push((base + i) as i8);
-                        }
-                    }
-
-                    let vec = unsafe {
-                        <SimdVec as SimdInt>::load_interleave_i8(
-                            a.as_ptr(),
-                            b.as_ptr(),
-                            c.as_ptr(),
-                            d.as_ptr(),
-                        )
-                    };
-                    let actual = unsafe { vec.to_array() };
-                    let actual: Vec<i8> = actual
-                        .as_ref()
-                        .iter()
-                        .flat_map(|x| x.to_le_bytes().map(|b| b as i8))
-                        .collect();
-
-                    assert_eq!(actual.as_ref(), expected);
-                }
-
                 #[test]
                 fn test_zip_lo_hi_i8() {
                     let a_start = 0i8;