diff --git a/crates/core_arch/MISSING.md b/crates/core_arch/MISSING.md index 99eb794a55..c948f3f8c9 100644 --- a/crates/core_arch/MISSING.md +++ b/crates/core_arch/MISSING.md @@ -1,59 +1,5 @@ ## The following neon instructions are currently not implemented in stdarch -### Can be implemented next: - -`vcls_u16` - -`vcls_u32` - -`vcls_u8` - -`vclsq_u16` - -`vclsq_u32` - -`vclsq_u8` - -`vcreate_s16` - -`vcreate_u16` - -`vpaddq_s64` - -`vpaddq_u64` - -`vreinterpretq_p128_f32` - -`vreinterpretq_p128_f64` - -`vreinterpretq_p128_p16` - -`vreinterpretq_p128_p8` - -`vreinterpretq_p128_s16` - -`vreinterpretq_p128_s32` - -`vreinterpretq_p128_s64` - -`vreinterpretq_p128_s8` - -`vreinterpretq_p128_u16` - -`vreinterpretq_p128_u32` - -`vreinterpretq_p128_u64` - -`vreinterpretq_p128_u8` - -`vslid_n_s64` - -`vslid_n_u64` - -`vsrid_n_s64` - -`vsrid_n_u64` - ### Not implemented on arm: `vcadd_rot270_f32` @@ -168,27 +114,3 @@ `vusdotq_s32v` -`vqshlu_n_s16` - -`vqshlu_n_s32` - -`vqshlu_n_s64` - -`vqshlu_n_s8` - -`vqshlub_n_s8` - -`vqshlud_n_s64` - -`vqshluh_n_s16` - -`vqshluq_n_s16` - -`vqshluq_n_s32` - -`vqshluq_n_s64` - -`vqshluq_n_s8` - -`vqshlus_n_s32` - diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index bdf6158bb2..5bfa4fa59b 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -10388,6 +10388,46 @@ pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 { simd_extract(vqshl_n_u64::<N>(vdup_n_u64(a)), 0) } +/// Signed saturating shift left unsigned +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlub_n_s8<const N: i32>(a: i8) -> u8 { + static_assert_imm3!(N); + simd_extract(vqshlu_n_s8::<N>(vdup_n_s8(a)), 0) +} + +/// Signed saturating shift left unsigned +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluh_n_s16<const N: i32>(a: i16) -> u16 { + static_assert_imm4!(N); + simd_extract(vqshlu_n_s16::<N>(vdup_n_s16(a)), 0) +} + +/// Signed saturating shift left unsigned +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlus_n_s32<const N: i32>(a: i32) -> u32 { + static_assert_imm5!(N); + simd_extract(vqshlu_n_s32::<N>(vdup_n_s32(a)), 0) +} + +/// Signed saturating shift left unsigned +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlud_n_s64<const N: i32>(a: i64) -> u64 { + static_assert_imm6!(N); + simd_extract(vqshlu_n_s64::<N>(vdup_n_s64(a)), 0) +} + /// Signed saturating shift right narrow #[inline] #[target_feature(enable = "neon")] @@ -10950,262 +10990,6 @@ pub unsafe fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t { transmute(a) } -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { - transmute(a) -} - /// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] @@ -11398,6 +11182,14 @@ pub unsafe fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t { transmute(a) } +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 { + transmute(a) +} + /// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] @@ -11590,6 +11382,14 @@ pub unsafe fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t { transmute(a) } +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_f64_p128(a: p128) -> float64x2_t { + transmute(a) +} + /// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] @@ -13393,6 +13193,46 @@ pub unsafe fn vqabsd_s64(a: i64) -> i64 { vqabsd_s64_(a) } +/// Shift left and insert +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vslid_n_s64<const N: i32>(a: i64, b: i64) -> i64 { + static_assert!(N : i32 where N >= 0 && N <= 63); + transmute(vsli_n_s64::<N>(transmute(a), transmute(b))) +} + +/// Shift left and insert +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vslid_n_u64<const N: i32>(a: u64, b: u64) -> u64 { + static_assert!(N : i32 where N >= 0 && N <= 63); + transmute(vsli_n_u64::<N>(transmute(a), transmute(b))) +} + +/// Shift right and insert +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsrid_n_s64<const N: i32>(a: i64, b: i64) -> i64 { + static_assert!(N : i32 where N >= 1 && N <= 64); + transmute(vsri_n_s64::<N>(transmute(a), transmute(b))) +} + +/// Shift right and insert +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsrid_n_u64<const N: i32>(a: u64, b: u64) -> u64 { + static_assert!(N : i32 where N >= 1 && N <= 64); + transmute(vsri_n_u64::<N>(transmute(a), transmute(b))) +} + #[cfg(test)] mod test { use super::*; @@ -21742,6 +21582,38 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqshlub_n_s8() { + let a: i8 = 1; + let e: u8 = 4; + let r: u8 = transmute(vqshlub_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshluh_n_s16() { + let a: i16 = 1; + let e: u16 = 4; + let r: u16 = transmute(vqshluh_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlus_n_s32() { + let a: i32 = 1; + let e: u32 = 4; + let r: u32 = transmute(vqshlus_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlud_n_s64() { + let a: i64 = 1; + let e: u64 = 4; + let r: u64 = transmute(vqshlud_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vqshrnd_n_s64() { let a: i64 = 0; @@ -22179,262 +22051,6 @@ mod test { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_s32_p64() { - let a: i64x1 = i64x1::new(0); - let e: i32x2 = i32x2::new(0, 0); - let r: i32x2 = transmute(vreinterpret_s32_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_u32_p64() { - let a: i64x1 = i64x1::new(0); - let e: u32x2 = u32x2::new(0, 0); - let r: u32x2 = transmute(vreinterpret_u32_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_s32_p64() { - let a: i64x2 = i64x2::new(0, 1); - let e: i32x4 = i32x4::new(0, 0, 1, 0); - let r: i32x4 = transmute(vreinterpretq_s32_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_u32_p64() { - let a: i64x2 = i64x2::new(0, 1); - let e: u32x4 = u32x4::new(0, 0, 1, 0); - let r: u32x4 = transmute(vreinterpretq_u32_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_p64_s32() { - let a: i32x2 = i32x2::new(0, 0); - let e: i64x1 = i64x1::new(0); - let r: i64x1 = transmute(vreinterpret_p64_s32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_p64_u32() { - let a: u32x2 = u32x2::new(0, 0); - let e: i64x1 = i64x1::new(0); - let r: i64x1 = transmute(vreinterpret_p64_u32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_p64_s32() { - let a: i32x4 = i32x4::new(0, 0, 1, 0); - let e: i64x2 = i64x2::new(0, 1); - let r: i64x2 = transmute(vreinterpretq_p64_s32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_p64_u32() { - let a: u32x4 = u32x4::new(0, 0, 1, 0); - let e: i64x2 = i64x2::new(0, 1); - let r: i64x2 = transmute(vreinterpretq_p64_u32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_s16_p64() { - let a: i64x1 = i64x1::new(0); - let e: i16x4 = i16x4::new(0, 0, 0, 0); - let r: i16x4 = transmute(vreinterpret_s16_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_u16_p64() { - let a: i64x1 = i64x1::new(0); - let e: u16x4 = u16x4::new(0, 0, 0, 0); - let r: u16x4 = transmute(vreinterpret_u16_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_p16_p64() { - let a: i64x1 = i64x1::new(0); - let e: i16x4 = i16x4::new(0, 0, 0, 0); - let r: i16x4 = transmute(vreinterpret_p16_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_s16_p64() { - let a: i64x2 = i64x2::new(0, 1); - let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); - let r: i16x8 = transmute(vreinterpretq_s16_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_u16_p64() { - let a: i64x2 = i64x2::new(0, 1); - let e: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0); - let r: u16x8 = transmute(vreinterpretq_u16_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_p16_p64() { - let a: i64x2 = i64x2::new(0, 1); - let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); - let r: i16x8 = transmute(vreinterpretq_p16_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_p64_p16() { - let a: i16x4 = i16x4::new(0, 0, 0, 0); - let e: i64x1 = i64x1::new(0); - let r: i64x1 = transmute(vreinterpret_p64_p16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_p64_s16() { - let a: i16x4 = i16x4::new(0, 0, 0, 0); - let e: i64x1 = i64x1::new(0); - let r: i64x1 = transmute(vreinterpret_p64_s16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_p64_u16() { - let a: u16x4 = u16x4::new(0, 0, 0, 0); - let e: i64x1 = i64x1::new(0); - let r: i64x1 = transmute(vreinterpret_p64_u16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_p64_p16() { - let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); - let e: i64x2 = i64x2::new(0, 1); - let r: i64x2 = transmute(vreinterpretq_p64_p16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_p64_s16() { - let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); - let e: i64x2 = i64x2::new(0, 1); - let r: i64x2 = transmute(vreinterpretq_p64_s16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_p64_u16() { - let a: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0); - let e: i64x2 = i64x2::new(0, 1); - let r: i64x2 = transmute(vreinterpretq_p64_u16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_s8_p64() { - let a: i64x1 = i64x1::new(0); - let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); - let r: i8x8 = transmute(vreinterpret_s8_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_u8_p64() { - let a: i64x1 = i64x1::new(0); - let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); - let r: u8x8 = transmute(vreinterpret_u8_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_p8_p64() { - let a: i64x1 = i64x1::new(0); - let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); - let r: i8x8 = transmute(vreinterpret_p8_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_s8_p64() { - let a: i64x2 = i64x2::new(0, 1); - let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); - let r: i8x16 = transmute(vreinterpretq_s8_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_u8_p64() { - let a: i64x2 = i64x2::new(0, 1); - let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); - let r: u8x16 = transmute(vreinterpretq_u8_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_p8_p64() { - let a: i64x2 = i64x2::new(0, 1); - let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); - let r: i8x16 = transmute(vreinterpretq_p8_p64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_p64_p8() { - let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); - let e: i64x1 = i64x1::new(0); - let r: i64x1 = transmute(vreinterpret_p64_p8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_p64_s8() { - let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); - let e: i64x1 = i64x1::new(0); - let r: i64x1 = transmute(vreinterpret_p64_s8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpret_p64_u8() { - let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); - let e: i64x1 = i64x1::new(0); - let r: i64x1 = transmute(vreinterpret_p64_u8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_p64_p8() { - let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); - let e: i64x2 = i64x2::new(0, 1); - let r: i64x2 = transmute(vreinterpretq_p64_p8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_p64_s8() { - let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); - let e: i64x2 = i64x2::new(0, 1); - let r: i64x2 = transmute(vreinterpretq_p64_s8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vreinterpretq_p64_u8() { - let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); - let e: i64x2 = i64x2::new(0, 1); - let r: i64x2 = transmute(vreinterpretq_p64_u8(transmute(a))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vreinterpret_s8_f64() { let a: f64 = 0.; @@ -22627,6 +22243,14 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_f64() { + let a: f64x2 = f64x2::new(0., 0.); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_f64(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vreinterpret_f64_s8() { let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); @@ -22819,6 +22443,14 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f64_p128() { + let a: p128 = 0; + let e: f64x2 = f64x2::new(0., 0.); + let r: f64x2 = transmute(vreinterpretq_f64_p128(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vreinterpret_f64_f32() { let a: f32x2 = f32x2::new(0., 0.); @@ -24610,4 +24242,40 @@ mod test { let r: i64 = transmute(vqabsd_s64(transmute(a))); assert_eq!(r, e); } + + #[simd_test(enable = "neon")] + unsafe fn test_vslid_n_s64() { + let a: i64 = 333; + let b: i64 = 2042; + let e: i64 = 8169; + let r: i64 = transmute(vslid_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vslid_n_u64() { + let a: u64 = 333; + let b: u64 = 2042; + let e: u64 = 8169; + let r: u64 = transmute(vslid_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsrid_n_s64() { + let a: i64 = 333; + let b: i64 = 2042; + let e: i64 = 510; + let r: i64 = transmute(vsrid_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsrid_n_u64() { + let a: u64 = 333; + let b: u64 = 2042; + let e: u64 = 510; + let r: u64 = transmute(vsrid_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } } diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index d23e43c435..ff895f9875 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -103,6 +103,8 @@ extern "unadjusted" { fn vpaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; #[link_name = "llvm.aarch64.neon.addp.v4i32"] fn vpaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + #[link_name = "llvm.aarch64.neon.addp.v2i64"] + fn vpaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; #[link_name = "llvm.aarch64.neon.addp.v16i8"] fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; @@ -1137,6 +1139,20 @@ pub unsafe fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + vpaddq_s64_(a, b) +} +/// Add pairwise +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + transmute(vpaddq_s64_(transmute(a), transmute(b))) +} +/// Add pairwise +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] pub unsafe fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { vpaddq_s8_(a, b) } @@ -3488,6 +3504,14 @@ mod tests { assert_eq!(r, e); } #[simd_test(enable = "neon")] + unsafe fn test_vpaddq_s64() { + let a = i64x2::new(1, 2); + let b = i64x2::new(0, -1); + let r: i64x2 = transmute(vpaddq_s64(transmute(a), transmute(b))); + let e = i64x2::new(3, -1); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vpaddq_s8() { let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); let b = i8x16::new( @@ -3516,6 +3540,14 @@ mod tests { assert_eq!(r, e); } #[simd_test(enable = "neon")] + unsafe fn test_vpaddq_u64() { + let a = u64x2::new(0, 1); + let b = u64x2::new(17, 18); + let r: u64x2 = transmute(vpaddq_u64(transmute(a), transmute(b))); + let e = u64x2::new(1, 35); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vpaddq_u8() { let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); let b = i8x16::new( diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index bd78a973e8..2f93c65262 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -1780,7 +1780,67 @@ pub unsafe fn vclsq_s32(a: int32x4_t) -> int32x4_t { vclsq_s32_(a) } -/// Signed count leading sign bits +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +pub unsafe fn vcls_u8(a: uint8x8_t) -> uint8x8_t { + transmute(vcls_s8(transmute(a))) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +pub unsafe fn vclsq_u8(a: uint8x16_t) -> uint8x16_t { + transmute(vclsq_s8(transmute(a))) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +pub unsafe fn vcls_u16(a: uint16x4_t) -> uint16x4_t { + transmute(vcls_s16(transmute(a))) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +pub unsafe fn vclsq_u16(a: uint16x8_t) -> uint16x8_t { + transmute(vclsq_s16(transmute(a))) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +pub unsafe fn vcls_u32(a: uint32x2_t) -> uint32x2_t { + transmute(vcls_s32(transmute(a))) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +pub unsafe fn vclsq_u32(a: uint32x4_t) -> uint32x4_t { + transmute(vclsq_s32(transmute(a))) +} + +/// Count leading zero bits #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -1790,7 +1850,7 @@ pub unsafe fn vclz_s8(a: int8x8_t) -> int8x8_t { vclz_s8_(a) } -/// Signed count leading sign bits +/// Count leading zero bits #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -1800,7 +1860,7 @@ pub unsafe fn vclzq_s8(a: int8x16_t) -> int8x16_t { vclzq_s8_(a) } -/// Signed count leading sign bits +/// Count leading zero bits #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -1810,7 +1870,7 @@ pub unsafe fn vclz_s16(a: int16x4_t) -> int16x4_t { vclz_s16_(a) } -/// Signed count leading sign bits +/// Count leading zero bits #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -1820,7 +1880,7 @@ pub unsafe fn vclzq_s16(a: int16x8_t) -> int16x8_t { vclzq_s16_(a) } -/// Signed count leading sign bits +/// Count leading zero bits #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -1830,7 +1890,7 @@ pub unsafe fn vclz_s32(a: int32x2_t) -> int32x2_t { vclz_s32_(a) } -/// Signed count leading sign bits +/// Count leading zero bits #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -1840,7 +1900,7 @@ pub unsafe fn vclzq_s32(a: int32x4_t) -> int32x4_t { vclzq_s32_(a) } -/// Unsigned count leading sign bits +/// Count leading zero bits #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -1850,7 +1910,7 @@ pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t { transmute(vclz_s8_(transmute(a))) } -/// Unsigned count leading sign bits +/// Count leading zero bits #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -1860,7 +1920,7 @@ pub unsafe fn vclzq_u8(a: uint8x16_t) -> uint8x16_t { transmute(vclzq_s8_(transmute(a))) } -/// Unsigned count leading sign bits +/// Count leading zero bits #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -1870,7 +1930,7 @@ pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t { transmute(vclz_s16_(transmute(a))) } -/// Unsigned count leading sign bits +/// Count leading zero bits #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -1880,7 +1940,7 @@ pub unsafe fn vclzq_u16(a: uint16x8_t) -> uint16x8_t { transmute(vclzq_s16_(transmute(a))) } -/// Unsigned count leading sign bits +/// Count leading zero bits #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -1890,7 +1950,7 @@ pub unsafe fn vclz_u32(a: uint32x2_t) -> uint32x2_t { transmute(vclz_s32_(transmute(a))) } -/// Unsigned count leading sign bits +/// Count leading zero bits #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -2014,6 +2074,16 @@ pub unsafe fn vcreate_s8(a: u64) -> int8x8_t { transmute(a) } +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vcreate_s16(a: u64) -> int16x4_t { + transmute(a) +} + /// Insert vector element from another vector element #[inline] #[target_feature(enable = "neon")] @@ -2044,6 +2114,16 @@ pub unsafe fn vcreate_u8(a: u64) -> uint8x8_t { transmute(a) } +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vcreate_u16(a: u64) -> uint16x4_t { + transmute(a) +} + /// Insert vector element from another vector element #[inline] #[target_feature(enable = "neon")] @@ -17504,6 +17584,262 @@ pub unsafe fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t { vqshlq_u64(a, vdupq_n_s64(N.try_into().unwrap())) } +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")] + fn vqshlu_n_s8_(a: int8x8_t, n: int8x8_t) -> uint8x8_t; + } +vqshlu_n_s8_(a, int8x8_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v8i8")] + fn vqshlu_n_s8_(a: int8x8_t, n: int8x8_t) -> uint8x8_t; + } +vqshlu_n_s8_(a, int8x8_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t { + static_assert_imm4!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")] + fn vqshlu_n_s16_(a: int16x4_t, n: int16x4_t) -> uint16x4_t; + } +vqshlu_n_s16_(a, int16x4_t(N as i16, N as i16, N as i16, N as i16)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t { + static_assert_imm4!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v4i16")] + fn vqshlu_n_s16_(a: int16x4_t, n: int16x4_t) -> uint16x4_t; + } +vqshlu_n_s16_(a, int16x4_t(N as i16, N as i16, N as i16, N as i16)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t { + static_assert_imm5!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")] + fn vqshlu_n_s32_(a: int32x2_t, n: int32x2_t) -> uint32x2_t; + } +vqshlu_n_s32_(a, int32x2_t(N as i32, N as i32)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t { + static_assert_imm5!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v2i32")] + fn vqshlu_n_s32_(a: int32x2_t, n: int32x2_t) -> uint32x2_t; + } +vqshlu_n_s32_(a, int32x2_t(N as i32, N as i32)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t { + static_assert_imm6!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")] + fn vqshlu_n_s64_(a: int64x1_t, n: int64x1_t) -> uint64x1_t; + } +vqshlu_n_s64_(a, int64x1_t(N as i64)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t { + static_assert_imm6!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v1i64")] + fn vqshlu_n_s64_(a: int64x1_t, n: int64x1_t) -> uint64x1_t; + } +vqshlu_n_s64_(a, int64x1_t(N as i64)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t { + static_assert_imm3!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")] + fn vqshluq_n_s8_(a: int8x16_t, n: int8x16_t) -> uint8x16_t; + } +vqshluq_n_s8_(a, int8x16_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t { + static_assert_imm3!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v16i8")] + fn vqshluq_n_s8_(a: int8x16_t, n: int8x16_t) -> uint8x16_t; + } +vqshluq_n_s8_(a, int8x16_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t { + static_assert_imm4!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")] + fn vqshluq_n_s16_(a: int16x8_t, n: int16x8_t) -> uint16x8_t; + } +vqshluq_n_s16_(a, int16x8_t(N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t { + static_assert_imm4!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v8i16")] + fn vqshluq_n_s16_(a: int16x8_t, n: int16x8_t) -> uint16x8_t; + } +vqshluq_n_s16_(a, int16x8_t(N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t { + static_assert_imm5!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")] + fn vqshluq_n_s32_(a: int32x4_t, n: int32x4_t) -> uint32x4_t; + } +vqshluq_n_s32_(a, int32x4_t(N as i32, N as i32, N as i32, N as i32)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t { + static_assert_imm5!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v4i32")] + fn vqshluq_n_s32_(a: int32x4_t, n: int32x4_t) -> uint32x4_t; + } +vqshluq_n_s32_(a, int32x4_t(N as i32, N as i32, N as i32, N as i32)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t { + static_assert_imm6!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")] + fn vqshluq_n_s64_(a: int64x2_t, n: int64x2_t) -> uint64x2_t; + } +vqshluq_n_s64_(a, int64x2_t(N as i64, N as i64)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t { + static_assert_imm6!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v2i64")] + fn vqshluq_n_s64_(a: int64x2_t, n: int64x2_t) -> uint64x2_t; + } +vqshluq_n_s64_(a, int64x2_t(N as i64, N as i64)) +} + /// Signed saturating shift right narrow #[inline] #[cfg(target_arch = "arm")] @@ -18684,6 +19020,76 @@ pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { transmute(a) } +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { + transmute(a) +} + /// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] @@ -19064,6 +19470,76 @@ pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { transmute(a) } +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { + transmute(a) +} + /// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] @@ -19304,6 +19780,86 @@ pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { transmute(a) } +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { + transmute(a) +} + /// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] @@ -19544,6 +20100,86 @@ pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { transmute(a) } +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { + transmute(a) +} + /// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] @@ -19664,6 +20300,96 @@ pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { transmute(a) } +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { + transmute(a) +} + /// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] @@ -19784,6 +20510,156 @@ pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { transmute(a) } +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { + transmute(a) +} + /// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] @@ -19984,6 +20860,16 @@ pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { transmute(a) } +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { + transmute(a) +} + /// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] @@ -20184,6 +21070,16 @@ pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { transmute(a) } +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +pub unsafe fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { + transmute(a) +} + /// Signed rounding shift left #[inline] #[target_feature(enable = "neon")] @@ -24765,6 +25661,54 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vcls_u8() { + let a: u8x8 = u8x8::new(0, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x8 = u8x8::new(7, 7, 7, 7, 7, 7, 7, 7); + let r: u8x8 = transmute(vcls_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_u8() { + let a: u8x16 = u8x16::new(0, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF); + let e: u8x16 = u8x16::new(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7); + let r: u8x16 = transmute(vclsq_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcls_u16() { + let a: u16x4 = u16x4::new(0, 0xFF_FF, 0x00, 0x00); + let e: u16x4 = u16x4::new(15, 15, 15, 15); + let r: u16x4 = transmute(vcls_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_u16() { + let a: u16x8 = u16x8::new(0, 0xFF_FF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u16x8 = u16x8::new(15, 15, 15, 15, 15, 15, 15, 15); + let r: u16x8 = transmute(vclsq_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcls_u32() { + let a: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let e: u32x2 = u32x2::new(31, 31); + let r: u32x2 = transmute(vcls_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_u32() { + let a: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0x00, 0x00); + let e: u32x4 = u32x4::new(31, 31, 31, 31); + let r: u32x4 = transmute(vclsq_u32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vclz_s8() { let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01); @@ -24941,6 +25885,14 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_s16() { + let a: u64 = 1; + let e: i16x4 = i16x4::new(1, 0, 0, 0); + let r: i16x4 = transmute(vcreate_s16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vcreate_s32() { let a: u64 = 1; @@ -24965,6 +25917,14 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_u16() { + let a: u64 = 1; + let e: u16x4 = u16x4::new(1, 0, 0, 0); + let r: u16x4 = transmute(vcreate_u16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vcreate_u32() { let a: u64 = 1; @@ -33993,6 +34953,70 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqshlu_n_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: u8x8 = transmute(vqshlu_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlu_n_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0, 4, 8, 12); + let r: u16x4 = transmute(vqshlu_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlu_n_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: u32x2 = u32x2::new(0, 4); + let r: u32x2 = transmute(vqshlu_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlu_n_s64() { + let a: i64x1 = i64x1::new(0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vqshlu_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshluq_n_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: u8x16 = transmute(vqshluq_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshluq_n_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: u16x8 = transmute(vqshluq_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshluq_n_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0, 4, 8, 12); + let r: u32x4 = transmute(vqshluq_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshluq_n_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: u64x2 = u64x2::new(0, 4); + let r: u64x2 = transmute(vqshluq_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vqshrn_n_s16() { let a: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28); @@ -34725,6 +35749,62 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s32_p64() { + let a: i64x1 = i64x1::new(0); + let e: i32x2 = i32x2::new(0, 0); + let r: i32x2 = transmute(vreinterpret_s32_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u32_p64() { + let a: i64x1 = i64x1::new(0); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vreinterpret_u32_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i32x4 = i32x4::new(0, 0, 1, 0); + let r: i32x4 = transmute(vreinterpretq_s32_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: u32x4 = u32x4::new(0, 0, 1, 0); + let r: u32x4 = transmute(vreinterpretq_u32_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s64_p128() { + let a: p128 = 0; + let e: i64x2 = i64x2::new(0, 0); + let r: i64x2 = transmute(vreinterpretq_s64_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_p128() { + let a: p128 = 0; + let e: u64x2 = u64x2::new(0, 0); + let r: u64x2 = transmute(vreinterpretq_u64_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_p128() { + let a: p128 = 0; + let e: i64x2 = i64x2::new(0, 0); + let r: i64x2 = transmute(vreinterpretq_p64_p128(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vreinterpret_s16_p8() { let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); @@ -35029,6 +36109,62 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_s32() { + let a: i32x2 = i32x2::new(0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_u32() { + let a: u32x2 = u32x2::new(0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_s32() { + let a: i32x4 = i32x4::new(0, 0, 1, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_u32() { + let a: u32x4 = u32x4::new(0, 0, 1, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_s64() { + let a: i64x2 = i64x2::new(0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_u64() { + let a: u64x2 = u64x2::new(0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_p64() { + let a: i64x2 = i64x2::new(0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_p64(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vreinterpret_s8_s32() { let a: i32x2 = i32x2::new(0, 1); @@ -35221,6 +36357,70 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s16_p64() { + let a: i64x1 = i64x1::new(0); + let e: i16x4 = i16x4::new(0, 0, 0, 0); + let r: i16x4 = transmute(vreinterpret_s16_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u16_p64() { + let a: i64x1 = i64x1::new(0); + let e: u16x4 = u16x4::new(0, 0, 0, 0); + let r: u16x4 = transmute(vreinterpret_u16_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p16_p64() { + let a: i64x1 = i64x1::new(0); + let e: i16x4 = i16x4::new(0, 0, 0, 0); + let r: i16x4 = transmute(vreinterpret_p16_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_s16_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: u16x8 = transmute(vreinterpretq_u16_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_p16_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_p128() { + let a: p128 = 0; + let e: i32x4 = i32x4::new(0, 0, 0, 0); + let r: i32x4 = transmute(vreinterpretq_s32_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_p128() { + let a: p128 = 0; + let e: u32x4 = u32x4::new(0, 0, 0, 0); + let r: u32x4 = transmute(vreinterpretq_u32_p128(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vreinterpret_s32_p8() { let a: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0); @@ -35413,6 +36613,70 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_p16() { + let a: i16x4 = i16x4::new(0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_s16() { + let a: i16x4 = i16x4::new(0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_u16() { + let a: u16x4 = u16x4::new(0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_p16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_s16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_u16() { + let a: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_s32() { + let a: i32x4 = i32x4::new(0, 0, 0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_u32() { + let a: u32x4 = u32x4::new(0, 0, 0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_u32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vreinterpret_s8_s64() { let a: i64x1 = i64x1::new(0); @@ -35509,6 +36773,78 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s8_p64() { + let a: i64x1 = i64x1::new(0); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_s8_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u8_p64() { + let a: i64x1 = i64x1::new(0); + let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x8 = transmute(vreinterpret_u8_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p8_p64() { + let a: i64x1 = i64x1::new(0); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_p8_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_s8_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let r: u8x16 = transmute(vreinterpretq_u8_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_p8_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_p128() { + let a: p128 = 0; + let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_s16_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_p128() { + let a: p128 = 0; + let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u16x8 = transmute(vreinterpretq_u16_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_p128() { + let a: p128 = 0; + let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_p16_p128(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vreinterpret_s64_p8() { let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); @@ -35605,6 +36941,126 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_p8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_s8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_u8() { + let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_p8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_s8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_u8() { + let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_s16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_u16() { + let a: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_p16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_s8() { + let a: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: p128 = 1; + let r: p128 = transmute(vreinterpretq_p128_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_u8() { + let a: u8x16 = u8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: p128 = 1; + let r: p128 = transmute(vreinterpretq_p128_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_p8() { + let a: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: p128 = 1; + let r: p128 = transmute(vreinterpretq_p128_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_p128() { + let a: p128 = 1; + let e: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_s8_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_p128() { + let a: p128 = 1; + let e: u8x16 = u8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x16 = transmute(vreinterpretq_u8_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_p128() { + let a: p128 = 1; + let e: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_p8_p128(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vreinterpret_s8_f32() { let a: f32x2 = f32x2::new(0., 0.); @@ -35765,6 +37221,14 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_f32() { + let a: f32x4 = f32x4::new(0., 0., 0., 0.); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_f32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vreinterpret_f32_s8() { let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); @@ -35925,6 +37389,14 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f32_p128() { + let a: p128 = 0; + let e: f32x4 = f32x4::new(0., 0., 0., 0.); + let r: f32x4 = transmute(vreinterpretq_f32_p128(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vrshl_s8() { let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index 933d2bc41f..729602f724 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -888,7 +888,17 @@ link-arm = vcls._EXT_ link-aarch64 = cls._EXT_ generate int*_t -/// Signed count leading sign bits +/// Count leading sign bits +name = vcls +multi_fn = transmute, {vcls-signed-noext, {transmute, a}} +a = MIN, MAX, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX +validate BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1 + +arm = vcls +aarch64 = cls +generate uint*_t + +/// Count leading zero bits name = vclz multi_fn = self-signed-ext, a a = MIN, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX @@ -898,7 +908,7 @@ arm = vclz. aarch64 = clz generate int*_t -/// Unsigned count leading sign bits +/// Count leading zero bits name = vclz multi_fn = transmute, {self-signed-ext, transmute(a)} a = MIN, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX @@ -1089,8 +1099,8 @@ validate 1, 0, 0, 0, 0, 0, 0, 0 aarch64 = nop arm = nop -generate u64:int8x8_t, u64:int16x4_t: u64:int32x2_t, u64:int64x1_t -generate u64:uint8x8_t, u64:uint16x4_t: u64:uint32x2_t, u64:uint64x1_t +generate u64:int8x8_t, u64:int16x4_t, u64:int32x2_t, u64:int64x1_t +generate u64:uint8x8_t, u64:uint16x4_t, u64:uint32x2_t, u64:uint64x1_t generate u64:poly8x8_t, u64:poly16x4_t target = aes generate u64:poly64x1_t @@ -5933,6 +5943,38 @@ validate 4 aarch64 = uqshl generate u8, u16, u32, u64 +/// Signed saturating shift left unsigned +name = vqshlu +n-suffix +constn = N +multi_fn = static_assert_imm-out_bits_exp_len-N +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +n = 2 +validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 +arm-aarch64-separate + +aarch64 = sqshlu +link-aarch64 = sqshlu._EXT_ +const-aarch64 = {dup-in_len-N as ttn} +arm = vqshlu +link-arm = vqshiftsu._EXT_ +const-arm = N as ttn +generate int8x8_t:uint8x8_t, int16x4_t:uint16x4_t, int32x2_t:uint32x2_t, int64x1_t:uint64x1_t +generate int8x16_t:uint8x16_t, int16x8_t:uint16x8_t, int32x4_t:uint32x4_t, int64x2_t:uint64x2_t + +/// Signed saturating shift left unsigned +name = vqshlu +n-suffix +constn = N +multi_fn = static_assert_imm-out_bits_exp_len-N +multi_fn = simd_extract, {vqshlu_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0 +a = 1 +n = 2 +validate 4 + +aarch64 = sqshlu +generate i8:u8, i16:u16, i32:u32, i64:u64 + /// Signed saturating shift right narrow name = vqshrn noq-n-suffix @@ -6216,9 +6258,6 @@ a = 0, 1, 2, 3, 4, 5, 6, 7 validate 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 aarch64 = nop -generate poly64x1_t:int32x2_t, poly64x1_t:uint32x2_t -generate poly64x2_t:int32x4_t, poly64x2_t:uint32x4_t - arm = nop generate int16x4_t:int8x8_t, uint16x4_t:int8x8_t, poly16x4_t:int8x8_t, int32x2_t:int16x4_t, uint32x2_t:int16x4_t, int64x1_t:int32x2_t, uint64x1_t:int32x2_t generate int16x8_t:int8x16_t, uint16x8_t:int8x16_t, poly16x8_t:int8x16_t, int32x4_t:int16x8_t, uint32x4_t:int16x8_t, int64x2_t:int32x4_t, uint64x2_t:int32x4_t @@ -6226,6 +6265,10 @@ generate poly16x4_t:uint8x8_t, int16x4_t:uint8x8_t, uint16x4_t:uint8x8_t, int32x generate poly16x8_t:uint8x16_t, int16x8_t:uint8x16_t, uint16x8_t:uint8x16_t, int32x4_t:uint16x8_t, uint32x4_t:uint16x8_t, int64x2_t:uint32x4_t, uint64x2_t:uint32x4_t generate poly16x4_t:poly8x8_t, int16x4_t:poly8x8_t, uint16x4_t:poly8x8_t, int32x2_t:poly16x4_t, uint32x2_t:poly16x4_t generate poly16x8_t:poly8x16_t, int16x8_t:poly8x16_t, uint16x8_t:poly8x16_t, int32x4_t:poly16x8_t, uint32x4_t:poly16x8_t +target = aes +generate poly64x1_t:int32x2_t, poly64x1_t:uint32x2_t +generate poly64x2_t:int32x4_t, poly64x2_t:uint32x4_t +generate p128:int64x2_t, p128:uint64x2_t, p128:poly64x2_t /// Vector reinterpret cast operation name = vreinterpret @@ -6235,9 +6278,6 @@ a = 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 validate 0, 1, 2, 3, 4, 5, 6, 7 aarch64 = nop -generate int32x2_t:poly64x1_t, uint32x2_t:poly64x1_t -generate int32x4_t:poly64x2_t, uint32x4_t:poly64x2_t - arm = nop generate poly8x8_t:int16x4_t, int8x8_t:int16x4_t, uint8x8_t:int16x4_t, poly16x4_t:int32x2_t, int16x4_t:int32x2_t, uint16x4_t:int32x2_t, int32x2_t:int64x1_t, uint32x2_t:int64x1_t generate poly8x16_t:int16x8_t, int8x16_t:int16x8_t, uint8x16_t:int16x8_t, poly16x8_t:int32x4_t, int16x8_t:int32x4_t, uint16x8_t:int32x4_t, int32x4_t:int64x2_t, uint32x4_t:int64x2_t @@ -6245,6 +6285,10 @@ generate poly8x8_t:uint16x4_t, int8x8_t:uint16x4_t, uint8x8_t:uint16x4_t, poly16 generate poly8x16_t:uint16x8_t, int8x16_t:uint16x8_t, uint8x16_t:uint16x8_t, poly16x8_t:uint32x4_t, int16x8_t:uint32x4_t, uint16x8_t:uint32x4_t, int32x4_t:uint64x2_t, uint32x4_t:uint64x2_t generate poly8x8_t:poly16x4_t, int8x8_t:poly16x4_t, uint8x8_t:poly16x4_t generate poly8x16_t:poly16x8_t, int8x16_t:poly16x8_t, uint8x16_t:poly16x8_t +target = aes +generate int32x2_t:poly64x1_t, uint32x2_t:poly64x1_t +generate int32x4_t:poly64x2_t, uint32x4_t:poly64x2_t +generate int64x2_t:p128, uint64x2_t:p128, poly64x2_t:p128 /// Vector reinterpret cast operation name = vreinterpret @@ -6254,9 +6298,6 @@ a = 0, 1, 2, 3 validate 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0 aarch64 = nop -generate poly64x1_t:int16x4_t, poly64x1_t:uint16x4_t, poly64x1_t:poly16x4_t -generate poly64x2_t:int16x8_t, poly64x2_t:uint16x8_t, poly64x2_t:poly16x8_t - arm = nop generate int32x2_t:int8x8_t, uint32x2_t:int8x8_t, int64x1_t:int16x4_t, uint64x1_t:int16x4_t generate int32x4_t:int8x16_t, uint32x4_t:int8x16_t, int64x2_t:int16x8_t, uint64x2_t:int16x8_t @@ -6264,6 +6305,10 @@ generate int32x2_t:uint8x8_t, uint32x2_t:uint8x8_t, int64x1_t:uint16x4_t, uint64 generate int32x4_t:uint8x16_t, uint32x4_t:uint8x16_t, int64x2_t:uint16x8_t, uint64x2_t:uint16x8_t generate int32x2_t:poly8x8_t, uint32x2_t:poly8x8_t, int64x1_t:poly16x4_t, uint64x1_t:poly16x4_t generate int32x4_t:poly8x16_t, uint32x4_t:poly8x16_t, int64x2_t:poly16x8_t, uint64x2_t:poly16x8_t +target = aes +generate poly64x1_t:int16x4_t, poly64x1_t:uint16x4_t, poly64x1_t:poly16x4_t +generate poly64x2_t:int16x8_t, poly64x2_t:uint16x8_t, poly64x2_t:poly16x8_t +generate p128:int32x4_t, p128:uint32x4_t /// Vector reinterpret cast operation name = vreinterpret @@ -6273,14 +6318,15 @@ a = 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0 validate 0, 1, 2, 3 aarch64 = nop -generate poly16x4_t:poly64x1_t, int16x4_t:poly64x1_t, uint16x4_t:poly64x1_t -generate poly16x8_t:poly64x2_t, int16x8_t:poly64x2_t, uint16x8_t:poly64x2_t - arm = nop generate poly8x8_t:int32x2_t, int8x8_t:int32x2_t, uint8x8_t:int32x2_t, poly16x4_t:int64x1_t, int16x4_t:int64x1_t, uint16x4_t:int64x1_t generate poly8x16_t:int32x4_t, int8x16_t:int32x4_t, uint8x16_t:int32x4_t, poly16x8_t:int64x2_t, int16x8_t:int64x2_t, uint16x8_t:int64x2_t generate poly8x8_t:uint32x2_t, int8x8_t:uint32x2_t, uint8x8_t:uint32x2_t, poly16x4_t:uint64x1_t, int16x4_t:uint64x1_t, uint16x4_t:uint64x1_t generate poly8x16_t:uint32x4_t, int8x16_t:uint32x4_t, uint8x16_t:uint32x4_t, poly16x8_t:uint64x2_t, int16x8_t:uint64x2_t, uint16x8_t:uint64x2_t +target = aes +generate poly16x4_t:poly64x1_t, int16x4_t:poly64x1_t, uint16x4_t:poly64x1_t +generate poly16x8_t:poly64x2_t, int16x8_t:poly64x2_t, uint16x8_t:poly64x2_t +generate int32x4_t:p128, uint32x4_t:p128 /// Vector reinterpret cast operation name = vreinterpret @@ -6290,12 +6336,13 @@ a = 0, 1 validate 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 aarch64 = nop -generate poly64x1_t:int8x8_t, poly64x1_t:uint8x8_t, poly64x1_t:poly8x8_t -generate poly64x2_t:int8x16_t, poly64x2_t:uint8x16_t, poly64x2_t:poly8x16_t - arm = nop generate int64x1_t:int8x8_t, uint64x1_t:int8x8_t, int64x1_t:uint8x8_t, uint64x1_t:uint8x8_t, int64x1_t:poly8x8_t, uint64x1_t:poly8x8_t generate int64x2_t:int8x16_t, uint64x2_t:int8x16_t, int64x2_t:uint8x16_t, uint64x2_t:uint8x16_t, int64x2_t:poly8x16_t, uint64x2_t:poly8x16_t +target = aes +generate poly64x1_t:int8x8_t, poly64x1_t:uint8x8_t, poly64x1_t:poly8x8_t +generate poly64x2_t:int8x16_t, poly64x2_t:uint8x16_t, poly64x2_t:poly8x16_t +generate p128:int16x8_t, p128:uint16x8_t, p128:poly16x8_t /// Vector reinterpret cast operation name = vreinterpret @@ -6305,12 +6352,37 @@ a = 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 validate 0, 1 aarch64 = nop +arm = nop +generate poly8x8_t:int64x1_t, int8x8_t:int64x1_t, uint8x8_t:int64x1_t, poly8x8_t:uint64x1_t, int8x8_t:uint64x1_t, uint8x8_t:uint64x1_t +generate poly8x16_t:int64x2_t, int8x16_t:int64x2_t, uint8x16_t:int64x2_t, poly8x16_t:uint64x2_t, int8x16_t:uint64x2_t, uint8x16_t:uint64x2_t +target = aes generate poly8x8_t:poly64x1_t, int8x8_t:poly64x1_t, uint8x8_t:poly64x1_t generate poly8x16_t:poly64x2_t, int8x16_t:poly64x2_t, uint8x16_t:poly64x2_t +generate int16x8_t:p128, uint16x8_t:p128, poly16x8_t:p128 +/// Vector reinterpret cast operation +name = vreinterpret +double-suffixes +fn = transmute +a = 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +validate 1 +target = aes + +aarch64 = nop arm = nop -generate poly8x8_t:int64x1_t, int8x8_t:int64x1_t, uint8x8_t:int64x1_t, poly8x8_t:uint64x1_t, int8x8_t:uint64x1_t, uint8x8_t:uint64x1_t -generate poly8x16_t:int64x2_t, int8x16_t:int64x2_t, uint8x16_t:int64x2_t, poly8x16_t:uint64x2_t, int8x16_t:uint64x2_t, uint8x16_t:uint64x2_t +generate int8x16_t:p128, uint8x16_t:p128, poly8x16_t:p128 + +/// Vector reinterpret cast operation +name = vreinterpret +double-suffixes +fn = transmute +a = 1 +validate 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +target = aes + +aarch64 = nop +arm = nop +generate p128:int8x16_t, p128:uint8x16_t, p128:poly8x16_t /// Vector reinterpret cast operation name = vreinterpret @@ -6326,6 +6398,7 @@ generate float64x1_t:uint8x8_t, float64x1_t:uint16x4_t, float64x1_t:uint32x2_t, generate float64x2_t:uint8x16_t, float64x2_t:uint16x8_t, float64x2_t:uint32x4_t, float64x2_t:uint64x2_t generate float64x1_t:poly8x8_t, float64x1_t:poly16x4_t, float32x2_t:poly64x1_t, float64x1_t:poly64x1_t generate float64x2_t:poly8x16_t, float64x2_t:poly16x8_t, float32x4_t:poly64x2_t, float64x2_t:poly64x2_t +generate float64x2_t:p128 arm = nop generate float32x2_t:int8x8_t, float32x2_t:int16x4_t, float32x2_t:int32x2_t, float32x2_t:int64x1_t @@ -6334,6 +6407,7 @@ generate float32x2_t:uint8x8_t, float32x2_t:uint16x4_t, float32x2_t:uint32x2_t, generate float32x4_t:uint8x16_t, float32x4_t:uint16x8_t, float32x4_t:uint32x4_t, float32x4_t:uint64x2_t generate float32x2_t:poly8x8_t, float32x2_t:poly16x4_t generate float32x4_t:poly8x16_t, float32x4_t:poly16x8_t +generate float32x4_t:p128 /// Vector reinterpret cast operation name = vreinterpret @@ -6349,6 +6423,7 @@ generate poly8x8_t:float64x1_t, uint16x4_t:float64x1_t, uint32x2_t:float64x1_t, generate poly8x16_t:float64x2_t, uint16x8_t:float64x2_t, uint32x4_t:float64x2_t, uint64x2_t:float64x2_t generate uint8x8_t:float64x1_t, poly16x4_t:float64x1_t, poly64x1_t:float64x1_t, poly64x1_t:float32x2_t generate uint8x16_t:float64x2_t, poly16x8_t:float64x2_t, poly64x2_t:float64x2_t, poly64x2_t:float32x4_t +generate p128:float64x2_t arm = nop generate int8x8_t:float32x2_t, int16x4_t:float32x2_t, int32x2_t:float32x2_t, int64x1_t:float32x2_t @@ -6357,6 +6432,7 @@ generate uint8x8_t:float32x2_t, uint16x4_t:float32x2_t, uint32x2_t:float32x2_t, generate uint8x16_t:float32x4_t, uint16x8_t:float32x4_t, uint32x4_t:float32x4_t, uint64x2_t:float32x4_t generate poly8x8_t:float32x2_t, poly16x4_t:float32x2_t generate poly8x16_t:float32x4_t, poly16x8_t:float32x4_t +generate p128:float32x4_t /// Vector reinterpret cast operation name = vreinterpret @@ -7447,3 +7523,31 @@ validate 7 aarch64 = sqabs link-aarch64 = sqabs._EXT_ generate i32:i32, i64:i64 + +/// Shift left and insert +name = vsli +n-suffix +constn = N +multi_fn = static_assert-N-0-63 +multi_fn = transmute, {vsli_n-in_ntt-::<N>, transmute(a), transmute(b)} +a = 333 +b = 2042 +n = 2 +validate 8169 + +aarch64 = sli +generate i64, u64 + +/// Shift right and insert +name = vsri +n-suffix +constn = N +multi_fn = static_assert-N-1-bits +multi_fn = transmute, {vsri_n-in_ntt-::<N>, transmute(a), transmute(b)} +a = 333 +b = 2042 +n = 2 +validate 510 + +aarch64 = sri +generate i64, u64 \ No newline at end of file diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index 4ef3cb091d..2142e7a5a0 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -2022,7 +2022,7 @@ fn gen_arm( link_arm, current_fn, arm_ext_inputs, arm_ext_output, )); let (aarch64_ext_inputs, aarch64_ext_output) = { - if const_aarch64.is_some() { + if let Some(const_aarch64) = const_aarch64 { if !matches!(fn_type, Fntype::Normal) { let ptr_type = match fn_type { Fntype::Load => "*const i8", @@ -2047,6 +2047,19 @@ fn gen_arm( format!(" -> {}", out_t) }; (inputs, out) + } else if const_aarch64.contains("dup-in_len-N as ttn") { + ( + match para_num { + 1 => format!("a: {}, n: {}", in_t[0], in_t[0]), + 2 => format!("a: {}, b: {}, n: {}", in_t[0], in_t[1], in_t[1]), + 3 => format!( + "a: {}, b: {}, c: {}, n: {}", + in_t[0], in_t[1], in_t[2], in_t[1] + ), + _ => unimplemented!("unknown para_num"), + }, + format!(" -> {}", out_t), + ) } else { ( match para_num { @@ -2268,6 +2281,18 @@ fn gen_arm( subs, constn.as_deref().unwrap() ) + } else if const_aarch64.contains("dup-in_len-N as ttn") { + let const_aarch64 = format!("N as {}", type_to_native_type(in_t[1])); + let mut cnt = String::from(in_t[1]); + cnt.push_str("("); + for i in 0..type_len(in_t[1]) { + if i != 0 { + cnt.push_str(", "); + } + cnt.push_str(&const_aarch64); + } + cnt.push_str(")"); + format!("{}(a, {})", current_fn, cnt) } else { match para_num { 1 => format!("{}(a, {})", current_fn, const_aarch64), diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs index ce7039ce72..a170ea4e62 100644 --- a/crates/stdarch-verify/tests/arm.rs +++ b/crates/stdarch-verify/tests/arm.rs @@ -454,6 +454,9 @@ fn verify_all_signatures() { "vreinterpret_p64_s64", "vreinterpret_f32_p64", "vreinterpretq_f32_p64", + "vreinterpretq_p64_p128", + "vreinterpretq_p128_p64", + "vreinterpretq_f32_p128", "vqrdmlahh_s16", "vqrdmlahs_s32", "vqrdmlahh_lane_s16", @@ -585,6 +588,12 @@ fn verify_all_signatures() { "vrnd64xq_f32", "vrnd64z_f32", "vrnd64zq_f32", + "vcls_u8", + "vcls_u16", + "vcls_u32", + "vclsq_u8", + "vclsq_u16", + "vclsq_u32", "__dbg", ]; let arm = match map.get(rust.name) {