diff --git a/crates/core_arch/MISSING.md b/crates/core_arch/MISSING.md
index 99eb794a55..c948f3f8c9 100644
--- a/crates/core_arch/MISSING.md
+++ b/crates/core_arch/MISSING.md
@@ -1,59 +1,5 @@
 ## The following neon instructions are currently not implemented in stdarch
 
-### Can be implemented next:
-
-`vcls_u16`
-
-`vcls_u32`
-
-`vcls_u8`
-
-`vclsq_u16`
-
-`vclsq_u32`
-
-`vclsq_u8`
-
-`vcreate_s16`
-
-`vcreate_u16`
-
-`vpaddq_s64`
-
-`vpaddq_u64`
-
-`vreinterpretq_p128_f32`
-
-`vreinterpretq_p128_f64`
-
-`vreinterpretq_p128_p16`
-
-`vreinterpretq_p128_p8`
-
-`vreinterpretq_p128_s16`
-
-`vreinterpretq_p128_s32`
-
-`vreinterpretq_p128_s64`
-
-`vreinterpretq_p128_s8`
-
-`vreinterpretq_p128_u16`
-
-`vreinterpretq_p128_u32`
-
-`vreinterpretq_p128_u64`
-
-`vreinterpretq_p128_u8`
-
-`vslid_n_s64`
-
-`vslid_n_u64`
-
-`vsrid_n_s64`
-
-`vsrid_n_u64`
-
 ### Not implemented on arm:
 
 `vcadd_rot270_f32`
@@ -168,27 +114,3 @@
 
 `vusdotq_s32v`
 
-`vqshlu_n_s16`
-
-`vqshlu_n_s32`
-
-`vqshlu_n_s64`
-
-`vqshlu_n_s8`
-
-`vqshlub_n_s8`
-
-`vqshlud_n_s64`
-
-`vqshluh_n_s16`
-
-`vqshluq_n_s16`
-
-`vqshluq_n_s32`
-
-`vqshluq_n_s64`
-
-`vqshluq_n_s8`
-
-`vqshlus_n_s32`
-
diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
index bdf6158bb2..5bfa4fa59b 100644
--- a/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -10388,6 +10388,46 @@ pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
     simd_extract(vqshl_n_u64::<N>(vdup_n_u64(a)), 0)
 }
 
+/// Signed saturating shift left unsigned
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlub_n_s8<const N: i32>(a: i8) -> u8 {
+    static_assert_imm3!(N);
+    simd_extract(vqshlu_n_s8::<N>(vdup_n_s8(a)), 0)
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshluh_n_s16<const N: i32>(a: i16) -> u16 {
+    static_assert_imm4!(N);
+    simd_extract(vqshlu_n_s16::<N>(vdup_n_s16(a)), 0)
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlus_n_s32<const N: i32>(a: i32) -> u32 {
+    static_assert_imm5!(N);
+    simd_extract(vqshlu_n_s32::<N>(vdup_n_s32(a)), 0)
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlud_n_s64<const N: i32>(a: i64) -> u64 {
+    static_assert_imm6!(N);
+    simd_extract(vqshlu_n_s64::<N>(vdup_n_s64(a)), 0)
+}
+
 /// Signed saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
@@ -10950,262 +10990,6 @@ pub unsafe fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
     transmute(a)
 }
 
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
-    transmute(a)
-}
-
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
@@ -11398,6 +11182,14 @@ pub unsafe fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
     transmute(a)
 }
 
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
+    transmute(a)
+}
+
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
@@ -11590,6 +11382,14 @@ pub unsafe fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
     transmute(a)
 }
 
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
+    transmute(a)
+}
+
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
@@ -13393,6 +13193,46 @@ pub unsafe fn vqabsd_s64(a: i64) -> i64 {
     vqabsd_s64_(a)
 }
 
+/// Shift left and insert
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vslid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N : i32 where N >= 0 && N <= 63);
+    transmute(vsli_n_s64::<N>(transmute(a), transmute(b)))
+}
+
+/// Shift left and insert
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vslid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N : i32 where N >= 0 && N <= 63);
+    transmute(vsli_n_u64::<N>(transmute(a), transmute(b)))
+}
+
+/// Shift right and insert
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsrid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    transmute(vsri_n_s64::<N>(transmute(a), transmute(b)))
+}
+
+/// Shift right and insert
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsrid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    transmute(vsri_n_u64::<N>(transmute(a), transmute(b)))
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
@@ -21742,6 +21582,38 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlub_n_s8() {
+        let a: i8 = 1;
+        let e: u8 = 4;
+        let r: u8 = transmute(vqshlub_n_s8::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshluh_n_s16() {
+        let a: i16 = 1;
+        let e: u16 = 4;
+        let r: u16 = transmute(vqshluh_n_s16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlus_n_s32() {
+        let a: i32 = 1;
+        let e: u32 = 4;
+        let r: u32 = transmute(vqshlus_n_s32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlud_n_s64() {
+        let a: i64 = 1;
+        let e: u64 = 4;
+        let r: u64 = transmute(vqshlud_n_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vqshrnd_n_s64() {
         let a: i64 = 0;
@@ -22179,262 +22051,6 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s32_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i32x2 = i32x2::new(0, 0);
-        let r: i32x2 = transmute(vreinterpret_s32_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u32_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vreinterpret_u32_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s32_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i32x4 = i32x4::new(0, 0, 1, 0);
-        let r: i32x4 = transmute(vreinterpretq_s32_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u32_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: u32x4 = u32x4::new(0, 0, 1, 0);
-        let r: u32x4 = transmute(vreinterpretq_u32_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p64_s32() {
-        let a: i32x2 = i32x2::new(0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p64_u32() {
-        let a: u32x2 = u32x2::new(0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p64_s32() {
-        let a: i32x4 = i32x4::new(0, 0, 1, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p64_u32() {
-        let a: u32x4 = u32x4::new(0, 0, 1, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s16_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i16x4 = i16x4::new(0, 0, 0, 0);
-        let r: i16x4 = transmute(vreinterpret_s16_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u16_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: u16x4 = u16x4::new(0, 0, 0, 0);
-        let r: u16x4 = transmute(vreinterpret_u16_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p16_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i16x4 = i16x4::new(0, 0, 0, 0);
-        let r: i16x4 = transmute(vreinterpret_p16_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s16_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_s16_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u16_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: u16x8 = transmute(vreinterpretq_u16_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p16_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_p16_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p64_p16() {
-        let a: i16x4 = i16x4::new(0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p64_s16() {
-        let a: i16x4 = i16x4::new(0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p64_u16() {
-        let a: u16x4 = u16x4::new(0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p64_p16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p64_s16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p64_u16() {
-        let a: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s8_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_s8_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u8_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vreinterpret_u8_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p8_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_p8_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s8_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_s8_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u8_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vreinterpretq_u8_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p8_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_p8_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p64_p8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p64_s8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p64_u8() {
-        let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p64_p8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p64_s8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p64_u8() {
-        let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     unsafe fn test_vreinterpret_s8_f64() {
         let a: f64 = 0.;
@@ -22627,6 +22243,14 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: p128 = 0;
+        let r: p128 = transmute(vreinterpretq_p128_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vreinterpret_f64_s8() {
         let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
@@ -22819,6 +22443,14 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_p128() {
+        let a: p128 = 0;
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vreinterpret_f64_f32() {
         let a: f32x2 = f32x2::new(0., 0.);
@@ -24610,4 +24242,40 @@ mod test {
         let r: i64 = transmute(vqabsd_s64(transmute(a)));
         assert_eq!(r, e);
     }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vslid_n_s64() {
+        let a: i64 = 333;
+        let b: i64 = 2042;
+        let e: i64 = 8169;
+        let r: i64 = transmute(vslid_n_s64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vslid_n_u64() {
+        let a: u64 = 333;
+        let b: u64 = 2042;
+        let e: u64 = 8169;
+        let r: u64 = transmute(vslid_n_u64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsrid_n_s64() {
+        let a: i64 = 333;
+        let b: i64 = 2042;
+        let e: i64 = 510;
+        let r: i64 = transmute(vsrid_n_s64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsrid_n_u64() {
+        let a: u64 = 333;
+        let b: u64 = 2042;
+        let e: u64 = 510;
+        let r: u64 = transmute(vsrid_n_u64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
 }
diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs
index d23e43c435..ff895f9875 100644
--- a/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/crates/core_arch/src/aarch64/neon/mod.rs
@@ -103,6 +103,8 @@ extern "unadjusted" {
     fn vpaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     #[link_name = "llvm.aarch64.neon.addp.v4i32"]
     fn vpaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    #[link_name = "llvm.aarch64.neon.addp.v2i64"]
+    fn vpaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     #[link_name = "llvm.aarch64.neon.addp.v16i8"]
     fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
 
@@ -1137,6 +1139,20 @@ pub unsafe fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    vpaddq_s64_(a, b)
+}
+/// Add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    transmute(vpaddq_s64_(transmute(a), transmute(b)))
+}
+/// Add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
 pub unsafe fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     vpaddq_s8_(a, b)
 }
@@ -3488,6 +3504,14 @@ mod tests {
         assert_eq!(r, e);
     }
     #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_s64() {
+        let a = i64x2::new(1, 2);
+        let b = i64x2::new(0, -1);
+        let r: i64x2 = transmute(vpaddq_s64(transmute(a), transmute(b)));
+        let e = i64x2::new(3, -1);
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
     unsafe fn test_vpaddq_s8() {
         let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = i8x16::new(
@@ -3516,6 +3540,14 @@ mod tests {
         assert_eq!(r, e);
     }
     #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_u64() {
+        let a = u64x2::new(0, 1);
+        let b = u64x2::new(17, 18);
+        let r: u64x2 = transmute(vpaddq_u64(transmute(a), transmute(b)));
+        let e = u64x2::new(1, 35);
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
     unsafe fn test_vpaddq_u8() {
         let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = i8x16::new(
diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs
index bd78a973e8..2f93c65262 100644
--- a/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -1780,7 +1780,67 @@ pub unsafe fn vclsq_s32(a: int32x4_t) -> int32x4_t {
 vclsq_s32_(a)
 }
 
-/// Signed count leading sign bits
+/// Count leading sign bits
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+pub unsafe fn vcls_u8(a: uint8x8_t) -> uint8x8_t {
+    transmute(vcls_s8(transmute(a)))
+}
+
+/// Count leading sign bits
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+pub unsafe fn vclsq_u8(a: uint8x16_t) -> uint8x16_t {
+    transmute(vclsq_s8(transmute(a)))
+}
+
+/// Count leading sign bits
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+pub unsafe fn vcls_u16(a: uint16x4_t) -> uint16x4_t {
+    transmute(vcls_s16(transmute(a)))
+}
+
+/// Count leading sign bits
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+pub unsafe fn vclsq_u16(a: uint16x8_t) -> uint16x8_t {
+    transmute(vclsq_s16(transmute(a)))
+}
+
+/// Count leading sign bits
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+pub unsafe fn vcls_u32(a: uint32x2_t) -> uint32x2_t {
+    transmute(vcls_s32(transmute(a)))
+}
+
+/// Count leading sign bits
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+pub unsafe fn vclsq_u32(a: uint32x4_t) -> uint32x4_t {
+    transmute(vclsq_s32(transmute(a)))
+}
+
+/// Count leading zero bits
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -1790,7 +1850,7 @@ pub unsafe fn vclz_s8(a: int8x8_t) -> int8x8_t {
     vclz_s8_(a)
 }
 
-/// Signed count leading sign bits
+/// Count leading zero bits
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -1800,7 +1860,7 @@ pub unsafe fn vclzq_s8(a: int8x16_t) -> int8x16_t {
     vclzq_s8_(a)
 }
 
-/// Signed count leading sign bits
+/// Count leading zero bits
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -1810,7 +1870,7 @@ pub unsafe fn vclz_s16(a: int16x4_t) -> int16x4_t {
     vclz_s16_(a)
 }
 
-/// Signed count leading sign bits
+/// Count leading zero bits
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -1820,7 +1880,7 @@ pub unsafe fn vclzq_s16(a: int16x8_t) -> int16x8_t {
     vclzq_s16_(a)
 }
 
-/// Signed count leading sign bits
+/// Count leading zero bits
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -1830,7 +1890,7 @@ pub unsafe fn vclz_s32(a: int32x2_t) -> int32x2_t {
     vclz_s32_(a)
 }
 
-/// Signed count leading sign bits
+/// Count leading zero bits
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -1840,7 +1900,7 @@ pub unsafe fn vclzq_s32(a: int32x4_t) -> int32x4_t {
     vclzq_s32_(a)
 }
 
-/// Unsigned count leading sign bits
+/// Count leading zero bits
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -1850,7 +1910,7 @@ pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
     transmute(vclz_s8_(transmute(a)))
 }
 
-/// Unsigned count leading sign bits
+/// Count leading zero bits
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -1860,7 +1920,7 @@ pub unsafe fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
     transmute(vclzq_s8_(transmute(a)))
 }
 
-/// Unsigned count leading sign bits
+/// Count leading zero bits
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -1870,7 +1930,7 @@ pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
     transmute(vclz_s16_(transmute(a)))
 }
 
-/// Unsigned count leading sign bits
+/// Count leading zero bits
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -1880,7 +1940,7 @@ pub unsafe fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
     transmute(vclzq_s16_(transmute(a)))
 }
 
-/// Unsigned count leading sign bits
+/// Count leading zero bits
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -1890,7 +1950,7 @@ pub unsafe fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
     transmute(vclz_s32_(transmute(a)))
 }
 
-/// Unsigned count leading sign bits
+/// Count leading zero bits
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -2014,6 +2074,16 @@ pub unsafe fn vcreate_s8(a: u64) -> int8x8_t {
     transmute(a)
 }
 
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vcreate_s16(a: u64) -> int16x4_t {
+    transmute(a)
+}
+
 /// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
@@ -2044,6 +2114,16 @@ pub unsafe fn vcreate_u8(a: u64) -> uint8x8_t {
     transmute(a)
 }
 
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vcreate_u16(a: u64) -> uint16x4_t {
+    transmute(a)
+}
+
 /// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
@@ -17504,6 +17584,262 @@ pub unsafe fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
     vqshlq_u64(a, vdupq_n_s64(N.try_into().unwrap()))
 }
 
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
+    static_assert_imm3!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")]
+        fn vqshlu_n_s8_(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
+    }
+vqshlu_n_s8_(a, int8x8_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
+    static_assert_imm3!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v8i8")]
+        fn vqshlu_n_s8_(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
+    }
+vqshlu_n_s8_(a, int8x8_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
+    static_assert_imm4!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")]
+        fn vqshlu_n_s16_(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
+    }
+vqshlu_n_s16_(a, int16x4_t(N as i16, N as i16, N as i16, N as i16))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
+    static_assert_imm4!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v4i16")]
+        fn vqshlu_n_s16_(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
+    }
+vqshlu_n_s16_(a, int16x4_t(N as i16, N as i16, N as i16, N as i16))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
+    static_assert_imm5!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")]
+        fn vqshlu_n_s32_(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
+    }
+vqshlu_n_s32_(a, int32x2_t(N as i32, N as i32))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
+    static_assert_imm5!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v2i32")]
+        fn vqshlu_n_s32_(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
+    }
+vqshlu_n_s32_(a, int32x2_t(N as i32, N as i32))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
+    static_assert_imm6!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")]
+        fn vqshlu_n_s64_(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
+    }
+vqshlu_n_s64_(a, int64x1_t(N as i64))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
+    static_assert_imm6!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v1i64")]
+        fn vqshlu_n_s64_(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
+    }
+vqshlu_n_s64_(a, int64x1_t(N as i64))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
+    static_assert_imm3!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")]
+        fn vqshluq_n_s8_(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
+    }
+vqshluq_n_s8_(a, int8x16_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
+    static_assert_imm3!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v16i8")]
+        fn vqshluq_n_s8_(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
+    }
+vqshluq_n_s8_(a, int8x16_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
+    static_assert_imm4!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")]
+        fn vqshluq_n_s16_(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
+    }
+vqshluq_n_s16_(a, int16x8_t(N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
+    static_assert_imm4!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v8i16")]
+        fn vqshluq_n_s16_(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
+    }
+vqshluq_n_s16_(a, int16x8_t(N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
+    static_assert_imm5!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")]
+        fn vqshluq_n_s32_(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
+    }
+vqshluq_n_s32_(a, int32x4_t(N as i32, N as i32, N as i32, N as i32))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
+    static_assert_imm5!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v4i32")]
+        fn vqshluq_n_s32_(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
+    }
+vqshluq_n_s32_(a, int32x4_t(N as i32, N as i32, N as i32, N as i32))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
+    static_assert_imm6!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")]
+        fn vqshluq_n_s64_(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
+    }
+vqshluq_n_s64_(a, int64x2_t(N as i64, N as i64))
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
+    static_assert_imm6!(N);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v2i64")]
+        fn vqshluq_n_s64_(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
+    }
+vqshluq_n_s64_(a, int64x2_t(N as i64, N as i64))
+}
+
 /// Signed saturating shift right narrow
 #[inline]
 #[cfg(target_arch = "arm")]
@@ -18684,6 +19020,76 @@ pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     transmute(a)
 }
 
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
+    transmute(a)
+}
+
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
@@ -19064,6 +19470,76 @@ pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     transmute(a)
 }
 
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
+    transmute(a)
+}
+
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
@@ -19304,6 +19780,86 @@ pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     transmute(a)
 }
 
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
+    transmute(a)
+}
+
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
@@ -19544,6 +20100,86 @@ pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     transmute(a)
 }
 
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
+    transmute(a)
+}
+
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
@@ -19664,6 +20300,96 @@ pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     transmute(a)
 }
 
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
+    transmute(a)
+}
+
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
@@ -19784,6 +20510,156 @@ pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     transmute(a)
 }
 
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
+    transmute(a)
+}
+
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
@@ -19984,6 +20860,16 @@ pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
     transmute(a)
 }
 
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
+    transmute(a)
+}
+
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
@@ -20184,6 +21070,16 @@ pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     transmute(a)
 }
 
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
+    transmute(a)
+}
+
 /// Signed rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
@@ -24765,6 +25661,54 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcls_u8() {
+        let a: u8x8 = u8x8::new(0, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u8x8 = u8x8::new(7, 7, 7, 7, 7, 7, 7, 7);
+        let r: u8x8 = transmute(vcls_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclsq_u8() {
+        let a: u8x16 = u8x16::new(0, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF);
+        let e: u8x16 = u8x16::new(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7);
+        let r: u8x16 = transmute(vclsq_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcls_u16() {
+        let a: u16x4 = u16x4::new(0, 0xFF_FF, 0x00, 0x00);
+        let e: u16x4 = u16x4::new(15, 15, 15, 15);
+        let r: u16x4 = transmute(vcls_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclsq_u16() {
+        let a: u16x8 = u16x8::new(0, 0xFF_FF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u16x8 = u16x8::new(15, 15, 15, 15, 15, 15, 15, 15);
+        let r: u16x8 = transmute(vclsq_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcls_u32() {
+        let a: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
+        let e: u32x2 = u32x2::new(31, 31);
+        let r: u32x2 = transmute(vcls_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclsq_u32() {
+        let a: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0x00, 0x00);
+        let e: u32x4 = u32x4::new(31, 31, 31, 31);
+        let r: u32x4 = transmute(vclsq_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vclz_s8() {
         let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01);
@@ -24941,6 +25885,14 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcreate_s16() {
+        let a: u64 = 1;
+        let e: i16x4 = i16x4::new(1, 0, 0, 0);
+        let r: i16x4 = transmute(vcreate_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vcreate_s32() {
         let a: u64 = 1;
@@ -24965,6 +25917,14 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcreate_u16() {
+        let a: u64 = 1;
+        let e: u16x4 = u16x4::new(1, 0, 0, 0);
+        let r: u16x4 = transmute(vcreate_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vcreate_u32() {
         let a: u64 = 1;
@@ -33993,6 +34953,70 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlu_n_s8() {
+        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u8x8 = u8x8::new(0, 4, 8, 12, 16, 20, 24, 28);
+        let r: u8x8 = transmute(vqshlu_n_s8::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlu_n_s16() {
+        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+        let e: u16x4 = u16x4::new(0, 4, 8, 12);
+        let r: u16x4 = transmute(vqshlu_n_s16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlu_n_s32() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let e: u32x2 = u32x2::new(0, 4);
+        let r: u32x2 = transmute(vqshlu_n_s32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlu_n_s64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vqshlu_n_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshluq_n_s8() {
+        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let e: u8x16 = u8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
+        let r: u8x16 = transmute(vqshluq_n_s8::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshluq_n_s16() {
+        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
+        let r: u16x8 = transmute(vqshluq_n_s16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshluq_n_s32() {
+        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+        let e: u32x4 = u32x4::new(0, 4, 8, 12);
+        let r: u32x4 = transmute(vqshluq_n_s32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshluq_n_s64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: u64x2 = u64x2::new(0, 4);
+        let r: u64x2 = transmute(vqshluq_n_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vqshrn_n_s16() {
         let a: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
@@ -34725,6 +35749,62 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_s32_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: i32x2 = i32x2::new(0, 0);
+        let r: i32x2 = transmute(vreinterpret_s32_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_u32_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: u32x2 = u32x2::new(0, 0);
+        let r: u32x2 = transmute(vreinterpret_u32_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_s32_p64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: i32x4 = i32x4::new(0, 0, 1, 0);
+        let r: i32x4 = transmute(vreinterpretq_s32_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_u32_p64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: u32x4 = u32x4::new(0, 0, 1, 0);
+        let r: u32x4 = transmute(vreinterpretq_u32_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_s64_p128() {
+        let a: p128 = 0;
+        let e: i64x2 = i64x2::new(0, 0);
+        let r: i64x2 = transmute(vreinterpretq_s64_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_u64_p128() {
+        let a: p128 = 0;
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vreinterpretq_u64_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_p128() {
+        let a: p128 = 0;
+        let e: i64x2 = i64x2::new(0, 0);
+        let r: i64x2 = transmute(vreinterpretq_p64_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vreinterpret_s16_p8() {
         let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
@@ -35029,6 +36109,62 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p64_s32() {
+        let a: i32x2 = i32x2::new(0, 0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_p64_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p64_u32() {
+        let a: u32x2 = u32x2::new(0, 0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_p64_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_s32() {
+        let a: i32x4 = i32x4::new(0, 0, 1, 0);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vreinterpretq_p64_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_u32() {
+        let a: u32x4 = u32x4::new(0, 0, 1, 0);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vreinterpretq_p64_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_s64() {
+        let a: i64x2 = i64x2::new(0, 0);
+        let e: p128 = 0;
+        let r: p128 = transmute(vreinterpretq_p128_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_u64() {
+        let a: u64x2 = u64x2::new(0, 0);
+        let e: p128 = 0;
+        let r: p128 = transmute(vreinterpretq_p128_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_p64() {
+        let a: i64x2 = i64x2::new(0, 0);
+        let e: p128 = 0;
+        let r: p128 = transmute(vreinterpretq_p128_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vreinterpret_s8_s32() {
         let a: i32x2 = i32x2::new(0, 1);
@@ -35221,6 +36357,70 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_s16_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: i16x4 = i16x4::new(0, 0, 0, 0);
+        let r: i16x4 = transmute(vreinterpret_s16_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_u16_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: u16x4 = u16x4::new(0, 0, 0, 0);
+        let r: u16x4 = transmute(vreinterpret_u16_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p16_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: i16x4 = i16x4::new(0, 0, 0, 0);
+        let r: i16x4 = transmute(vreinterpret_p16_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_s16_p64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
+        let r: i16x8 = transmute(vreinterpretq_s16_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_u16_p64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
+        let r: u16x8 = transmute(vreinterpretq_u16_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p16_p64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
+        let r: i16x8 = transmute(vreinterpretq_p16_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_s32_p128() {
+        let a: p128 = 0;
+        let e: i32x4 = i32x4::new(0, 0, 0, 0);
+        let r: i32x4 = transmute(vreinterpretq_s32_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_u32_p128() {
+        let a: p128 = 0;
+        let e: u32x4 = u32x4::new(0, 0, 0, 0);
+        let r: u32x4 = transmute(vreinterpretq_u32_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vreinterpret_s32_p8() {
         let a: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
@@ -35413,6 +36613,70 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p64_p16() {
+        let a: i16x4 = i16x4::new(0, 0, 0, 0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_p64_p16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p64_s16() {
+        let a: i16x4 = i16x4::new(0, 0, 0, 0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_p64_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p64_u16() {
+        let a: u16x4 = u16x4::new(0, 0, 0, 0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_p64_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_p16() {
+        let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vreinterpretq_p64_p16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_s16() {
+        let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vreinterpretq_p64_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_u16() {
+        let a: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vreinterpretq_p64_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_s32() {
+        let a: i32x4 = i32x4::new(0, 0, 0, 0);
+        let e: p128 = 0;
+        let r: p128 = transmute(vreinterpretq_p128_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_u32() {
+        let a: u32x4 = u32x4::new(0, 0, 0, 0);
+        let e: p128 = 0;
+        let r: p128 = transmute(vreinterpretq_p128_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vreinterpret_s8_s64() {
         let a: i64x1 = i64x1::new(0);
@@ -35509,6 +36773,78 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_s8_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x8 = transmute(vreinterpret_s8_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_u8_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vreinterpret_u8_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p8_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x8 = transmute(vreinterpret_p8_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_s8_p64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x16 = transmute(vreinterpretq_s8_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_u8_p64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vreinterpretq_u8_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p8_p64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x16 = transmute(vreinterpretq_p8_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_s16_p128() {
+        let a: p128 = 0;
+        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i16x8 = transmute(vreinterpretq_s16_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_u16_p128() {
+        let a: p128 = 0;
+        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vreinterpretq_u16_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p16_p128() {
+        let a: p128 = 0;
+        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i16x8 = transmute(vreinterpretq_p16_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vreinterpret_s64_p8() {
         let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
@@ -35605,6 +36941,126 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p64_p8() {
+        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_p64_p8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p64_s8() {
+        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_p64_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p64_u8() {
+        let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_p64_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_p8() {
+        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vreinterpretq_p64_p8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_s8() {
+        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vreinterpretq_p64_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_u8() {
+        let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vreinterpretq_p64_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_s16() {
+        let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let e: p128 = 0;
+        let r: p128 = transmute(vreinterpretq_p128_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_u16() {
+        let a: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let e: p128 = 0;
+        let r: p128 = transmute(vreinterpretq_p128_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_p16() {
+        let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let e: p128 = 0;
+        let r: p128 = transmute(vreinterpretq_p128_p16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_s8() {
+        let a: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: p128 = 1;
+        let r: p128 = transmute(vreinterpretq_p128_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_u8() {
+        let a: u8x16 = u8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: p128 = 1;
+        let r: p128 = transmute(vreinterpretq_p128_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_p8() {
+        let a: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: p128 = 1;
+        let r: p128 = transmute(vreinterpretq_p128_p8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_s8_p128() {
+        let a: p128 = 1;
+        let e: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x16 = transmute(vreinterpretq_s8_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_u8_p128() {
+        let a: p128 = 1;
+        let e: u8x16 = u8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vreinterpretq_u8_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p8_p128() {
+        let a: p128 = 1;
+        let e: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x16 = transmute(vreinterpretq_p8_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vreinterpret_s8_f32() {
         let a: f32x2 = f32x2::new(0., 0.);
@@ -35765,6 +37221,14 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_f32() {
+        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
+        let e: p128 = 0;
+        let r: p128 = transmute(vreinterpretq_p128_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vreinterpret_f32_s8() {
         let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
@@ -35925,6 +37389,14 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f32_p128() {
+        let a: p128 = 0;
+        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
+        let r: f32x4 = transmute(vreinterpretq_f32_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vrshl_s8() {
         let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec
index 933d2bc41f..729602f724 100644
--- a/crates/stdarch-gen/neon.spec
+++ b/crates/stdarch-gen/neon.spec
@@ -888,7 +888,17 @@ link-arm = vcls._EXT_
 link-aarch64 = cls._EXT_
 generate int*_t
 
-/// Signed count leading sign bits
+/// Count leading sign bits
+name = vcls
+multi_fn = transmute, {vcls-signed-noext, {transmute, a}}
+a = MIN, MAX, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX
+validate BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1
+
+arm = vcls
+aarch64 = cls
+generate uint*_t
+
+/// Count leading zero bits
 name = vclz
 multi_fn = self-signed-ext, a
 a = MIN, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
@@ -898,7 +908,7 @@ arm = vclz.
 aarch64 = clz
 generate int*_t
 
-/// Unsigned count leading sign bits
+/// Count leading zero bits
 name = vclz
 multi_fn = transmute, {self-signed-ext, transmute(a)}
 a = MIN, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
@@ -1089,8 +1099,8 @@ validate 1, 0, 0, 0, 0, 0, 0, 0
 
 aarch64 = nop
 arm = nop
-generate u64:int8x8_t, u64:int16x4_t: u64:int32x2_t, u64:int64x1_t
-generate u64:uint8x8_t, u64:uint16x4_t: u64:uint32x2_t, u64:uint64x1_t
+generate u64:int8x8_t, u64:int16x4_t, u64:int32x2_t, u64:int64x1_t
+generate u64:uint8x8_t, u64:uint16x4_t, u64:uint32x2_t, u64:uint64x1_t
 generate u64:poly8x8_t, u64:poly16x4_t
 target = aes
 generate u64:poly64x1_t
@@ -5933,6 +5943,38 @@ validate 4
 aarch64 = uqshl
 generate u8, u16, u32, u64
 
+/// Signed saturating shift left unsigned
+name = vqshlu
+n-suffix
+constn = N
+multi_fn = static_assert_imm-out_bits_exp_len-N
+a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+n = 2
+validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
+arm-aarch64-separate
+
+aarch64 = sqshlu
+link-aarch64 = sqshlu._EXT_
+const-aarch64 = {dup-in_len-N as ttn}
+arm = vqshlu
+link-arm = vqshiftsu._EXT_
+const-arm = N as ttn
+generate int8x8_t:uint8x8_t, int16x4_t:uint16x4_t, int32x2_t:uint32x2_t, int64x1_t:uint64x1_t
+generate int8x16_t:uint8x16_t, int16x8_t:uint16x8_t, int32x4_t:uint32x4_t, int64x2_t:uint64x2_t
+
+/// Signed saturating shift left unsigned
+name = vqshlu
+n-suffix
+constn = N
+multi_fn = static_assert_imm-out_bits_exp_len-N
+multi_fn = simd_extract, {vqshlu_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0
+a = 1
+n = 2
+validate 4
+
+aarch64 = sqshlu
+generate i8:u8, i16:u16, i32:u32, i64:u64
+
 /// Signed saturating shift right narrow
 name = vqshrn
 noq-n-suffix
@@ -6216,9 +6258,6 @@ a = 0, 1, 2, 3, 4, 5, 6, 7
 validate 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
 
 aarch64 = nop
-generate poly64x1_t:int32x2_t, poly64x1_t:uint32x2_t
-generate poly64x2_t:int32x4_t, poly64x2_t:uint32x4_t
-
 arm = nop
 generate int16x4_t:int8x8_t, uint16x4_t:int8x8_t, poly16x4_t:int8x8_t, int32x2_t:int16x4_t, uint32x2_t:int16x4_t, int64x1_t:int32x2_t, uint64x1_t:int32x2_t
 generate int16x8_t:int8x16_t, uint16x8_t:int8x16_t, poly16x8_t:int8x16_t, int32x4_t:int16x8_t, uint32x4_t:int16x8_t, int64x2_t:int32x4_t, uint64x2_t:int32x4_t
@@ -6226,6 +6265,10 @@ generate poly16x4_t:uint8x8_t, int16x4_t:uint8x8_t, uint16x4_t:uint8x8_t, int32x
 generate poly16x8_t:uint8x16_t, int16x8_t:uint8x16_t, uint16x8_t:uint8x16_t, int32x4_t:uint16x8_t, uint32x4_t:uint16x8_t, int64x2_t:uint32x4_t, uint64x2_t:uint32x4_t
 generate poly16x4_t:poly8x8_t, int16x4_t:poly8x8_t, uint16x4_t:poly8x8_t, int32x2_t:poly16x4_t, uint32x2_t:poly16x4_t
 generate poly16x8_t:poly8x16_t, int16x8_t:poly8x16_t, uint16x8_t:poly8x16_t, int32x4_t:poly16x8_t, uint32x4_t:poly16x8_t
+target = aes
+generate poly64x1_t:int32x2_t, poly64x1_t:uint32x2_t
+generate poly64x2_t:int32x4_t, poly64x2_t:uint32x4_t
+generate p128:int64x2_t, p128:uint64x2_t, p128:poly64x2_t
 
 /// Vector reinterpret cast operation
 name = vreinterpret
@@ -6235,9 +6278,6 @@ a = 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
 validate 0, 1, 2, 3, 4, 5, 6, 7
 
 aarch64 = nop
-generate int32x2_t:poly64x1_t, uint32x2_t:poly64x1_t
-generate int32x4_t:poly64x2_t, uint32x4_t:poly64x2_t
-
 arm = nop
 generate poly8x8_t:int16x4_t, int8x8_t:int16x4_t, uint8x8_t:int16x4_t, poly16x4_t:int32x2_t, int16x4_t:int32x2_t, uint16x4_t:int32x2_t, int32x2_t:int64x1_t, uint32x2_t:int64x1_t
 generate poly8x16_t:int16x8_t, int8x16_t:int16x8_t, uint8x16_t:int16x8_t, poly16x8_t:int32x4_t, int16x8_t:int32x4_t, uint16x8_t:int32x4_t, int32x4_t:int64x2_t, uint32x4_t:int64x2_t
@@ -6245,6 +6285,10 @@ generate poly8x8_t:uint16x4_t, int8x8_t:uint16x4_t, uint8x8_t:uint16x4_t, poly16
 generate poly8x16_t:uint16x8_t, int8x16_t:uint16x8_t, uint8x16_t:uint16x8_t, poly16x8_t:uint32x4_t, int16x8_t:uint32x4_t, uint16x8_t:uint32x4_t, int32x4_t:uint64x2_t, uint32x4_t:uint64x2_t
 generate poly8x8_t:poly16x4_t, int8x8_t:poly16x4_t, uint8x8_t:poly16x4_t
 generate poly8x16_t:poly16x8_t, int8x16_t:poly16x8_t, uint8x16_t:poly16x8_t
+target = aes
+generate int32x2_t:poly64x1_t, uint32x2_t:poly64x1_t
+generate int32x4_t:poly64x2_t, uint32x4_t:poly64x2_t
+generate int64x2_t:p128, uint64x2_t:p128, poly64x2_t:p128
 
 /// Vector reinterpret cast operation
 name = vreinterpret
@@ -6254,9 +6298,6 @@ a = 0, 1, 2, 3
 validate 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
 
 aarch64 = nop
-generate poly64x1_t:int16x4_t, poly64x1_t:uint16x4_t, poly64x1_t:poly16x4_t
-generate poly64x2_t:int16x8_t, poly64x2_t:uint16x8_t, poly64x2_t:poly16x8_t
-
 arm = nop
 generate int32x2_t:int8x8_t, uint32x2_t:int8x8_t, int64x1_t:int16x4_t, uint64x1_t:int16x4_t
 generate int32x4_t:int8x16_t, uint32x4_t:int8x16_t, int64x2_t:int16x8_t, uint64x2_t:int16x8_t
@@ -6264,6 +6305,10 @@ generate int32x2_t:uint8x8_t, uint32x2_t:uint8x8_t, int64x1_t:uint16x4_t, uint64
 generate int32x4_t:uint8x16_t, uint32x4_t:uint8x16_t, int64x2_t:uint16x8_t, uint64x2_t:uint16x8_t
 generate int32x2_t:poly8x8_t, uint32x2_t:poly8x8_t, int64x1_t:poly16x4_t, uint64x1_t:poly16x4_t
 generate int32x4_t:poly8x16_t, uint32x4_t:poly8x16_t, int64x2_t:poly16x8_t, uint64x2_t:poly16x8_t
+target = aes
+generate poly64x1_t:int16x4_t, poly64x1_t:uint16x4_t, poly64x1_t:poly16x4_t
+generate poly64x2_t:int16x8_t, poly64x2_t:uint16x8_t, poly64x2_t:poly16x8_t
+generate p128:int32x4_t, p128:uint32x4_t
 
 /// Vector reinterpret cast operation
 name = vreinterpret
@@ -6273,14 +6318,15 @@ a = 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
 validate 0, 1, 2, 3
 
 aarch64 = nop
-generate poly16x4_t:poly64x1_t, int16x4_t:poly64x1_t, uint16x4_t:poly64x1_t
-generate poly16x8_t:poly64x2_t, int16x8_t:poly64x2_t, uint16x8_t:poly64x2_t
-
 arm = nop
 generate poly8x8_t:int32x2_t, int8x8_t:int32x2_t, uint8x8_t:int32x2_t, poly16x4_t:int64x1_t, int16x4_t:int64x1_t, uint16x4_t:int64x1_t
 generate poly8x16_t:int32x4_t, int8x16_t:int32x4_t, uint8x16_t:int32x4_t, poly16x8_t:int64x2_t, int16x8_t:int64x2_t, uint16x8_t:int64x2_t
 generate poly8x8_t:uint32x2_t, int8x8_t:uint32x2_t, uint8x8_t:uint32x2_t, poly16x4_t:uint64x1_t, int16x4_t:uint64x1_t, uint16x4_t:uint64x1_t
 generate poly8x16_t:uint32x4_t, int8x16_t:uint32x4_t, uint8x16_t:uint32x4_t, poly16x8_t:uint64x2_t, int16x8_t:uint64x2_t, uint16x8_t:uint64x2_t
+target = aes
+generate poly16x4_t:poly64x1_t, int16x4_t:poly64x1_t, uint16x4_t:poly64x1_t
+generate poly16x8_t:poly64x2_t, int16x8_t:poly64x2_t, uint16x8_t:poly64x2_t
+generate int32x4_t:p128, uint32x4_t:p128
 
 /// Vector reinterpret cast operation
 name = vreinterpret
@@ -6290,12 +6336,13 @@ a = 0, 1
 validate 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
 
 aarch64 = nop
-generate poly64x1_t:int8x8_t, poly64x1_t:uint8x8_t, poly64x1_t:poly8x8_t
-generate poly64x2_t:int8x16_t, poly64x2_t:uint8x16_t, poly64x2_t:poly8x16_t
-
 arm = nop
 generate int64x1_t:int8x8_t, uint64x1_t:int8x8_t, int64x1_t:uint8x8_t, uint64x1_t:uint8x8_t, int64x1_t:poly8x8_t, uint64x1_t:poly8x8_t
 generate int64x2_t:int8x16_t, uint64x2_t:int8x16_t, int64x2_t:uint8x16_t, uint64x2_t:uint8x16_t, int64x2_t:poly8x16_t, uint64x2_t:poly8x16_t
+target = aes
+generate poly64x1_t:int8x8_t, poly64x1_t:uint8x8_t, poly64x1_t:poly8x8_t
+generate poly64x2_t:int8x16_t, poly64x2_t:uint8x16_t, poly64x2_t:poly8x16_t
+generate p128:int16x8_t, p128:uint16x8_t, p128:poly16x8_t
 
 /// Vector reinterpret cast operation
 name = vreinterpret
@@ -6305,12 +6352,37 @@ a = 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
 validate 0, 1
 
 aarch64 = nop
+arm = nop
+generate poly8x8_t:int64x1_t, int8x8_t:int64x1_t, uint8x8_t:int64x1_t, poly8x8_t:uint64x1_t, int8x8_t:uint64x1_t, uint8x8_t:uint64x1_t
+generate poly8x16_t:int64x2_t, int8x16_t:int64x2_t, uint8x16_t:int64x2_t, poly8x16_t:uint64x2_t, int8x16_t:uint64x2_t, uint8x16_t:uint64x2_t
+target = aes
 generate poly8x8_t:poly64x1_t, int8x8_t:poly64x1_t, uint8x8_t:poly64x1_t
 generate poly8x16_t:poly64x2_t, int8x16_t:poly64x2_t, uint8x16_t:poly64x2_t
+generate int16x8_t:p128, uint16x8_t:p128, poly16x8_t:p128
 
+/// Vector reinterpret cast operation
+name = vreinterpret
+double-suffixes
+fn = transmute
+a = 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+validate 1
+target = aes
+
+aarch64 = nop
 arm = nop
-generate poly8x8_t:int64x1_t, int8x8_t:int64x1_t, uint8x8_t:int64x1_t, poly8x8_t:uint64x1_t, int8x8_t:uint64x1_t, uint8x8_t:uint64x1_t
-generate poly8x16_t:int64x2_t, int8x16_t:int64x2_t, uint8x16_t:int64x2_t, poly8x16_t:uint64x2_t, int8x16_t:uint64x2_t, uint8x16_t:uint64x2_t
+generate int8x16_t:p128, uint8x16_t:p128, poly8x16_t:p128
+
+/// Vector reinterpret cast operation
+name = vreinterpret
+double-suffixes
+fn = transmute
+a = 1
+validate 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+target = aes
+
+aarch64 = nop
+arm = nop
+generate p128:int8x16_t, p128:uint8x16_t, p128:poly8x16_t
 
 /// Vector reinterpret cast operation
 name = vreinterpret
@@ -6326,6 +6398,7 @@ generate float64x1_t:uint8x8_t, float64x1_t:uint16x4_t, float64x1_t:uint32x2_t,
 generate float64x2_t:uint8x16_t, float64x2_t:uint16x8_t, float64x2_t:uint32x4_t, float64x2_t:uint64x2_t
 generate float64x1_t:poly8x8_t, float64x1_t:poly16x4_t, float32x2_t:poly64x1_t, float64x1_t:poly64x1_t
 generate float64x2_t:poly8x16_t, float64x2_t:poly16x8_t, float32x4_t:poly64x2_t, float64x2_t:poly64x2_t
+generate float64x2_t:p128
 
 arm = nop
 generate float32x2_t:int8x8_t, float32x2_t:int16x4_t, float32x2_t:int32x2_t, float32x2_t:int64x1_t
@@ -6334,6 +6407,7 @@ generate float32x2_t:uint8x8_t, float32x2_t:uint16x4_t, float32x2_t:uint32x2_t,
 generate float32x4_t:uint8x16_t, float32x4_t:uint16x8_t, float32x4_t:uint32x4_t, float32x4_t:uint64x2_t
 generate float32x2_t:poly8x8_t, float32x2_t:poly16x4_t
 generate float32x4_t:poly8x16_t, float32x4_t:poly16x8_t
+generate float32x4_t:p128
 
 /// Vector reinterpret cast operation
 name = vreinterpret
@@ -6349,6 +6423,7 @@ generate poly8x8_t:float64x1_t, uint16x4_t:float64x1_t, uint32x2_t:float64x1_t,
 generate poly8x16_t:float64x2_t, uint16x8_t:float64x2_t, uint32x4_t:float64x2_t, uint64x2_t:float64x2_t
 generate uint8x8_t:float64x1_t, poly16x4_t:float64x1_t, poly64x1_t:float64x1_t, poly64x1_t:float32x2_t
 generate uint8x16_t:float64x2_t, poly16x8_t:float64x2_t, poly64x2_t:float64x2_t, poly64x2_t:float32x4_t
+generate p128:float64x2_t
 
 arm = nop
 generate int8x8_t:float32x2_t, int16x4_t:float32x2_t, int32x2_t:float32x2_t, int64x1_t:float32x2_t
@@ -6357,6 +6432,7 @@ generate uint8x8_t:float32x2_t, uint16x4_t:float32x2_t, uint32x2_t:float32x2_t,
 generate uint8x16_t:float32x4_t, uint16x8_t:float32x4_t, uint32x4_t:float32x4_t, uint64x2_t:float32x4_t
 generate poly8x8_t:float32x2_t, poly16x4_t:float32x2_t
 generate poly8x16_t:float32x4_t, poly16x8_t:float32x4_t
+generate p128:float32x4_t
 
 /// Vector reinterpret cast operation
 name = vreinterpret
@@ -7447,3 +7523,31 @@ validate 7
 aarch64 = sqabs
 link-aarch64 = sqabs._EXT_
 generate i32:i32, i64:i64
+
+/// Shift left and insert
+name = vsli
+n-suffix
+constn = N
+multi_fn = static_assert-N-0-63
+multi_fn = transmute, {vsli_n-in_ntt-::<N>, transmute(a), transmute(b)}
+a = 333
+b = 2042
+n = 2
+validate 8169
+
+aarch64 = sli
+generate i64, u64
+
+/// Shift right and insert
+name = vsri
+n-suffix
+constn = N
+multi_fn = static_assert-N-1-bits
+multi_fn = transmute, {vsri_n-in_ntt-::<N>, transmute(a), transmute(b)}
+a = 333
+b = 2042
+n = 2
+validate 510
+
+aarch64 = sri
+generate i64, u64
\ No newline at end of file
diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs
index 4ef3cb091d..2142e7a5a0 100644
--- a/crates/stdarch-gen/src/main.rs
+++ b/crates/stdarch-gen/src/main.rs
@@ -2022,7 +2022,7 @@ fn gen_arm(
             link_arm, current_fn, arm_ext_inputs, arm_ext_output,
         ));
         let (aarch64_ext_inputs, aarch64_ext_output) = {
-            if const_aarch64.is_some() {
+            if let Some(const_aarch64) = const_aarch64 {
                 if !matches!(fn_type, Fntype::Normal) {
                     let ptr_type = match fn_type {
                         Fntype::Load => "*const i8",
@@ -2047,6 +2047,19 @@ fn gen_arm(
                         format!(" -> {}", out_t)
                     };
                     (inputs, out)
+                } else if const_aarch64.contains("dup-in_len-N as ttn") {
+                    (
+                        match para_num {
+                            1 => format!("a: {}, n: {}", in_t[0], in_t[0]),
+                            2 => format!("a: {}, b: {}, n: {}", in_t[0], in_t[1], in_t[1]),
+                            3 => format!(
+                                "a: {}, b: {}, c: {}, n: {}",
+                                in_t[0], in_t[1], in_t[2], in_t[1]
+                            ),
+                            _ => unimplemented!("unknown para_num"),
+                        },
+                        format!(" -> {}", out_t),
+                    )
                 } else {
                     (
                         match para_num {
@@ -2268,6 +2281,18 @@ fn gen_arm(
                             subs,
                             constn.as_deref().unwrap()
                         )
+                    } else if const_aarch64.contains("dup-in_len-N as ttn") {
+                        let const_aarch64 = format!("N as {}", type_to_native_type(in_t[1]));
+                        let mut cnt = String::from(in_t[1]);
+                        cnt.push_str("(");
+                        for i in 0..type_len(in_t[1]) {
+                            if i != 0 {
+                                cnt.push_str(", ");
+                            }
+                            cnt.push_str(&const_aarch64);
+                        }
+                        cnt.push_str(")");
+                        format!("{}(a, {})", current_fn, cnt)
                     } else {
                         match para_num {
                             1 => format!("{}(a, {})", current_fn, const_aarch64),
diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs
index ce7039ce72..a170ea4e62 100644
--- a/crates/stdarch-verify/tests/arm.rs
+++ b/crates/stdarch-verify/tests/arm.rs
@@ -454,6 +454,9 @@ fn verify_all_signatures() {
             "vreinterpret_p64_s64",
             "vreinterpret_f32_p64",
             "vreinterpretq_f32_p64",
+            "vreinterpretq_p64_p128",
+            "vreinterpretq_p128_p64",
+            "vreinterpretq_f32_p128",
             "vqrdmlahh_s16",
             "vqrdmlahs_s32",
             "vqrdmlahh_lane_s16",
@@ -585,6 +588,12 @@ fn verify_all_signatures() {
             "vrnd64xq_f32",
             "vrnd64z_f32",
             "vrnd64zq_f32",
+            "vcls_u8",
+            "vcls_u16",
+            "vcls_u32",
+            "vclsq_u8",
+            "vclsq_u16",
+            "vclsq_u32",
             "__dbg",
         ];
         let arm = match map.get(rust.name) {