From 31eb38c1d0a3b257dab8a234968e609e3643425c Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Wed, 2 Nov 2022 11:58:35 +0100
Subject: [PATCH] Merge raw_bitcast and bitcast

- Allow bitcast for vectors with differing lane widths
- Remove raw_bitcast IR instruction
- Change all users of raw_bitcast to bitcast
- Implement support for no-op bitcast cases across backends

This implements the second step of the plan outlined here:
https://github.com/bytecodealliance/wasmtime/issues/4566#issuecomment-1234819394
---
 .../codegen/meta/src/shared/instructions.rs   | 28 -------------------
 cranelift/codegen/src/isa/aarch64/lower.isle  |  9 ++----
 .../codegen/src/isa/aarch64/lower_inst.rs     |  2 --
 cranelift/codegen/src/isa/riscv64/lower.isle  |  5 ----
 cranelift/codegen/src/isa/s390x/lower.isle    | 21 ++++++++++----
 cranelift/codegen/src/isa/s390x/lower.rs      |  1 -
 cranelift/codegen/src/isa/x64/lower.isle      | 17 ++++++-----
 cranelift/codegen/src/isa/x64/lower.rs        |  1 -
 cranelift/codegen/src/nan_canonicalization.rs |  8 +++---
 cranelift/codegen/src/simple_preopt.rs        |  8 +++---
 cranelift/codegen/src/verifier/mod.rs         | 12 +-------
 .../filetests/isa/x64/move-elision.clif       |  6 ++--
 .../filetests/isa/x64/simd-issue-3951.clif    |  2 +-
 .../filetests/runtests/bitcast-ref64.clif     |  5 ++--
 .../filetests/runtests/bitcast-same-type.clif |  3 +-
 .../runtests/ref64-invalid-null.clif          |  4 +--
 .../runtests/simd-bitcast-aarch64.clif        | 21 ++++++++++++++
 .../filetests/runtests/simd-bitcast.clif      | 19 ++-----------
 .../runtests/simd-bitselect-to-vselect.clif   |  4 +--
 .../filetests/runtests/simd-comparison.clif   |  6 ++--
 .../filetests/runtests/simd-lane-access.clif  | 10 +++----
 .../filetests/simple_preopt/bitselect.clif    | 12 ++++----
 .../filetests/filetests/verifier/bitcast.clif |  4 +--
 cranelift/interpreter/src/step.rs             |  2 +-
 cranelift/interpreter/src/value.rs            |  2 +-
 cranelift/wasm/src/code_translator.rs         | 16 +++++------
 26 files changed, 96 insertions(+), 132 deletions(-)
 create mode 100644 cranelift/filetests/filetests/runtests/simd-bitcast-aarch64.clif

diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
index 2a1dd3655e74..c8aeaa50dec1 100644
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -683,8 +683,6 @@ pub(crate) fn define(
             .build(),
     );
 
-    let AnyTo = &TypeVar::copy_from(Any, "AnyTo".to_string());
-
     let Mem = &TypeVar::new(
         "Mem",
         "Any type that can be stored in memory",
@@ -3148,32 +3146,6 @@ pub(crate) fn define(
         The input and output types must be storable to memory and of the same
         size. A bitcast is equivalent to storing one type and loading the other
         type from the same address.
-
-        For vector types, the lane types must also be the same size (see
-        `raw_bitcast` for changing the lane size).
-        "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![a]),
-    );
-
-    let x = &Operand::new("x", Any);
-    let a = &Operand::new("a", AnyTo).with_doc("Bits of `x` reinterpreted");
-
-    ig.push(
-        Inst::new(
-            "raw_bitcast",
-            r#"
-        Cast the bits in `x` as a different type of the same bit width.
-
-        This instruction does not change the data's representation but allows
-        data in registers to be used as different types, e.g. an i32x4 as a
-        b8x16. The only constraint on the result `a` is that it can be
-        `raw_bitcast` back to the original type. Also, in a raw_bitcast between
-        vector types with the same number of lanes, the value of each result
-        lane is a raw_bitcast of the corresponding operand lane. TODO there is
-        currently no mechanism for enforcing the bit width constraint.
         "#,
             &formats.unary,
         )
diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle
index cd1e699a3db6..5d6fc3b0b35b 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -2212,8 +2212,8 @@
 ;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ; SIMD&FP <=> SIMD&FP
-(rule 5 (lower (has_type (ty_float_or_vec out_ty) (bitcast x @ (value_type (ty_float_or_vec _)))))
-      (fpu_move out_ty x))
+(rule 5 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type (ty_float_or_vec _)))))
+      x)
 
 ; GPR => SIMD&FP
 (rule 4 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
@@ -2232,11 +2232,6 @@
       x)
 (rule 1 (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)
 
-;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (raw_bitcast val))
-      val)
-
 ;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; extractlane with lane 0 can pass through the value unchanged; upper
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index d92fcc432f15..3d54e1873721 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -207,8 +207,6 @@ pub(crate) fn lower_insn_to_regs(
 
         Opcode::Vconst => implemented_in_isle(ctx),
 
-        Opcode::RawBitcast => implemented_in_isle(ctx),
-
         Opcode::Extractlane => implemented_in_isle(ctx),
 
         Opcode::Insertlane => implemented_in_isle(ctx),
diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle
index 62f3b03acc18..abd193055f13 100644
--- a/cranelift/codegen/src/isa/riscv64/lower.isle
+++ b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -814,11 +814,6 @@
    (lower (has_type out (bitcast v @ (value_type in_ty))))
    (gen_moves v in_ty out))
 
-;;;;;  Rules for `raw_bitcast`;;;;;;;;;
-(rule
-   (lower (has_type out (raw_bitcast v @ (value_type in_ty))))
-   (gen_moves v in_ty out))
-
 ;;;;;  Rules for `ceil`;;;;;;;;;
 (rule 
   (lower (has_type ty (ceil x)))
diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle
index b757d58f121f..e52d4ce41520 100644
--- a/cranelift/codegen/src/isa/s390x/lower.isle
+++ b/cranelift/codegen/src/isa/s390x/lower.isle
@@ -1760,16 +1760,25 @@
 (rule (lower (has_type $I32 (bitcast x @ (value_type $F32))))
       (vec_extract_lane $F32X4 x 0 (zero_reg)))
 
-
-;;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; FIXME: There are two flavors of raw_bitcast, which are currently not
+;; Bitcast between types residing in GPRs is a no-op.
+(rule 1 (lower (has_type (gpr32_ty _)
+                         (bitcast x @ (value_type (gpr32_ty _))))) x)
+(rule 2 (lower (has_type (gpr64_ty _)
+                         (bitcast x @ (value_type (gpr64_ty _))))) x)
+
+;; Bitcast between types residing in FPRs is a no-op.
+(rule 3 (lower (has_type (ty_scalar_float _)
+                         (bitcast x @ (value_type (ty_scalar_float _))))) x)
+
+;; Bitcast between types residing in VRs is a no-op.
+;; FIXME: There are two flavors of vector bitcast, which are currently not
 ;; distinguished in CLIF IR.  Those generated by Wasmtime assume little-endian
 ;; lane order, and those generated elsewhere assume big-endian lane order.
-;; Raw bitcast is a no-op if current lane order matches that assumed lane order.
+;; Bitcast is a no-op if current lane order matches that assumed lane order.
 ;; However, due to our choice of lane order depending on the current function
 ;; ABI, every bitcast we currently see here is indeed a no-op.
-(rule (lower (raw_bitcast x)) x)
+(rule 4 (lower (has_type (vr128_ty _)
+                         (bitcast x @ (value_type (vr128_ty _))))) x)
 
 
 ;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs
index 391f6bfee3d7..6bb59cfb4fbe 100644
--- a/cranelift/codegen/src/isa/s390x/lower.rs
+++ b/cranelift/codegen/src/isa/s390x/lower.rs
@@ -141,7 +141,6 @@ impl LowerBackend for S390xBackend {
             | Opcode::ScalarToVector
             | Opcode::VhighBits
             | Opcode::Bitcast
-            | Opcode::RawBitcast
             | Opcode::Load
             | Opcode::Uload8
             | Opcode::Sload8
diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle
index 89a8b5cbda1b..41ef7eb1db49 100644
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -3303,6 +3303,14 @@
 (rule (lower (has_type $F64 (bitcast src @ (value_type $I64))))
       (bitcast_gpr_to_xmm $I64 src))
 
+;; Bitcast between types residing in GPR registers is a no-op.
+(rule 1 (lower (has_type (is_gpr_type _)
+                         (bitcast x @ (value_type (is_gpr_type _))))) x)
+
+;; Bitcast between types residing in XMM registers is a no-op.
+(rule 2 (lower (has_type (is_xmm_type _)
+                         (bitcast x @ (value_type (is_xmm_type _))))) x)
+
 ;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $F32 (fcopysign a @ (value_type $F32) b)))
@@ -3472,15 +3480,6 @@
       ;; TODO use Inst::gen_constant() instead.
       (x64_xmm_load_const ty (const_to_vconst const)))
 
-;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; A raw_bitcast is just a mechanism for correcting the type of V128 values (see
-;; https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR
-;; instruction should emit no machine code but a move is necessary to give the
-;; register allocator a definition for the output virtual register.
-(rule (lower (raw_bitcast val))
-      (put_in_regs val))
-
 ;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index 747f7f7f9c76..db9fa4eba492 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -453,7 +453,6 @@ fn lower_insn_to_regs(
         | Opcode::GetPinnedReg
         | Opcode::SetPinnedReg
         | Opcode::Vconst
-        | Opcode::RawBitcast
         | Opcode::Insertlane
         | Opcode::Shuffle
         | Opcode::Swizzle
diff --git a/cranelift/codegen/src/nan_canonicalization.rs b/cranelift/codegen/src/nan_canonicalization.rs
index 107985e27e83..6b8a882aa3b5 100644
--- a/cranelift/codegen/src/nan_canonicalization.rs
+++ b/cranelift/codegen/src/nan_canonicalization.rs
@@ -70,11 +70,11 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) {
             .select(is_nan, canon_nan, new_res);
     };
     let vector_select = |pos: &mut FuncCursor, canon_nan: Value| {
-        let cond = pos.ins().raw_bitcast(types::I8X16, is_nan);
-        let canon_nan = pos.ins().raw_bitcast(types::I8X16, canon_nan);
-        let result = pos.ins().raw_bitcast(types::I8X16, new_res);
+        let cond = pos.ins().bitcast(types::I8X16, is_nan);
+        let canon_nan = pos.ins().bitcast(types::I8X16, canon_nan);
+        let result = pos.ins().bitcast(types::I8X16, new_res);
         let bitmask = pos.ins().bitselect(cond, canon_nan, result);
-        pos.ins().with_result(val).raw_bitcast(val_type, bitmask);
+        pos.ins().with_result(val).bitcast(val_type, bitmask);
     };
 
     match val_type {
diff --git a/cranelift/codegen/src/simple_preopt.rs b/cranelift/codegen/src/simple_preopt.rs
index 7c381e928c93..d107f1554c1d 100644
--- a/cranelift/codegen/src/simple_preopt.rs
+++ b/cranelift/codegen/src/simple_preopt.rs
@@ -863,7 +863,7 @@ mod simplify {
                                 return;
                             }
                             let new_type = I8.by(old_cond_type.bytes()).unwrap();
-                            (pos.ins().raw_bitcast(new_type, args[0]), new_type)
+                            (pos.ins().bitcast(new_type, args[0]), new_type)
                         }
                         _ => return,
                     };
@@ -874,10 +874,10 @@ mod simplify {
 
                     if arg_type != old_arg_type {
                         // Operands types must match, we need to add bitcasts.
-                        let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
-                        let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
+                        let arg1 = pos.ins().bitcast(arg_type, args[1]);
+                        let arg2 = pos.ins().bitcast(arg_type, args[2]);
                         let ret = pos.ins().vselect(cond_val, arg1, arg2);
-                        pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
+                        pos.func.dfg.replace(inst).bitcast(old_arg_type, ret);
                     } else {
                         pos.func
                             .dfg
diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs
index 7e24011db9ac..00855f46d197 100644
--- a/cranelift/codegen/src/verifier/mod.rs
+++ b/cranelift/codegen/src/verifier/mod.rs
@@ -1078,17 +1078,7 @@ impl<'a> Verifier<'a> {
         let typ = self.func.dfg.ctrl_typevar(inst);
         let value_type = self.func.dfg.value_type(arg);
 
-        if typ.lane_bits() != value_type.lane_bits() {
-            errors.fatal((
-                inst,
-                format!(
-                    "The bitcast argument {} has a lane type of {} bits, which doesn't match an expected type of {} bits",
-                    arg,
-                    value_type.lane_bits(),
-                    typ.lane_bits()
-                ),
-            ))
-        } else if typ.bits() != value_type.bits() {
+        if typ.bits() != value_type.bits() {
             errors.fatal((
                 inst,
                 format!(
diff --git a/cranelift/filetests/filetests/isa/x64/move-elision.clif b/cranelift/filetests/filetests/isa/x64/move-elision.clif
index f570f7005103..08280bae2dd3 100644
--- a/cranelift/filetests/filetests/isa/x64/move-elision.clif
+++ b/cranelift/filetests/filetests/isa/x64/move-elision.clif
@@ -7,9 +7,9 @@ block0(v0: i32x4):
     ;; In the x64 backend, all of these pseudo-instructions are lowered to moves between registers (e.g. MOVAPD, MOVDQA,
     ;; etc.). Because these have been marked as moves, no instructions are emitted by this function besides the prologue
     ;; and epilogue.
-    v1 = raw_bitcast.f32x4 v0
-    v2 = raw_bitcast.f64x2 v1
-    v3 = raw_bitcast.i8x16 v2
+    v1 = bitcast.f32x4 v0
+    v2 = bitcast.f64x2 v1
+    v3 = bitcast.i8x16 v2
     return v3
 }
 
diff --git a/cranelift/filetests/filetests/isa/x64/simd-issue-3951.clif b/cranelift/filetests/filetests/isa/x64/simd-issue-3951.clif
index e7076cedf53e..5353bf3cfa4d 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-issue-3951.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-issue-3951.clif
@@ -12,7 +12,7 @@ function %check_issue_3951(i64 vmctx) -> i8x16 fast {
     v4 = global_value.i64 gv0
     v5 = load.i8x16 notrap aligned v4+8
     v6 = icmp ugt v3, v5
-    v7 = raw_bitcast.i8x16 v6
+    v7 = bitcast.i8x16 v6
     jump block1(v7)
   block1(v1: i8x16):
     return v1
diff --git a/cranelift/filetests/filetests/runtests/bitcast-ref64.clif b/cranelift/filetests/filetests/runtests/bitcast-ref64.clif
index 73d9a177437a..546152b5c4e4 100644
--- a/cranelift/filetests/filetests/runtests/bitcast-ref64.clif
+++ b/cranelift/filetests/filetests/runtests/bitcast-ref64.clif
@@ -1,7 +1,8 @@
 test run
 target aarch64
-; the interpreter, x86_64, and s390x do not support bitcasting to/from
-; references
+target x86_64
+target s390x
+; the interpreter does not support bitcasting to/from references
 
 function %bitcast_ir64(i64) -> i8 {
 block0(v0: i64):
diff --git a/cranelift/filetests/filetests/runtests/bitcast-same-type.clif b/cranelift/filetests/filetests/runtests/bitcast-same-type.clif
index 9e88a9bc7bc0..19aa53517737 100644
--- a/cranelift/filetests/filetests/runtests/bitcast-same-type.clif
+++ b/cranelift/filetests/filetests/runtests/bitcast-same-type.clif
@@ -1,7 +1,8 @@
 test interpret
 test run
 target aarch64
-; x86_64 and s390x do not support bitcasting to the same type as the input.
+target x86_64
+target s390x
 
 function %bitcast_i8(i8) -> i8 {
 block0(v0: i8):
diff --git a/cranelift/filetests/filetests/runtests/ref64-invalid-null.clif b/cranelift/filetests/filetests/runtests/ref64-invalid-null.clif
index 39b44d0ce162..ff321aa279da 100644
--- a/cranelift/filetests/filetests/runtests/ref64-invalid-null.clif
+++ b/cranelift/filetests/filetests/runtests/ref64-invalid-null.clif
@@ -14,7 +14,7 @@ block0:
 
 function %is_null_r64(i64) -> i8 {
 block0(v0: i64):
-  v1 = raw_bitcast.r64 v0
+  v1 = bitcast.r64 v0
   v2 = is_null v1
   return v2
 }
@@ -24,7 +24,7 @@ block0(v0: i64):
 
 function %is_invalid_r64(i64) -> i8 {
 block0(v0: i64):
-  v1 = raw_bitcast.r64 v0
+  v1 = bitcast.r64 v0
   v2 = is_invalid v1
   return v2
 }
diff --git a/cranelift/filetests/filetests/runtests/simd-bitcast-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-bitcast-aarch64.clif
new file mode 100644
index 000000000000..edee1e35240e
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-bitcast-aarch64.clif
@@ -0,0 +1,21 @@
+test interpret
+test run
+target aarch64
+;; 64-bit vector types only supported on aarch64
+
+function %bitcast_if32x2(i32x2) -> f32x2 {
+block0(v0: i32x2):
+  v1 = bitcast.f32x2 v0
+  return v1
+}
+; run: %bitcast_if32x2([0 4294967295]) == [0x0.0 -NaN:0x3fffff]
+; run: %bitcast_if32x2([-1 127]) == [-NaN:0x3fffff 0x0.0000fep-126]
+
+function %bitcast_fi32x2(f32x2) -> i32x2 {
+block0(v0: f32x2):
+  v1 = bitcast.i32x2 v0
+  return v1
+}
+; run: %bitcast_fi32x2([0x0.0 -NaN:0x3fffff]) == [0 4294967295]
+; run: %bitcast_fi32x2([-NaN:0x3fffff 0x0.0000fep-126]) == [-1 127]
+
diff --git a/cranelift/filetests/filetests/runtests/simd-bitcast.clif b/cranelift/filetests/filetests/runtests/simd-bitcast.clif
index 5541f743456e..81e3d2ae6640 100644
--- a/cranelift/filetests/filetests/runtests/simd-bitcast.clif
+++ b/cranelift/filetests/filetests/runtests/simd-bitcast.clif
@@ -1,23 +1,8 @@
 test interpret
 test run
 target aarch64
-; x86_64 and s390x do not support vector bitcasts.
-
-function %bitcast_if32x2(i32x2) -> f32x2 {
-block0(v0: i32x2):
-  v1 = bitcast.f32x2 v0
-  return v1
-}
-; run: %bitcast_if32x2([0 4294967295]) == [0x0.0 -NaN:0x3fffff]
-; run: %bitcast_if32x2([-1 127]) == [-NaN:0x3fffff 0x0.0000fep-126]
-
-function %bitcast_fi32x2(f32x2) -> i32x2 {
-block0(v0: f32x2):
-  v1 = bitcast.i32x2 v0
-  return v1
-}
-; run: %bitcast_fi32x2([0x0.0 -NaN:0x3fffff]) == [0 4294967295]
-; run: %bitcast_fi32x2([-NaN:0x3fffff 0x0.0000fep-126]) == [-1 127]
+target x86_64
+target s390x
 
 function %bitcast_if32x4(i32x4) -> f32x4 {
 block0(v0: i32x4):
diff --git a/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif b/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif
index 26c8911cf719..20d7c05e38ae 100644
--- a/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif
+++ b/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif
@@ -8,7 +8,7 @@ target x86_64 skylake
 function %mask_from_icmp(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
     v2 = icmp sge v0, v1
-    v3 = raw_bitcast.i32x4 v2
+    v3 = bitcast.i32x4 v2
     v4 = bitselect v3, v0, v1
     return v4
 }
@@ -16,7 +16,7 @@ block0(v0: i32x4, v1: i32x4):
 
 function %mask_casted(i64x2, i64x2, i32x4) -> i64x2 {
 block0(v0: i64x2, v1: i64x2, v2: i32x4):
-    v3 = raw_bitcast.i64x2 v2
+    v3 = bitcast.i64x2 v2
     v4 = bitselect v3, v0, v1
     return v4
 }
diff --git a/cranelift/filetests/filetests/runtests/simd-comparison.clif b/cranelift/filetests/filetests/runtests/simd-comparison.clif
index 3ed38f9a71ed..4a724210abf9 100644
--- a/cranelift/filetests/filetests/runtests/simd-comparison.clif
+++ b/cranelift/filetests/filetests/runtests/simd-comparison.clif
@@ -51,7 +51,7 @@ block0:
     v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
     v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
     v2 = icmp sgt v0, v1
-    v3 = raw_bitcast.i8x16 v2
+    v3 = bitcast.i8x16 v2
     v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
     v7 = icmp eq v3, v4
     v8 = vall_true v7
@@ -126,7 +126,7 @@ block0:
     v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
     v2 = icmp ult v0, v1
     v3 = vconst.i16x8 0x00
-    v4 = raw_bitcast.i16x8 v2
+    v4 = bitcast.i16x8 v2
     v5 = icmp eq v3, v4
     v8 = vall_true v5
     return v8
@@ -200,7 +200,7 @@ block0:
     v2 = fcmp gt v0, v1
     ; now check that the result v2 is all zeroes
     v3 = vconst.i32x4 0x00
-    v4 = raw_bitcast.i32x4 v2
+    v4 = bitcast.i32x4 v2
     v5 = icmp eq v3, v4
     v8 = vall_true v5
     return v8
diff --git a/cranelift/filetests/filetests/runtests/simd-lane-access.clif b/cranelift/filetests/filetests/runtests/simd-lane-access.clif
index 45145f9c1486..a73e6dca5d29 100644
--- a/cranelift/filetests/filetests/runtests/simd-lane-access.clif
+++ b/cranelift/filetests/filetests/runtests/simd-lane-access.clif
@@ -26,10 +26,10 @@ block0:
 function %shuffle_i32x4_in_same_place() -> i32x4 {
 block0:
     v1 = vconst.i32x4 [0 1 2 3]
-    v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
+    v2 = bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
     ; keep each lane in place from the first vector
     v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
-    v4 = raw_bitcast.i32x4 v3
+    v4 = bitcast.i32x4 v3
     return v4
 }
 ; run: %shuffle_in_same_place() == [0 1 2 3]
@@ -37,10 +37,10 @@ block0:
 function %shuffle_i32x4_to_all_true() -> i32x4 {
 block0:
     v1 = vconst.i32x4 [-1 0 -1 0]
-    v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
+    v2 = bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
     ; pair up the true values to make the entire vector true
     v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11]
-    v4 = raw_bitcast.i32x4 v3 ; TODO store.i32x4 is unavailable; see https://github.com/bytecodealliance/wasmtime/issues/2237
+    v4 = bitcast.i32x4 v3 ; TODO store.i32x4 is unavailable; see https://github.com/bytecodealliance/wasmtime/issues/2237
     return v4
 }
 ; run: %shuffle_i32x4_to_all_true() == [0xffffffff 0xffffffff 0xffffffff 0xffffffff]
@@ -100,7 +100,7 @@ block0:
     v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 -1 0 0
     0 0 0]
     v2 = extractlane v1, 10
-    v3 = raw_bitcast.i8 v2
+    v3 = bitcast.i8 v2
     return v3
 }
 ; run: %extractlane_i8x16_last() == 0xff
diff --git a/cranelift/filetests/filetests/simple_preopt/bitselect.clif b/cranelift/filetests/filetests/simple_preopt/bitselect.clif
index e55c46fd2bc7..b7ba46f5c052 100644
--- a/cranelift/filetests/filetests/simple_preopt/bitselect.clif
+++ b/cranelift/filetests/filetests/simple_preopt/bitselect.clif
@@ -16,7 +16,7 @@ block0(v0: i8x16, v1: i8x16):
 ;; can't remove the bitselect in this case.
 function %mask_casted(i8x16, i8x16, i32x4) -> i8x16 {
 block0(v0: i8x16, v1: i8x16, v2: i32x4):
-    v3 = raw_bitcast.i8x16 v2
+    v3 = bitcast.i8x16 v2
     v4 = bitselect v3, v0, v1
     ; check: v4 = bitselect v3, v0, v1
     return v4
@@ -26,7 +26,7 @@ function %good_const_mask_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
     v3 = vconst.i8x16 [0 0 0xFF 0 0 0xFF 0 0 0 0 0xFF 0 0 0 0 0xFF]
     v4 = bitselect v3, v0, v1
-    ; check:  v5 = raw_bitcast.i8x16 v3
+    ; check:  v5 = bitcast.i8x16 v3
     ; nextln: v4 = vselect v5, v0, v1
     return v4
 }
@@ -35,11 +35,11 @@ function %good_const_mask_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
     v3 = vconst.i16x8 [0x0000 0xFF00 0x0000 0x00FF 0x0000 0xFFFF 0x00FF 0xFFFF]
     v4 = bitselect v3, v0, v1
-    ; check:  v5 = raw_bitcast.i8x16 v3
-    ; nextln: v6 = raw_bitcast.i8x16 v0
-    ; nextln: v7 = raw_bitcast.i8x16 v1
+    ; check:  v5 = bitcast.i8x16 v3
+    ; nextln: v6 = bitcast.i8x16 v0
+    ; nextln: v7 = bitcast.i8x16 v1
     ; nextln: v8 = vselect v5, v6, v7
-    ; nextln: v4 = raw_bitcast.i16x8 v8
+    ; nextln: v4 = bitcast.i16x8 v8
     return v4
 }
 
diff --git a/cranelift/filetests/filetests/verifier/bitcast.clif b/cranelift/filetests/filetests/verifier/bitcast.clif
index 6c6dacdc5e78..5ed7b8386d7f 100644
--- a/cranelift/filetests/filetests/verifier/bitcast.clif
+++ b/cranelift/filetests/filetests/verifier/bitcast.clif
@@ -10,14 +10,14 @@ block0(v0: i32):
 ; bitcast to a type larger than the operand is not ok
 function %valid_bitcast2(i32) -> i64 {
 block0(v0: i32):
-    v1 = bitcast.i64 v0 ; error: The bitcast argument v0 has a lane type of 32 bits, which doesn't match an expected type of 64 bits
+    v1 = bitcast.i64 v0 ; error: The bitcast argument v0 has a type of 32 bits, which doesn't match an expected type of 64 bits
     return v1
 }
 
 ; bitcast to a smaller type is not ok
 function %bad_bitcast(i64) -> i32 {
 block0(v0: i64):
-    v1 = bitcast.i32 v0 ; error: The bitcast argument v0 has a lane type of 64 bits, which doesn't match an expected type of 32 bits
+    v1 = bitcast.i32 v0 ; error: The bitcast argument v0 has a type of 64 bits, which doesn't match an expected type of 32 bits
     return v1
 }
 
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
index 18ecc22a76e9..b6d05530b7c0 100644
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -947,7 +947,7 @@ where
         Opcode::Nearest => assign(Value::nearest(arg(0)?)?),
         Opcode::IsNull => unimplemented!("IsNull"),
         Opcode::IsInvalid => unimplemented!("IsInvalid"),
-        Opcode::Bitcast | Opcode::RawBitcast | Opcode::ScalarToVector => {
+        Opcode::Bitcast | Opcode::ScalarToVector => {
             let input_ty = inst_context.type_of(inst_context.args()[0]).unwrap();
             let arg0 = extractlanes(&arg(0)?, input_ty)?;
 
diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs
index 279564d8c7e3..e42cef700643 100644
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -335,7 +335,7 @@ impl Value for DataValue {
     fn convert(self, kind: ValueConversionKind) -> ValueResult<Self> {
         Ok(match kind {
             ValueConversionKind::Exact(ty) => match (self, ty) {
-                // TODO a lot to do here: from bmask to ireduce to raw_bitcast...
+                // TODO a lot to do here: from bmask to ireduce to bitcast...
                 (val, ty) if val.ty().is_int() && ty.is_int() => {
                     DataValue::from_integer(val.into_int()?, ty)?
                 }
diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs
index 60a3cf8e0a9b..7842eacba06d 100644
--- a/cranelift/wasm/src/code_translator.rs
+++ b/cranelift/wasm/src/code_translator.rs
@@ -1427,7 +1427,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let data = value.bytes().to_vec().into();
             let handle = builder.func.dfg.constants.insert(data);
             let value = builder.ins().vconst(I8X16, handle);
-            // the v128.const is typed in CLIF as a I8x16 but raw_bitcast to a different type
+            // the v128.const is typed in CLIF as a I8x16 but bitcast to a different type
             // before use
             state.push1(value)
         }
@@ -1536,7 +1536,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let shuffled = builder.ins().shuffle(a, b, mask);
             state.push1(shuffled)
             // At this point the original types of a and b are lost; users of this value (i.e. this
-            // WASM-to-CLIF translator) may need to raw_bitcast for type-correctness. This is due
+            // WASM-to-CLIF translator) may need to bitcast for type-correctness. This is due
             // to WASM using the less specific v128 type for certain operations and more specific
             // types (e.g. i8x16) for others.
         }
@@ -2895,14 +2895,14 @@ fn type_of(operator: &Operator) -> Type {
 }
 
 /// Some SIMD operations only operate on I8X16 in CLIF; this will convert them to that type by
-/// adding a raw_bitcast if necessary.
+/// adding a bitcast if necessary.
 fn optionally_bitcast_vector(
     value: Value,
     needed_type: Type,
     builder: &mut FunctionBuilder,
 ) -> Value {
     if builder.func.dfg.value_type(value) != needed_type {
-        builder.ins().raw_bitcast(needed_type, value)
+        builder.ins().bitcast(needed_type, value)
     } else {
         value
     }
@@ -2937,7 +2937,7 @@ fn canonicalise_v128_values<'a>(
     // Otherwise we'll have to cast, and push the resulting `Value`s into `canonicalised`.
     for v in values {
         tmp_canonicalised.push(if is_non_canonical_v128(builder.func.dfg.value_type(*v)) {
-            builder.ins().raw_bitcast(I8X16, *v)
+            builder.ins().bitcast(I8X16, *v)
         } else {
             *v
         });
@@ -3048,7 +3048,7 @@ fn bitcast_arguments<'a>(
 
 /// A helper for bitcasting a sequence of return values for the function currently being built. If
 /// a value is a vector type that does not match its expected type, this will modify the value in
-/// place to point to the result of a `raw_bitcast`. This conversion is necessary to translate Wasm
+/// place to point to the result of a `bitcast`. This conversion is necessary to translate Wasm
 /// code that uses `V128` as function parameters (or implicitly in block parameters) and still use
 /// specific CLIF types (e.g. `I32X4`) in the function body.
 pub fn bitcast_wasm_returns<FE: FuncEnvironment + ?Sized>(
@@ -3060,7 +3060,7 @@ pub fn bitcast_wasm_returns<FE: FuncEnvironment + ?Sized>(
         environ.is_wasm_return(&builder.func.signature, i)
     });
     for (t, arg) in changes {
-        *arg = builder.ins().raw_bitcast(t, *arg);
+        *arg = builder.ins().bitcast(t, *arg);
     }
 }
 
@@ -3076,6 +3076,6 @@ fn bitcast_wasm_params<FE: FuncEnvironment + ?Sized>(
         environ.is_wasm_parameter(&callee_signature, i)
     });
     for (t, arg) in changes {
-        *arg = builder.ins().raw_bitcast(t, *arg);
+        *arg = builder.ins().bitcast(t, *arg);
     }
 }