Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge raw_bitcast and bitcast #5175

Merged
merged 1 commit into from
Nov 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 0 additions & 28 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -683,8 +683,6 @@ pub(crate) fn define(
.build(),
);

let AnyTo = &TypeVar::copy_from(Any, "AnyTo".to_string());

let Mem = &TypeVar::new(
"Mem",
"Any type that can be stored in memory",
Expand Down Expand Up @@ -3148,32 +3146,6 @@ pub(crate) fn define(
The input and output types must be storable to memory and of the same
size. A bitcast is equivalent to storing one type and loading the other
type from the same address.

For vector types, the lane types must also be the same size (see
`raw_bitcast` for changing the lane size).
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

let x = &Operand::new("x", Any);
let a = &Operand::new("a", AnyTo).with_doc("Bits of `x` reinterpreted");

ig.push(
Inst::new(
"raw_bitcast",
r#"
Cast the bits in `x` as a different type of the same bit width.

This instruction does not change the data's representation but allows
data in registers to be used as different types, e.g. an i32x4 as a
b8x16. The only constraint on the result `a` is that it can be
`raw_bitcast` back to the original type. Also, in a raw_bitcast between
vector types with the same number of lanes, the value of each result
lane is a raw_bitcast of the corresponding operand lane. TODO there is
currently no mechanism for enforcing the bit width constraint.
"#,
&formats.unary,
)
Expand Down
9 changes: 2 additions & 7 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2212,8 +2212,8 @@
;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; SIMD&FP <=> SIMD&FP
(rule 5 (lower (has_type (ty_float_or_vec out_ty) (bitcast x @ (value_type (ty_float_or_vec _)))))
(fpu_move out_ty x))
(rule 5 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type (ty_float_or_vec _)))))
x)

; GPR => SIMD&FP
(rule 4 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
Expand All @@ -2232,11 +2232,6 @@
x)
(rule 1 (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)

;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (raw_bitcast val))
val)

;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; extractlane with lane 0 can pass through the value unchanged; upper
Expand Down
2 changes: 0 additions & 2 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,6 @@ pub(crate) fn lower_insn_to_regs(

Opcode::Vconst => implemented_in_isle(ctx),

Opcode::RawBitcast => implemented_in_isle(ctx),

Opcode::Extractlane => implemented_in_isle(ctx),

Opcode::Insertlane => implemented_in_isle(ctx),
Expand Down
5 changes: 0 additions & 5 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -814,11 +814,6 @@
(lower (has_type out (bitcast v @ (value_type in_ty))))
(gen_moves v in_ty out))

;;;;; Rules for `raw_bitcast`;;;;;;;;;
(rule
(lower (has_type out (raw_bitcast v @ (value_type in_ty))))
(gen_moves v in_ty out))

;;;;; Rules for `ceil`;;;;;;;;;
(rule
(lower (has_type ty (ceil x)))
Expand Down
21 changes: 15 additions & 6 deletions cranelift/codegen/src/isa/s390x/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1760,16 +1760,25 @@
(rule (lower (has_type $I32 (bitcast x @ (value_type $F32))))
(vec_extract_lane $F32X4 x 0 (zero_reg)))


;;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; FIXME: There are two flavors of raw_bitcast, which are currently not
;; Bitcast between types residing in GPRs is a no-op.
(rule 1 (lower (has_type (gpr32_ty _)
(bitcast x @ (value_type (gpr32_ty _))))) x)
(rule 2 (lower (has_type (gpr64_ty _)
(bitcast x @ (value_type (gpr64_ty _))))) x)

;; Bitcast between types residing in FPRs is a no-op.
(rule 3 (lower (has_type (ty_scalar_float _)
(bitcast x @ (value_type (ty_scalar_float _))))) x)

;; Bitcast between types residing in VRs is a no-op.
;; FIXME: There are two flavors of vector bitcast, which are currently not
;; distinguished in CLIF IR. Those generated by Wasmtime assume little-endian
;; lane order, and those generated elsewhere assume big-endian lane order.
;; Raw bitcast is a no-op if current lane order matches that assumed lane order.
;; Bitcast is a no-op if current lane order matches that assumed lane order.
;; However, due to our choice of lane order depending on the current function
;; ABI, every bitcast we currently see here is indeed a no-op.
(rule (lower (raw_bitcast x)) x)
(rule 4 (lower (has_type (vr128_ty _)
(bitcast x @ (value_type (vr128_ty _))))) x)


;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand Down
1 change: 0 additions & 1 deletion cranelift/codegen/src/isa/s390x/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ impl LowerBackend for S390xBackend {
| Opcode::ScalarToVector
| Opcode::VhighBits
| Opcode::Bitcast
| Opcode::RawBitcast
| Opcode::Load
| Opcode::Uload8
| Opcode::Sload8
Expand Down
17 changes: 8 additions & 9 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3303,6 +3303,14 @@
(rule (lower (has_type $F64 (bitcast src @ (value_type $I64))))
(bitcast_gpr_to_xmm $I64 src))

;; Bitcast between types residing in GPR registers is a no-op.
(rule 1 (lower (has_type (is_gpr_type _)
(bitcast x @ (value_type (is_gpr_type _))))) x)

;; Bitcast between types residing in XMM registers is a no-op.
(rule 2 (lower (has_type (is_xmm_type _)
(bitcast x @ (value_type (is_xmm_type _))))) x)

;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fcopysign a @ (value_type $F32) b)))
Expand Down Expand Up @@ -3472,15 +3480,6 @@
;; TODO use Inst::gen_constant() instead.
(x64_xmm_load_const ty (const_to_vconst const)))

;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; A raw_bitcast is just a mechanism for correcting the type of V128 values (see
;; https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR
;; instruction should emit no machine code but a move is necessary to give the
;; register allocator a definition for the output virtual register.
(rule (lower (raw_bitcast val))
(put_in_regs val))

;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM
Expand Down
1 change: 0 additions & 1 deletion cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,6 @@ fn lower_insn_to_regs(
| Opcode::GetPinnedReg
| Opcode::SetPinnedReg
| Opcode::Vconst
| Opcode::RawBitcast
| Opcode::Insertlane
| Opcode::Shuffle
| Opcode::Swizzle
Expand Down
8 changes: 4 additions & 4 deletions cranelift/codegen/src/nan_canonicalization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) {
.select(is_nan, canon_nan, new_res);
};
let vector_select = |pos: &mut FuncCursor, canon_nan: Value| {
let cond = pos.ins().raw_bitcast(types::I8X16, is_nan);
let canon_nan = pos.ins().raw_bitcast(types::I8X16, canon_nan);
let result = pos.ins().raw_bitcast(types::I8X16, new_res);
let cond = pos.ins().bitcast(types::I8X16, is_nan);
let canon_nan = pos.ins().bitcast(types::I8X16, canon_nan);
let result = pos.ins().bitcast(types::I8X16, new_res);
let bitmask = pos.ins().bitselect(cond, canon_nan, result);
pos.ins().with_result(val).raw_bitcast(val_type, bitmask);
pos.ins().with_result(val).bitcast(val_type, bitmask);
};

match val_type {
Expand Down
8 changes: 4 additions & 4 deletions cranelift/codegen/src/simple_preopt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,7 @@ mod simplify {
return;
}
let new_type = I8.by(old_cond_type.bytes()).unwrap();
(pos.ins().raw_bitcast(new_type, args[0]), new_type)
(pos.ins().bitcast(new_type, args[0]), new_type)
}
_ => return,
};
Expand All @@ -874,10 +874,10 @@ mod simplify {

if arg_type != old_arg_type {
// Operands types must match, we need to add bitcasts.
let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
let arg1 = pos.ins().bitcast(arg_type, args[1]);
let arg2 = pos.ins().bitcast(arg_type, args[2]);
let ret = pos.ins().vselect(cond_val, arg1, arg2);
pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
pos.func.dfg.replace(inst).bitcast(old_arg_type, ret);
} else {
pos.func
.dfg
Expand Down
12 changes: 1 addition & 11 deletions cranelift/codegen/src/verifier/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1078,17 +1078,7 @@ impl<'a> Verifier<'a> {
let typ = self.func.dfg.ctrl_typevar(inst);
let value_type = self.func.dfg.value_type(arg);

if typ.lane_bits() != value_type.lane_bits() {
errors.fatal((
inst,
format!(
"The bitcast argument {} has a lane type of {} bits, which doesn't match an expected type of {} bits",
arg,
value_type.lane_bits(),
typ.lane_bits()
),
))
} else if typ.bits() != value_type.bits() {
if typ.bits() != value_type.bits() {
errors.fatal((
inst,
format!(
Expand Down
6 changes: 3 additions & 3 deletions cranelift/filetests/filetests/isa/x64/move-elision.clif
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ block0(v0: i32x4):
;; In the x64 backend, all of these pseudo-instructions are lowered to moves between registers (e.g. MOVAPD, MOVDQA,
;; etc.). Because these have been marked as moves, no instructions are emitted by this function besides the prologue
;; and epilogue.
v1 = raw_bitcast.f32x4 v0
v2 = raw_bitcast.f64x2 v1
v3 = raw_bitcast.i8x16 v2
v1 = bitcast.f32x4 v0
v2 = bitcast.f64x2 v1
v3 = bitcast.i8x16 v2
return v3
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ function %check_issue_3951(i64 vmctx) -> i8x16 fast {
v4 = global_value.i64 gv0
v5 = load.i8x16 notrap aligned v4+8
v6 = icmp ugt v3, v5
v7 = raw_bitcast.i8x16 v6
v7 = bitcast.i8x16 v6
jump block1(v7)
block1(v1: i8x16):
return v1
Expand Down
5 changes: 3 additions & 2 deletions cranelift/filetests/filetests/runtests/bitcast-ref64.clif
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
test run
target aarch64
; the interpreter, x86_64, and s390x do not support bitcasting to/from
; references
target x86_64
target s390x
; the interpreter does not support bitcasting to/from references

function %bitcast_ir64(i64) -> i8 {
block0(v0: i64):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
test interpret
test run
set enable_llvm_abi_extensions=true
target aarch64
; x86_64 and s390x do not support bitcasting to the same type as the input.
target x86_64
target s390x

function %bitcast_i8(i8) -> i8 {
block0(v0: i8):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ block0:

function %is_null_r64(i64) -> i8 {
block0(v0: i64):
v1 = raw_bitcast.r64 v0
v1 = bitcast.r64 v0
v2 = is_null v1
return v2
}
Expand All @@ -24,7 +24,7 @@ block0(v0: i64):

function %is_invalid_r64(i64) -> i8 {
block0(v0: i64):
v1 = raw_bitcast.r64 v0
v1 = bitcast.r64 v0
v2 = is_invalid v1
return v2
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
test interpret
test run
target aarch64
;; 64-bit vector types only supported on aarch64

function %bitcast_if32x2(i32x2) -> f32x2 {
block0(v0: i32x2):
v1 = bitcast.f32x2 v0
return v1
}
; run: %bitcast_if32x2([0 4294967295]) == [0x0.0 -NaN:0x3fffff]
; run: %bitcast_if32x2([-1 127]) == [-NaN:0x3fffff 0x0.0000fep-126]

function %bitcast_fi32x2(f32x2) -> i32x2 {
block0(v0: f32x2):
v1 = bitcast.i32x2 v0
return v1
}
; run: %bitcast_fi32x2([0x0.0 -NaN:0x3fffff]) == [0 4294967295]
; run: %bitcast_fi32x2([-NaN:0x3fffff 0x0.0000fep-126]) == [-1 127]

19 changes: 2 additions & 17 deletions cranelift/filetests/filetests/runtests/simd-bitcast.clif
Original file line number Diff line number Diff line change
@@ -1,23 +1,8 @@
test interpret
test run
target aarch64
; x86_64 and s390x do not support vector bitcasts.

function %bitcast_if32x2(i32x2) -> f32x2 {
block0(v0: i32x2):
v1 = bitcast.f32x2 v0
return v1
}
; run: %bitcast_if32x2([0 4294967295]) == [0x0.0 -NaN:0x3fffff]
; run: %bitcast_if32x2([-1 127]) == [-NaN:0x3fffff 0x0.0000fep-126]

function %bitcast_fi32x2(f32x2) -> i32x2 {
block0(v0: f32x2):
v1 = bitcast.i32x2 v0
return v1
}
; run: %bitcast_fi32x2([0x0.0 -NaN:0x3fffff]) == [0 4294967295]
; run: %bitcast_fi32x2([-NaN:0x3fffff 0x0.0000fep-126]) == [-1 127]
target x86_64
target s390x

function %bitcast_if32x4(i32x4) -> f32x4 {
block0(v0: i32x4):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ target x86_64 skylake
function %mask_from_icmp(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp sge v0, v1
v3 = raw_bitcast.i32x4 v2
v3 = bitcast.i32x4 v2
v4 = bitselect v3, v0, v1
return v4
}
; run: %mask_from_icmp([5 6 7 8], [1 10 20 7]) == [5 10 20 8]

function %mask_casted(i64x2, i64x2, i32x4) -> i64x2 {
block0(v0: i64x2, v1: i64x2, v2: i32x4):
v3 = raw_bitcast.i64x2 v2
v3 = bitcast.i64x2 v2
v4 = bitselect v3, v0, v1
return v4
}
Expand Down
6 changes: 3 additions & 3 deletions cranelift/filetests/filetests/runtests/simd-comparison.clif
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ block0:
v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
v2 = icmp sgt v0, v1
v3 = raw_bitcast.i8x16 v2
v3 = bitcast.i8x16 v2
v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
v7 = icmp eq v3, v4
v8 = vall_true v7
Expand Down Expand Up @@ -126,7 +126,7 @@ block0:
v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
v2 = icmp ult v0, v1
v3 = vconst.i16x8 0x00
v4 = raw_bitcast.i16x8 v2
v4 = bitcast.i16x8 v2
v5 = icmp eq v3, v4
v8 = vall_true v5
return v8
Expand Down Expand Up @@ -200,7 +200,7 @@ block0:
v2 = fcmp gt v0, v1
; now check that the result v2 is all zeroes
v3 = vconst.i32x4 0x00
v4 = raw_bitcast.i32x4 v2
v4 = bitcast.i32x4 v2
v5 = icmp eq v3, v4
v8 = vall_true v5
return v8
Expand Down
Loading