diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 69ed608d130e..bc6771d35fa9 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -2970,6 +2970,13 @@ (_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Cvtsd2ss) x dst)))) dst)) +;; Helper for creating `cvtdq2ps` instructions. +(decl x64_cvtdq2ps (Xmm) Xmm) +(rule (x64_cvtdq2ps x) + (let ((dst WritableXmm (temp_writable_xmm)) + (_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Cvtdq2ps) x dst)))) + dst)) + ;; Helper for creating `cvtps2pd` instructions. (decl x64_cvtps2pd (Xmm) Xmm) (rule (x64_cvtps2pd x) @@ -2984,6 +2991,29 @@ (_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Cvtpd2ps) x dst)))) dst)) +;; Helper for creating `cvtdq2pd` instructions. +(decl x64_cvtdq2pd (Type Xmm) Xmm) +(rule (x64_cvtdq2pd ty x) + (let ((dst WritableXmm (temp_writable_xmm)) + (_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Cvtdq2pd) x dst)))) + dst)) + +;; Helper for creating `cvtsi2ss` instructions. +(decl x64_cvtsi2ss (Type GprMem) Xmm) +(rule (x64_cvtsi2ss ty x) + (let ((dst WritableXmm (temp_writable_xmm)) + (size OperandSize (raw_operand_size_of_type ty)) + (_ Unit (emit (MInst.GprToXmm (SseOpcode.Cvtsi2ss) x dst size)))) + dst)) + +;; Helper for creating `cvtsi2sd` instructions. +(decl x64_cvtsi2sd (Type GprMem) Xmm) +(rule (x64_cvtsi2sd ty x) + (let ((dst WritableXmm (temp_writable_xmm)) + (size OperandSize (raw_operand_size_of_type ty)) + (_ Unit (emit (MInst.GprToXmm (SseOpcode.Cvtsi2sd) x dst size)))) + dst)) + ;; Helpers for creating `pcmpeq*` instructions. (decl x64_pcmpeq (Type Xmm XmmMem) Xmm) (rule (x64_pcmpeq $I8X16 x y) (x64_pcmpeqb x y)) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 6f542d343d16..fc02bd7571a0 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1527,8 +1527,11 @@ pub(crate) fn emit( SseOpcode::Cvtdq2pd => (LegacyPrefixes::_F3, 0x0FE6, 2), SseOpcode::Cvtpd2ps => (LegacyPrefixes::_66, 0x0F5A, 2), SseOpcode::Cvtps2pd => (LegacyPrefixes::None, 0x0F5A, 2), + SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2), SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2), SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2), + SseOpcode::Cvttpd2dq => (LegacyPrefixes::_66, 0x0FE6, 2), + SseOpcode::Cvttps2dq => (LegacyPrefixes::_F3, 0x0F5B, 2), SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2), SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28, 2), SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F, 2), @@ -1623,9 +1626,6 @@ pub(crate) fn emit( SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2), SseOpcode::Blendvps => (LegacyPrefixes::_66, 0x0F3814, 3), SseOpcode::Blendvpd => (LegacyPrefixes::_66, 0x0F3815, 3), - SseOpcode::Cvttpd2dq => (LegacyPrefixes::_66, 0x0FE6, 2), - SseOpcode::Cvttps2dq => (LegacyPrefixes::_F3, 0x0F5B, 2), - SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2), SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2), SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2), SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2), diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index be3b4b03f9ea..088c0bd15f39 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3998,21 +3998,21 @@ fn test_x64_emit() { // ======================================================== // XMM_RM_R: Integer Conversion insns.push(( - Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::reg(xmm1), w_xmm8), + Inst::xmm_unary_rm_r(SseOpcode::Cvtdq2ps, RegMem::reg(xmm1), w_xmm8), "440F5BC1", - "cvtdq2ps %xmm8, %xmm1, %xmm8", + "cvtdq2ps %xmm1, %xmm8", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Cvttpd2dq, RegMem::reg(xmm15), w_xmm7), + Inst::xmm_unary_rm_r(SseOpcode::Cvttpd2dq, RegMem::reg(xmm15), w_xmm7), "66410FE6FF", - "cvttpd2dq %xmm7, %xmm15, %xmm7", + "cvttpd2dq %xmm15, %xmm7", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Cvttps2dq, RegMem::reg(xmm9), w_xmm8), + Inst::xmm_unary_rm_r(SseOpcode::Cvttps2dq, RegMem::reg(xmm9), w_xmm8), "F3450F5BC1", - "cvttps2dq %xmm8, %xmm9, %xmm8", + "cvttps2dq %xmm9, %xmm8", )); // XMM_Mov_R_M: float stores diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 5188dd322af4..01915b35a900 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2985,3 +2985,31 @@ (rule (lower (selectif_spectre_guard cc (ifcmp a b) x y)) (select_icmp (emit_cmp cc a b) x y)) + +;; Rules for `fcvt_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $F32 (fcvt_from_sint a @ (value_type $I8)))) + (x64_cvtsi2ss $I32 (extend_to_gpr a $I32 (ExtendKind.Sign)))) + +(rule (lower (has_type $F32 (fcvt_from_sint a @ (value_type $I16)))) + (x64_cvtsi2ss $I32 (extend_to_gpr a $I32 (ExtendKind.Sign)))) + +(rule (lower (has_type $F32 (fcvt_from_sint a @ (value_type (ty_int (fits_in_64 ty)))))) + (x64_cvtsi2ss ty a)) + +(rule (lower (has_type $F64 (fcvt_from_sint a @ (value_type $I8)))) + (x64_cvtsi2sd $I32 (extend_to_gpr a $I32 (ExtendKind.Sign)))) + +(rule (lower (has_type $F64 (fcvt_from_sint a @ (value_type $I16)))) + (x64_cvtsi2sd $I32 (extend_to_gpr a $I32 (ExtendKind.Sign)))) + +(rule (lower (has_type $F64 (fcvt_from_sint a @ (value_type (ty_int (fits_in_64 ty)))))) + (x64_cvtsi2sd ty a)) + +(rule (lower (fcvt_from_sint a @ (value_type $I32X4))) + (x64_cvtdq2ps a)) + +;; Rules for `fcvt_low_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (fcvt_low_from_sint a @ (value_type ty))) + (x64_cvtdq2pd ty a)) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index cbad5dd376fb..ebf2eca43572 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -172,6 +172,7 @@ enum ExtSpec { #[allow(dead_code)] ZeroExtendTo32, ZeroExtendTo64, + #[allow(dead_code)] SignExtendTo32, #[allow(dead_code)] // not used just yet but may be used in the future! SignExtendTo64, @@ -626,56 +627,12 @@ fn lower_insn_to_regs>( | Opcode::GetReturnAddress | Opcode::Select | Opcode::Selectif - | Opcode::SelectifSpectreGuard => { + | Opcode::SelectifSpectreGuard + | Opcode::FcvtFromSint + | Opcode::FcvtLowFromSint => { implemented_in_isle(ctx); } - Opcode::FcvtFromSint => { - let output_ty = ty.unwrap(); - if !output_ty.is_vector() { - let (ext_spec, src_size) = match ctx.input_ty(insn, 0) { - types::I8 | types::I16 => (Some(ExtSpec::SignExtendTo32), OperandSize::Size32), - types::I32 => (None, OperandSize::Size32), - types::I64 => (None, OperandSize::Size64), - _ => unreachable!(), - }; - - let src = match ext_spec { - Some(ext_spec) => RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec)), - None => RegMem::reg(put_input_in_reg(ctx, inputs[0])), - }; - - let opcode = if output_ty == types::F32 { - SseOpcode::Cvtsi2ss - } else { - assert_eq!(output_ty, types::F64); - SseOpcode::Cvtsi2sd - }; - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::gpr_to_xmm(opcode, src, src_size, dst)); - } else { - let ty = ty.unwrap(); - let src = put_input_in_reg(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let opcode = match ctx.input_ty(insn, 0) { - types::I32X4 => SseOpcode::Cvtdq2ps, - _ => { - unimplemented!("unable to use type {} for op {}", ctx.input_ty(insn, 0), op) - } - }; - ctx.emit(Inst::gen_move(dst, src, ty)); - ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst)); - } - } - Opcode::FcvtLowFromSint => { - let src = RegMem::reg(put_input_in_reg(ctx, inputs[0])); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::xmm_unary_rm_r( - SseOpcode::Cvtdq2pd, - RegMem::from(src), - dst, - )); - } Opcode::FcvtFromUint => { let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let ty = ty.unwrap(); @@ -844,11 +801,19 @@ fn lower_insn_to_regs>( ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::from(tmp), dst)); // Convert the low 16 bits - ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::from(tmp), tmp)); + ctx.emit(Inst::xmm_unary_rm_r( + SseOpcode::Cvtdq2ps, + RegMem::from(tmp), + tmp, + )); // Shift the high bits by 1, convert, and double to get the correct value. ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(1), dst)); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::from(dst), dst)); + ctx.emit(Inst::xmm_unary_rm_r( + SseOpcode::Cvtdq2ps, + RegMem::from(dst), + dst, + )); ctx.emit(Inst::xmm_rm_r( SseOpcode::Addps, RegMem::reg(dst.to_reg()), @@ -938,7 +903,7 @@ fn lower_insn_to_regs>( )); // Convert the packed float to packed doubleword. - ctx.emit(Inst::xmm_rm_r( + ctx.emit(Inst::xmm_unary_rm_r( SseOpcode::Cvttps2dq, RegMem::reg(dst.to_reg()), dst, @@ -1031,7 +996,7 @@ fn lower_insn_to_regs>( ctx.emit(Inst::xmm_rm_r(SseOpcode::Pcmpeqd, RegMem::from(tmp2), tmp2)); ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(1), tmp2)); - ctx.emit(Inst::xmm_rm_r( + ctx.emit(Inst::xmm_unary_rm_r( SseOpcode::Cvtdq2ps, RegMem::from(tmp2), tmp2, @@ -1041,7 +1006,11 @@ fn lower_insn_to_regs>( // Overflow lanes greater than the maximum allowed signed value will // set to 0x80000000. Negative and NaN lanes will be 0x0 ctx.emit(Inst::xmm_mov(SseOpcode::Movaps, RegMem::from(dst), tmp1)); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvttps2dq, RegMem::from(dst), dst)); + ctx.emit(Inst::xmm_unary_rm_r( + SseOpcode::Cvttps2dq, + RegMem::from(dst), + dst, + )); // Set lanes to src - max_signed_int ctx.emit(Inst::xmm_rm_r(SseOpcode::Subps, RegMem::from(tmp2), tmp1)); @@ -1058,7 +1027,7 @@ fn lower_insn_to_regs>( )); // Convert those set of lanes that have the max_signed_int factored out. - ctx.emit(Inst::xmm_rm_r( + ctx.emit(Inst::xmm_unary_rm_r( SseOpcode::Cvttps2dq, RegMem::from(tmp1), tmp1, @@ -1416,7 +1385,7 @@ fn lower_insn_to_regs>( tmp1, )); ctx.emit(Inst::xmm_rm_r(SseOpcode::Minpd, RegMem::from(tmp1), dst)); - ctx.emit(Inst::xmm_rm_r( + ctx.emit(Inst::xmm_unary_rm_r( SseOpcode::Cvttpd2dq, RegMem::from(dst), dst, diff --git a/cranelift/filetests/filetests/isa/x64/fcvt.clif b/cranelift/filetests/filetests/isa/x64/fcvt.clif new file mode 100644 index 000000000000..65d257050d35 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/fcvt.clif @@ -0,0 +1,133 @@ +test compile precise-output +target x86_64 + +function %f1(i8) -> f32 { +block0(v0: i8): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movsbl %dil, %ecx +; cvtsi2ss %ecx, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(i16) -> f32 { +block0(v0: i16): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movswl %di, %ecx +; cvtsi2ss %ecx, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f3(i32) -> f32 { +block0(v0: i32): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cvtsi2ss %edi, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(i64) -> f32 { +block0(v0: i64): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cvtsi2ss %rdi, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f5(i8) -> f64 { +block0(v0: i8): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movsbl %dil, %ecx +; cvtsi2sd %ecx, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f6(i16) -> f64 { +block0(v0: i16): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movswl %di, %ecx +; cvtsi2sd %ecx, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f7(i32) -> f64 { +block0(v0: i32): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cvtsi2sd %edi, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f8(i64) -> f64 { +block0(v0: i64): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cvtsi2sd %rdi, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f9(i32x4) -> f64x2 { +block0(v0: i32x4): + v1 = fcvt_low_from_sint.f64x2 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cvtdq2pd %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +