diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 49cec3080eb9..bd25f388ea2c 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -486,16 +486,19 @@ (XmmUninitializedValue (dst WritableXmm)) ;; A call to the `ElfTlsGetAddr` libcall. Returns address of TLS symbol - ;; in `rax`. - (ElfTlsGetAddr (symbol ExternalName)) + ;; `dst`, which is constrained to `rax`. + (ElfTlsGetAddr (symbol ExternalName) + (dst WritableGpr)) ;; A Mach-O TLS symbol access. Returns address of the TLS symbol in - ;; `rax`. - (MachOTlsGetAddr (symbol ExternalName)) + ;; `dst`, which is constrained to `rax`. + (MachOTlsGetAddr (symbol ExternalName) + (dst WritableGpr)) ;; A Coff TLS symbol access. Returns address of the TLS symbol in - ;; `rax`. - (CoffTlsGetAddr (symbol ExternalName)) + ;; `dst`, which is constrained to `rax`. + (CoffTlsGetAddr (symbol ExternalName) + (dst WritableGpr)) ;; An unwind pseudoinstruction describing the state of the machine at ;; this program point. @@ -2275,6 +2278,11 @@ (rule (x64_pmulhw src1 src2) (xmm_rm_r $I16X8 (SseOpcode.Pmulhw) src1 src2)) +;; Helper for creating `pmulhrsw` instructions. +(decl x64_pmulhrsw (Xmm XmmMem) Xmm) +(rule (x64_pmulhrsw src1 src2) + (xmm_rm_r $I16X8 (SseOpcode.Pmulhrsw) src1 src2)) + ;; Helper for creating `pmulhuw` instructions. (decl x64_pmulhuw (Xmm XmmMem) Xmm) (rule (x64_pmulhuw src1 src2) @@ -2683,6 +2691,15 @@ dst)))) dst)) +;; Helper for creating `shufps` instructions. +(decl x64_shufps (Xmm XmmMem u8) Xmm) +(rule (x64_shufps src1 src2 byte) + (xmm_rm_r_imm (SseOpcode.Shufps) + src1 + src2 + byte + (OperandSize.Size32))) + ;; Helper for creating `MInst.XmmUnaryRmR` instructions. (decl xmm_unary_rm_r (SseOpcode XmmMem) Xmm) (rule (xmm_unary_rm_r op src) @@ -3733,6 +3750,42 @@ (decl swizzle_zero_mask () VCodeConstant) (extern constructor swizzle_zero_mask swizzle_zero_mask) +;;;; TLS Values ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Helper for emitting ElfTlsGetAddr. +(decl elf_tls_get_addr (ExternalName) Gpr) +(rule (elf_tls_get_addr name) + (let ((dst WritableGpr (temp_writable_gpr)) + (_ Unit (emit (MInst.ElfTlsGetAddr name dst)))) + dst)) + +;; Helper for emitting MachOTlsGetAddr. +(decl macho_tls_get_addr (ExternalName) Gpr) +(rule (macho_tls_get_addr name) + (let ((dst WritableGpr (temp_writable_gpr)) + (_ Unit (emit (MInst.MachOTlsGetAddr name dst)))) + dst)) + +;; Helper for emitting CoffTlsGetAddr. +(decl coff_tls_get_addr (ExternalName) Gpr) +(rule (coff_tls_get_addr name) + (let ((dst WritableGpr (temp_writable_gpr)) + (_ Unit (emit (MInst.CoffTlsGetAddr name dst)))) + dst)) + +;;;; sqmul_round_sat ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl sqmul_round_sat_mask () VCodeConstant) +(extern constructor sqmul_round_sat_mask sqmul_round_sat_mask) + +;;;; uunarrow ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl uunarrow_umax_mask () VCodeConstant) +(extern constructor uunarrow_umax_mask uunarrow_umax_mask) + +(decl uunarrow_uint_mask () VCodeConstant) +(extern constructor uunarrow_uint_mask uunarrow_uint_mask) + ;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (convert Gpr InstOutput output_gpr) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index c6b8aad38660..6e6ef44bd58b 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -2915,7 +2915,10 @@ pub(crate) fn emit( } } - Inst::ElfTlsGetAddr { ref symbol } => { + Inst::ElfTlsGetAddr { ref symbol, dst } => { + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(dst, regs::rax()); + // N.B.: Must be exactly this byte sequence; the linker requires it, // because it must know how to rewrite the bytes. @@ -2941,7 +2944,10 @@ pub(crate) fn emit( sink.put4(0); // offset } - Inst::MachOTlsGetAddr { ref symbol } => { + Inst::MachOTlsGetAddr { ref symbol, dst } => { + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(dst, regs::rax()); + // movq gv@tlv(%rip), %rdi sink.put1(0x48); // REX.w sink.put1(0x8b); // MOV @@ -2954,7 +2960,10 @@ pub(crate) fn emit( sink.put1(0x17); } - Inst::CoffTlsGetAddr { ref symbol } => { + Inst::CoffTlsGetAddr { ref symbol, dst } => { + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(dst, regs::rax()); + // See: https://gcc.godbolt.org/z/M8or9x6ss // And: https://github.com/bjorn3/rustc_codegen_cranelift/issues/388#issuecomment-532930282 diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index f848e0f9cfec..d0dde7472762 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -95,6 +95,24 @@ impl Inst { let dst = WritableGpr::from_writable_reg(dst).unwrap(); Inst::Setcc { cc, dst } } + + fn xmm_rm_r_imm( + op: SseOpcode, + src: RegMem, + dst: Writable, + imm: u8, + size: OperandSize, + ) -> Inst { + debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); + Inst::XmmRmRImm { + op, + src1: dst.to_reg(), + src2: src, + dst, + imm, + size, + } + } } #[test] @@ -4738,6 +4756,7 @@ fn test_x64_emit() { insns.push(( Inst::ElfTlsGetAddr { symbol: ExternalName::User(UserExternalNameRef::new(0)), + dst: WritableGpr::from_writable_reg(w_rax).unwrap(), }, "66488D3D00000000666648E800000000", "%rax = elf_tls_get_addr User(userextname0)", @@ -4746,6 +4765,7 @@ fn test_x64_emit() { insns.push(( Inst::MachOTlsGetAddr { symbol: ExternalName::User(UserExternalNameRef::new(0)), + dst: WritableGpr::from_writable_reg(w_rax).unwrap(), }, "488B3D00000000FF17", "%rax = macho_tls_get_addr User(userextname0)", @@ -4754,6 +4774,7 @@ fn test_x64_emit() { insns.push(( Inst::CoffTlsGetAddr { symbol: ExternalName::User(UserExternalNameRef::new(0)), + dst: WritableGpr::from_writable_reg(w_rax).unwrap(), }, "8B050000000065488B0C2558000000488B04C1488D8000000000", "%rax = coff_tls_get_addr User(userextname0)", diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 950c635a94a8..9d7f1bd0f4bf 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -263,12 +263,6 @@ impl Inst { Inst::MovRR { size, src, dst } } - pub(crate) fn xmm_load_const(src: VCodeConstant, dst: Writable, ty: Type) -> Inst { - debug_assert!(dst.to_reg().class() == RegClass::Float); - debug_assert!(ty.is_vector() && ty.bits() == 128); - Inst::XmmLoadConst { src, dst, ty } - } - /// Convenient helper for unary float operations. pub(crate) fn xmm_unary_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Inst { src.assert_regclass_is(RegClass::Float); @@ -377,24 +371,6 @@ impl Inst { } } - pub(crate) fn xmm_rm_r_imm( - op: SseOpcode, - src: RegMem, - dst: Writable, - imm: u8, - size: OperandSize, - ) -> Inst { - debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); - Inst::XmmRmRImm { - op, - src1: dst.to_reg(), - src2: src, - dst, - imm, - size, - } - } - pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable) -> Inst { src.assert_regclass_is(RegClass::Int); debug_assert!(dst.to_reg().class() == RegClass::Int); @@ -1544,16 +1520,19 @@ impl PrettyPrint for Inst { Inst::Ud2 { trap_code } => format!("ud2 {}", trap_code), - Inst::ElfTlsGetAddr { ref symbol } => { - format!("%rax = elf_tls_get_addr {:?}", symbol) + Inst::ElfTlsGetAddr { ref symbol, dst } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + format!("{} = elf_tls_get_addr {:?}", dst, symbol) } - Inst::MachOTlsGetAddr { ref symbol } => { - format!("%rax = macho_tls_get_addr {:?}", symbol) + Inst::MachOTlsGetAddr { ref symbol, dst } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + format!("{} = macho_tls_get_addr {:?}", dst, symbol) } - Inst::CoffTlsGetAddr { ref symbol } => { - format!("%rax = coff_tls_get_addr {:?}", symbol) + Inst::CoffTlsGetAddr { ref symbol, dst } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + format!("{} = coff_tls_get_addr {:?}", dst, symbol) } Inst::Unwind { inst } => { @@ -1994,8 +1973,8 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol // No registers are used. } - Inst::ElfTlsGetAddr { .. } | Inst::MachOTlsGetAddr { .. } => { - collector.reg_def(Writable::from_reg(regs::rax())); + Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => { + collector.reg_fixed_def(dst.to_writable_reg(), regs::rax()); // All caller-saves are clobbered. // // We use the SysV calling convention here because the @@ -2007,12 +1986,12 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_clobbers(clobbers); } - Inst::CoffTlsGetAddr { .. } => { + Inst::CoffTlsGetAddr { dst, .. } => { // We also use the gs register. But that register is not allocatable by the // register allocator, so we don't need to mark it as used here. // We use %rax to set the address - collector.reg_def(Writable::from_reg(regs::rax())); + collector.reg_fixed_def(dst.to_writable_reg(), regs::rax()); // We use %rcx as a temporary variable to load the _tls_index collector.reg_def(Writable::from_reg(regs::rcx())); diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index ec1cc3baec0c..1c413f148a13 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -3694,3 +3694,66 @@ (lo Reg (value_regs_get regs 0)) (hi Reg (value_regs_get regs 1))) (output_pair lo hi))) + +;; Rules for `tls_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (tls_value (symbol_value_data name _ _))) + (if (tls_model_is_elf_gd)) + (elf_tls_get_addr name)) + +(rule (lower (tls_value (symbol_value_data name _ _))) + (if (tls_model_is_macho)) + (macho_tls_get_addr name)) + +(rule (lower (tls_value (symbol_value_data name _ _))) + (if (tls_model_is_coff)) + (coff_tls_get_addr name)) + +;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (sqmul_round_sat qx @ (value_type $I16X8) qy)) + (let ((src1 Xmm qx) + (src2 Xmm qy) + + (mask Xmm (x64_xmm_load_const $I16X8 (sqmul_round_sat_mask))) + (dst Xmm (x64_pmulhrsw src1 src2)) + (cmp Xmm (x64_pcmpeqw mask dst))) + (x64_pxor dst cmp))) + +;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; TODO: currently we only lower a special case of `uunarrow` needed to support +;; the translation of wasm's i32x4.trunc_sat_f64x2_u_zero operation. +;; https://github.com/bytecodealliance/wasmtime/issues/4791 +;; +;; y = i32x4.trunc_sat_f64x2_u_zero(x) is lowered to: +;; MOVAPD xmm_y, xmm_x +;; XORPD xmm_tmp, xmm_tmp +;; MAXPD xmm_y, xmm_tmp +;; MINPD xmm_y, [wasm_f64x2_splat(4294967295.0)] +;; ROUNDPD xmm_y, xmm_y, 0x0B +;; ADDPD xmm_y, [wasm_f64x2_splat(0x1.0p+52)] +;; SHUFPS xmm_y, xmm_xmp, 0x88 +(rule (lower (uunarrow (fcvt_to_uint_sat src @ (value_type $F64X2)) + (vconst (u128_from_constant 0)))) + (let ((src Xmm src) + + ;; MOVAPD xmm_y, xmm_x + ;; XORPD xmm_tmp, xmm_tmp + (zeros Xmm (x64_xorpd src src)) + (dst Xmm (x64_maxpd src zeros)) + + (umax_mask Xmm (x64_xmm_load_const $F64X2 (uunarrow_umax_mask))) + + ;; MINPD xmm_y, [wasm_f64x2_splat(4294967295.0)] + (dst Xmm (x64_minpd dst umax_mask)) + + ;; ROUNDPD xmm_y, xmm_y, 0x0B + (dst Xmm (x64_roundpd dst (RoundImm.RoundZero))) + + ;; ADDPD xmm_y, [wasm_f64x2_splat(0x1.0p+52)] + (uint_mask Xmm (x64_xmm_load_const $F64X2 (uunarrow_uint_mask))) + (dst Xmm (x64_addpd dst uint_mask))) + + ;; SHUFPS xmm_y, xmm_xmp, 0x88 + (x64_shufps dst zeros 0x88))) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index f646f51ab3d4..6d1a0bec61bb 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -11,7 +11,7 @@ use crate::isa::{x64::settings as x64_settings, x64::X64Backend, CallConv}; use crate::machinst::lower::*; use crate::machinst::*; use crate::result::CodegenResult; -use crate::settings::{Flags, TlsModel}; +use crate::settings::Flags; use smallvec::SmallVec; use target_lexicon::Triple; @@ -304,33 +304,15 @@ fn lower_insn_to_regs( isa_flags: &x64_settings::Flags, triple: &Triple, ) -> CodegenResult<()> { - let op = ctx.data(insn).opcode(); - - let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn)) - .map(|i| InsnInput { insn, input: i }) - .collect(); let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn)) .map(|i| InsnOutput { insn, output: i }) .collect(); - let ty = if outputs.len() > 0 { - Some(ctx.output_ty(insn, 0)) - } else { - None - }; - if let Ok(()) = isle::lower(ctx, triple, flags, isa_flags, &outputs, insn) { return Ok(()); } - let implemented_in_isle = |ctx: &mut Lower| { - unreachable!( - "implemented in ISLE: inst = `{}`, type = `{:?}`", - ctx.dfg().display_inst(insn), - ty - ) - }; - + let op = ctx.data(insn).opcode(); match op { Opcode::Iconst | Opcode::Bconst @@ -474,151 +456,24 @@ fn lower_insn_to_regs( | Opcode::VallTrue | Opcode::VhighBits | Opcode::Iconcat - | Opcode::Isplit => { - implemented_in_isle(ctx); - } - - Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"), - - Opcode::TlsValue => { - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let (name, _, _) = ctx.symbol_value(insn).unwrap(); - let symbol = name.clone(); - - match flags.tls_model() { - TlsModel::ElfGd => { - ctx.emit(Inst::ElfTlsGetAddr { symbol }); - ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); - } - TlsModel::Macho => { - ctx.emit(Inst::MachOTlsGetAddr { symbol }); - ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); - } - TlsModel::Coff => { - ctx.emit(Inst::CoffTlsGetAddr { symbol }); - ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); - } - _ => todo!( - "Unimplemented TLS model in x64 backend: {:?}", - flags.tls_model() - ), - } - } + | Opcode::Isplit + | Opcode::TlsValue + | Opcode::SqmulRoundSat + | Opcode::Uunarrow => { + let ty = if outputs.len() > 0 { + Some(ctx.output_ty(insn, 0)) + } else { + None + }; - Opcode::SqmulRoundSat => { - // Lane-wise saturating rounding multiplication in Q15 format - // Optimal lowering taken from instruction proposal https://github.com/WebAssembly/simd/pull/365 - // y = i16x8.q15mulr_sat_s(a, b) is lowered to: - //MOVDQA xmm_y, xmm_a - //MOVDQA xmm_tmp, wasm_i16x8_splat(0x8000) - //PMULHRSW xmm_y, xmm_b - //PCMPEQW xmm_tmp, xmm_y - //PXOR xmm_y, xmm_tmp - let input_ty = ctx.input_ty(insn, 0); - let src1 = put_input_in_reg(ctx, inputs[0]); - let src2 = put_input_in_reg(ctx, inputs[1]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - - ctx.emit(Inst::gen_move(dst, src1, input_ty)); - static SAT_MASK: [u8; 16] = [ - 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, - 0x00, 0x80, - ]; - let mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&SAT_MASK)); - let mask = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); - ctx.emit(Inst::xmm_load_const(mask_const, mask, types::I16X8)); - - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmulhrsw, RegMem::reg(src2), dst)); - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Pcmpeqw, - RegMem::reg(dst.to_reg()), - mask, - )); - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Pxor, - RegMem::reg(mask.to_reg()), - dst, - )); + unreachable!( + "implemented in ISLE: inst = `{}`, type = `{:?}`", + ctx.dfg().display_inst(insn), + ty + ) } - Opcode::Uunarrow => { - if let Some(fcvt_inst) = matches_input(ctx, inputs[0], Opcode::FcvtToUintSat) { - //y = i32x4.trunc_sat_f64x2_u_zero(x) is lowered to: - //MOVAPD xmm_y, xmm_x - //XORPD xmm_tmp, xmm_tmp - //MAXPD xmm_y, xmm_tmp - //MINPD xmm_y, [wasm_f64x2_splat(4294967295.0)] - //ROUNDPD xmm_y, xmm_y, 0x0B - //ADDPD xmm_y, [wasm_f64x2_splat(0x1.0p+52)] - //SHUFPS xmm_y, xmm_xmp, 0x88 - - let fcvt_input = InsnInput { - insn: fcvt_inst, - input: 0, - }; - let input_ty = ctx.input_ty(fcvt_inst, 0); - let output_ty = ctx.output_ty(insn, 0); - let src = put_input_in_reg(ctx, fcvt_input); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - - ctx.emit(Inst::gen_move(dst, src, input_ty)); - let tmp1 = ctx.alloc_tmp(output_ty).only_reg().unwrap(); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::from(tmp1), tmp1)); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Maxpd, RegMem::from(tmp1), dst)); - - // 4294967295.0 is equivalent to 0x41EFFFFFFFE00000 - static UMAX_MASK: [u8; 16] = [ - 0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF, 0xEF, 0x41, 0x00, 0x00, 0xE0, 0xFF, 0xFF, - 0xFF, 0xEF, 0x41, - ]; - let umax_const = ctx.use_constant(VCodeConstantData::WellKnown(&UMAX_MASK)); - let umax_mask = ctx.alloc_tmp(types::F64X2).only_reg().unwrap(); - ctx.emit(Inst::xmm_load_const(umax_const, umax_mask, types::F64X2)); - - //MINPD xmm_y, [wasm_f64x2_splat(4294967295.0)] - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Minpd, - RegMem::from(umax_mask), - dst, - )); - //ROUNDPD xmm_y, xmm_y, 0x0B - ctx.emit(Inst::xmm_rm_r_imm( - SseOpcode::Roundpd, - RegMem::reg(dst.to_reg()), - dst, - RoundImm::RoundZero.encode(), - OperandSize::Size32, - )); - //ADDPD xmm_y, [wasm_f64x2_splat(0x1.0p+52)] - static UINT_MASK: [u8; 16] = [ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x30, 0x43, - ]; - let uint_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK)); - let uint_mask = ctx.alloc_tmp(types::F64X2).only_reg().unwrap(); - ctx.emit(Inst::xmm_load_const( - uint_mask_const, - uint_mask, - types::F64X2, - )); - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Addpd, - RegMem::from(uint_mask), - dst, - )); - - //SHUFPS xmm_y, xmm_xmp, 0x88 - ctx.emit(Inst::xmm_rm_r_imm( - SseOpcode::Shufps, - RegMem::reg(tmp1.to_reg()), - dst, - 0x88, - OperandSize::Size32, - )); - } else { - println!("Did not match fcvt input!"); - } - } + Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"), // Unimplemented opcodes below. These are not currently used by Wasm // lowering or other known embeddings, but should be either supported or diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index c9a09d5db214..d76e72f88c5d 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -911,6 +911,39 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { .use_constant(VCodeConstantData::WellKnown(&ZERO_MASK_VALUE)) } + #[inline] + fn sqmul_round_sat_mask(&mut self) -> VCodeConstant { + static SAT_MASK: [u8; 16] = [ + 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, + 0x00, 0x80, + ]; + self.lower_ctx + .use_constant(VCodeConstantData::WellKnown(&SAT_MASK)) + } + + #[inline] + fn uunarrow_umax_mask(&mut self) -> VCodeConstant { + // 4294967295.0 is equivalent to 0x41EFFFFFFFE00000 + static UMAX_MASK: [u8; 16] = [ + 0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF, 0xEF, 0x41, 0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF, + 0xEF, 0x41, + ]; + + self.lower_ctx + .use_constant(VCodeConstantData::WellKnown(&UMAX_MASK)) + } + + #[inline] + fn uunarrow_uint_mask(&mut self) -> VCodeConstant { + static UINT_MASK: [u8; 16] = [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x43, + ]; + + self.lower_ctx + .use_constant(VCodeConstantData::WellKnown(&UINT_MASK)) + } + fn emit_div_or_rem( &mut self, kind: &DivOrRemKind, diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 8a121791c7ed..458c2c1f35c3 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -664,6 +664,24 @@ macro_rules! isle_prelude_methods { } } + #[inline] + fn tls_model_is_macho(&mut self) -> Option<()> { + if self.flags.tls_model() == TlsModel::Macho { + Some(()) + } else { + None + } + } + + #[inline] + fn tls_model_is_coff(&mut self) -> Option<()> { + if self.flags.tls_model() == TlsModel::Coff { + Some(()) + } else { + None + } + } + #[inline] fn func_ref_data(&mut self, func_ref: FuncRef) -> (SigRef, ExternalName, RelocDistance) { let funcdata = &self.lower_ctx.dfg().ext_funcs[func_ref]; diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index ab0ca58ba3ab..d622de334a3f 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -790,6 +790,12 @@ (decl pure tls_model_is_elf_gd () Unit) (extern constructor tls_model_is_elf_gd tls_model_is_elf_gd) +(decl pure tls_model_is_macho () Unit) +(extern constructor tls_model_is_macho tls_model_is_macho) + +(decl pure tls_model_is_coff () Unit) +(extern constructor tls_model_is_coff tls_model_is_coff) + ;;;; Helpers for accessing instruction data ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Accessor for `FuncRef`. diff --git a/cranelift/filetests/filetests/isa/x64/sqmul_round_sat.clif b/cranelift/filetests/filetests/isa/x64/sqmul_round_sat.clif new file mode 100644 index 000000000000..a1e220fb66aa --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/sqmul_round_sat.clif @@ -0,0 +1,19 @@ +test compile precise-output +target x86_64 + +function %f1(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = sqmul_round_sat v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_const VCodeConstant(0), %xmm7 +; pmulhrsw %xmm0, %xmm1, %xmm0 +; pcmpeqw %xmm7, %xmm0, %xmm7 +; pxor %xmm0, %xmm7, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/uunarrow.clif b/cranelift/filetests/filetests/isa/x64/uunarrow.clif new file mode 100644 index 000000000000..9893a51224d2 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/uunarrow.clif @@ -0,0 +1,26 @@ +test compile precise-output +target x86_64 + +function %f1(f64x2) -> i32x4 { +block0(v0: f64x2): + v1 = fcvt_to_uint_sat.i64x2 v0 + v2 = vconst.i64x2 [0 0] + v3 = uunarrow v1, v2 + return v3 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; xorpd %xmm3, %xmm3, %xmm3 +; maxpd %xmm0, %xmm3, %xmm0 +; load_const VCodeConstant(0), %xmm7 +; minpd %xmm0, %xmm7, %xmm0 +; roundpd $3, %xmm0, %xmm0 +; load_const VCodeConstant(1), %xmm13 +; addpd %xmm0, %xmm13, %xmm0 +; shufps $136, %xmm0, %xmm3, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +