From d94f1805ff2eab9df228ca9ffda941b6209ecfd4 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 18 Aug 2020 15:54:21 -0700 Subject: [PATCH] x64 new backend: port ABI implementation to shared infrastructure with AArch64. Previously, in #2128, we factored out a common "vanilla 64-bit ABI" implementation from the AArch64 ABI code, with the idea that this should be largely compatible with x64. This PR alters the new x64 backend to make use of the shared infrastructure, removing the duplication that existed previously. The generated code is nearly (not exactly) the same; the only difference relates to how the clobber-save region is padded in the prologue. This also changes some register allocations in the aarch64 code because call support in the shared ABI infra now passes a temp vreg in, rather than requiring use of a fixed, non-allocable temp; tests have been updated, and the runtime behavior is unchanged. --- cranelift/codegen/src/isa/aarch64/abi.rs | 57 +- cranelift/codegen/src/isa/x64/abi.rs | 1706 ++++++----------- cranelift/codegen/src/machinst/abi_impl.rs | 87 +- .../filetests/vcode/aarch64/call.clif | 16 +- .../filetests/vcode/aarch64/reftypes.clif | 4 +- .../filetests/vcode/aarch64/stack-limit.clif | 8 +- 6 files changed, 683 insertions(+), 1195 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 5f918dfe59c2..a035f8cb5c58 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -13,16 +13,15 @@ use alloc::boxed::Box; use alloc::vec::Vec; use regalloc::{RealReg, Reg, RegClass, Set, Writable}; use smallvec::SmallVec; -use std::convert::TryFrom; // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because // these ABIs are very similar. /// Support for the AArch64 ABI from the callee side (within a function body). -pub type AArch64ABIBody = ABIBodyImpl; +pub(crate) type AArch64ABIBody = ABIBodyImpl; /// Support for the AArch64 ABI from the caller side (at a callsite). -pub type AArch64ABICall = ABICallImpl; +pub(crate) type AArch64ABICall = ABICallImpl; // Spidermonkey specific ABI convention. @@ -105,7 +104,7 @@ impl Into for StackAMode { /// AArch64-specific ABI behavior. This struct just serves as an implementation /// point for the trait; it is never actually instantiated. -pub struct AArch64MachineImpl; +pub(crate) struct AArch64MachineImpl; impl ABIMachineImpl for AArch64MachineImpl { type I = Inst; @@ -276,7 +275,8 @@ impl ABIMachineImpl for AArch64MachineImpl { Inst::Ret } - fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u64) -> SmallVec<[Inst; 4]> { + fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u32) -> SmallVec<[Inst; 4]> { + let imm = imm as u64; let mut insts = SmallVec::new(); if let Some(imm12) = Imm12::maybe_from_u64(imm) { insts.push(Inst::AluRRImm12 { @@ -287,6 +287,7 @@ impl ABIMachineImpl for AArch64MachineImpl { }); } else { let scratch2 = writable_tmp2_reg(); + assert_ne!(scratch2.to_reg(), from_reg); insts.extend(Inst::load_constant(scratch2, imm.into())); insts.push(Inst::AluRRRExtend { alu_op: ALUOp::Add64, @@ -325,29 +326,29 @@ impl ABIMachineImpl for AArch64MachineImpl { Inst::LoadAddr { rd: into_reg, mem } } - fn get_fixed_tmp_reg() -> Reg { + fn get_stacklimit_reg() -> Reg { spilltmp_reg() } - fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i64, ty: Type) -> Inst { - let mem = AMode::RegOffset(base, offset, ty); + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Inst { + let mem = AMode::RegOffset(base, offset as i64, ty); Inst::gen_load(into_reg, mem, ty) } - fn gen_store_base_offset(base: Reg, offset: i64, from_reg: Reg, ty: Type) -> Inst { - let mem = AMode::RegOffset(base, offset, ty); + fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst { + let mem = AMode::RegOffset(base, offset as i64, ty); Inst::gen_store(mem, from_reg, ty) } - fn gen_sp_reg_adjust(amount: i64) -> SmallVec<[Inst; 2]> { + fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Inst; 2]> { if amount == 0 { return SmallVec::new(); } let (amount, is_sub) = if amount > 0 { - (u64::try_from(amount).unwrap(), false) + (amount as u64, false) } else { - (u64::try_from(-amount).unwrap(), true) + (-amount as u64, true) }; let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 }; @@ -380,8 +381,10 @@ impl ABIMachineImpl for AArch64MachineImpl { ret } - fn gen_nominal_sp_adj(offset: i64) -> Inst { - Inst::VirtualSPOffsetAdj { offset } + fn gen_nominal_sp_adj(offset: i32) -> Inst { + Inst::VirtualSPOffsetAdj { + offset: offset as i64, + } } fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> { @@ -544,11 +547,12 @@ impl ABIMachineImpl for AArch64MachineImpl { defs: Vec>, loc: SourceLoc, opcode: ir::Opcode, - ) -> SmallVec<[(/* is_safepoint = */ bool, Inst); 2]> { + tmp: Writable, + ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> { let mut insts = SmallVec::new(); match &dest { &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(( - true, + InstIsSafepoint::Yes, Inst::Call { info: Box::new(CallInfo { dest: name.clone(), @@ -561,19 +565,19 @@ impl ABIMachineImpl for AArch64MachineImpl { )), &CallDest::ExtName(ref name, RelocDistance::Far) => { insts.push(( - false, + InstIsSafepoint::No, Inst::LoadExtName { - rd: writable_spilltmp_reg(), + rd: tmp, name: Box::new(name.clone()), offset: 0, srcloc: loc, }, )); insts.push(( - true, + InstIsSafepoint::Yes, Inst::CallInd { info: Box::new(CallIndInfo { - rn: spilltmp_reg(), + rn: tmp.to_reg(), uses, defs, loc, @@ -583,7 +587,7 @@ impl ABIMachineImpl for AArch64MachineImpl { )); } &CallDest::Reg(reg) => insts.push(( - true, + InstIsSafepoint::Yes, Inst::CallInd { info: Box::new(CallIndInfo { rn: *reg, @@ -599,7 +603,7 @@ impl ABIMachineImpl for AArch64MachineImpl { insts } - fn get_spillslot_size(rc: RegClass, ty: Type) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { // We allocate in terms of 8-byte slots. match (rc, ty) { (RegClass::I64, _) => 1, @@ -689,9 +693,10 @@ fn get_callee_saves( } } } - // Sort registers for deterministic code output. - int_saves.sort_by_key(|r| r.to_reg().get_index()); - vec_saves.sort_by_key(|r| r.to_reg().get_index()); + // Sort registers for deterministic code output. We can do an unstable sort because the + // registers will be unique (there are no dups). + int_saves.sort_unstable_by_key(|r| r.to_reg().get_index()); + vec_saves.sort_unstable_by_key(|r| r.to_reg().get_index()); (int_saves, vec_saves) } diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 4bc22357fd02..4fe12866ce61 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -1,73 +1,602 @@ //! Implementation of the standard x64 ABI. -use crate::binemit::StackMap; -use crate::ir::{self, types, ArgumentExtension, StackSlot, Type}; +use crate::ir::types::*; +use crate::ir::{self, types, SourceLoc, TrapCode, Type}; +use crate::isa; use crate::isa::{x64::inst::*, CallConv}; +use crate::machinst::abi_impl::*; use crate::machinst::*; use crate::settings; use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; use args::*; -use log::trace; -use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; -use std::mem; +use regalloc::{RealReg, Reg, RegClass, Set, Writable}; +use smallvec::{smallvec, SmallVec}; +use std::convert::TryFrom; /// This is the limit for the size of argument and return-value areas on the /// stack. We place a reasonable limit here to avoid integer overflow issues /// with 32-bit arithmetic: for now, 128 MB. static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; -#[derive(Clone, Debug)] -enum ABIArg { - Reg(RealReg, ir::Type, ir::ArgumentExtension), - Stack(i64, ir::Type, ir::ArgumentExtension), +/// Try to fill a Baldrdash register, returning it if it was found. +fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option { + if call_conv.extends_baldrdash() { + match ¶m.purpose { + &ir::ArgumentPurpose::VMContext => { + // This is SpiderMonkey's `WasmTlsReg`. + Some(ABIArg::Reg( + regs::r14().to_real_reg(), + types::I64, + param.extension, + )) + } + &ir::ArgumentPurpose::SignatureId => { + // This is SpiderMonkey's `WasmTableCallSigReg`. + Some(ABIArg::Reg( + regs::r10().to_real_reg(), + types::I64, + param.extension, + )) + } + _ => None, + } + } else { + None + } } -/// X64 ABI information shared between body (callee) and caller. -struct ABISig { - /// Argument locations (regs or stack slots). Stack offsets are relative to - /// SP on entry to function. - args: Vec, - /// Return-value locations. Stack offsets are relative to the return-area - /// pointer. - rets: Vec, - /// Space on stack used to store arguments. - stack_arg_space: i64, - /// Space on stack used to store return values. - stack_ret_space: i64, - /// Index in `args` of the stack-return-value-area argument. - stack_ret_arg: Option, - /// Calling convention used. - call_conv: CallConv, -} +/// Support for the x64 ABI from the callee side (within a function body). +pub(crate) type X64ABIBody = ABIBodyImpl; -pub(crate) struct X64ABIBody { - sig: ABISig, +/// Support for the x64 ABI from the caller side (at a callsite). +pub(crate) type X64ABICall = ABICallImpl; - /// Offsets to each stack slot. - stack_slots: Vec, +/// Implementation of ABI primitives for x64. +pub(crate) struct X64ABIMachineImpl; - /// Total stack size of all the stack slots. - stack_slots_size: usize, +impl ABIMachineImpl for X64ABIMachineImpl { + type I = Inst; - /// The register holding the return-area pointer, if needed. - ret_area_ptr: Option>, + fn compute_arg_locs( + call_conv: isa::CallConv, + params: &[ir::AbiParam], + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, + ) -> CodegenResult<(Vec, i64, Option)> { + let is_baldrdash = call_conv.extends_baldrdash(); + + let mut next_gpr = 0; + let mut next_vreg = 0; + let mut next_stack: u64 = 0; + let mut ret = vec![]; + + for i in 0..params.len() { + // Process returns backward, according to the SpiderMonkey ABI (which we + // adopt internally if `is_baldrdash` is set). + let param = match (args_or_rets, is_baldrdash) { + (ArgsOrRets::Args, _) => ¶ms[i], + (ArgsOrRets::Rets, false) => ¶ms[i], + (ArgsOrRets::Rets, true) => ¶ms[params.len() - 1 - i], + }; - /// Clobbered registers, as indicated by regalloc. - clobbered: Set>, + // Validate "purpose". + match ¶m.purpose { + &ir::ArgumentPurpose::VMContext + | &ir::ArgumentPurpose::Normal + | &ir::ArgumentPurpose::StackLimit + | &ir::ArgumentPurpose::SignatureId => {} + _ => panic!( + "Unsupported argument purpose {:?} in signature: {:?}", + param.purpose, params + ), + } - /// Total number of spill slots, as indicated by regalloc. - num_spill_slots: Option, + let intreg = in_int_reg(param.value_type); + let vecreg = in_vec_reg(param.value_type); + debug_assert!(intreg || vecreg); + debug_assert!(!(intreg && vecreg)); - /// Calculated while creating the prologue, and used when creating the epilogue. Amount by - /// which RSP is adjusted downwards to allocate the spill area. - frame_size_bytes: Option, + let (next_reg, candidate) = if intreg { + let candidate = match args_or_rets { + ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr), + ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr, i), + }; + debug_assert!(candidate + .map(|r| r.get_class() == RegClass::I64) + .unwrap_or(true)); + (&mut next_gpr, candidate) + } else { + let candidate = match args_or_rets { + ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg), + ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg, i), + }; + debug_assert!(candidate + .map(|r| r.get_class() == RegClass::V128) + .unwrap_or(true)); + (&mut next_vreg, candidate) + }; + + if let Some(param) = try_fill_baldrdash_reg(call_conv, param) { + assert!(intreg); + ret.push(param); + } else if let Some(reg) = candidate { + ret.push(ABIArg::Reg( + reg.to_real_reg(), + param.value_type, + param.extension, + )); + *next_reg += 1; + } else { + // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte + // stack alignment happens separately after all args.) + let size = (param.value_type.bits() / 8) as u64; + let size = std::cmp::max(size, 8); + // Align. + debug_assert!(size.is_power_of_two()); + next_stack = (next_stack + size - 1) & !(size - 1); + ret.push(ABIArg::Stack( + next_stack as i64, + param.value_type, + param.extension, + )); + next_stack += size; + } + } + + if args_or_rets == ArgsOrRets::Rets && is_baldrdash { + ret.reverse(); + } - call_conv: CallConv, + let extra_arg = if add_ret_area_ptr { + debug_assert!(args_or_rets == ArgsOrRets::Args); + if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) { + ret.push(ABIArg::Reg( + reg.to_real_reg(), + types::I64, + ir::ArgumentExtension::None, + )); + } else { + ret.push(ABIArg::Stack( + next_stack as i64, + types::I64, + ir::ArgumentExtension::None, + )); + next_stack += 8; + } + Some(ret.len() - 1) + } else { + None + }; + + next_stack = (next_stack + 15) & !15; + + // To avoid overflow issues, limit the arg/return size to something reasonable. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + + Ok((ret, next_stack as i64, extra_arg)) + } + + fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64 { + if call_conv.extends_baldrdash() { + let num_words = flags.baldrdash_prologue_words() as i64; + debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words"); + num_words * 8 + } else { + 16 // frame pointer + return address. + } + } + + fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Self::I { + let (is_int, ext_mode) = match ty { + types::B1 | types::B8 | types::I8 => (true, Some(ExtMode::BQ)), + types::B16 | types::I16 => (true, Some(ExtMode::WQ)), + types::B32 | types::I32 => (true, Some(ExtMode::LQ)), + types::B64 | types::I64 | types::R64 => (true, None), + types::F32 | types::F64 => (false, None), + _ => panic!("load_stack({})", ty), + }; + + let mem = SyntheticAmode::from(mem); + + if is_int { + match ext_mode { + Some(ext_mode) => Inst::movsx_rm_r( + ext_mode, + RegMem::mem(mem), + into_reg, + /* infallible load */ None, + ), + None => Inst::mov64_m_r(mem, into_reg, None /* infallible */), + } + } else { + let sse_op = match ty { + types::F32 => SseOpcode::Movss, + types::F64 => SseOpcode::Movsd, + _ => unreachable!(), + }; + Inst::xmm_mov( + sse_op, + RegMem::mem(mem), + into_reg, + None, /* infallible */ + ) + } + } + + fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I { + let (is_int, size) = match ty { + types::B1 | types::B8 | types::I8 => (true, 1), + types::B16 | types::I16 => (true, 2), + types::B32 | types::I32 => (true, 4), + types::B64 | types::I64 | types::R64 => (true, 8), + types::F32 => (false, 4), + types::F64 => (false, 8), + _ => unimplemented!("store_stack({})", ty), + }; + + let mem = SyntheticAmode::from(mem); + + if is_int { + Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None) + } else { + let sse_op = match size { + 4 => SseOpcode::Movss, + 8 => SseOpcode::Movsd, + _ => unreachable!(), + }; + Inst::xmm_mov_r_m(sse_op, from_reg, mem, /* infallible store */ None) + } + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self::I { + Inst::gen_move(to_reg, from_reg, ty) + } + + /// Generate an integer-extend operation. + fn gen_extend( + to_reg: Writable, + from_reg: Reg, + is_signed: bool, + from_bits: u8, + to_bits: u8, + ) -> Self::I { + let ext_mode = match from_bits { + 1 | 8 => ExtMode::BQ, + 16 => ExtMode::WQ, + 32 => ExtMode::LQ, + _ => panic!("Bad extension: {} bits to {} bits", from_bits, to_bits), + }; + if is_signed { + Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg, None) + } else { + Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg, None) + } + } + + fn gen_ret() -> Self::I { + Inst::Ret + } + + fn gen_epilogue_placeholder() -> Self::I { + Inst::EpiloguePlaceholder + } + + fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u32) -> SmallVec<[Self::I; 4]> { + let mut ret = SmallVec::new(); + if from_reg != into_reg.to_reg() { + ret.push(Inst::gen_move(into_reg, from_reg, I64)); + } + ret.push(Inst::alu_rmi_r( + true, + AluRmiROpcode::Add, + RegMemImm::imm(imm), + into_reg, + )); + ret + } + + fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Self::I; 2]> { + smallvec![ + Inst::cmp_rmi_r(/* bytes = */ 8, RegMemImm::reg(regs::rsp()), limit_reg), + Inst::TrapIf { + // NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp. + cc: CC::NBE, + srcloc: SourceLoc::default(), + trap_code: TrapCode::StackOverflow, + }, + ] + } + + fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, _ty: Type) -> Self::I { + let mem: SyntheticAmode = mem.into(); + Inst::lea(mem, into_reg) + } + + fn get_stacklimit_reg() -> Reg { + // As per comment on trait definition, we must return a caller-save + // register here. + regs::r10() + } + + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Self::I { + assert_eq!(ty, I64); // only ever used for I64s. + let simm32 = offset as u32; + let mem = Amode::imm_reg(simm32, base); + Inst::mov64_m_r(mem, into_reg, None) + } + + fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I { + assert_eq!(ty, I64); // only ever used for I64s. + let simm32 = offset as u32; + let mem = Amode::imm_reg(simm32, base); + Inst::mov_r_m(/* bytes = */ 8, from_reg, mem, None) + } + + fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Self::I; 2]> { + let (alu_op, amount) = if amount >= 0 { + (AluRmiROpcode::Add, amount) + } else { + (AluRmiROpcode::Sub, -amount) + }; + + let amount = amount as u32; + + smallvec![Inst::alu_rmi_r( + true, + alu_op, + RegMemImm::imm(amount), + Writable::from_reg(regs::rsp()), + )] + } + + fn gen_nominal_sp_adj(offset: i32) -> Self::I { + Inst::VirtualSPOffsetAdj { + offset: offset as i64, + } + } - /// The settings controlling this function's compilation. - flags: settings::Flags, + fn gen_prologue_frame_setup() -> SmallVec<[Self::I; 2]> { + let r_rsp = regs::rsp(); + let r_rbp = regs::rbp(); + let w_rbp = Writable::from_reg(r_rbp); + let mut insts = SmallVec::new(); + // RSP before the call will be 0 % 16. So here, it is 8 % 16. + insts.push(Inst::push64(RegMemImm::reg(r_rbp))); + // RSP is now 0 % 16 + insts.push(Inst::mov_r_r(true, r_rsp, w_rbp)); + insts + } + + fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]> { + let mut insts = SmallVec::new(); + insts.push(Inst::mov_r_r( + true, + regs::rbp(), + Writable::from_reg(regs::rsp()), + )); + insts.push(Inst::pop64(Writable::from_reg(regs::rbp()))); + insts + } + + fn gen_clobber_save( + call_conv: isa::CallConv, + clobbers: &Set>, + ) -> (u64, SmallVec<[Self::I; 16]>) { + let mut insts = SmallVec::new(); + // Find all clobbered registers that are callee-save. These are only I64 + // registers (all XMM registers are caller-save) so we can compute the + // total size of the needed stack space easily. + let clobbered = get_callee_saves(&call_conv, clobbers); + let stack_size = 8 * clobbered.len() as u32; + // Align to 16 bytes. + let stack_size = (stack_size + 15) & !15; + // Adjust the stack pointer downward with one `sub rsp, IMM` + // instruction. + if stack_size > 0 { + insts.push(Inst::alu_rmi_r( + true, + AluRmiROpcode::Sub, + RegMemImm::imm(stack_size), + Writable::from_reg(regs::rsp()), + )); + } + // Store each clobbered register in order at offsets from RSP. + let mut cur_offset = 0; + for reg in &clobbered { + let r_reg = reg.to_reg(); + match r_reg.get_class() { + RegClass::I64 => { + insts.push(Inst::mov_r_m( + /* bytes = */ 8, + r_reg.to_reg(), + Amode::imm_reg(cur_offset, regs::rsp()), + None, + )); + cur_offset += 8; + } + // No XMM regs are callee-save, so we do not need to implement + // this. + _ => unimplemented!(), + } + } + + (stack_size as u64, insts) + } + + fn gen_clobber_restore( + call_conv: isa::CallConv, + clobbers: &Set>, + ) -> SmallVec<[Self::I; 16]> { + let mut insts = SmallVec::new(); + + let clobbered = get_callee_saves(&call_conv, clobbers); + let stack_size = 8 * clobbered.len() as u32; + let stack_size = (stack_size + 15) & !15; + + // Restore regs by loading from offsets of RSP. + let mut cur_offset = 0; + for reg in &clobbered { + let rreg = reg.to_reg(); + match rreg.get_class() { + RegClass::I64 => { + insts.push(Inst::mov64_m_r( + Amode::imm_reg(cur_offset, regs::rsp()), + Writable::from_reg(rreg.to_reg()), + None, + )); + cur_offset += 8; + } + _ => unimplemented!(), + } + } + // Adjust RSP back upward. + if stack_size > 0 { + insts.push(Inst::alu_rmi_r( + true, + AluRmiROpcode::Add, + RegMemImm::imm(stack_size), + Writable::from_reg(regs::rsp()), + )); + } + + insts + } + + /// Generate a call instruction/sequence. + fn gen_call( + dest: &CallDest, + uses: Vec, + defs: Vec>, + loc: SourceLoc, + opcode: ir::Opcode, + tmp: Writable, + ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]> { + let mut insts = SmallVec::new(); + match dest { + &CallDest::ExtName(ref name, RelocDistance::Near) => { + insts.push(( + InstIsSafepoint::Yes, + Inst::call_known(name.clone(), uses, defs, loc, opcode), + )); + } + &CallDest::ExtName(ref name, RelocDistance::Far) => { + insts.push(( + InstIsSafepoint::No, + Inst::LoadExtName { + dst: tmp, + name: Box::new(name.clone()), + offset: 0, + srcloc: loc, + }, + )); + insts.push(( + InstIsSafepoint::Yes, + Inst::call_unknown(RegMem::reg(tmp.to_reg()), uses, defs, loc, opcode), + )); + } + &CallDest::Reg(reg) => { + insts.push(( + InstIsSafepoint::Yes, + Inst::call_unknown(RegMem::reg(reg), uses, defs, loc, opcode), + )); + } + } + insts + } + + fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { + // We allocate in terms of 8-byte slots. + match (rc, ty) { + (RegClass::I64, _) => 1, + (RegClass::V128, types::F32) | (RegClass::V128, types::F64) => 1, + (RegClass::V128, _) => 2, + _ => panic!("Unexpected register class!"), + } + } + + fn get_virtual_sp_offset_from_state(s: &::State) -> i64 { + s.virtual_sp_offset + } + + fn get_nominal_sp_to_fp(s: &::State) -> i64 { + s.nominal_sp_to_fp + } + + fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { + let mut caller_saved = vec![ + // Systemv calling convention: + // - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved). + Writable::from_reg(regs::rsi()), + Writable::from_reg(regs::rdi()), + Writable::from_reg(regs::rax()), + Writable::from_reg(regs::rcx()), + Writable::from_reg(regs::rdx()), + Writable::from_reg(regs::r8()), + Writable::from_reg(regs::r9()), + Writable::from_reg(regs::r10()), + Writable::from_reg(regs::r11()), + // - XMM: all the registers! + Writable::from_reg(regs::xmm0()), + Writable::from_reg(regs::xmm1()), + Writable::from_reg(regs::xmm2()), + Writable::from_reg(regs::xmm3()), + Writable::from_reg(regs::xmm4()), + Writable::from_reg(regs::xmm5()), + Writable::from_reg(regs::xmm6()), + Writable::from_reg(regs::xmm7()), + Writable::from_reg(regs::xmm8()), + Writable::from_reg(regs::xmm9()), + Writable::from_reg(regs::xmm10()), + Writable::from_reg(regs::xmm11()), + Writable::from_reg(regs::xmm12()), + Writable::from_reg(regs::xmm13()), + Writable::from_reg(regs::xmm14()), + Writable::from_reg(regs::xmm15()), + ]; + + if call_conv.extends_baldrdash() { + caller_saved.push(Writable::from_reg(regs::r12())); + caller_saved.push(Writable::from_reg(regs::r13())); + // Not r14; implicitly preserved in the entry. + caller_saved.push(Writable::from_reg(regs::r15())); + caller_saved.push(Writable::from_reg(regs::rbx())); + } + + caller_saved + } +} + +impl From for SyntheticAmode { + fn from(amode: StackAMode) -> Self { + match amode { + StackAMode::FPOffset(off, _ty) => { + let off = i32::try_from(off).expect("Offset in FPOffset is greater than 2GB"); + let simm32 = off as u32; + SyntheticAmode::Real(Amode::ImmReg { + simm32, + base: regs::rbp(), + }) + } + StackAMode::NominalSPOffset(off, _ty) => { + let off = + i32::try_from(off).expect("Offset in NominalSPOffset is greater than 2GB"); + let simm32 = off as u32; + SyntheticAmode::nominal_sp_offset(simm32) + } + StackAMode::SPOffset(off, _ty) => { + let off = i32::try_from(off).expect("Offset in SPOffset is greater than 2GB"); + let simm32 = off as u32; + SyntheticAmode::Real(Amode::ImmReg { + simm32, + base: regs::rsp(), + }) + } + } + } } fn in_int_reg(ty: types::Type) -> bool { @@ -202,1095 +731,26 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool { } } -fn get_callee_saves(call_conv: &CallConv, regs: Vec>) -> Vec> { - match call_conv { +fn get_callee_saves(call_conv: &CallConv, regs: &Set>) -> Vec> { + let mut regs: Vec> = match call_conv { CallConv::BaldrdashSystemV => regs - .into_iter() + .iter() + .cloned() .filter(|r| is_callee_save_baldrdash(r.to_reg())) .collect(), CallConv::BaldrdashWindows => { todo!("baldrdash windows"); } CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs - .into_iter() + .iter() + .cloned() .filter(|r| is_callee_save_systemv(r.to_reg())) .collect(), CallConv::WindowsFastcall => todo!("windows fastcall"), CallConv::Probestack => todo!("probestack?"), - } -} - -impl X64ABIBody { - /// Create a new body ABI instance. - pub(crate) fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult { - let sig = ABISig::from_func_sig(&f.signature)?; - - let call_conv = f.signature.call_conv; - debug_assert!( - call_conv == CallConv::SystemV || call_conv.extends_baldrdash(), - "unsupported or unimplemented calling convention {}", - call_conv - ); - - // Compute stackslot locations and total stackslot size. - let mut stack_offset: usize = 0; - let mut stack_slots = vec![]; - for (stackslot, data) in f.stack_slots.iter() { - let off = stack_offset; - stack_offset += data.size as usize; - stack_offset = (stack_offset + 7) & !7; - debug_assert_eq!(stackslot.as_u32() as usize, stack_slots.len()); - stack_slots.push(off); - } - - Ok(Self { - sig, - stack_slots, - stack_slots_size: stack_offset, - ret_area_ptr: None, - clobbered: Set::empty(), - num_spill_slots: None, - frame_size_bytes: None, - call_conv: f.signature.call_conv.clone(), - flags, - }) - } - - /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return - /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg). - fn fp_to_arg_offset(&self) -> i64 { - if self.call_conv.extends_baldrdash() { - let num_words = self.flags.baldrdash_prologue_words() as i64; - debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words"); - num_words * 8 - } else { - 16 // frame pointer + return address. - } - } -} - -impl ABIBody for X64ABIBody { - type I = Inst; - - fn temp_needed(&self) -> bool { - self.sig.stack_ret_arg.is_some() - } - - fn init(&mut self, maybe_tmp: Option>) { - if self.sig.stack_ret_arg.is_some() { - assert!(maybe_tmp.is_some()); - self.ret_area_ptr = maybe_tmp; - } - } - - fn flags(&self) -> &settings::Flags { - &self.flags - } - - fn num_args(&self) -> usize { - self.sig.args.len() - } - fn num_retvals(&self) -> usize { - self.sig.rets.len() - } - fn num_stackslots(&self) -> usize { - self.stack_slots.len() - } - - fn liveins(&self) -> Set { - let mut set: Set = Set::empty(); - for arg in &self.sig.args { - if let &ABIArg::Reg(r, ..) = arg { - set.insert(r); - } - } - set - } - - fn liveouts(&self) -> Set { - let mut set: Set = Set::empty(); - for ret in &self.sig.rets { - if let &ABIArg::Reg(r, ..) = ret { - set.insert(r); - } - } - set - } - - fn gen_copy_arg_to_reg(&self, idx: usize, to_reg: Writable) -> Inst { - match &self.sig.args[idx] { - ABIArg::Reg(from_reg, ty, _) => Inst::gen_move(to_reg, from_reg.to_reg(), *ty), - &ABIArg::Stack(off, ty, _) => { - assert!( - self.fp_to_arg_offset() + off <= u32::max_value() as i64, - "large offset nyi" - ); - load_stack( - Amode::imm_reg((self.fp_to_arg_offset() + off) as u32, regs::rbp()), - to_reg, - ty, - ) - } - } - } - - fn gen_retval_area_setup(&self) -> Option { - if let Some(i) = self.sig.stack_ret_arg { - let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap()); - trace!( - "gen_retval_area_setup: inst {:?}; ptr reg is {:?}", - inst, - self.ret_area_ptr.unwrap().to_reg() - ); - Some(inst) - } else { - trace!("gen_retval_area_setup: not needed"); - None - } - } - - fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Writable) -> Vec { - let mut ret = Vec::new(); - match &self.sig.rets[idx] { - &ABIArg::Reg(r, ty, ext) => { - let from_bits = ty.bits() as u8; - let ext_mode = match from_bits { - 1 | 8 => Some(ExtMode::BQ), - 16 => Some(ExtMode::WQ), - 32 => Some(ExtMode::LQ), - 64 | 128 => None, - _ => unreachable!(), - }; - - let dest_reg = Writable::from_reg(r.to_reg()); - match (ext, ext_mode) { - (ArgumentExtension::Uext, Some(ext_mode)) => { - ret.push(Inst::movzx_rm_r( - ext_mode, - RegMem::reg(from_reg.to_reg()), - dest_reg, - /* infallible load */ None, - )); - } - (ArgumentExtension::Sext, Some(ext_mode)) => { - ret.push(Inst::movsx_rm_r( - ext_mode, - RegMem::reg(from_reg.to_reg()), - dest_reg, - /* infallible load */ None, - )); - } - _ => ret.push(Inst::gen_move(dest_reg, from_reg.to_reg(), ty)), - }; - } - - &ABIArg::Stack(off, ty, ext) => { - let from_bits = ty.bits() as u8; - let ext_mode = match from_bits { - 1 | 8 => Some(ExtMode::BQ), - 16 => Some(ExtMode::WQ), - 32 => Some(ExtMode::LQ), - 64 => None, - _ => unreachable!(), - }; - - // Trash the from_reg; it should be its last use. - match (ext, ext_mode) { - (ArgumentExtension::Uext, Some(ext_mode)) => { - ret.push(Inst::movzx_rm_r( - ext_mode, - RegMem::reg(from_reg.to_reg()), - from_reg, - /* infallible load */ None, - )); - } - (ArgumentExtension::Sext, Some(ext_mode)) => { - ret.push(Inst::movsx_rm_r( - ext_mode, - RegMem::reg(from_reg.to_reg()), - from_reg, - /* infallible load */ None, - )); - } - _ => {} - }; - - assert!( - off < u32::max_value() as i64, - "large stack return offset nyi" - ); - - let mem = Amode::imm_reg(off as u32, self.ret_area_ptr.unwrap().to_reg()); - ret.push(store_stack(mem, from_reg.to_reg(), ty)) - } - } - - ret - } - - fn gen_ret(&self) -> Inst { - Inst::ret() - } - - fn gen_epilogue_placeholder(&self) -> Inst { - Inst::epilogue_placeholder() - } - - fn set_num_spillslots(&mut self, slots: usize) { - self.num_spill_slots = Some(slots); - } - - fn set_clobbered(&mut self, clobbered: Set>) { - self.clobbered = clobbered; - } - - fn stackslot_addr(&self, slot: StackSlot, offset: u32, dst: Writable) -> Inst { - let stack_off = self.stack_slots[slot.as_u32() as usize] as i64; - let sp_off: i64 = stack_off + (offset as i64); - Inst::lea(SyntheticAmode::nominal_sp_offset(sp_off as u32), dst) - } - - fn load_stackslot( - &self, - _slot: StackSlot, - _offset: u32, - _ty: Type, - _into_reg: Writable, - ) -> Inst { - unimplemented!("load_stackslot") - } - - fn store_stackslot(&self, _slot: StackSlot, _offset: u32, _ty: Type, _from_reg: Reg) -> Inst { - unimplemented!("store_stackslot") - } - - fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable) -> Inst { - // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size. - let islot = slot.get() as i64; - let spill_off = islot * 8; - let sp_off = self.stack_slots_size as i64 + spill_off; - debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI"); - trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - load_stack( - SyntheticAmode::nominal_sp_offset(sp_off as u32), - into_reg, - ty, - ) - } - - fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst { - // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size. - let islot = slot.get() as i64; - let spill_off = islot * 8; - let sp_off = self.stack_slots_size as i64 + spill_off; - debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI"); - trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - store_stack( - SyntheticAmode::nominal_sp_offset(sp_off as u32), - from_reg, - ty, - ) - } - - fn spillslots_to_stack_map(&self, slots: &[SpillSlot], state: &EmitState) -> StackMap { - assert!(state.virtual_sp_offset >= 0); - trace!( - "spillslots_to_stack_map: slots = {:?}, state = {:?}", - slots, - state - ); - let map_size = (state.virtual_sp_offset + state.nominal_sp_to_fp) as u32; - let map_words = (map_size + 7) / 8; - let mut bits = std::iter::repeat(false) - .take(map_words as usize) - .collect::>(); - - let first_spillslot_word = (self.stack_slots_size + state.virtual_sp_offset as usize) / 8; - for &slot in slots { - let slot = slot.get() as usize; - bits[first_spillslot_word + slot] = true; - } - - StackMap::from_slice(&bits[..]) - } - - fn gen_prologue(&mut self) -> Vec { - let r_rsp = regs::rsp(); - - let mut insts = vec![]; - - // Baldrdash generates its own prologue sequence, so we don't have to. - if !self.call_conv.extends_baldrdash() { - let r_rbp = regs::rbp(); - let w_rbp = Writable::from_reg(r_rbp); - - // The "traditional" pre-preamble - // RSP before the call will be 0 % 16. So here, it is 8 % 16. - insts.push(Inst::push64(RegMemImm::reg(r_rbp))); - // RSP is now 0 % 16 - insts.push(Inst::mov_r_r(true, r_rsp, w_rbp)); - } - - let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); - let callee_saved_used: usize = clobbered - .iter() - .map(|reg| match reg.to_reg().get_class() { - RegClass::I64 => 8, - _ => todo!(), - }) - .sum(); - - let mut total_stacksize = self.stack_slots_size + 8 * self.num_spill_slots.unwrap(); - if self.call_conv.extends_baldrdash() { - // Baldrdash expects the stack to take at least the number of words set in - // baldrdash_prologue_words; count them here. - debug_assert!( - !self.flags.enable_probestack(), - "baldrdash does not expect cranelift to emit stack probes" - ); - total_stacksize += self.flags.baldrdash_prologue_words() as usize * 8; - } - - // Now make sure the frame stack is aligned, so RSP == 0 % 16 in the function's body. - let padding = (16 - ((total_stacksize + callee_saved_used) % 16)) & 15; - let frame_size = total_stacksize + padding; - debug_assert!( - frame_size <= u32::max_value() as usize, - "gen_prologue(x86): total_stacksize >= 2G" - ); - debug_assert_eq!((frame_size + callee_saved_used) % 16, 0, "misaligned stack"); - - if !self.call_conv.extends_baldrdash() { - // Explicitly allocate the frame. - let w_rsp = Writable::from_reg(r_rsp); - if frame_size > 0 { - insts.push(Inst::alu_rmi_r( - true, - AluRmiROpcode::Sub, - RegMemImm::imm(frame_size as u32), - w_rsp, - )); - } - } - - // Save callee saved registers that we trash. Keep track of how much space we've used, so - // as to know what we have to do to get the base of the spill area 0 % 16. - let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); - for reg in clobbered { - let r_reg = reg.to_reg(); - match r_reg.get_class() { - RegClass::I64 => { - insts.push(Inst::push64(RegMemImm::reg(r_reg.to_reg()))); - } - _ => unimplemented!(), - } - } - - if callee_saved_used > 0 { - insts.push(Inst::VirtualSPOffsetAdj { - offset: callee_saved_used as i64, - }); - } - - // Stash this value. We'll need it for the epilogue. - debug_assert!(self.frame_size_bytes.is_none()); - self.frame_size_bytes = Some(frame_size); - - insts - } - - fn gen_epilogue(&self) -> Vec { - let mut insts = vec![]; - - // Undo what we did in the prologue. - - // Restore regs. - let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); - for wreg in clobbered.into_iter().rev() { - let rreg = wreg.to_reg(); - match rreg.get_class() { - RegClass::I64 => { - // TODO: make these conversion sequences less cumbersome. - insts.push(Inst::pop64(Writable::from_reg(rreg.to_reg()))); - } - _ => unimplemented!(), - } - } - - // No need to adjust the virtual sp offset here: - // - this would create issues when there's a return in the middle of a function, - // - and nothing in this sequence may try to access stack slots from the nominal SP. - - // Clear the spill area and the 16-alignment padding below it. - if !self.call_conv.extends_baldrdash() { - let frame_size = self.frame_size_bytes.unwrap(); - if frame_size > 0 { - let r_rsp = regs::rsp(); - let w_rsp = Writable::from_reg(r_rsp); - insts.push(Inst::alu_rmi_r( - true, - AluRmiROpcode::Add, - RegMemImm::imm(frame_size as u32), - w_rsp, - )); - } - } - - // Baldrdash generates its own preamble. - if !self.call_conv.extends_baldrdash() { - // Undo the "traditional" pre-preamble - // RSP before the call will be 0 % 16. So here, it is 8 % 16. - insts.push(Inst::pop64(Writable::from_reg(regs::rbp()))); - insts.push(Inst::ret()); - } - - insts - } - - fn frame_size(&self) -> u32 { - self.frame_size_bytes - .expect("frame size not computed before prologue generation") as u32 - } - - fn stack_args_size(&self) -> u32 { - unimplemented!("I need to be computed!") - } - - fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { - // We allocate in terms of 8-byte slots. - match (rc, ty) { - (RegClass::I64, _) => 1, - (RegClass::V128, types::F32) | (RegClass::V128, types::F64) => 1, - (RegClass::V128, _) => 2, - _ => panic!("Unexpected register class!"), - } - } - - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Inst { - let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty); - self.store_spillslot(to_slot, ty, from_reg.to_reg()) - } - - fn gen_reload( - &self, - to_reg: Writable, - from_slot: SpillSlot, - ty: Option, - ) -> Inst { - let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty); - self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg())) - } -} - -/// Return a type either from an optional type hint, or if not, from the default -/// type associated with the given register's class. This is used to generate -/// loads/spills appropriately given the type of value loaded/stored (which may -/// be narrower than the spillslot). We usually have the type because the -/// regalloc usually provides the vreg being spilled/reloaded, and we know every -/// vreg's type. However, the regalloc *can* request a spill/reload without an -/// associated vreg when needed to satisfy a safepoint (which requires all -/// ref-typed values, even those in real registers in the original vcode, to be -/// in spillslots). -fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { - match (ty, r.get_class()) { - // If the type is provided - (Some(t), _) => t, - // If no type is provided, this should be a register spill for a - // safepoint, so we only expect I64 (integer) registers. - (None, RegClass::I64) => types::I64, - _ => panic!("Unexpected register class!"), - } -} - -fn get_caller_saves(call_conv: CallConv) -> Vec> { - let mut caller_saved = Vec::new(); - - // Systemv calling convention: - // - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved). - caller_saved.push(Writable::from_reg(regs::rsi())); - caller_saved.push(Writable::from_reg(regs::rdi())); - caller_saved.push(Writable::from_reg(regs::rax())); - caller_saved.push(Writable::from_reg(regs::rcx())); - caller_saved.push(Writable::from_reg(regs::rdx())); - caller_saved.push(Writable::from_reg(regs::r8())); - caller_saved.push(Writable::from_reg(regs::r9())); - caller_saved.push(Writable::from_reg(regs::r10())); - caller_saved.push(Writable::from_reg(regs::r11())); - - if call_conv.extends_baldrdash() { - caller_saved.push(Writable::from_reg(regs::r12())); - caller_saved.push(Writable::from_reg(regs::r13())); - // Not r14; implicitly preserved in the entry. - caller_saved.push(Writable::from_reg(regs::r15())); - caller_saved.push(Writable::from_reg(regs::rbx())); - } - - // - XMM: all the registers! - caller_saved.push(Writable::from_reg(regs::xmm0())); - caller_saved.push(Writable::from_reg(regs::xmm1())); - caller_saved.push(Writable::from_reg(regs::xmm2())); - caller_saved.push(Writable::from_reg(regs::xmm3())); - caller_saved.push(Writable::from_reg(regs::xmm4())); - caller_saved.push(Writable::from_reg(regs::xmm5())); - caller_saved.push(Writable::from_reg(regs::xmm6())); - caller_saved.push(Writable::from_reg(regs::xmm7())); - caller_saved.push(Writable::from_reg(regs::xmm8())); - caller_saved.push(Writable::from_reg(regs::xmm9())); - caller_saved.push(Writable::from_reg(regs::xmm10())); - caller_saved.push(Writable::from_reg(regs::xmm11())); - caller_saved.push(Writable::from_reg(regs::xmm12())); - caller_saved.push(Writable::from_reg(regs::xmm13())); - caller_saved.push(Writable::from_reg(regs::xmm14())); - caller_saved.push(Writable::from_reg(regs::xmm15())); - - caller_saved -} - -fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { - // Compute uses: all arg regs. - let mut uses = Vec::new(); - for arg in &sig.args { - match arg { - &ABIArg::Reg(reg, ..) => uses.push(reg.to_reg()), - _ => {} - } - } - - // Compute defs: all retval regs, and all caller-save (clobbered) regs. - let mut defs = get_caller_saves(sig.call_conv); - for ret in &sig.rets { - match ret { - &ABIArg::Reg(reg, ..) => defs.push(Writable::from_reg(reg.to_reg())), - _ => {} - } - } - - (uses, defs) -} - -/// Try to fill a Baldrdash register, returning it if it was found. -fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option { - if call_conv.extends_baldrdash() { - match ¶m.purpose { - &ir::ArgumentPurpose::VMContext => { - // This is SpiderMonkey's `WasmTlsReg`. - Some(ABIArg::Reg( - regs::r14().to_real_reg(), - types::I64, - param.extension, - )) - } - &ir::ArgumentPurpose::SignatureId => { - // This is SpiderMonkey's `WasmTableCallSigReg`. - Some(ABIArg::Reg( - regs::r10().to_real_reg(), - types::I64, - param.extension, - )) - } - _ => None, - } - } else { - None - } -} - -/// Are we computing information about arguments or return values? Much of the -/// handling is factored out into common routines; this enum allows us to -/// distinguish which case we're handling. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum ArgsOrRets { - Args, - Rets, -} - -/// Process a list of parameters or return values and allocate them to X-regs, -/// V-regs, and stack slots. -/// -/// Returns the list of argument locations, the stack-space used (rounded up -/// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the -/// index of the extra synthetic arg that was added. -fn compute_arg_locs( - call_conv: CallConv, - params: &[ir::AbiParam], - args_or_rets: ArgsOrRets, - add_ret_area_ptr: bool, -) -> CodegenResult<(Vec, i64, Option)> { - let is_baldrdash = call_conv.extends_baldrdash(); - - let mut next_gpr = 0; - let mut next_vreg = 0; - let mut next_stack: u64 = 0; - let mut ret = vec![]; - - for i in 0..params.len() { - // Process returns backward, according to the SpiderMonkey ABI (which we - // adopt internally if `is_baldrdash` is set). - let param = match (args_or_rets, is_baldrdash) { - (ArgsOrRets::Args, _) => ¶ms[i], - (ArgsOrRets::Rets, false) => ¶ms[i], - (ArgsOrRets::Rets, true) => ¶ms[params.len() - 1 - i], - }; - - // Validate "purpose". - match ¶m.purpose { - &ir::ArgumentPurpose::VMContext - | &ir::ArgumentPurpose::Normal - | &ir::ArgumentPurpose::StackLimit - | &ir::ArgumentPurpose::SignatureId => {} - _ => panic!( - "Unsupported argument purpose {:?} in signature: {:?}", - param.purpose, params - ), - } - - let intreg = in_int_reg(param.value_type); - let vecreg = in_vec_reg(param.value_type); - debug_assert!(intreg || vecreg); - debug_assert!(!(intreg && vecreg)); - - let (next_reg, candidate) = if intreg { - let candidate = match args_or_rets { - ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr), - ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr, i), - }; - debug_assert!(candidate - .map(|r| r.get_class() == RegClass::I64) - .unwrap_or(true)); - (&mut next_gpr, candidate) - } else { - let candidate = match args_or_rets { - ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg), - ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg, i), - }; - debug_assert!(candidate - .map(|r| r.get_class() == RegClass::V128) - .unwrap_or(true)); - (&mut next_vreg, candidate) - }; - - if let Some(param) = try_fill_baldrdash_reg(call_conv, param) { - assert!(intreg); - ret.push(param); - } else if let Some(reg) = candidate { - ret.push(ABIArg::Reg( - reg.to_real_reg(), - param.value_type, - param.extension, - )); - *next_reg += 1; - } else { - // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte - // stack alignment happens separately after all args.) - let size = (param.value_type.bits() / 8) as u64; - let size = std::cmp::max(size, 8); - // Align. - debug_assert!(size.is_power_of_two()); - next_stack = (next_stack + size - 1) & !(size - 1); - ret.push(ABIArg::Stack( - next_stack as i64, - param.value_type, - param.extension, - )); - next_stack += size; - } - } - - if args_or_rets == ArgsOrRets::Rets && is_baldrdash { - ret.reverse(); - } - - let extra_arg = if add_ret_area_ptr { - debug_assert!(args_or_rets == ArgsOrRets::Args); - if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) { - ret.push(ABIArg::Reg( - reg.to_real_reg(), - types::I64, - ir::ArgumentExtension::None, - )); - } else { - ret.push(ABIArg::Stack( - next_stack as i64, - types::I64, - ir::ArgumentExtension::None, - )); - next_stack += 8; - } - Some(ret.len() - 1) - } else { - None - }; - - next_stack = (next_stack + 15) & !15; - - // To avoid overflow issues, limit the arg/return size to something reasonable. - if next_stack > STACK_ARG_RET_SIZE_LIMIT { - return Err(CodegenError::ImplLimitExceeded); - } - - Ok((ret, next_stack as i64, extra_arg)) -} - -impl ABISig { - fn from_func_sig(sig: &ir::Signature) -> CodegenResult { - // Compute args and retvals from signature. Handle retvals first, - // because we may need to add a return-area arg to the args. - let (rets, stack_ret_space, _) = compute_arg_locs( - sig.call_conv, - &sig.returns, - ArgsOrRets::Rets, - /* extra ret-area ptr = */ false, - )?; - let need_stack_return_area = stack_ret_space > 0; - let (args, stack_arg_space, stack_ret_arg) = compute_arg_locs( - sig.call_conv, - &sig.params, - ArgsOrRets::Args, - need_stack_return_area, - )?; - - trace!( - "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}", - sig, - args, - rets, - stack_arg_space, - stack_ret_space, - stack_ret_arg - ); - - Ok(ABISig { - args, - rets, - stack_arg_space, - stack_ret_space, - stack_ret_arg, - call_conv: sig.call_conv, - }) - } -} - -enum CallDest { - ExtName(ir::ExternalName, RelocDistance), - Reg(Reg), -} - -fn adjust_stack>(ctx: &mut C, amount: u64, is_sub: bool) { - if amount == 0 { - return; - } - - let (alu_op, sp_adjustment) = if is_sub { - (AluRmiROpcode::Sub, amount as i64) - } else { - (AluRmiROpcode::Add, -(amount as i64)) - }; - - ctx.emit(Inst::VirtualSPOffsetAdj { - offset: sp_adjustment, - }); - - if amount <= u32::max_value() as u64 { - ctx.emit(Inst::alu_rmi_r( - true, - alu_op, - RegMemImm::imm(amount as u32), - Writable::from_reg(regs::rsp()), - )); - } else { - // TODO will require a scratch register. - unimplemented!("adjust stack with large offset"); - } -} - -fn load_stack(mem: impl Into, into_reg: Writable, ty: Type) -> Inst { - let (is_int, ext_mode) = match ty { - types::B1 | types::B8 | types::I8 => (true, Some(ExtMode::BQ)), - types::B16 | types::I16 => (true, Some(ExtMode::WQ)), - types::B32 | types::I32 => (true, Some(ExtMode::LQ)), - types::B64 | types::I64 | types::R64 => (true, None), - types::F32 | types::F64 => (false, None), - _ => panic!("load_stack({})", ty), - }; - - let mem = mem.into(); - - if is_int { - match ext_mode { - Some(ext_mode) => Inst::movsx_rm_r( - ext_mode, - RegMem::mem(mem), - into_reg, - /* infallible load */ None, - ), - None => Inst::mov64_m_r(mem, into_reg, None /* infallible */), - } - } else { - let sse_op = match ty { - types::F32 => SseOpcode::Movss, - types::F64 => SseOpcode::Movsd, - _ => unreachable!(), - }; - Inst::xmm_mov( - sse_op, - RegMem::mem(mem), - into_reg, - None, /* infallible */ - ) - } -} - -fn store_stack(mem: impl Into, from_reg: Reg, ty: Type) -> Inst { - let (is_int, size) = match ty { - types::B1 | types::B8 | types::I8 => (true, 1), - types::B16 | types::I16 => (true, 2), - types::B32 | types::I32 => (true, 4), - types::B64 | types::I64 | types::R64 => (true, 8), - types::F32 => (false, 4), - types::F64 => (false, 8), - _ => unimplemented!("store_stack({})", ty), }; - let mem = mem.into(); - if is_int { - Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None) - } else { - let sse_op = match size { - 4 => SseOpcode::Movss, - 8 => SseOpcode::Movsd, - _ => unreachable!(), - }; - Inst::xmm_mov_r_m(sse_op, from_reg, mem, /* infallible store */ None) - } -} - -/// X64 ABI object for a function call. -pub struct X64ABICall { - sig: ABISig, - uses: Vec, - defs: Vec>, - dest: CallDest, - loc: ir::SourceLoc, - opcode: ir::Opcode, -} - -impl X64ABICall { - /// Create a callsite ABI object for a call directly to the specified function. - pub fn from_func( - sig: &ir::Signature, - extname: &ir::ExternalName, - dist: RelocDistance, - loc: ir::SourceLoc, - ) -> CodegenResult { - let sig = ABISig::from_func_sig(sig)?; - let (uses, defs) = abisig_to_uses_and_defs(&sig); - Ok(Self { - sig, - uses, - defs, - dest: CallDest::ExtName(extname.clone(), dist), - loc, - opcode: ir::Opcode::Call, - }) - } - - /// Create a callsite ABI object for a call to a function pointer with the - /// given signature. - pub fn from_ptr( - sig: &ir::Signature, - ptr: Reg, - loc: ir::SourceLoc, - opcode: ir::Opcode, - ) -> CodegenResult { - let sig = ABISig::from_func_sig(sig)?; - let (uses, defs) = abisig_to_uses_and_defs(&sig); - Ok(Self { - sig, - uses, - defs, - dest: CallDest::Reg(ptr), - loc, - opcode, - }) - } -} - -impl ABICall for X64ABICall { - type I = Inst; - - fn num_args(&self) -> usize { - if self.sig.stack_ret_arg.is_some() { - self.sig.args.len() - 1 - } else { - self.sig.args.len() - } - } - - fn emit_stack_pre_adjust>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack(ctx, off as u64, /* is_sub = */ true) - } - - fn emit_stack_post_adjust>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack(ctx, off as u64, /* is_sub = */ false) - } - - fn emit_copy_reg_to_arg>( - &self, - ctx: &mut C, - idx: usize, - from_reg: Reg, - ) { - match &self.sig.args[idx] { - &ABIArg::Reg(reg, ty, ext) if ext != ir::ArgumentExtension::None && ty.bits() < 64 => { - assert_eq!(RegClass::I64, reg.get_class()); - let dest_reg = Writable::from_reg(reg.to_reg()); - let ext_mode = match ty.bits() { - 1 | 8 => ExtMode::BQ, - 16 => ExtMode::WQ, - 32 => ExtMode::LQ, - _ => unreachable!(), - }; - match ext { - ir::ArgumentExtension::Uext => { - ctx.emit(Inst::movzx_rm_r( - ext_mode, - RegMem::reg(from_reg), - dest_reg, - /* infallible load */ None, - )); - } - ir::ArgumentExtension::Sext => { - ctx.emit(Inst::movsx_rm_r( - ext_mode, - RegMem::reg(from_reg), - dest_reg, - /* infallible load */ None, - )); - } - _ => unreachable!(), - }; - } - &ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move( - Writable::from_reg(reg.to_reg()), - from_reg, - ty, - )), - &ABIArg::Stack(off, ty, ext) => { - if ext != ir::ArgumentExtension::None && ty.bits() < 64 { - assert_eq!(RegClass::I64, from_reg.get_class()); - let dest_reg = Writable::from_reg(from_reg); - let ext_mode = match ty.bits() { - 1 | 8 => ExtMode::BQ, - 16 => ExtMode::WQ, - 32 => ExtMode::LQ, - _ => unreachable!(), - }; - // Extend in place in the source register. Our convention is to - // treat high bits as undefined for values in registers, so this - // is safe, even for an argument that is nominally read-only. - match ext { - ir::ArgumentExtension::Uext => { - ctx.emit(Inst::movzx_rm_r( - ext_mode, - RegMem::reg(from_reg), - dest_reg, - /* infallible load */ None, - )); - } - ir::ArgumentExtension::Sext => { - ctx.emit(Inst::movsx_rm_r( - ext_mode, - RegMem::reg(from_reg), - dest_reg, - /* infallible load */ None, - )); - } - _ => unreachable!(), - }; - } - - debug_assert!(off <= u32::max_value() as i64); - debug_assert!(off >= 0); - ctx.emit(store_stack( - Amode::imm_reg(off as u32, regs::rsp()), - from_reg, - ty, - )) - } - } - } - - fn emit_copy_retval_to_reg>( - &self, - ctx: &mut C, - idx: usize, - into_reg: Writable, - ) { - match &self.sig.rets[idx] { - &ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move(into_reg, reg.to_reg(), ty)), - &ABIArg::Stack(off, ty, _) => { - let ret_area_base = self.sig.stack_arg_space; - let sp_offset = off + ret_area_base; - // TODO handle offsets bigger than u32::max - debug_assert!(sp_offset >= 0); - debug_assert!(sp_offset <= u32::max_value() as i64); - ctx.emit(load_stack( - Amode::imm_reg(sp_offset as u32, regs::rsp()), - into_reg, - ty, - )); - } - } - } - - fn emit_call>(&mut self, ctx: &mut C) { - let (uses, defs) = ( - mem::replace(&mut self.uses, Default::default()), - mem::replace(&mut self.defs, Default::default()), - ); - - if let Some(i) = self.sig.stack_ret_arg { - let dst = ctx.alloc_tmp(RegClass::I64, types::I64); - let ret_area_base = self.sig.stack_arg_space; - debug_assert!( - ret_area_base <= u32::max_value() as i64, - "large offset for ret area NYI" - ); - ctx.emit(Inst::lea( - Amode::imm_reg(ret_area_base as u32, regs::rsp()), - dst, - )); - self.emit_copy_reg_to_arg(ctx, i, dst.to_reg()); - } - - match &self.dest { - &CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit_safepoint( - Inst::call_known(name.clone(), uses, defs, self.loc, self.opcode), - ), - &CallDest::ExtName(ref name, RelocDistance::Far) => { - let tmp = ctx.alloc_tmp(RegClass::I64, types::I64); - ctx.emit(Inst::LoadExtName { - dst: tmp, - name: Box::new(name.clone()), - offset: 0, - srcloc: self.loc, - }); - ctx.emit_safepoint(Inst::call_unknown( - RegMem::reg(tmp.to_reg()), - uses, - defs, - self.loc, - self.opcode, - )); - } - &CallDest::Reg(reg) => ctx.emit_safepoint(Inst::call_unknown( - RegMem::reg(reg), - uses, - defs, - self.loc, - self.opcode, - )), - } - } + // Sort registers for deterministic code output. We can do an unstable sort because the + // registers will be unique (there are no dups). + regs.sort_unstable_by_key(|r| r.to_reg().get_index()); + regs } diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index b90bc7916bd4..e79ab5f7722a 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -119,6 +119,7 @@ use crate::{ir, isa}; use alloc::vec::Vec; use log::{debug, trace}; use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; +use std::convert::TryFrom; use std::marker::PhantomData; use std::mem; @@ -142,6 +143,16 @@ pub enum ArgsOrRets { Rets, } +/// Is an instruction returned by an ABI machine-specific backend a safepoint, +/// or not? +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum InstIsSafepoint { + /// The instruction is a safepoint. + Yes, + /// The instruction is not a safepoint. + No, +} + /// Abstract location for a machine-specific ABI impl to translate into the /// appropriate addressing mode. #[derive(Clone, Copy, Debug)] @@ -207,13 +218,15 @@ pub trait ABIMachineImpl { fn gen_epilogue_placeholder() -> Self::I; /// Generate an add-with-immediate. Note that even if this uses a scratch - /// register, the sequence must still be correct if the given source or dest - /// is the register returned by `get_fixed_tmp_reg()`; hence, for machines - /// that may need a scratch register to synthesize an arbitrary constant, - /// the machine backend should reserve *another* fixed temp register for - /// this purpose. (E.g., on AArch64, x16 is the ordinary fixed tmp, and x17 - /// is the secondary fixed tmp used to implement this.) - fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u64) -> SmallVec<[Self::I; 4]>; + /// register, it must satisfy two requirements: + /// + /// - The add-imm sequence must only clobber caller-save registers, because + /// it will be placed in the prologue before the clobbered callee-save + /// registers are saved. + /// + /// - The add-imm sequence must work correctly when `from_reg` and/or + /// `into_reg` are the register returned by `get_stacklimit_reg()`. + fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u32) -> SmallVec<[Self::I; 4]>; /// Generate a sequence that traps with a `TrapCode::StackOverflow` code if /// the stack pointer is less than the given limit register (assuming the @@ -224,21 +237,30 @@ pub trait ABIMachineImpl { /// SP-based offset). fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, ty: Type) -> Self::I; - /// Get a fixed (not used by regalloc) temp. This is needed for certain - /// sequences generated after the register allocator has already run. - fn get_fixed_tmp_reg() -> Reg; + /// Get a fixed register to use to compute a stack limit. This is needed for + /// certain sequences generated after the register allocator has already + /// run. This must satisfy two requirements: + /// + /// - It must be a caller-save register, because it will be clobbered in the + /// prologue before the clobbered callee-save registers are saved. + /// + /// - It must be safe to pass as an argument and/or destination to + /// `gen_add_imm()`. This is relevant when an addition with a large + /// immediate needs its own temporary; it cannot use the same fixed + /// temporary as this one. + fn get_stacklimit_reg() -> Reg; /// Generate a store to the given [base+offset] address. - fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i64, ty: Type) -> Self::I; + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Self::I; /// Generate a load from the given [base+offset] address. - fn gen_store_base_offset(base: Reg, offset: i64, from_reg: Reg, ty: Type) -> Self::I; + fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I; /// Adjust the stack pointer up or down. - fn gen_sp_reg_adjust(amount: i64) -> SmallVec<[Self::I; 2]>; + fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Self::I; 2]>; /// Generate a meta-instruction that adjusts the nominal SP offset. - fn gen_nominal_sp_adj(amount: i64) -> Self::I; + fn gen_nominal_sp_adj(amount: i32) -> Self::I; /// Generate the usual frame-setup sequence for this architecture: e.g., /// `push rbp / mov rbp, rsp` on x86-64, or `stp fp, lr, [sp, #-16]!` on @@ -272,18 +294,20 @@ pub trait ABIMachineImpl { clobbers: &Set>, ) -> SmallVec<[Self::I; 16]>; - /// Generate a call instruction/sequence. + /// Generate a call instruction/sequence. This method is provided one + /// temporary register to use to synthesize the called address, if needed. fn gen_call( dest: &CallDest, uses: Vec, defs: Vec>, loc: SourceLoc, opcode: ir::Opcode, - ) -> SmallVec<[(/* is_safepoint = */ bool, Self::I); 2]>; + tmp: Writable, + ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>; /// Get the number of spillslots required for the given register-class and /// type. - fn get_spillslot_size(rc: RegClass, ty: Type) -> u32; + fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32; /// Get the current virtual-SP offset from an instruction-emission state. fn get_virtual_sp_offset_from_state(s: &::State) -> i64; @@ -506,8 +530,7 @@ impl ABIBodyImpl { // `scratch`. If our stack size doesn't fit into an immediate this // means we need a second scratch register for loading the stack size // into a register. - let scratch = Writable::from_reg(M::get_fixed_tmp_reg()); - let stack_size = u64::from(stack_size); + let scratch = Writable::from_reg(M::get_stacklimit_reg()); insts.extend(M::gen_add_imm(scratch, stack_limit, stack_size).into_iter()); insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg())); } @@ -563,7 +586,7 @@ fn generate_gv( readonly: _, } => { let base = generate_gv::(f, abi, base, insts); - let into_reg = Writable::from_reg(M::get_fixed_tmp_reg()); + let into_reg = Writable::from_reg(M::get_stacklimit_reg()); insts.push(M::gen_load_base_offset(into_reg, base, offset.into(), I64)); return into_reg.to_reg(); } @@ -676,6 +699,7 @@ impl ABIBody for ABIBodyImpl { } &ABIArg::Stack(off, mut ty, ext) => { let from_bits = ty_bits(ty) as u8; + let off = i32::try_from(off).expect("Argument stack offset greater than 2GB"); // Trash the from_reg; it should be its last use. match (ext, from_bits) { (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) if n < 64 => { @@ -864,7 +888,7 @@ impl ABIBody for ABIBodyImpl { if total_sp_adjust > 0 { // sub sp, sp, #total_stacksize - let adj = total_sp_adjust as i64; + let adj = total_sp_adjust as i32; insts.extend(M::gen_sp_reg_adjust(-adj)); } @@ -873,7 +897,7 @@ impl ABIBody for ABIBodyImpl { insts.extend(clobber_insts); if clobber_size > 0 { - insts.push(M::gen_nominal_sp_adj(clobber_size as i64)); + insts.push(M::gen_nominal_sp_adj(clobber_size as i32)); } self.total_frame_size = Some(total_stacksize); @@ -911,7 +935,7 @@ impl ABIBody for ABIBodyImpl { } fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { - M::get_spillslot_size(rc, ty) + M::get_number_of_spillslots_for_value(rc, ty) } fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Self::I { @@ -1024,13 +1048,12 @@ impl ABICallImpl { fn adjust_stack_and_nominal_sp>( ctx: &mut C, - off: u64, + off: i32, is_sub: bool, ) { if off == 0 { return; } - let off = off as i64; let amt = if is_sub { -off } else { off }; for inst in M::gen_sp_reg_adjust(amt) { ctx.emit(inst); @@ -1051,12 +1074,12 @@ impl ABICall for ABICallImpl { fn emit_stack_pre_adjust>(&self, ctx: &mut C) { let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack_and_nominal_sp::(ctx, off as u64, /* is_sub = */ true) + adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ true) } fn emit_stack_post_adjust>(&self, ctx: &mut C) { let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack_and_nominal_sp::(ctx, off as u64, /* is_sub = */ false) + adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ false) } fn emit_copy_reg_to_arg>( @@ -1152,13 +1175,13 @@ impl ABICall for ABICallImpl { )); self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); } + let tmp = ctx.alloc_tmp(RegClass::I64, I64); for (is_safepoint, inst) in - M::gen_call(&self.dest, uses, defs, self.loc, self.opcode).into_iter() + M::gen_call(&self.dest, uses, defs, self.loc, self.opcode, tmp).into_iter() { - if is_safepoint { - ctx.emit_safepoint(inst); - } else { - ctx.emit(inst); + match is_safepoint { + InstIsSafepoint::Yes => ctx.emit_safepoint(inst), + InstIsSafepoint::No => ctx.emit(inst), } } } diff --git a/cranelift/filetests/filetests/vcode/aarch64/call.clif b/cranelift/filetests/filetests/vcode/aarch64/call.clif index cded47dc5b3a..ad14eca992a5 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/call.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/call.clif @@ -11,8 +11,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x16, 8 ; b 12 ; data -; nextln: blr x16 +; nextln: ldr x1, 8 ; b 12 ; data +; nextln: blr x1 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -28,8 +28,8 @@ block0(v0: i32): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: mov w0, w0 -; nextln: ldr x16, 8 ; b 12 ; data -; nextln: blr x16 +; nextln: ldr x1, 8 ; b 12 ; data +; nextln: blr x1 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -57,8 +57,8 @@ block0(v0: i32): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: sxtw x0, w0 -; nextln: ldr x16, 8 ; b 12 ; data -; nextln: blr x16 +; nextln: ldr x1, 8 ; b 12 ; data +; nextln: blr x1 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -99,8 +99,8 @@ block0(v0: i8): ; nextln: movz x7, #42 ; nextln: sxtb x8, w8 ; nextln: stur x8, [sp] -; nextln: ldr x16, 8 ; b 12 ; data -; nextln: blr x16 +; nextln: ldr x8, 8 ; b 12 ; data +; nextln: blr x8 ; nextln: add sp, sp, #16 ; nextln: virtual_sp_offset_adjust -16 ; nextln: mov sp, fp diff --git a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif index 2458516cfc59..97234a7da0d1 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif @@ -83,12 +83,12 @@ block3(v7: r64, v8: r64): ; nextln: mov x19, x0 ; nextln: mov x20, x1 ; nextln: mov x0, x19 -; nextln: ldr x16, 8 ; b 12 ; data +; nextln: ldr x1, 8 ; b 12 ; data ; nextln: stur x0, [sp, #24] ; nextln: stur x19, [sp, #32] ; nextln: stur x20, [sp, #40] ; nextln: (safepoint: slots [S0, S1, S2] -; nextln: blr x16 +; nextln: blr x1 ; nextln: ldur x19, [sp, #32] ; nextln: ldur x20, [sp, #40] ; nextln: add x1, sp, #16 diff --git a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif index 8ad03936694b..63584805fc81 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif @@ -44,8 +44,8 @@ block0(v0: i64): ; nextln: mov fp, sp ; nextln: subs xzr, sp, x0 ; nextln: b.hs 8 ; udf -; nextln: ldr x16 -; nextln: blr x16 +; nextln: ldr x0 +; nextln: blr x0 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -67,8 +67,8 @@ block0(v0: i64): ; nextln: ldur x16, [x16, #4] ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; udf -; nextln: ldr x16 -; nextln: blr x16 +; nextln: ldr x0 +; nextln: blr x0 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret