From 4ff05108e927ffede7f45d8be22888790644cc03 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 28 Jun 2023 13:52:18 -0700 Subject: [PATCH] Cranelift: Refactor `return_call[_indirect]` lowering Commons up some code paths and sets the stage for other architectures. This should have fewer calls back and forth between architecture specific and independent bits of code, which I have found hard to keep track of. Now, lowering tail calls is done in architecture specific code that can call out to architecture independent helpers as needed. Before it was architecture independent code that would call architecture specific hooks that would call architecture independent helpers. Too much stuff split across too many layers. This new approach removes at least one layer of indirection and unnecessarily confusing abstraction. --- cranelift/codegen/src/isa/aarch64/abi.rs | 13 -- cranelift/codegen/src/isa/riscv64/abi.rs | 13 -- cranelift/codegen/src/isa/s390x/abi.rs | 13 -- cranelift/codegen/src/isa/x64/abi.rs | 133 +++++++++++++------- cranelift/codegen/src/isa/x64/lower.rs | 12 ++ cranelift/codegen/src/isa/x64/lower/isle.rs | 114 +---------------- cranelift/codegen/src/machinst/abi.rs | 93 +++++++------- 7 files changed, 147 insertions(+), 244 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index b573cb6706ba..9959409a244b 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1046,19 +1046,6 @@ impl ABIMachineSpec for AArch64MachineDeps { insts } - fn gen_return_call( - _callee: CallDest, - _new_stack_arg_size: u32, - _old_stack_arg_size: u32, - _ret_addr: Option, - _fp: Reg, - _tmp: Writable, - _tmp2: Writable, - _uses: abi::CallArgList, - ) -> SmallVec<[Self::I; 2]> { - todo!(); - } - fn gen_memcpy Writable>( call_conv: isa::CallConv, dst: Reg, diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index 766e0289c389..4fbaa4be0c48 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -565,19 +565,6 @@ impl ABIMachineSpec for Riscv64MachineDeps { insts } - fn gen_return_call( - _callee: CallDest, - _new_stack_arg_size: u32, - _old_stack_arg_size: u32, - _ret_addr: Option, - _fp: Reg, - _tmp: Writable, - _tmp2: Writable, - _uses: abi::CallArgList, - ) -> SmallVec<[Self::I; 2]> { - todo!(); - } - fn gen_memcpy Writable>( call_conv: isa::CallConv, dst: Reg, diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index f020d7a7cb88..8c0fd25f1bd7 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -748,19 +748,6 @@ impl ABIMachineSpec for S390xMachineDeps { unreachable!(); } - fn gen_return_call( - _callee: CallDest, - _new_stack_arg_size: u32, - _old_stack_arg_size: u32, - _ret_addr: Option, - _fp: Reg, - _tmp: Writable, - _tmp2: Writable, - _uses: abi::CallArgList, - ) -> SmallVec<[Self::I; 2]> { - todo!(); - } - fn gen_memcpy Writable>( _call_conv: isa::CallConv, _dst: Reg, diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 05e4848fc5e1..61cd28fbe0e0 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -11,7 +11,7 @@ use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; use args::*; -use regalloc2::{PRegSet, VReg}; +use regalloc2::{PReg, PRegSet, VReg}; use smallvec::{smallvec, SmallVec}; use std::convert::TryFrom; @@ -713,52 +713,6 @@ impl ABIMachineSpec for X64ABIMachineSpec { insts } - fn gen_return_call( - callee: CallDest, - new_stack_arg_size: u32, - old_stack_arg_size: u32, - ret_addr: Option, - fp: Reg, - tmp: Writable, - tmp2: Writable, - uses: abi::CallArgList, - ) -> SmallVec<[Self::I; 2]> { - let ret_addr = ret_addr.map(|r| Gpr::new(r).unwrap()); - let fp = Gpr::new(fp).unwrap(); - let tmp = WritableGpr::from_writable_reg(tmp).unwrap(); - let info = Box::new(ReturnCallInfo { - new_stack_arg_size, - old_stack_arg_size, - ret_addr, - fp, - tmp, - uses, - }); - match callee { - CallDest::ExtName(callee, RelocDistance::Near) => { - smallvec![Inst::ReturnCallKnown { callee, info }] - } - CallDest::ExtName(callee, RelocDistance::Far) => { - smallvec![ - Inst::LoadExtName { - dst: tmp2, - name: Box::new(callee.clone()), - offset: 0, - distance: RelocDistance::Far, - }, - Inst::ReturnCallUnknown { - callee: tmp2.into(), - info, - } - ] - } - CallDest::Reg(callee) => smallvec![Inst::ReturnCallUnknown { - callee: callee.into(), - info, - }], - } - } - fn gen_memcpy Writable>( call_conv: isa::CallConv, dst: Reg, @@ -886,6 +840,91 @@ impl ABIMachineSpec for X64ABIMachineSpec { } } +impl X64CallSite { + pub fn emit_return_call(mut self, ctx: &mut Lower, args: isle::ValueSlice) { + // Allocate additional stack space for the new stack frame. We will + // build it in the newly allocated space, but then copy it over our + // current frame at the last moment. + let new_stack_arg_size = self.emit_allocate_tail_call_frame(ctx); + let old_stack_arg_size = ctx.abi().stack_args_size(ctx.sigs()); + + // Make a copy of the frame pointer, since we use it when copying down + // the new stack frame. + let fp = ctx.temp_writable_gpr(); + let rbp = PReg::from(regs::rbp().to_real_reg().unwrap()); + ctx.emit(Inst::MovFromPReg { src: rbp, dst: fp }); + + // Load the return address, because copying our new stack frame + // over our current stack frame might overwrite it, and we'll need to + // place it in the correct location after we do that copy. + // + // But we only need to actually move the return address if the size of + // stack arguments changes. + let ret_addr = if new_stack_arg_size != old_stack_arg_size { + let ret_addr = ctx.temp_writable_gpr(); + ctx.emit(Inst::Mov64MR { + src: SyntheticAmode::Real(Amode::ImmReg { + simm32: 8, + base: *fp.to_reg(), + flags: MemFlags::trusted(), + }), + dst: ret_addr, + }); + Some(ret_addr.to_reg()) + } else { + None + }; + + // Put all arguments in registers and stack slots (within that newly + // allocated stack space). + self.emit_args(ctx, args); + if let Some(i) = ctx.sigs()[self.sig()].stack_ret_arg() { + let ret_area_ptr = ctx.abi().ret_area_ptr().expect( + "if the tail callee has a return pointer, then the tail caller \ + must as well", + ); + for inst in self.gen_arg(ctx, i.into(), ValueRegs::one(ret_area_ptr.to_reg())) { + ctx.emit(inst); + } + } + + // Finally, emit the macro instruction to copy the new stack frame over + // our current one and do the actual tail call! + + let dest = self.dest().clone(); + let info = Box::new(ReturnCallInfo { + new_stack_arg_size, + old_stack_arg_size, + ret_addr, + fp: fp.to_reg(), + tmp: ctx.temp_writable_gpr(), + uses: self.take_uses(), + }); + match dest { + CallDest::ExtName(callee, RelocDistance::Near) => { + ctx.emit(Inst::ReturnCallKnown { callee, info }); + } + CallDest::ExtName(callee, RelocDistance::Far) => { + let tmp2 = ctx.temp_writable_gpr(); + ctx.emit(Inst::LoadExtName { + dst: tmp2.to_writable_reg(), + name: Box::new(callee), + offset: 0, + distance: RelocDistance::Far, + }); + ctx.emit(Inst::ReturnCallUnknown { + callee: tmp2.to_writable_reg().into(), + info, + }); + } + CallDest::Reg(callee) => ctx.emit(Inst::ReturnCallUnknown { + callee: callee.into(), + info, + }), + } + } +} + impl From for SyntheticAmode { fn from(amode: StackAMode) -> Self { // We enforce a 128 MB stack-frame size limit above, so these diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 9e74d2719b8a..b13d291573da 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -19,6 +19,18 @@ use target_lexicon::Triple; //============================================================================= // Helpers for instruction lowering. +impl Lower<'_, Inst> { + #[inline] + pub fn temp_writable_gpr(&mut self) -> WritableGpr { + WritableGpr::from_writable_reg(self.alloc_tmp(types::I64).only_reg().unwrap()).unwrap() + } + + #[inline] + pub fn temp_writable_xmm(&mut self) -> WritableXmm { + WritableXmm::from_writable_reg(self.alloc_tmp(types::F64).only_reg().unwrap()).unwrap() + } +} + fn is_int_or_ref_ty(ty: Type) -> bool { match ty { types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true, diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 78a2a4f581c1..3890400bfb61 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -95,7 +95,7 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { let callee = self.put_in_reg(callee); - let mut call_site = X64CallSite::from_ptr( + let call_site = X64CallSite::from_ptr( self.lower_ctx.sigs(), callee_sig, callee, @@ -103,58 +103,7 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { caller_conv, self.backend.flags().clone(), ); - - // Allocate additional stack space for the new stack frame. We will - // build it in the newly allocated space, but then copy it over our - // current frame at the last moment. - let new_stack_arg_size = call_site.emit_allocate_tail_call_frame(self.lower_ctx); - let old_stack_arg_size = self.lower_ctx.abi().stack_args_size(self.lower_ctx.sigs()); - - // Make a copy of the frame pointer, since we use it when copying down - // the new stack frame. - let fp = self.temp_writable_gpr(); - let rbp = self.preg_rbp(); - self.lower_ctx - .emit(MInst::MovFromPReg { src: rbp, dst: fp }); - - // Load the return address, because copying our new stack frame - // over our current stack frame might overwrite it, and we'll need to - // place it in the correct location after we do that copy. - // - // But we only need to actually move the return address if the size of - // stack arguments changes. - let ret_addr = if new_stack_arg_size != old_stack_arg_size { - let ret_addr = self.temp_writable_gpr(); - self.lower_ctx.emit(MInst::Mov64MR { - src: SyntheticAmode::Real(Amode::ImmReg { - simm32: 8, - base: fp.to_reg().to_reg(), - flags: MemFlags::trusted(), - }), - dst: ret_addr, - }); - Some(ret_addr) - } else { - None - }; - - // Put all arguments in registers and stack slots (within that newly - // allocated stack space). - self.gen_call_common_args(&mut call_site, args); - - // Finally, emit the macro instruction to copy the new stack frame over - // our current one and do the actual tail call! - let tmp = self.temp_writable_gpr(); - let tmp2 = self.temp_writable_gpr(); - call_site.emit_return_call( - self.lower_ctx, - new_stack_arg_size, - old_stack_arg_size, - ret_addr.map(|r| r.to_reg().to_reg()), - fp.to_reg().to_reg(), - tmp.to_writable_reg(), - tmp2.to_writable_reg(), - ); + call_site.emit_return_call(self.lower_ctx, args); InstOutput::new() } @@ -173,7 +122,7 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { "Can only do `return_call`s from within a `tail` calling convention function" ); - let mut call_site = X64CallSite::from_func( + let call_site = X64CallSite::from_func( self.lower_ctx.sigs(), callee_sig, &callee, @@ -181,58 +130,7 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { caller_conv, self.backend.flags().clone(), ); - - // Allocate additional stack space for the new stack frame. We will - // build it in the newly allocated space, but then copy it over our - // current frame at the last moment. - let new_stack_arg_size = call_site.emit_allocate_tail_call_frame(self.lower_ctx); - let old_stack_arg_size = self.lower_ctx.abi().stack_args_size(self.lower_ctx.sigs()); - - // Make a copy of the frame pointer, since we use it when copying down - // the new stack frame. - let fp = self.temp_writable_gpr(); - let rbp = self.preg_rbp(); - self.lower_ctx - .emit(MInst::MovFromPReg { src: rbp, dst: fp }); - - // Load the return address, because copying our new stack frame - // over our current stack frame might overwrite it, and we'll need to - // place it in the correct location after we do that copy. - // - // But we only need to actually move the return address if the size of - // stack arguments changes. - let ret_addr = if new_stack_arg_size != old_stack_arg_size { - let ret_addr = self.temp_writable_gpr(); - self.lower_ctx.emit(MInst::Mov64MR { - src: SyntheticAmode::Real(Amode::ImmReg { - simm32: 8, - base: fp.to_reg().to_reg(), - flags: MemFlags::trusted(), - }), - dst: ret_addr, - }); - Some(ret_addr) - } else { - None - }; - - // Put all arguments in registers and stack slots (within that newly - // allocated stack space). - self.gen_call_common_args(&mut call_site, args); - - // Finally, emit the macro instruction to copy the new stack frame over - // our current one and do the actual tail call! - let tmp = self.temp_writable_gpr(); - let tmp2 = self.temp_writable_gpr(); - call_site.emit_return_call( - self.lower_ctx, - new_stack_arg_size, - old_stack_arg_size, - ret_addr.map(|r| r.to_reg().to_reg()), - fp.to_reg().to_reg(), - tmp.to_writable_reg(), - tmp2.to_writable_reg(), - ); + call_site.emit_return_call(self.lower_ctx, args); InstOutput::new() } @@ -599,12 +497,12 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { #[inline] fn temp_writable_gpr(&mut self) -> WritableGpr { - Writable::from_reg(Gpr::new(self.temp_writable_reg(I64).to_reg()).unwrap()) + self.lower_ctx.temp_writable_gpr() } #[inline] fn temp_writable_xmm(&mut self) -> WritableXmm { - Writable::from_reg(Xmm::new(self.temp_writable_reg(I8X16).to_reg()).unwrap()) + self.lower_ctx.temp_writable_xmm() } #[inline] diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 70b258dd54d9..1697c700c6a4 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -600,17 +600,6 @@ pub trait ABIMachineSpec { callee_pop_size: u32, ) -> SmallVec<[Self::I; 2]>; - fn gen_return_call( - callee: CallDest, - new_stack_arg_size: u32, - old_stack_arg_size: u32, - ret_addr: Option, - fp: Reg, - tmp: Writable, - tmp2: Writable, - uses: abi::CallArgList, - ) -> SmallVec<[Self::I; 2]>; - /// Generate a memcpy invocation. Used to set up struct /// args. Takes `src`, `dst` as read-only inputs and passes a temporary /// allocator. @@ -722,6 +711,11 @@ impl SigData { pub fn call_conv(&self) -> isa::CallConv { self.call_conv } + + /// The index of the stack-return-value-area argument, if any. + pub fn stack_ret_arg(&self) -> Option { + self.stack_ret_arg + } } /// A (mostly) deduplicated set of ABI signatures. @@ -1260,6 +1254,11 @@ impl Callee { insts.extend(M::gen_add_imm(self.call_conv, scratch, stack_limit, stack_size).into_iter()); insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg())); } + + /// Get the register holding the return-area pointer, if any. + pub(crate) fn ret_area_ptr(&self) -> Option> { + self.ret_area_ptr + } } /// Generates the instructions necessary for the `gv` to be materialized into a @@ -2175,6 +2174,18 @@ impl CallSite { _mach: PhantomData, } } + + pub(crate) fn sig(&self) -> Sig { + self.sig + } + + pub(crate) fn dest(&self) -> &CallDest { + &self.dest + } + + pub(crate) fn take_uses(self) -> CallArgList { + self.uses + } } fn adjust_stack_and_nominal_sp(ctx: &mut Lower, amount: i32) { @@ -2448,6 +2459,27 @@ impl CallSite { insts } + /// Call `gen_arg` for each non-hidden argument and emit all instructions + /// generated. + pub fn emit_args(&mut self, ctx: &mut Lower, (inputs, off): isle::ValueSlice) { + let num_args = self.num_args(ctx.sigs()); + assert_eq!(inputs.len(&ctx.dfg().value_lists) - off, num_args); + + let mut arg_value_regs: SmallVec<[_; 16]> = smallvec![]; + for i in 0..num_args { + let input = inputs.get(off + i, &ctx.dfg().value_lists).unwrap(); + arg_value_regs.push(ctx.put_value_in_regs(input)); + } + for (i, arg_regs) in arg_value_regs.iter().enumerate() { + self.emit_copy_regs_to_buffer(ctx, i, *arg_regs); + } + for (i, value_regs) in arg_value_regs.iter().enumerate() { + for inst in self.gen_arg(ctx, i, *value_regs) { + ctx.emit(inst); + } + } + } + /// Define a return value after the call returns. pub fn gen_retval( &mut self, @@ -2558,45 +2590,6 @@ impl CallSite { ctx.emit(inst); } } - - /// Emit a tail call sequence. - /// - /// The returned instruction should have a proper use-set (arg registers are - /// uses) according to the argument registers this function signature in - /// this ABI. - pub fn emit_return_call( - mut self, - ctx: &mut Lower, - new_stack_arg_size: u32, - old_stack_arg_size: u32, - ret_addr: Option, - fp: Reg, - tmp: Writable, - tmp2: Writable, - ) { - if let Some(i) = ctx.sigs()[self.sig].stack_ret_arg { - let ret_area_ptr = ctx.abi().ret_area_ptr.expect( - "if the tail callee has a return pointer, then the tail caller \ - must as well", - ); - for inst in self.gen_arg(ctx, i.into(), ValueRegs::one(ret_area_ptr.to_reg())) { - ctx.emit(inst); - } - } - - for inst in M::gen_return_call( - self.dest, - new_stack_arg_size, - old_stack_arg_size, - ret_addr, - fp, - tmp, - tmp2, - self.uses, - ) { - ctx.emit(inst); - } - } } #[cfg(test)]