Skip to content

Commit

Permalink
Cranelift: Get tail calls working on aarch64
Browse files Browse the repository at this point in the history
Co-Authored-By: Jamey Sharp <[email protected]>
  • Loading branch information
fitzgen and jameysharp committed Jul 13, 2023
1 parent 73405a4 commit 6d00449
Show file tree
Hide file tree
Showing 11 changed files with 1,070 additions and 159 deletions.
36 changes: 36 additions & 0 deletions cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1161,6 +1161,42 @@ impl ABIMachineSpec for AArch64MachineDeps {
}
}

impl AArch64CallSite {
pub fn emit_return_call(mut self, ctx: &mut Lower<Inst>, args: isle::ValueSlice) {
let (new_stack_arg_size, old_stack_arg_size) =
self.emit_temporary_tail_call_frame(ctx, args);

let dest = self.dest().clone();
let opcode = self.opcode();
let uses = self.take_uses();
let info = Box::new(ReturnCallInfo {
uses,
opcode,
old_stack_arg_size,
new_stack_arg_size,
});

match dest {
CallDest::ExtName(callee, RelocDistance::Near) => {
ctx.emit(Inst::ReturnCall { callee, info });
}
CallDest::ExtName(name, RelocDistance::Far) => {
let callee = ctx.alloc_tmp(types::I64).only_reg().unwrap();
ctx.emit(Inst::LoadExtName {
rd: callee,
name: Box::new(name),
offset: 0,
});
ctx.emit(Inst::ReturnCallInd {
callee: callee.to_reg(),
info,
});
}
CallDest::Reg(callee) => ctx.emit(Inst::ReturnCallInd { callee, info }),
}
}
}

fn compute_arg_locs_tail<'a, I>(
params: I,
add_ret_area_ptr: bool,
Expand Down
11 changes: 11 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,16 @@
(CallInd
(info BoxCallIndInfo))

;; A return-call macro instruction.
(ReturnCall
(callee ExternalName)
(info BoxReturnCallInfo))

;; An indirect return-call macro instruction.
(ReturnCallInd
(callee Reg)
(info BoxReturnCallInfo))

;; A pseudo-instruction that captures register arguments in vregs.
(Args
(args VecArgPair))
Expand Down Expand Up @@ -1030,6 +1040,7 @@

(type BoxCallInfo (primitive BoxCallInfo))
(type BoxCallIndInfo (primitive BoxCallIndInfo))
(type BoxReturnCallInfo (primitive BoxReturnCallInfo))
(type CondBrKind (primitive CondBrKind))
(type BranchTarget (primitive BranchTarget))
(type BoxJTSequenceInfo (primitive BoxJTSequenceInfo))
Expand Down
211 changes: 209 additions & 2 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ use cranelift_control::ControlPlane;
use regalloc2::Allocation;

use crate::binemit::{Reloc, StackMap};
use crate::ir::{types::*, RelSourceLoc};
use crate::ir::{LibCall, MemFlags, TrapCode};
use crate::ir::{self, types::*, LibCall, MemFlags, RelSourceLoc, TrapCode};
use crate::isa::aarch64::inst::*;
use crate::machinst::{ty_bits, Reg, RegClass, Writable};
use crate::trace;
Expand Down Expand Up @@ -3205,6 +3204,57 @@ impl MachInstEmit for Inst {
state.virtual_sp_offset
);
}
&Inst::ReturnCall {
ref callee,
ref info,
} => {
emit_return_call_common_sequence(
&mut allocs,
sink,
emit_info,
state,
info.new_stack_arg_size,
info.old_stack_arg_size,
&info.uses,
);

// Note: this is not `Inst::Jump { .. }.emit(..)` because we
// have different metadata in this case: we don't have a label
// for the target, but rather a function relocation.
sink.add_reloc(Reloc::Arm64Call, callee, 0);
sink.put4(enc_jump26(0b000101, 0));
sink.add_call_site(ir::Opcode::ReturnCall);

// `emit_return_call_common_sequence` emits an island if
// necessary, so we can safely disable the worst-case-size check
// in this case.
start_off = sink.cur_offset();
}
&Inst::ReturnCallInd { callee, ref info } => {
let callee = allocs.next(callee);

emit_return_call_common_sequence(
&mut allocs,
sink,
emit_info,
state,
info.new_stack_arg_size,
info.old_stack_arg_size,
&info.uses,
);

Inst::IndirectBr {
rn: callee,
targets: vec![],
}
.emit(&[], sink, emit_info, state);
sink.add_call_site(ir::Opcode::ReturnCallIndirect);

// `emit_return_call_common_sequence` emits an island if
// necessary, so we can safely disable the worst-case-size check
// in this case.
start_off = sink.cur_offset();
}
&Inst::CondBr {
taken,
not_taken,
Expand Down Expand Up @@ -3712,3 +3762,160 @@ impl MachInstEmit for Inst {
self.print_with_state(state, &mut allocs)
}
}

fn emit_return_call_common_sequence(
allocs: &mut AllocationConsumer<'_>,
sink: &mut MachBuffer<Inst>,
emit_info: &EmitInfo,
state: &mut EmitState,
new_stack_arg_size: u32,
old_stack_arg_size: u32,
uses: &CallArgList,
) {
for u in uses {
let _ = allocs.next(u.vreg);
}

// We are emitting a dynamic number of instructions and might need an
// island. We emit four instructions regardless of how many stack arguments
// we have, and then two instructions per word of stack argument space.
let new_stack_words = new_stack_arg_size / 8;
let insts = 4 + 2 * new_stack_words;
let size_of_inst = 4;
let space_needed = insts * size_of_inst;
if sink.island_needed(space_needed) {
let jump_around_label = sink.get_label();
let jmp = Inst::Jump {
dest: BranchTarget::Label(jump_around_label),
};
jmp.emit(&[], sink, emit_info, state);
sink.emit_island(space_needed + 4, &mut state.ctrl_plane);
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
}

// Copy the new frame on top of our current frame.
//
// The current stack layout is the following:
//
// | ... |
// +---------------------+
// | ... |
// | stack arguments |
// | ... |
// current | return address |
// frame | old FP | <-- FP
// | ... |
// | old stack slots |
// | ... |
// +---------------------+
// | ... |
// new | new stack arguments |
// frame | ... | <-- SP
// +---------------------+
//
// We need to restore the old FP, restore the return address from the stack
// to the link register, copy the new stack arguments over the old stack
// arguments, adjust SP to point to the new stack arguments, and then jump
// to the callee (which will push the old FP and RA again). Note that the
// actual jump happens outside this helper function.

assert_eq!(
new_stack_arg_size % 8,
0,
"size of new stack arguments must be 8-byte aligned"
);

// The delta from our frame pointer to the (eventual) stack pointer value
// when we jump to the tail callee. This is the difference in size of stack
// arguments as well as accounting for the two words we pushed onto the
// stack upon entry to this function (the return address and old frame
// pointer).
let fp_to_callee_sp = i64::from(old_stack_arg_size) - i64::from(new_stack_arg_size) + 16;

let tmp1 = regs::writable_spilltmp_reg();
let tmp2 = regs::writable_tmp2_reg();

// Restore the return address to the link register, and load the old FP into
// a temporary register.
//
// We can't put the old FP into the FP register until after we copy the
// stack arguments into place, since that uses address modes that are
// relative to our current FP.
//
// Note that the FP is saved in the function prologue for all non-leaf
// functions, even when `preserve_frame_pointers=false`. Note also that
// `return_call` instructions make it so that a function is considered
// non-leaf. Therefore we always have an FP to restore here.
Inst::LoadP64 {
rt: tmp1,
rt2: writable_link_reg(),
mem: PairAMode::SignedOffset(
regs::fp_reg(),
SImm7Scaled::maybe_from_i64(0, types::I64).unwrap(),
),
flags: MemFlags::trusted(),
}
.emit(&[], sink, emit_info, state);

// Copy the new stack arguments over the old stack arguments.
for i in (0..new_stack_arg_size / 8).rev() {
// Load the `i`th new stack argument word from the temporary stack
// space.
Inst::ULoad64 {
rd: tmp2,
mem: AMode::SPOffset {
off: i64::from(i * 8),
ty: types::I64,
},
flags: ir::MemFlags::trusted(),
}
.emit(&[], sink, emit_info, state);

// Store it to its final destination on the stack, overwriting our
// current frame.
Inst::Store64 {
rd: tmp2.to_reg(),
mem: AMode::FPOffset {
off: fp_to_callee_sp + i64::from(i * 8),
ty: types::I64,
},
flags: ir::MemFlags::trusted(),
}
.emit(&[], sink, emit_info, state);
}

// Initialize the SP for the tail callee, deallocating the temporary stack
// argument space and our current frame at the same time.
let (off, alu_op) = if let Ok(off) = u64::try_from(fp_to_callee_sp) {
(off, ALUOp::Add)
} else {
let abs = fp_to_callee_sp.abs();
let off = u64::try_from(abs).unwrap();
(off, ALUOp::Sub)
};
Inst::AluRRImm12 {
alu_op,
size: OperandSize::Size64,
rd: regs::writable_stack_reg(),
rn: regs::fp_reg(),
imm12: Imm12::maybe_from_u64(off).unwrap(),
}
.emit(&[], sink, emit_info, state);

// Move the old FP value from the temporary into the FP register.
Inst::AluRRImm12 {
alu_op: ALUOp::Sub,
size: OperandSize::Size64,
rd: regs::writable_fp_reg(),
rn: tmp1.to_reg(),
imm12: Imm12::maybe_from_u64(0).unwrap(),
}
.emit(&[], sink, emit_info, state);

state.virtual_sp_offset -= i64::from(new_stack_arg_size);
trace!(
"return_call[_ind] adjusts virtual sp offset by {} -> {}",
new_stack_arg_size,
state.virtual_sp_offset
);
}
59 changes: 59 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,22 @@ pub struct CallIndInfo {
pub callee_pop_size: u32,
}

/// Additional information for `return_call[_ind]` instructions, left out of
/// line to lower the size of the `Inst` enum.
#[derive(Clone, Debug)]
pub struct ReturnCallInfo {
/// Arguments to the call instruction.
pub uses: CallArgList,
/// Instruction opcode.
pub opcode: Opcode,
/// The size of the current/old stack frame's stack arguments.
pub old_stack_arg_size: u32,
/// The size of the new stack frame's stack arguments. This is necessary
/// for copying the frame over our current frame. It must already be
/// allocated on the stack.
pub new_stack_arg_size: u32,
}

/// Additional information for JTSequence instructions, left out of line to lower the size of the Inst
/// enum.
#[derive(Clone, Debug)]
Expand Down Expand Up @@ -873,6 +889,20 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
}
collector.reg_clobbers(info.clobbers);
}
&Inst::ReturnCall {
ref info,
callee: _,
} => {
for u in &info.uses {
collector.reg_fixed_use(u.vreg, u.preg);
}
}
&Inst::ReturnCallInd { ref info, callee } => {
collector.reg_use(callee);
for u in &info.uses {
collector.reg_fixed_use(u.vreg, u.preg);
}
}
&Inst::CondBr { ref kind, .. } => match kind {
CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
collector.reg_use(*rt);
Expand Down Expand Up @@ -1013,6 +1043,7 @@ impl MachInst for Inst {
fn is_term(&self) -> MachTerminator {
match self {
&Inst::Ret { .. } | &Inst::AuthenticatedRet { .. } => MachTerminator::Ret,
&Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
&Inst::Jump { .. } => MachTerminator::Uncond,
&Inst::CondBr { .. } => MachTerminator::Cond,
&Inst::IndirectBr { .. } => MachTerminator::Indirect,
Expand Down Expand Up @@ -2522,6 +2553,34 @@ impl Inst {
let rn = pretty_print_reg(info.rn, allocs);
format!("blr {}", rn)
}
&Inst::ReturnCall {
ref callee,
ref info,
} => {
let mut s = format!(
"return_call {callee:?} old_stack_arg_size:{} new_stack_arg_size:{}",
info.old_stack_arg_size, info.new_stack_arg_size
);
for ret in &info.uses {
let preg = pretty_print_reg(ret.preg, &mut empty_allocs);
let vreg = pretty_print_reg(ret.vreg, allocs);
write!(&mut s, " {vreg}={preg}").unwrap();
}
s
}
&Inst::ReturnCallInd { callee, ref info } => {
let callee = pretty_print_reg(callee, allocs);
let mut s = format!(
"return_call_ind {callee} old_stack_arg_size:{} new_stack_arg_size:{}",
info.old_stack_arg_size, info.new_stack_arg_size
);
for ret in &info.uses {
let preg = pretty_print_reg(ret.preg, &mut empty_allocs);
let vreg = pretty_print_reg(ret.vreg, allocs);
write!(&mut s, " {vreg}={preg}").unwrap();
}
s
}
&Inst::Args { ref args } => {
let mut s = "args".to_string();
for arg in args {
Expand Down
8 changes: 8 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2268,6 +2268,14 @@
(rule (lower (return args))
(lower_return args))

;;;; Rules for `return_call` and `return_call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (return_call (func_ref_data sig_ref extname dist) args))
(gen_return_call sig_ref extname dist args))

(rule (lower (return_call_indirect sig_ref callee args))
(gen_return_call_indirect sig_ref callee args))

;;;; Rules for loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower
Expand Down
Loading

0 comments on commit 6d00449

Please sign in to comment.