Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cranelift: Refactor return_call[_indirect] lowering #6666

Merged
merged 1 commit into from
Jun 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1046,19 +1046,6 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}

fn gen_return_call(
_callee: CallDest,
_new_stack_arg_size: u32,
_old_stack_arg_size: u32,
_ret_addr: Option<Reg>,
_fp: Reg,
_tmp: Writable<Reg>,
_tmp2: Writable<Reg>,
_uses: abi::CallArgList,
) -> SmallVec<[Self::I; 2]> {
todo!();
}

fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
call_conv: isa::CallConv,
dst: Reg,
Expand Down
13 changes: 0 additions & 13 deletions cranelift/codegen/src/isa/riscv64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -565,19 +565,6 @@ impl ABIMachineSpec for Riscv64MachineDeps {
insts
}

fn gen_return_call(
_callee: CallDest,
_new_stack_arg_size: u32,
_old_stack_arg_size: u32,
_ret_addr: Option<Reg>,
_fp: Reg,
_tmp: Writable<Reg>,
_tmp2: Writable<Reg>,
_uses: abi::CallArgList,
) -> SmallVec<[Self::I; 2]> {
todo!();
}

fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
call_conv: isa::CallConv,
dst: Reg,
Expand Down
13 changes: 0 additions & 13 deletions cranelift/codegen/src/isa/s390x/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -748,19 +748,6 @@ impl ABIMachineSpec for S390xMachineDeps {
unreachable!();
}

fn gen_return_call(
_callee: CallDest,
_new_stack_arg_size: u32,
_old_stack_arg_size: u32,
_ret_addr: Option<Reg>,
_fp: Reg,
_tmp: Writable<Reg>,
_tmp2: Writable<Reg>,
_uses: abi::CallArgList,
) -> SmallVec<[Self::I; 2]> {
todo!();
}

fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
_call_conv: isa::CallConv,
_dst: Reg,
Expand Down
133 changes: 86 additions & 47 deletions cranelift/codegen/src/isa/x64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use args::*;
use regalloc2::{PRegSet, VReg};
use regalloc2::{PReg, PRegSet, VReg};
use smallvec::{smallvec, SmallVec};
use std::convert::TryFrom;

Expand Down Expand Up @@ -713,52 +713,6 @@ impl ABIMachineSpec for X64ABIMachineSpec {
insts
}

fn gen_return_call(
callee: CallDest,
new_stack_arg_size: u32,
old_stack_arg_size: u32,
ret_addr: Option<Reg>,
fp: Reg,
tmp: Writable<Reg>,
tmp2: Writable<Reg>,
uses: abi::CallArgList,
) -> SmallVec<[Self::I; 2]> {
let ret_addr = ret_addr.map(|r| Gpr::new(r).unwrap());
let fp = Gpr::new(fp).unwrap();
let tmp = WritableGpr::from_writable_reg(tmp).unwrap();
let info = Box::new(ReturnCallInfo {
new_stack_arg_size,
old_stack_arg_size,
ret_addr,
fp,
tmp,
uses,
});
match callee {
CallDest::ExtName(callee, RelocDistance::Near) => {
smallvec![Inst::ReturnCallKnown { callee, info }]
}
CallDest::ExtName(callee, RelocDistance::Far) => {
smallvec![
Inst::LoadExtName {
dst: tmp2,
name: Box::new(callee.clone()),
offset: 0,
distance: RelocDistance::Far,
},
Inst::ReturnCallUnknown {
callee: tmp2.into(),
info,
}
]
}
CallDest::Reg(callee) => smallvec![Inst::ReturnCallUnknown {
callee: callee.into(),
info,
}],
}
}

fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
call_conv: isa::CallConv,
dst: Reg,
Expand Down Expand Up @@ -886,6 +840,91 @@ impl ABIMachineSpec for X64ABIMachineSpec {
}
}

impl X64CallSite {
pub fn emit_return_call(mut self, ctx: &mut Lower<Inst>, args: isle::ValueSlice) {
// Allocate additional stack space for the new stack frame. We will
// build it in the newly allocated space, but then copy it over our
// current frame at the last moment.
let new_stack_arg_size = self.emit_allocate_tail_call_frame(ctx);
let old_stack_arg_size = ctx.abi().stack_args_size(ctx.sigs());

// Make a copy of the frame pointer, since we use it when copying down
// the new stack frame.
let fp = ctx.temp_writable_gpr();
let rbp = PReg::from(regs::rbp().to_real_reg().unwrap());
ctx.emit(Inst::MovFromPReg { src: rbp, dst: fp });

// Load the return address, because copying our new stack frame
// over our current stack frame might overwrite it, and we'll need to
// place it in the correct location after we do that copy.
//
// But we only need to actually move the return address if the size of
// stack arguments changes.
let ret_addr = if new_stack_arg_size != old_stack_arg_size {
let ret_addr = ctx.temp_writable_gpr();
ctx.emit(Inst::Mov64MR {
src: SyntheticAmode::Real(Amode::ImmReg {
simm32: 8,
base: *fp.to_reg(),
flags: MemFlags::trusted(),
}),
dst: ret_addr,
});
Some(ret_addr.to_reg())
} else {
None
};

// Put all arguments in registers and stack slots (within that newly
// allocated stack space).
self.emit_args(ctx, args);
if let Some(i) = ctx.sigs()[self.sig()].stack_ret_arg() {
let ret_area_ptr = ctx.abi().ret_area_ptr().expect(
"if the tail callee has a return pointer, then the tail caller \
must as well",
);
for inst in self.gen_arg(ctx, i.into(), ValueRegs::one(ret_area_ptr.to_reg())) {
ctx.emit(inst);
}
}

// Finally, emit the macro instruction to copy the new stack frame over
// our current one and do the actual tail call!

let dest = self.dest().clone();
let info = Box::new(ReturnCallInfo {
new_stack_arg_size,
old_stack_arg_size,
ret_addr,
fp: fp.to_reg(),
tmp: ctx.temp_writable_gpr(),
uses: self.take_uses(),
});
match dest {
CallDest::ExtName(callee, RelocDistance::Near) => {
ctx.emit(Inst::ReturnCallKnown { callee, info });
}
CallDest::ExtName(callee, RelocDistance::Far) => {
let tmp2 = ctx.temp_writable_gpr();
ctx.emit(Inst::LoadExtName {
dst: tmp2.to_writable_reg(),
name: Box::new(callee),
offset: 0,
distance: RelocDistance::Far,
});
ctx.emit(Inst::ReturnCallUnknown {
callee: tmp2.to_writable_reg().into(),
info,
});
}
CallDest::Reg(callee) => ctx.emit(Inst::ReturnCallUnknown {
callee: callee.into(),
info,
}),
}
}
}

impl From<StackAMode> for SyntheticAmode {
fn from(amode: StackAMode) -> Self {
// We enforce a 128 MB stack-frame size limit above, so these
Expand Down
12 changes: 12 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,18 @@ use target_lexicon::Triple;
//=============================================================================
// Helpers for instruction lowering.

impl Lower<'_, Inst> {
#[inline]
pub fn temp_writable_gpr(&mut self) -> WritableGpr {
WritableGpr::from_writable_reg(self.alloc_tmp(types::I64).only_reg().unwrap()).unwrap()
}

#[inline]
pub fn temp_writable_xmm(&mut self) -> WritableXmm {
WritableXmm::from_writable_reg(self.alloc_tmp(types::F64).only_reg().unwrap()).unwrap()
}
}

fn is_int_or_ref_ty(ty: Type) -> bool {
match ty {
types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
Expand Down
114 changes: 6 additions & 108 deletions cranelift/codegen/src/isa/x64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,66 +95,15 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {

let callee = self.put_in_reg(callee);

let mut call_site = X64CallSite::from_ptr(
let call_site = X64CallSite::from_ptr(
self.lower_ctx.sigs(),
callee_sig,
callee,
Opcode::ReturnCallIndirect,
caller_conv,
self.backend.flags().clone(),
);

// Allocate additional stack space for the new stack frame. We will
// build it in the newly allocated space, but then copy it over our
// current frame at the last moment.
let new_stack_arg_size = call_site.emit_allocate_tail_call_frame(self.lower_ctx);
let old_stack_arg_size = self.lower_ctx.abi().stack_args_size(self.lower_ctx.sigs());

// Make a copy of the frame pointer, since we use it when copying down
// the new stack frame.
let fp = self.temp_writable_gpr();
let rbp = self.preg_rbp();
self.lower_ctx
.emit(MInst::MovFromPReg { src: rbp, dst: fp });

// Load the return address, because copying our new stack frame
// over our current stack frame might overwrite it, and we'll need to
// place it in the correct location after we do that copy.
//
// But we only need to actually move the return address if the size of
// stack arguments changes.
let ret_addr = if new_stack_arg_size != old_stack_arg_size {
let ret_addr = self.temp_writable_gpr();
self.lower_ctx.emit(MInst::Mov64MR {
src: SyntheticAmode::Real(Amode::ImmReg {
simm32: 8,
base: fp.to_reg().to_reg(),
flags: MemFlags::trusted(),
}),
dst: ret_addr,
});
Some(ret_addr)
} else {
None
};

// Put all arguments in registers and stack slots (within that newly
// allocated stack space).
self.gen_call_common_args(&mut call_site, args);

// Finally, emit the macro instruction to copy the new stack frame over
// our current one and do the actual tail call!
let tmp = self.temp_writable_gpr();
let tmp2 = self.temp_writable_gpr();
call_site.emit_return_call(
self.lower_ctx,
new_stack_arg_size,
old_stack_arg_size,
ret_addr.map(|r| r.to_reg().to_reg()),
fp.to_reg().to_reg(),
tmp.to_writable_reg(),
tmp2.to_writable_reg(),
);
call_site.emit_return_call(self.lower_ctx, args);

InstOutput::new()
}
Expand All @@ -173,66 +122,15 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
"Can only do `return_call`s from within a `tail` calling convention function"
);

let mut call_site = X64CallSite::from_func(
let call_site = X64CallSite::from_func(
self.lower_ctx.sigs(),
callee_sig,
&callee,
distance,
caller_conv,
self.backend.flags().clone(),
);

// Allocate additional stack space for the new stack frame. We will
// build it in the newly allocated space, but then copy it over our
// current frame at the last moment.
let new_stack_arg_size = call_site.emit_allocate_tail_call_frame(self.lower_ctx);
let old_stack_arg_size = self.lower_ctx.abi().stack_args_size(self.lower_ctx.sigs());

// Make a copy of the frame pointer, since we use it when copying down
// the new stack frame.
let fp = self.temp_writable_gpr();
let rbp = self.preg_rbp();
self.lower_ctx
.emit(MInst::MovFromPReg { src: rbp, dst: fp });

// Load the return address, because copying our new stack frame
// over our current stack frame might overwrite it, and we'll need to
// place it in the correct location after we do that copy.
//
// But we only need to actually move the return address if the size of
// stack arguments changes.
let ret_addr = if new_stack_arg_size != old_stack_arg_size {
let ret_addr = self.temp_writable_gpr();
self.lower_ctx.emit(MInst::Mov64MR {
src: SyntheticAmode::Real(Amode::ImmReg {
simm32: 8,
base: fp.to_reg().to_reg(),
flags: MemFlags::trusted(),
}),
dst: ret_addr,
});
Some(ret_addr)
} else {
None
};

// Put all arguments in registers and stack slots (within that newly
// allocated stack space).
self.gen_call_common_args(&mut call_site, args);

// Finally, emit the macro instruction to copy the new stack frame over
// our current one and do the actual tail call!
let tmp = self.temp_writable_gpr();
let tmp2 = self.temp_writable_gpr();
call_site.emit_return_call(
self.lower_ctx,
new_stack_arg_size,
old_stack_arg_size,
ret_addr.map(|r| r.to_reg().to_reg()),
fp.to_reg().to_reg(),
tmp.to_writable_reg(),
tmp2.to_writable_reg(),
);
call_site.emit_return_call(self.lower_ctx, args);

InstOutput::new()
}
Expand Down Expand Up @@ -599,12 +497,12 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {

#[inline]
fn temp_writable_gpr(&mut self) -> WritableGpr {
Writable::from_reg(Gpr::new(self.temp_writable_reg(I64).to_reg()).unwrap())
self.lower_ctx.temp_writable_gpr()
}

#[inline]
fn temp_writable_xmm(&mut self) -> WritableXmm {
Writable::from_reg(Xmm::new(self.temp_writable_reg(I8X16).to_reg()).unwrap())
self.lower_ctx.temp_writable_xmm()
}

#[inline]
Expand Down
Loading