Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

riscv64: Use Vector RegClass for Vectors #6366

Merged
merged 6 commits into from
May 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,11 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}

fn get_number_of_spillslots_for_value(rc: RegClass, vector_size: u32) -> u32 {
fn get_number_of_spillslots_for_value(
rc: RegClass,
vector_size: u32,
_isa_flags: &Self::F,
) -> u32 {
assert_eq!(vector_size % 8, 0);
// We allocate in terms of 8-byte slots.
match rc {
Expand Down
169 changes: 130 additions & 39 deletions cranelift/codegen/src/isa/riscv64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,40 @@ pub struct Riscv64MachineDeps;

impl IsaFlags for RiscvFlags {}

impl RiscvFlags {
pub(crate) fn min_vec_reg_size(&self) -> u64 {
let entries = [
(self.has_zvl65536b(), 65536),
(self.has_zvl32768b(), 32768),
(self.has_zvl16384b(), 16384),
(self.has_zvl8192b(), 8192),
(self.has_zvl4096b(), 4096),
(self.has_zvl2048b(), 2048),
(self.has_zvl1024b(), 1024),
(self.has_zvl512b(), 512),
(self.has_zvl256b(), 256),
// In order to claim the Application Profile V extension, a minimum
// register size of 128 is required. i.e. V implies Zvl128b.
(self.has_v(), 128),
(self.has_zvl128b(), 128),
(self.has_zvl64b(), 64),
(self.has_zvl32b(), 32),
];

for (has_flag, size) in entries.into_iter() {
if !has_flag {
continue;
}

// Due to a limitation in regalloc2, we can't support types
// larger than 1024 bytes. So limit that here.
return std::cmp::min(size, 1024);
}

return 0;
}
}

impl ABIMachineSpec for Riscv64MachineDeps {
type I = Inst;
type F = RiscvFlags;
Expand Down Expand Up @@ -415,9 +449,9 @@ impl ABIMachineSpec for Riscv64MachineDeps {
for reg in clobbered_callee_saves {
let r_reg = reg.to_reg();
let ty = match r_reg.class() {
regalloc2::RegClass::Int => I64,
regalloc2::RegClass::Float => F64,
RegClass::Vector => unreachable!(),
RegClass::Int => I64,
RegClass::Float => F64,
RegClass::Vector => unimplemented!("Vector Clobber Saves"),
};
if flags.unwind_info() {
insts.push(Inst::Unwind {
Expand Down Expand Up @@ -462,9 +496,9 @@ impl ABIMachineSpec for Riscv64MachineDeps {
for reg in &clobbered_callee_saves {
let rreg = reg.to_reg();
let ty = match rreg.class() {
regalloc2::RegClass::Int => I64,
regalloc2::RegClass::Float => F64,
RegClass::Vector => unreachable!(),
RegClass::Int => I64,
RegClass::Float => F64,
RegClass::Vector => unimplemented!("Vector Clobber Restores"),
};
insts.push(Self::gen_load_stack(
StackAMode::SPOffset(-cur_offset, ty),
Expand Down Expand Up @@ -572,12 +606,16 @@ impl ABIMachineSpec for Riscv64MachineDeps {
insts
}

fn get_number_of_spillslots_for_value(rc: RegClass, _target_vector_bytes: u32) -> u32 {
fn get_number_of_spillslots_for_value(
rc: RegClass,
_target_vector_bytes: u32,
isa_flags: &RiscvFlags,
) -> u32 {
// We allocate in terms of 8-byte slots.
match rc {
RegClass::Int => 1,
RegClass::Float => 1,
RegClass::Vector => unreachable!(),
RegClass::Vector => (isa_flags.min_vec_reg_size() / 8) as u32,
}
}

Expand All @@ -592,20 +630,7 @@ impl ABIMachineSpec for Riscv64MachineDeps {
}

fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet {
let mut v = PRegSet::empty();
for (k, need_save) in CALLER_SAVE_X_REG.iter().enumerate() {
if !*need_save {
continue;
}
v.add(px_reg(k));
}
for (k, need_save) in CALLER_SAVE_F_REG.iter().enumerate() {
if !*need_save {
continue;
}
v.add(pf_reg(k));
}
v
CLOBBERS
}

fn get_clobbered_callee_saves(
Expand Down Expand Up @@ -652,24 +677,12 @@ impl ABIMachineSpec for Riscv64MachineDeps {
}
}

const CALLER_SAVE_X_REG: [bool; 32] = [
false, true, false, false, false, true, true, true, // 0-7
false, false, true, true, true, true, true, true, // 8-15
true, true, false, false, false, false, false, false, // 16-23
false, false, false, false, true, true, true, true, // 24-31
];
const CALLEE_SAVE_X_REG: [bool; 32] = [
false, false, true, false, false, false, false, false, // 0-7
true, true, false, false, false, false, false, false, // 8-15
false, false, true, true, true, true, true, true, // 16-23
true, true, true, true, false, false, false, false, // 24-31
];
const CALLER_SAVE_F_REG: [bool; 32] = [
true, true, true, true, true, true, true, true, // 0-7
false, true, true, true, true, true, true, true, // 8-15
true, true, false, false, false, false, false, false, // 16-23
false, false, false, false, true, true, true, true, // 24-31
];
const CALLEE_SAVE_F_REG: [bool; 32] = [
false, false, false, false, false, false, false, false, // 0-7
true, false, false, false, false, false, false, false, // 8-15
Expand All @@ -680,10 +693,11 @@ const CALLEE_SAVE_F_REG: [bool; 32] = [
/// This should be the registers that must be saved by callee.
#[inline]
fn is_reg_saved_in_prologue(_conv: CallConv, reg: RealReg) -> bool {
if reg.class() == RegClass::Int {
CALLEE_SAVE_X_REG[reg.hw_enc() as usize]
} else {
CALLEE_SAVE_F_REG[reg.hw_enc() as usize]
match reg.class() {
RegClass::Int => CALLEE_SAVE_X_REG[reg.hw_enc() as usize],
RegClass::Float => CALLEE_SAVE_F_REG[reg.hw_enc() as usize],
// All vector registers are caller saved.
RegClass::Vector => false,
}
}

Expand All @@ -697,12 +711,89 @@ fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
RegClass::Float => {
clobbered_size += 8;
}
RegClass::Vector => unreachable!(),
RegClass::Vector => unimplemented!("Vector Size Clobbered"),
}
}
align_to(clobbered_size, 16)
}

const fn clobbers() -> PRegSet {
PRegSet::empty()
.with(px_reg(1))
.with(px_reg(5))
.with(px_reg(6))
.with(px_reg(7))
.with(px_reg(10))
.with(px_reg(11))
.with(px_reg(12))
.with(px_reg(13))
.with(px_reg(14))
.with(px_reg(15))
.with(px_reg(16))
.with(px_reg(17))
.with(px_reg(28))
.with(px_reg(29))
.with(px_reg(30))
.with(px_reg(31))
// F Regs
.with(pf_reg(0))
.with(pf_reg(1))
.with(pf_reg(2))
.with(pf_reg(3))
.with(pf_reg(4))
.with(pf_reg(5))
.with(pf_reg(6))
.with(pf_reg(7))
.with(pf_reg(9))
.with(pf_reg(10))
.with(pf_reg(11))
.with(pf_reg(12))
.with(pf_reg(13))
.with(pf_reg(14))
.with(pf_reg(15))
.with(pf_reg(16))
.with(pf_reg(17))
.with(pf_reg(28))
.with(pf_reg(29))
.with(pf_reg(30))
.with(pf_reg(31))
// V Regs - All vector regs get clobbered
.with(pv_reg(0))
.with(pv_reg(1))
.with(pv_reg(2))
.with(pv_reg(3))
.with(pv_reg(4))
.with(pv_reg(5))
.with(pv_reg(6))
.with(pv_reg(7))
.with(pv_reg(8))
.with(pv_reg(9))
.with(pv_reg(10))
.with(pv_reg(11))
.with(pv_reg(12))
.with(pv_reg(13))
.with(pv_reg(14))
.with(pv_reg(15))
.with(pv_reg(16))
.with(pv_reg(17))
.with(pv_reg(18))
.with(pv_reg(19))
.with(pv_reg(20))
.with(pv_reg(21))
.with(pv_reg(22))
.with(pv_reg(23))
.with(pv_reg(24))
.with(pv_reg(25))
.with(pv_reg(26))
.with(pv_reg(27))
.with(pv_reg(28))
.with(pv_reg(29))
.with(pv_reg(30))
.with(pv_reg(31))
}

const CLOBBERS: PRegSet = clobbers();

impl Riscv64MachineDeps {
fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
insts.reserve(probe_count as usize);
Expand Down
2 changes: 2 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -927,6 +927,8 @@ impl MachInstEmit for Inst {
}

&Inst::Mov { rd, rm, ty } => {
debug_assert_ne!(rd.to_reg().class(), RegClass::Vector);
debug_assert_ne!(rm.class(), RegClass::Vector);
if rd.to_reg() != rm {
let rm = allocs.next(rm);
let rd = allocs.next_writable(rd);
Expand Down
37 changes: 11 additions & 26 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
use super::lower::isle::generated_code::{VecAMode, VecElementWidth};
use crate::binemit::{Addend, CodeOffset, Reloc};
pub use crate::ir::condcodes::IntCC;
use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, R32, R64};
use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64};

pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel};
use crate::isa::{CallConv, FunctionAlignment};
Expand Down Expand Up @@ -667,7 +667,7 @@ impl MachInst for Inst {
match rc {
regalloc2::RegClass::Int => I64,
regalloc2::RegClass::Float => F64,
regalloc2::RegClass::Vector => unreachable!(),
regalloc2::RegClass::Vector => I8X16,
}
}

Expand Down Expand Up @@ -770,7 +770,7 @@ impl MachInst for Inst {
let idx = (ty.bytes().ilog2() - 1) as usize;
let ty = &SIMD_TYPES[idx][..];

Ok((&[RegClass::Float], ty))
Ok((&[RegClass::Vector], ty))
}
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
Expand Down Expand Up @@ -830,24 +830,13 @@ pub fn reg_name(reg: Reg) -> String {
28..=31 => format!("ft{}", real.hw_enc() - 20),
_ => unreachable!(),
},
RegClass::Vector => unreachable!(),
RegClass::Vector => format!("v{}", real.hw_enc()),
},
None => {
format!("{:?}", reg)
}
}
}
pub fn vec_reg_name(reg: Reg) -> String {
match reg.to_real_reg() {
Some(real) => {
assert_eq!(real.class(), RegClass::Float);
format!("v{}", real.hw_enc())
}
None => {
format!("{:?}", reg)
}
}
}

impl Inst {
fn print_with_state(
Expand All @@ -859,10 +848,6 @@ impl Inst {
let reg = allocs.next(reg);
reg_name(reg)
};
let format_vec_reg = |reg: Reg, allocs: &mut AllocationConsumer<'_>| -> String {
let reg = allocs.next(reg);
vec_reg_name(reg)
};

let format_vec_amode = |amode: &VecAMode, allocs: &mut AllocationConsumer<'_>| -> String {
match amode {
Expand Down Expand Up @@ -1568,9 +1553,9 @@ impl Inst {
vs2,
ref vstate,
} => {
let vs1_s = format_vec_reg(vs1, allocs);
let vs2_s = format_vec_reg(vs2, allocs);
let vd_s = format_vec_reg(vd.to_reg(), allocs);
let vs1_s = format_reg(vs1, allocs);
let vs2_s = format_reg(vs2, allocs);
let vd_s = format_reg(vd.to_reg(), allocs);

// Note: vs2 and vs1 here are opposite to the standard scalar ordering.
// This is noted in Section 10.1 of the RISC-V Vector spec.
Expand All @@ -1583,8 +1568,8 @@ impl Inst {
vs2,
ref vstate,
} => {
let vs2_s = format_vec_reg(vs2, allocs);
let vd_s = format_vec_reg(vd.to_reg(), allocs);
let vs2_s = format_reg(vs2, allocs);
let vd_s = format_reg(vd.to_reg(), allocs);

format!("{} {},{},{} {}", op, vd_s, vs2_s, imm, vstate)
}
Expand All @@ -1601,7 +1586,7 @@ impl Inst {
..
} => {
let base = format_vec_amode(from, allocs);
let vd = format_vec_reg(to.to_reg(), allocs);
let vd = format_reg(to.to_reg(), allocs);
format!("vl{}.v {},{} {}", eew, vd, base, vstate)
}
Inst::VecStore {
Expand All @@ -1612,7 +1597,7 @@ impl Inst {
..
} => {
let dst = format_vec_amode(to, allocs);
let vs3 = format_vec_reg(*from, allocs);
let vs3 = format_reg(*from, allocs);
format!("vs{}.v {},{} {}", eew, vs3, dst, vstate)
}
}
Expand Down
Loading