From edb29113e30e5ed1af291afa70cdab1d0ac05d7f Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Wed, 7 Feb 2024 13:46:07 +0000 Subject: [PATCH] New codegen backend: Start emitting code. This makes a start at emitting X86_64 code from the JIT IR. Obviously this is non-functional at this point (it's currently not even called from the JIT pipeline), but should serve as something we can iterate upon and at least unit test in isolation. Many things missing: - Trace input handling. - Correct allocation sizes. - Stackmaps - Debugger support. - Loads more testing. --- ykrt/Cargo.toml | 3 + ykrt/src/compile/jitc_yk/codegen/abs_stack.rs | 81 ++++ ykrt/src/compile/jitc_yk/codegen/mod.rs | 50 +++ .../compile/jitc_yk/codegen/reg_alloc/mod.rs | 68 ++++ .../jitc_yk/codegen/reg_alloc/spill_alloc.rs | 142 +++++++ ykrt/src/compile/jitc_yk/codegen/x86_64.rs | 372 ++++++++++++++++++ ykrt/src/compile/jitc_yk/jit_ir.rs | 45 ++- ykrt/src/compile/jitc_yk/mod.rs | 1 + 8 files changed, 761 insertions(+), 1 deletion(-) create mode 100644 ykrt/src/compile/jitc_yk/codegen/abs_stack.rs create mode 100644 ykrt/src/compile/jitc_yk/codegen/mod.rs create mode 100644 ykrt/src/compile/jitc_yk/codegen/reg_alloc/mod.rs create mode 100644 ykrt/src/compile/jitc_yk/codegen/reg_alloc/spill_alloc.rs create mode 100644 ykrt/src/compile/jitc_yk/codegen/x86_64.rs diff --git a/ykrt/Cargo.toml b/ykrt/Cargo.toml index d6ed27033..0eaafb7cf 100644 --- a/ykrt/Cargo.toml +++ b/ykrt/Cargo.toml @@ -23,6 +23,8 @@ yktracec = { path = "../yktracec" } static_assertions = "1.1.0" typed-index-collections = "3.1.0" thiserror = "1.0.56" +dynasmrt = "2.0.0" +iced-x86 = { version = "1.21.0", features = ["decoder", "std"] } [dependencies.llvm-sys] # note: using a git version to get llvm linkage features in llvm-sys (not in a @@ -46,4 +48,5 @@ yk_jitstate_debug = [] yk_testing = [] [dev-dependencies] +fm = "0.2.2" num-traits = "0.2.16" diff --git a/ykrt/src/compile/jitc_yk/codegen/abs_stack.rs b/ykrt/src/compile/jitc_yk/codegen/abs_stack.rs new file mode 100644 index 000000000..891909187 --- /dev/null +++ b/ykrt/src/compile/jitc_yk/codegen/abs_stack.rs @@ -0,0 +1,81 @@ +//! The abstract stack. + +/// This data structure keeps track of an abstract stack pointer for a JIT frame during code +/// generation. The abstract stack pointer is zero-based, so the stack pointer value also serves as +/// the size of the stack. +/// +/// The implementation is platform agnostic: as the abstract stack gets bigger, the abstract stack +/// pointer grows upwards, even on architectures where the stack grows downwards. +#[derive(Debug, Default)] +pub(crate) struct AbstractStack(usize); + +impl AbstractStack { + /// Aligns the abstract stack pointer to the specified number of bytes. + /// + /// Returns the newly aligned stack pointer. + pub(crate) fn align(&mut self, to: usize) -> usize { + let rem = self.0 % to; + if rem != 0 { + self.0 += to - rem; + } + self.0 + } + + /// Makes the stack bigger by `nbytes` bytes. + /// + /// Returns the new stack pointer. + pub(crate) fn grow(&mut self, nbytes: usize) -> usize { + self.0 += nbytes; + self.0 + } + + /// Returns the stack pointer value. + pub(crate) fn size(&self) -> usize { + self.0 + } +} + +#[cfg(test)] +mod tests { + use super::AbstractStack; + + #[test] + fn grow() { + let mut s = AbstractStack::default(); + assert_eq!(s.grow(8), 8); + assert_eq!(s.grow(8), 16); + assert_eq!(s.grow(1), 17); + assert_eq!(s.grow(0), 17); + assert_eq!(s.grow(1000), 1017); + } + + #[test] + fn align() { + let mut s = AbstractStack::default(); + for i in 1..100 { + assert_eq!(s.align(i), 0); + assert_eq!(s.align(i), 0); + } + for i in 1..100 { + s.grow(1); + assert_eq!(s.align(1), i); + assert_eq!(s.align(1), i); + } + assert_eq!(s.align(8), 104); + for i in 105..205 { + assert_eq!(s.align(i), i); + assert_eq!(s.align(i), i); + } + assert_eq!(s.align(12345678), 12345678); + assert_eq!(s.align(12345678), 12345678); + } + + #[test] + fn size() { + let mut s = AbstractStack::default(); + for i in 1..100 { + s.grow(1); + assert_eq!(s.size(), i); + } + } +} diff --git a/ykrt/src/compile/jitc_yk/codegen/mod.rs b/ykrt/src/compile/jitc_yk/codegen/mod.rs new file mode 100644 index 000000000..bd2aa6119 --- /dev/null +++ b/ykrt/src/compile/jitc_yk/codegen/mod.rs @@ -0,0 +1,50 @@ +//! The JIT's Code Generator. + +// FIXME: eventually delete. +#![allow(dead_code)] + +use super::{jit_ir, CompilationError}; +use reg_alloc::RegisterAllocator; + +mod abs_stack; +mod reg_alloc; +mod x86_64; + +/// A trait that defines access to JIT compiled code. +pub(crate) trait CodeGenOutput { + /// Disassemble the code-genned trace into a string. + #[cfg(any(debug_assertions, test))] + fn disassemble(&self) -> String; +} + +/// All code generators conform to this contract. +trait CodeGen<'a> { + /// Instantiate a code generator for the specified JIT module. + fn new( + jit_mod: &'a jit_ir::Module, + ra: &'a mut dyn RegisterAllocator, + ) -> Result + where + Self: Sized; + + /// Perform code generation. + fn codegen(self) -> Result, CompilationError>; +} + +#[cfg(test)] +mod tests { + use super::CodeGenOutput; + use fm::FMatcher; + + /// Test helper to use `fm` to match a disassembled trace. + pub(crate) fn match_asm(cgo: Box, pattern: &str) { + let dis = cgo.disassemble(); + match FMatcher::new(pattern).unwrap().matches(&dis) { + Ok(()) => (), + Err(e) => panic!( + "\n!!! Emitted code didn't match !!!\n\n{}\nFull asm:\n{}\n", + e, dis + ), + } + } +} diff --git a/ykrt/src/compile/jitc_yk/codegen/reg_alloc/mod.rs b/ykrt/src/compile/jitc_yk/codegen/reg_alloc/mod.rs new file mode 100644 index 000000000..26dbf78d8 --- /dev/null +++ b/ykrt/src/compile/jitc_yk/codegen/reg_alloc/mod.rs @@ -0,0 +1,68 @@ +//! Register allocation. +//! +//! This module: +//! - describes the generic interface to register allocators. +//! - contains concrete implementations of register allocators. + +use super::{super::jit_ir, abs_stack::AbstractStack}; + +mod spill_alloc; +#[cfg(test)] +pub(crate) use spill_alloc::SpillAllocator; + +/// Describes a local variable allocation. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub(crate) enum LocalAlloc { + /// The local variable is on the stack. + Stack { + /// The offset (from the base pointer) of the allocation. + /// + /// This is independent of which direction the stack grows. In other words, for + /// architectures where the stack grows downwards, you'd subtract this from the base + /// pointer to find the address of the allocation. + /// + /// OPT: consider addressing relative to the stack pointer, thus freeing up the base + /// pointer for general purpose use. + frame_off: usize, + }, + /// The local variable is in a register. + /// + /// FIXME: unimplemented. + Register, +} + +impl LocalAlloc { + /// Create a [Self::Stack] allocation. + pub(crate) fn new_stack(frame_off: usize) -> Self { + Self::Stack { frame_off } + } +} + +/// Indicates the direction of stack growth. +pub(crate) enum StackDirection { + GrowsUp, + GrowsDown, +} + +/// The API to regsiter allocators. +/// +/// Register allocators are responsible for assigning storage for local variables. +pub(crate) trait RegisterAllocator { + /// Creates a register allocator for a stack growing in the specified direction. + fn new(stack_dir: StackDirection) -> Self + where + Self: Sized; + + /// Allocates `size` bytes storage space for the local variable defined by the instruction with + /// index `local`. + fn allocate( + &mut self, + local: jit_ir::InstrIdx, + size: usize, + stack: &mut AbstractStack, + ) -> LocalAlloc; + + /// Return the allocation for the value computed by the instruction at the specified + /// instruction index. + fn allocation<'a>(&'a self, idx: jit_ir::InstrIdx) -> &'a LocalAlloc; +} diff --git a/ykrt/src/compile/jitc_yk/codegen/reg_alloc/spill_alloc.rs b/ykrt/src/compile/jitc_yk/codegen/reg_alloc/spill_alloc.rs new file mode 100644 index 000000000..5653169ba --- /dev/null +++ b/ykrt/src/compile/jitc_yk/codegen/reg_alloc/spill_alloc.rs @@ -0,0 +1,142 @@ +//! The spill allocator. +//! +//! This is a register allocator that always allocates to the stack, so in fact it's not much of a +//! register allocator at all. + +use super::{ + super::{abs_stack::AbstractStack, jit_ir}, + LocalAlloc, RegisterAllocator, StackDirection, +}; +use typed_index_collections::TiVec; + +pub(crate) struct SpillAllocator { + allocs: TiVec, + stack_dir: StackDirection, +} + +impl RegisterAllocator for SpillAllocator { + fn new(stack_dir: StackDirection) -> SpillAllocator { + Self { + allocs: Default::default(), + stack_dir, + } + } + + fn allocate( + &mut self, + local: jit_ir::InstrIdx, + size: usize, + stack: &mut AbstractStack, + ) -> LocalAlloc { + // Under the current design, there can't be gaps in [self.allocs] and local variable + // allocations happen sequentially. So the local we are currently allocating should be the + // next unallocated index. + debug_assert!(jit_ir::InstrIdx::new(self.allocs.len()).unwrap() == local); + + // Align the stack to the size of the allocation. + // + // FIXME: perhaps we should align to the largest alignment of the constituent fields? + // To do this we need to first finish proper type sizes. + let post_align_off = stack.align(size); + + // Make space for the allocation. + let post_grow_off = stack.grow(size); + + // If the stack grows up, then the allocation's offset is the stack height *before* we've + // made space on the stack, otherwise it's the stack height *after*. + let alloc_off = match self.stack_dir { + StackDirection::GrowsUp => post_align_off, + StackDirection::GrowsDown => post_grow_off, + }; + + let alloc = LocalAlloc::new_stack(alloc_off); + self.allocs.push(alloc); + alloc + } + + fn allocation<'a>(&'a self, idx: jit_ir::InstrIdx) -> &'a LocalAlloc { + &self.allocs[idx] + } +} + +#[cfg(test)] +mod tests { + use crate::compile::jitc_yk::{ + codegen::{ + abs_stack::AbstractStack, + reg_alloc::{LocalAlloc, RegisterAllocator, SpillAllocator, StackDirection}, + }, + jit_ir::InstrIdx, + }; + + #[test] + fn grow_down() { + let mut stack = AbstractStack::default(); + let mut sa = SpillAllocator::new(StackDirection::GrowsDown); + + let idx = InstrIdx::new(0).unwrap(); + sa.allocate(idx, 8, &mut stack); + debug_assert_eq!(stack.size(), 8); + debug_assert_eq!(sa.allocation(idx), &LocalAlloc::Stack { frame_off: 8 }); + + let idx = InstrIdx::new(1).unwrap(); + sa.allocate(idx, 1, &mut stack); + debug_assert_eq!(stack.size(), 9); + debug_assert_eq!(sa.allocation(idx), &LocalAlloc::Stack { frame_off: 9 }); + } + + #[test] + fn grow_up() { + let mut stack = AbstractStack::default(); + let mut sa = SpillAllocator::new(StackDirection::GrowsUp); + + let idx = InstrIdx::new(0).unwrap(); + sa.allocate(idx, 8, &mut stack); + debug_assert_eq!(stack.size(), 8); + debug_assert_eq!(sa.allocation(idx), &LocalAlloc::Stack { frame_off: 0 }); + + let idx = InstrIdx::new(1).unwrap(); + sa.allocate(idx, 1, &mut stack); + debug_assert_eq!(stack.size(), 9); + debug_assert_eq!(sa.allocation(idx), &LocalAlloc::Stack { frame_off: 8 }); + } + + #[cfg(debug_assertions)] + #[should_panic] + #[test] + fn allocate_out_of_order() { + let mut stack = AbstractStack::default(); + let mut sa = SpillAllocator::new(StackDirection::GrowsUp); + // panics because the backing store for local allocations are a "unsparse vector" and Local + // 0 hasn't been allocated yet. + sa.allocate(InstrIdx::new(1).unwrap(), 1, &mut stack); + } + + #[test] + fn compose_alloc_and_align_down() { + let mut stack = AbstractStack::default(); + let mut sa = SpillAllocator::new(StackDirection::GrowsDown); + + sa.allocate(InstrIdx::new(0).unwrap(), 8, &mut stack); + stack.align(32); + + let idx = InstrIdx::new(1).unwrap(); + sa.allocate(idx, 1, &mut stack); + debug_assert_eq!(stack.size(), 33); + debug_assert_eq!(sa.allocation(idx), &LocalAlloc::Stack { frame_off: 33 }); + } + + #[test] + fn compose_alloc_and_align_up() { + let mut stack = AbstractStack::default(); + let mut sa = SpillAllocator::new(StackDirection::GrowsUp); + + sa.allocate(InstrIdx::new(0).unwrap(), 8, &mut stack); + stack.align(32); + + let idx = InstrIdx::new(1).unwrap(); + sa.allocate(idx, 1, &mut stack); + debug_assert_eq!(stack.size(), 33); + debug_assert_eq!(sa.allocation(idx), &LocalAlloc::Stack { frame_off: 32 }); + } +} diff --git a/ykrt/src/compile/jitc_yk/codegen/x86_64.rs b/ykrt/src/compile/jitc_yk/codegen/x86_64.rs new file mode 100644 index 000000000..2fb00df33 --- /dev/null +++ b/ykrt/src/compile/jitc_yk/codegen/x86_64.rs @@ -0,0 +1,372 @@ +//! The X86_64 JIT Code Generator. + +use super::{ + super::{ + jit_ir::{self, InstrIdx, Operand}, + CompilationError, + }, + abs_stack::AbstractStack, + reg_alloc::{LocalAlloc, RegisterAllocator, StackDirection}, + CodeGen, CodeGenOutput, +}; +use dynasmrt::{dynasm, x64::Rq, AssemblyOffset, DynasmApi, ExecutableBuffer, Register}; +#[cfg(any(debug_assertions, test))] +use iced_x86; +#[cfg(any(debug_assertions, test))] +use std::{cell::Cell, collections::HashMap, slice}; + +/// Argument registers as defined by the X86_64 SysV ABI. +static ARG_REGS: [Rq; 6] = [Rq::RDI, Rq::RSI, Rq::RDX, Rq::RCX, Rq::R8, Rq::R9]; + +/// The size of a 64-bit register in bytes. +static REG64_SIZE: usize = 8; + +/// Work registers, i.e. the registers we use temproarily (where possible) for operands to, and +/// results of, intermediate computations. +/// +/// We choose callee-save registers so that we don't have to worry about storing/restoring them +/// when we do a function call to external code. +static WR0: Rq = Rq::R12; +static WR1: Rq = Rq::R13; +static WR2: Rq = Rq::R14; + +/// The X86_64 SysV ABI requires a 16-byte aligned stack prior to any call. +const SYSV_CALL_STACK_ALIGN: usize = 16; + +/// On X86_64 the stack grows down. +const STACK_DIRECTION: StackDirection = StackDirection::GrowsDown; + +/// The X86_64 code generator. +pub(super) struct X64CodeGen<'a> { + jit_mod: &'a jit_ir::Module, + asm: dynasmrt::x64::Assembler, + /// Abstract stack pointer, as a relative offset from `RBP`. The higher this number, the larger + /// the JITted code's stack. That means that even on a host where the stack grows down, this + /// value grows up. + stack: AbstractStack, + /// Register allocator. + ra: &'a mut dyn RegisterAllocator, + /// Comments used by the trace printer for debugging and testing only. + /// + /// Each assembly offset can have zero or more comment lines. + #[cfg(any(debug_assertions, test))] + comments: Cell>>, +} + +impl<'a> CodeGen<'a> for X64CodeGen<'a> { + fn new( + jit_mod: &'a jit_ir::Module, + ra: &'a mut dyn RegisterAllocator, + ) -> Result, CompilationError> { + let asm = dynasmrt::x64::Assembler::new() + .map_err(|e| CompilationError::Unrecoverable(e.to_string()))?; + Ok(Self { + jit_mod, + asm, + stack: Default::default(), + ra, + #[cfg(any(debug_assertions, test))] + comments: Cell::new(HashMap::new()), + }) + } + + fn codegen(mut self) -> Result, CompilationError> { + let alloc_off = self.emit_prologue(); + + // FIXME: we'd like to be able to assemble code backwards as this would simplify register + // allocation and side-step the need to patch up the prolog after the fact. dynasmrs + // doesn't support this, but it's on their roadmap: + // https://github.com/CensoredUsername/dynasm-rs/issues/48 + for (idx, inst) in self.jit_mod.instrs().iter().enumerate() { + self.codegen_inst(jit_ir::InstrIdx::new(idx)?, inst); + } + + // Now we know the size of the stack frame (i.e. self.asp), patch the allocation with the + // correct amount. + self.patch_frame_allocation(alloc_off); + + self.asm + .commit() + .map_err(|e| CompilationError::Unrecoverable(e.to_string()))?; + + let buf = self + .asm + .finalize() + .map_err(|_| CompilationError::Unrecoverable("failed to finalize assembler".into()))?; + + #[cfg(not(any(debug_assertions, test)))] + return Ok(Box::new(X64CodeGenOutput { buf })); + #[cfg(any(debug_assertions, test))] + { + let comments = self.comments.take(); + return Ok(Box::new(X64CodeGenOutput { buf, comments })); + } + } +} + +impl<'a> X64CodeGen<'a> { + /// Codegen an instruction. + fn codegen_inst(&mut self, instr_idx: jit_ir::InstrIdx, inst: &jit_ir::Instruction) { + #[cfg(any(debug_assertions, test))] + self.comment(self.asm.offset(), inst.to_string()); + match inst { + jit_ir::Instruction::LoadArg(i) => self.codegen_loadarg_instr(instr_idx, &i), + jit_ir::Instruction::Load(i) => self.codegen_load_instr(instr_idx, &i), + _ => todo!(), + } + } + + /// Add a comment to the trace, for use when disassembling its native code. + #[cfg(any(debug_assertions, test))] + fn comment(&mut self, off: AssemblyOffset, line: String) { + self.comments.get_mut().entry(off.0).or_default().push(line); + } + + /// Emit the prologue of the JITted code. + /// + /// The JITted code is a function, so it has to stash the old stack poninter, open a new frame + /// and allocate space for local variables etc. + /// + /// Note that there is no correspoinding `emit_epilogue()`. This is because the only way out of + /// JITted code is via deoptimisation, which will rewrite the whole stack anyway. + /// + /// Returns the offset at which to patch up the stack allocation later. + fn emit_prologue(&mut self) -> AssemblyOffset { + #[cfg(any(debug_assertions, test))] + self.comment(self.asm.offset(), "prologue".to_owned()); + + // Start a frame for the JITted code. + dynasm!(self.asm + ; push rbp + ; mov rbp, rsp + ); + + // Emit a dummy frame allocation instruction that initially allocates 0 bytes, but will be + // patched later when we know how big the frame needs to be. + let alloc_off = self.asm.offset(); + dynasm!(self.asm + ; sub rsp, DWORD 0 + ); + + // FIXME: load/allocate trace inputs here. + + alloc_off + } + + fn patch_frame_allocation(&mut self, asm_off: AssemblyOffset) { + // The stack should be 16-byte aligned after allocation. This ensures that calls in the + // trace also get a 16-byte aligned stack, as per the SysV ABI. + self.stack.align(SYSV_CALL_STACK_ALIGN); + + match i32::try_from(self.stack.size()) { + Ok(asp) => { + let mut patchup = self.asm.alter_uncommitted(); + patchup.goto(asm_off); + dynasm!(patchup + // The size of this instruction must be the exactly the same as the dummy + // allocation instruction that was emitted during `emit_prologue()`. + ; sub rsp, DWORD asp + ); + } + Err(_) => { + // If we get here, then the frame was so big that the dummy instruction we had + // planned to patch isn't big enough to encode the desired allocation size. Cross + // this bridge if/when we get to it. + todo!(); + } + } + } + + /// Load a local variable out of its stack slot into the specified register. + fn load_local(&mut self, reg: Rq, local: InstrIdx) { + match self.ra.allocation(local) { + LocalAlloc::Stack { frame_off } => { + match i32::try_from(*frame_off) { + Ok(foff) => { + // We use `movzx` where possible to avoid partial register stalls. + match local.instr(self.jit_mod).def_abi_size() { + 1 => dynasm!(self.asm; movzx Rq(reg.code()), BYTE [rbp - foff]), + 2 => dynasm!(self.asm; movzx Rq(reg.code()), WORD [rbp - foff]), + 4 => dynasm!(self.asm; mov Rd(reg.code()), [rbp - foff]), + 8 => dynasm!(self.asm; mov Rq(reg.code()), [rbp - foff]), + _ => todo!(), + } + } + Err(_) => todo!(), + } + } + LocalAlloc::Register => todo!(), + } + } + + fn store_local(&mut self, l: &LocalAlloc, reg: Rq, size: usize) { + match l { + LocalAlloc::Stack { frame_off } => match i32::try_from(*frame_off) { + Ok(off) => match size { + 8 => dynasm!(self.asm ; mov [rbp - off], Rq(reg.code())), + 4 => dynasm!(self.asm ; mov [rbp - off], Rd(reg.code())), + 2 => dynasm!(self.asm ; mov [rbp - off], Rw(reg.code())), + 1 => dynasm!(self.asm ; mov [rbp - off], Rb(reg.code())), + _ => todo!("{}", size), + }, + Err(_) => todo!("{}", size), + }, + LocalAlloc::Register => todo!(), + } + } + + fn reg_into_new_local(&mut self, local: InstrIdx, reg: Rq) { + let l = self.ra.allocate( + local, + local.instr(self.jit_mod).def_abi_size(), + &mut self.stack, + ); + self.store_local(&l, reg, local.instr(self.jit_mod).def_abi_size()) + } + + fn codegen_loadarg_instr( + &mut self, + inst_idx: jit_ir::InstrIdx, + _inst: &jit_ir::LoadArgInstruction, + ) { + // FIXME: LoadArg instructions are not yet specified. This hack is just to satisfy data + // flow dependencies during code generation. + dynasm!(self.asm ; mov Rq(WR0.code()), 0); + self.reg_into_new_local(inst_idx, WR0); + } + + fn codegen_load_instr(&mut self, inst_idx: jit_ir::InstrIdx, inst: &jit_ir::LoadInstruction) { + { + self.operand_into_reg(WR0, &inst.operand()); + } + let size = inst_idx.instr(self.jit_mod).def_abi_size(); + debug_assert!(size <= REG64_SIZE); + match size { + 8 => dynasm!(self.asm ; mov Rq(WR0.code()), [Rq(WR0.code())]), + 4 => dynasm!(self.asm ; mov Rd(WR0.code()), [Rq(WR0.code())]), + 2 => dynasm!(self.asm ; movzx Rd(WR0.code()), WORD [Rq(WR0.code())]), + 1 => dynasm!(self.asm ; movzx Rq(WR0.code()), BYTE [Rq(WR0.code())]), + _ => todo!("{}", size), + }; + self.reg_into_new_local(inst_idx, WR0); + } + + fn const_u64_into_reg(&mut self, reg: Rq, cv: u64) { + dynasm!(self.asm + ; mov Rq(reg.code()), QWORD cv as i64 // `as` intentional. + ) + } + + /// Load an operand into a register. + fn operand_into_reg(&mut self, reg: Rq, op: &Operand) { + match op { + Operand::Local(li) => self.load_local(reg, *li), + _ => todo!("{}", op), + } + } +} + +pub(super) struct X64CodeGenOutput { + /// The executable code itself. + buf: ExecutableBuffer, + /// Comments to be shown when printing the compiled trace using `AsmPrinter`. + /// + /// Maps a byte offset in the native JITted code to a collection of line comments to show when + /// disassembling the trace. + /// + /// Used for testing and debugging. + #[cfg(any(debug_assertions, test))] + comments: HashMap>, +} + +impl CodeGenOutput for X64CodeGenOutput { + #[cfg(any(debug_assertions, test))] + fn disassemble(&self) -> String { + AsmPrinter::new(&self.buf, &self.comments).to_string() + } +} + +/// Disassembles emitted code for testing and debugging purposes. +#[cfg(any(debug_assertions, test))] +struct AsmPrinter<'a> { + buf: &'a ExecutableBuffer, + comments: &'a HashMap>, +} + +#[cfg(any(debug_assertions, test))] +impl<'a> AsmPrinter<'a> { + fn new(buf: &'a ExecutableBuffer, comments: &'a HashMap>) -> Self { + Self { buf, comments } + } + + /// Returns the disassembled trace. + fn to_string(&self) -> String { + let mut out = Vec::new(); + out.push("--- Begin jit-asm ---".to_string()); + let len = self.buf.len(); + let bptr = self.buf.ptr(AssemblyOffset(0)); + let code = unsafe { slice::from_raw_parts(bptr, len) }; + // `as` is safe as it casts from a raw pointer to a pointer-sized integer. + let mut dis = + iced_x86::Decoder::with_ip(64, code, u64::try_from(bptr as usize).unwrap(), 0); + let mut remain = len; + while remain != 0 { + let off = len - remain; + if let Some(lines) = self.comments.get(&off) { + for line in lines { + out.push(format!("; {line}")); + } + } + let inst = dis.decode(); + out.push(format!("{:08x} {:08x}: {}", inst.ip(), off, inst)); + remain -= inst.len(); + } + out.push("--- End jit-asm ---".into()); + out.join("\n") + } +} + +#[cfg(test)] +mod tests { + use super::{CodeGen, X64CodeGen, STACK_DIRECTION}; + use crate::compile::jitc_yk::{ + aot_ir, + codegen::{ + reg_alloc::{RegisterAllocator, SpillAllocator}, + tests::match_asm, + }, + jit_ir, + }; + + #[test] + fn simple_codegen() { + let mut aot_mod = aot_ir::Module::default(); + aot_mod.push_type(aot_ir::Type::Ptr); + + let mut jit_mod = jit_ir::Module::new("test".into()); + jit_mod.push(jit_ir::LoadArgInstruction::new().into()); + jit_mod.push( + jit_ir::LoadInstruction::new( + jit_ir::Operand::Local(jit_ir::InstrIdx::new(0).unwrap()), + jit_ir::TypeIdx::new(0).unwrap(), + ) + .into(), + ); + let patt_lines = [ + "...", + "; Load %0", + "... 00000019: mov r12,[rbp-8]", + "... 00000020: mov r12,[r12]", + "... 00000025: mov [rbp-10h],r12", + "--- End jit-asm ---", + ]; + let mut ra = SpillAllocator::new(STACK_DIRECTION); + match_asm( + X64CodeGen::new(&jit_mod, &mut ra) + .unwrap() + .codegen() + .unwrap(), + &patt_lines.join("\n"), + ); + } +} diff --git a/ykrt/src/compile/jitc_yk/jit_ir.rs b/ykrt/src/compile/jitc_yk/jit_ir.rs index acc793b87..836cedd91 100644 --- a/ykrt/src/compile/jitc_yk/jit_ir.rs +++ b/ykrt/src/compile/jitc_yk/jit_ir.rs @@ -63,6 +63,13 @@ fn index_overflow(typ: &str) -> CompilationError { macro_rules! index_24bit { ($struct:ident) => { impl $struct { + #[cfg(test)] + pub(crate) fn new(v: usize) -> Result { + U24::from_usize(v) + .ok_or(index_overflow(stringify!($struct))) + .map(|u| Self(u)) + } + /// Convert an AOT index to a reduced-size JIT index (if possible). pub(crate) fn from_aot(aot_idx: aot_ir::$struct) -> Result<$struct, CompilationError> { U24::from_usize(usize::from(aot_idx)) @@ -92,6 +99,18 @@ macro_rules! index_16bit { self.0.into() } } + + // impl From for $struct { + // fn from(idx: usize) -> Self { + // Self(idx) + // } + // } + + impl From<$struct> for usize { + fn from(s: $struct) -> usize { + s.0.into() + } + } }; } @@ -142,6 +161,13 @@ index_24bit!(GlobalIdx); pub(crate) struct InstrIdx(u16); index_16bit!(InstrIdx); +impl InstrIdx { + /// Return a reference to the instruction indentified by `self` in `jit_mod`. + pub(crate) fn instr<'a>(&'a self, jit_mod: &'a Module) -> &Instruction { + jit_mod.instr(*self) + } +} + /// The packed representation of an instruction operand. /// /// # Encoding @@ -236,6 +262,12 @@ impl Instruction { Self::StoreGlobal(..) => false, } } + + /// Returns the size (in bytes) of the value that this instruction generates. + pub(crate) fn def_abi_size(&self) -> usize { + debug_assert!(self.is_def()); + 8 // FIXME + } } impl fmt::Display for Instruction { @@ -285,6 +317,7 @@ pub struct LoadInstruction { } impl LoadInstruction { + // FIXME: why do we need to provide a type index? Can't we get that from the operand? pub(crate) fn new(op: Operand, ty_idx: TypeIdx) -> LoadInstruction { LoadInstruction { op: PackedOperand::new(&op), @@ -547,7 +580,7 @@ pub(crate) struct Module { /// The name of the module and the eventual symbol name for the JITted code. name: String, /// The IR trace as a linear sequence of instructions. - instrs: Vec, + instrs: Vec, // FIXME: this should be a TiVec. /// The extra argument table. /// /// Used when a [CallInstruction]'s arguments don't fit inline. @@ -571,6 +604,11 @@ impl Module { } } + /// Return the instruction at the specified index. + pub(crate) fn instr(&self, idx: InstrIdx) -> &Instruction { + &self.instrs[usize::try_from(idx).unwrap()] + } + /// Push an instruction to the end of the [Module]. pub(crate) fn push(&mut self, instr: Instruction) { self.instrs.push(instr); @@ -586,6 +624,11 @@ impl Module { eprintln!("{}", self); } + /// Returns a reference to the instruction stream. + pub(crate) fn instrs(&self) -> &Vec { + &self.instrs + } + /// Push a slice of extra arguments into the extra arg table. fn push_extra_args(&mut self, ops: &[Operand]) -> Result { let idx = self.extra_args.len(); diff --git a/ykrt/src/compile/jitc_yk/mod.rs b/ykrt/src/compile/jitc_yk/mod.rs index c864411bb..ff3cf6210 100644 --- a/ykrt/src/compile/jitc_yk/mod.rs +++ b/ykrt/src/compile/jitc_yk/mod.rs @@ -51,6 +51,7 @@ static PHASES_TO_PRINT: LazyLock> = LazyLock::new(|| { }); pub mod aot_ir; +mod codegen; pub mod jit_ir; mod trace_builder;