From bbe372fb472c15ce07728161a4f167f449d757f3 Mon Sep 17 00:00:00 2001 From: Willi Ye Date: Sat, 1 Feb 2025 16:59:48 +0100 Subject: [PATCH] Dedicated jit memory for arm7 --- src/core/emu.rs | 2 +- src/core/memory/mem.rs | 5 +- src/jit/assembler/block_asm.rs | 59 ++----------- src/jit/assembler/block_inst.rs | 75 ++++------------ src/jit/jit_asm.rs | 13 ++- src/jit/jit_memory.rs | 152 +++++++++++++++----------------- src/main.rs | 1 - 7 files changed, 108 insertions(+), 199 deletions(-) diff --git a/src/core/emu.rs b/src/core/emu.rs index aaf74998..d275bbf8 100644 --- a/src/core/emu.rs +++ b/src/core/emu.rs @@ -339,7 +339,7 @@ impl Emu { pub fn new(cartridge_io: CartridgeIo, fps: Arc, key_map: Arc, touch_points: Arc, sound_sampler: Arc, settings: Settings) -> Self { Emu { common: UnsafeCell::new(Common::new(cartridge_io, fps, key_map)), - mem: UnsafeCell::new(Memory::new(touch_points, sound_sampler)), + mem: UnsafeCell::new(Memory::new(&settings, touch_points, sound_sampler)), arm7_hle: UnsafeCell::new(Arm7Hle::new()), settings, } diff --git a/src/core/memory/mem.rs b/src/core/memory/mem.rs index c40e5eab..694ad311 100644 --- a/src/core/memory/mem.rs +++ b/src/core/memory/mem.rs @@ -15,6 +15,7 @@ use crate::core::CpuType::ARM9; use crate::jit::jit_memory::JitMemory; use crate::logging::debug_println; use crate::mmap::Shm; +use crate::settings::Settings; use crate::utils::Convert; use crate::{utils, DEBUG_LOG}; use std::hint::unreachable_unchecked; @@ -744,7 +745,7 @@ impl MemoryMultipleMemsetIo, sound_sampler: Arc) -> Self { + pub fn new(settings: &Settings, touch_points: Arc, sound_sampler: Arc) -> Self { Memory { shm: Shm::new("physical", regions::TOTAL_MEM_SIZE as usize).unwrap(), wram: Wram::new(), @@ -754,7 +755,7 @@ impl Memory { palettes: Palettes::new(), vram: Vram::default(), oam: Oam::new(), - jit: JitMemory::new(), + jit: JitMemory::new(settings), breakout_imm: false, mmu_arm9: MmuArm9::new(), mmu_arm7: MmuArm7::new(), diff --git a/src/jit/assembler/block_asm.rs b/src/jit/assembler/block_asm.rs index ce70f5e4..eea9827c 100644 --- a/src/jit/assembler/block_asm.rs +++ b/src/jit/assembler/block_asm.rs @@ -664,7 +664,7 @@ impl BlockAsm { self.tmp_regs.func_call_reg } }; - self.insert_inst(Call::reg( + self.insert_inst(Call::new( reg, [ args[0].map(|_| BlockReg::Fixed(Reg::R0)), @@ -676,48 +676,6 @@ impl BlockAsm { )); } - pub fn call_common(&mut self, offset: usize) { - self.call_common_internal(offset, None::, None::, None::, None::, true) - } - - pub fn call1_common(&mut self, offset: usize, arg0: impl Into) { - self.call_common_internal(offset, Some(arg0.into()), None::, None::, None::, true) - } - - pub fn call2_common(&mut self, offset: usize, arg0: impl Into, arg1: impl Into) { - self.call_common_internal(offset, Some(arg0.into()), Some(arg1.into()), None::, None::, true) - } - - pub fn call3_common(&mut self, offset: usize, arg0: impl Into, arg1: impl Into, arg2: impl Into) { - self.call_common_internal(offset, Some(arg0.into()), Some(arg1.into()), Some(arg2.into()), None::, true) - } - - pub fn call4_common(&mut self, offset: usize, arg0: impl Into, arg1: impl Into, arg2: impl Into, arg3: impl Into) { - self.call_common_internal(offset, Some(arg0.into()), Some(arg1.into()), Some(arg2.into()), Some(arg3.into()), true) - } - - fn call_common_internal( - &mut self, - offset: usize, - arg0: Option>, - arg1: Option>, - arg2: Option>, - arg3: Option>, - has_return: bool, - ) { - let args = self.handle_call_args(arg0, arg1, arg2, arg3); - self.insert_inst(Call::offset( - offset, - [ - args[0].map(|_| BlockReg::Fixed(Reg::R0)), - args[1].map(|_| BlockReg::Fixed(Reg::R1)), - args[2].map(|_| BlockReg::Fixed(Reg::R2)), - args[3].map(|_| BlockReg::Fixed(Reg::R3)), - ], - has_return, - )); - } - pub fn bkpt(&mut self, id: u16) { self.insert_inst(Bkpt(id)); } @@ -1123,7 +1081,7 @@ impl BlockAsm { self.buf.opcodes.len() } - pub fn finalize(&mut self, jit_mem_offset: usize) -> &Vec { + pub fn finalize(&mut self) -> &Vec { // Used to determine what regs to push and pop for prologue and epilogue let mut used_host_regs = if unlikely(self.is_common_fun) { self.buf.reg_allocator.dirty_regs & ALLOCATION_REGS @@ -1149,15 +1107,10 @@ impl BlockAsm { continue; } - let diff = if encoding.is_call_common() { - let opcode_index = (jit_mem_offset >> 2) + index; - let branch_to = u32::from(encoding.index()) >> 2; - branch_to as i32 - opcode_index as i32 - } else { - let block_index = u32::from(encoding.index()); - let branch_to = self.buf.block_opcode_offsets[block_index as usize]; - branch_to as i32 - index as i32 - }; + let block_index = u32::from(encoding.index()); + let branch_to = self.buf.block_opcode_offsets[block_index as usize]; + let diff = branch_to as i32 - index as i32; + if diff == 1 && !encoding.has_return() { self.buf.opcodes[index] = AluShiftImm::mov_al(Reg::R0, Reg::R0); } else { diff --git a/src/jit/assembler/block_inst.rs b/src/jit/assembler/block_inst.rs index 62436862..bc5a25e8 100644 --- a/src/jit/assembler/block_inst.rs +++ b/src/jit/assembler/block_inst.rs @@ -187,19 +187,13 @@ pub struct GenericGuest { #[bitsize(32)] #[derive(FromBits)] pub struct BranchEncoding { - pub index: u26, + pub index: u27, pub has_return: bool, - pub is_call_common: bool, pub cond: u4, } -pub enum CallOp { - Reg(BlockReg), - Offset(usize), -} - pub struct Call { - op: CallOp, + reg: BlockReg, args: [Option; 4], pub has_return: bool, } @@ -1121,29 +1115,15 @@ impl Debug for GenericGuest { } impl Call { - pub fn reg(reg: BlockReg, args: [Option; 4], has_return: bool) -> Self { - Call { - op: CallOp::Reg(reg), - args, - has_return, - } - } - - pub fn offset(offset: usize, args: [Option; 4], has_return: bool) -> Self { - Call { - op: CallOp::Offset(offset), - args, - has_return, - } + pub fn new(reg: BlockReg, args: [Option; 4], has_return: bool) -> Self { + Call { reg, args, has_return } } } impl BlockInstTrait for Call { fn get_io(&self) -> (BlockRegSet, BlockRegSet) { let mut inputs = BlockRegSet::new(); - if let CallOp::Reg(reg) = self.op { - inputs += reg; - } + inputs += self.reg; for &arg in self.args.iter().flatten() { inputs += arg; } @@ -1162,47 +1142,26 @@ impl BlockInstTrait for Call { } fn replace_input_regs(&mut self, old: BlockReg, new: BlockReg) { - if let CallOp::Reg(reg) = &mut self.op { - replace_reg(reg, old, new) - } + replace_reg(&mut self.reg, old, new) } fn replace_output_regs(&mut self, _: BlockReg, _: BlockReg) {} - fn emit_opcode(&mut self, alloc: &BlockRegAllocator, opcodes: &mut Vec, opcode_index: usize, placeholders: &mut BlockAsmPlaceholders) { - match self.op { - CallOp::Reg(reg) => opcodes.push(if self.has_return { - Bx::blx(alloc.for_emit_input(reg), Cond::AL) - } else { - Bx::bx(alloc.for_emit_input(reg), Cond::AL) - }), - CallOp::Offset(offset) => { - // Encode common offset - // Branch offset can only be figured out later - opcodes.push(BranchEncoding::new(u26::new(offset as u32), self.has_return, true, u4::new(Cond::AL as u8)).into()); - placeholders.branch.push(opcode_index); - } - } + fn emit_opcode(&mut self, alloc: &BlockRegAllocator, opcodes: &mut Vec, _: usize, _: &mut BlockAsmPlaceholders) { + opcodes.push(if self.has_return { + Bx::blx(alloc.for_emit_input(self.reg), Cond::AL) + } else { + Bx::bx(alloc.for_emit_input(self.reg), Cond::AL) + }); } } impl Debug for Call { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self.op { - CallOp::Reg(reg) => { - if self.has_return { - write!(f, "Blx {reg:?} {:?}", self.args) - } else { - write!(f, "Bx {reg:?} {:?}", self.args) - } - } - CallOp::Offset(offset) => { - if self.has_return { - write!(f, "Bl {offset:x} {:?}", self.args) - } else { - write!(f, "B {offset:x} {:?}", self.args) - } - } + if self.has_return { + write!(f, "Blx {:?} {:?}", self.reg, self.args) + } else { + write!(f, "Bx {:?} {:?}", self.reg, self.args) } } } @@ -1235,7 +1194,7 @@ impl BlockInstTrait for Branch { fn emit_opcode(&mut self, _: &BlockRegAllocator, opcodes: &mut Vec, opcode_index: usize, placeholders: &mut BlockAsmPlaceholders) { // Encode label // Branch offset can only be figured out later - opcodes.push(BranchEncoding::new(u26::new(self.block_index as u32), false, false, u4::new(Cond::AL as u8)).into()); + opcodes.push(BranchEncoding::new(u27::new(self.block_index as u32), false, u4::new(Cond::AL as u8)).into()); placeholders.branch.push(opcode_index); } } diff --git a/src/jit/jit_asm.rs b/src/jit/jit_asm.rs index a7105f45..bdada574 100644 --- a/src/jit/jit_asm.rs +++ b/src/jit/jit_asm.rs @@ -281,7 +281,13 @@ fn emit_code_block_internal(asm: &mut JitAsm, guest_pc: let is_unreturnable_branch = !inst_info.out_regs.is_reserved(Reg::LR) && is_uncond_branch; asm.jit_buf.insts.push(inst_info); - if is_unreturnable_branch || uncond_branch_count == 4 { + if is_unreturnable_branch + || uncond_branch_count + == match CPU { + ARM9 => 4, + ARM7 => 1, + } + { break; } pc_offset += pc_step; @@ -325,9 +331,8 @@ fn emit_code_block_internal(asm: &mut JitAsm, guest_pc: block_asm.epilogue(); - let opcodes_len = block_asm.emit_opcodes(guest_pc); - let next_jit_entry = get_jit!(asm.emu).get_next_entry(opcodes_len); - let opcodes = block_asm.finalize(next_jit_entry); + block_asm.emit_opcodes(guest_pc); + let opcodes = block_asm.finalize(); if IS_DEBUG && unsafe { BLOCK_LOG } { for &opcode in opcodes { println!("0x{opcode:x},"); diff --git a/src/jit/jit_memory.rs b/src/jit/jit_memory.rs index d97e5276..9ea0d692 100644 --- a/src/jit/jit_memory.rs +++ b/src/jit/jit_memory.rs @@ -13,6 +13,7 @@ use crate::jit::reg::Reg; use crate::jit::{Cond, MemoryAmount}; use crate::logging::debug_println; use crate::mmap::{flush_icache, Mmap, PAGE_SHIFT, PAGE_SIZE}; +use crate::settings::Settings; use crate::utils; use crate::utils::{HeapMem, HeapMemU8}; use std::collections::VecDeque; @@ -24,6 +25,8 @@ use CpuType::{ARM7, ARM9}; const JIT_MEMORY_SIZE: usize = 24 * 1024 * 1024; pub const JIT_LIVE_RANGE_PAGE_SIZE_SHIFT: u32 = 10; const JIT_LIVE_RANGE_PAGE_SIZE: u32 = 1 << JIT_LIVE_RANGE_PAGE_SIZE_SHIFT; +const JIT_ARM9_MEMORY_SIZE: usize = 20 * 1024 * 1024; +const JIT_ARM7_MEMORY_SIZE: usize = JIT_MEMORY_SIZE - JIT_ARM9_MEMORY_SIZE; #[derive(Copy, Clone)] pub struct JitEntry(pub *const extern "C" fn()); @@ -88,7 +91,6 @@ pub struct JitLiveRanges { #[cfg(target_os = "linux")] struct JitPerfMapRecord { - common_records: Vec<(usize, usize, String)>, perf_map_path: std::path::PathBuf, perf_map: std::fs::File, } @@ -98,18 +100,11 @@ impl JitPerfMapRecord { fn new() -> Self { let perf_map_path = std::path::PathBuf::from(format!("/tmp/perf-{}.map", std::process::id())); JitPerfMapRecord { - common_records: Vec::new(), perf_map_path: perf_map_path.clone(), perf_map: std::fs::File::create(perf_map_path).unwrap(), } } - fn record_common(&mut self, jit_start: usize, jit_size: usize, name: impl AsRef) { - self.common_records.push((jit_start, jit_size, name.as_ref().to_string())); - use std::io::Write; - writeln!(self.perf_map, "{jit_start:x} {jit_size:x} {}", name.as_ref()).unwrap(); - } - fn record(&mut self, jit_start: usize, jit_size: usize, guest_pc: u32, cpu_type: CpuType) { use std::io::Write; writeln!(self.perf_map, "{jit_start:x} {jit_size:x} {cpu_type:?}_{guest_pc:x}").unwrap(); @@ -117,10 +112,6 @@ impl JitPerfMapRecord { fn reset(&mut self) { self.perf_map = std::fs::File::create(&self.perf_map_path).unwrap(); - for (jit_start, jit_size, name) in &self.common_records { - use std::io::Write; - writeln!(self.perf_map, "{jit_start:x} {jit_size:x} {name}").unwrap(); - } } } @@ -140,107 +131,117 @@ impl JitPerfMapRecord { fn reset(&mut self) {} } +struct JitMemoryMetadata { + size: usize, + start: usize, + end: usize, + max_end: usize, + jit_funcs: VecDeque<(usize, u16, u16)>, +} + +impl JitMemoryMetadata { + fn new(size: usize, start: usize, end: usize) -> Self { + JitMemoryMetadata { + size, + start, + end, + max_end: end, + jit_funcs: VecDeque::new(), + } + } +} + pub struct JitMemory { mem: Mmap, - mem_common_end: usize, - mem_start: usize, - mem_end: usize, - jit_funcs: VecDeque<(usize, u16, u16)>, + arm9_data: JitMemoryMetadata, + arm7_data: JitMemoryMetadata, jit_entries: JitEntries, jit_live_ranges: JitLiveRanges, pub jit_memory_map: JitMemoryMap, jit_perf_map_record: JitPerfMapRecord, - pub arm7_hle: bool, } impl JitMemory { - pub fn new() -> Self { + pub fn new(settings: &Settings) -> Self { let jit_entries = JitEntries::new(); let jit_live_ranges = JitLiveRanges::default(); let jit_memory_map = JitMemoryMap::new(&jit_entries, &jit_live_ranges); JitMemory { mem: Mmap::executable("jit", JIT_MEMORY_SIZE).unwrap(), - mem_common_end: 0, - mem_start: 0, - mem_end: JIT_MEMORY_SIZE, - jit_funcs: VecDeque::new(), + arm9_data: if settings.arm7_hle() { + JitMemoryMetadata::new(JIT_MEMORY_SIZE, 0, JIT_MEMORY_SIZE) + } else { + JitMemoryMetadata::new(JIT_ARM9_MEMORY_SIZE, 0, JIT_ARM9_MEMORY_SIZE) + }, + arm7_data: if settings.arm7_hle() { + JitMemoryMetadata::new(0, 0, 0) + } else { + JitMemoryMetadata::new(JIT_ARM7_MEMORY_SIZE, JIT_ARM9_MEMORY_SIZE, JIT_MEMORY_SIZE) + }, jit_entries, jit_live_ranges, jit_memory_map, jit_perf_map_record: JitPerfMapRecord::new(), - arm7_hle: false, } } - fn reset_blocks(&mut self) { + fn get_jit_data(&mut self, cpu_type: CpuType) -> &mut JitMemoryMetadata { + match cpu_type { + ARM9 => &mut self.arm9_data, + ARM7 => &mut self.arm7_data, + } + } + + fn reset_blocks(&mut self, cpu_type: CpuType) { self.jit_perf_map_record.reset(); - let (jit_entry, addr_offset_start, addr_offset_end) = self.jit_funcs.pop_front().unwrap(); + let jit_data = self.get_jit_data(cpu_type); + + let (jit_entry, addr_offset_start, addr_offset_end) = jit_data.jit_funcs.pop_front().unwrap(); let jit_entry = jit_entry as *mut JitEntry; unsafe { *jit_entry = DEFAULT_JIT_ENTRY }; let freed_start = addr_offset_start; let mut freed_end = addr_offset_end; - while (freed_end - freed_start) < (JIT_MEMORY_SIZE / 6 / PAGE_SIZE) as u16 { - let (jit_entry, _, addr_offset_end) = self.jit_funcs.front().unwrap(); + while (freed_end - freed_start) < (jit_data.size / 4 / PAGE_SIZE) as u16 { + let (jit_entry, _, addr_offset_end) = jit_data.jit_funcs.front().unwrap(); if *addr_offset_end < freed_start { break; } let jit_entry = *jit_entry as *mut JitEntry; unsafe { *jit_entry = DEFAULT_JIT_ENTRY }; freed_end = *addr_offset_end; - self.jit_funcs.pop_front().unwrap(); + jit_data.jit_funcs.pop_front().unwrap(); } - self.mem_start = (freed_start as usize) << PAGE_SHIFT; - self.mem_end = (freed_end as usize) << PAGE_SHIFT; + jit_data.start = (freed_start as usize) << PAGE_SHIFT; + jit_data.end = (freed_end as usize) << PAGE_SHIFT; - debug_println!("Jit memory reset from {:x} - {:x}", self.mem_start, self.mem_end); + debug_println!("{cpu_type:?} Jit memory reset from {:x} - {:x}", jit_data.start, jit_data.end); } - fn allocate_block(&mut self, required_size: usize) -> (usize, bool) { + fn allocate_block(&mut self, required_size: usize, cpu_type: CpuType) -> (usize, bool) { let mut flushed = false; - if self.mem_start + required_size > self.mem_end { - self.reset_blocks(); - assert!(self.mem_start + required_size <= self.mem_end); + let jit_data = self.get_jit_data(cpu_type); + if jit_data.start + required_size > jit_data.end { + if jit_data.start + required_size > jit_data.max_end { + let (_, _, last_addr_end) = jit_data.jit_funcs.back_mut().unwrap(); + *last_addr_end = (jit_data.max_end >> PAGE_SHIFT) as u16; + } + self.reset_blocks(cpu_type); + let jit_data = self.get_jit_data(cpu_type); + assert!(jit_data.start + required_size <= jit_data.end); flushed = true; } - let addr = self.mem_start; - self.mem_start += required_size; + let jit_data = self.get_jit_data(cpu_type); + let addr = jit_data.start; + jit_data.start += required_size; (addr, flushed) } - pub fn get_start_entry(&self) -> usize { - self.mem.as_ptr() as _ - } - - pub fn get_next_entry(&self, opcodes_len: usize) -> usize { - let aligned_size = utils::align_up(opcodes_len << 2, PAGE_SIZE); - if self.mem_start + aligned_size > self.mem_end { - self.mem_common_end - } else { - self.mem_start - } - } - - pub fn insert_common_fun_block(&mut self, opcodes: &[u32], name: impl AsRef) -> *const extern "C" fn() { + fn insert(&mut self, opcodes: &[u32], cpu_type: CpuType) -> (usize, usize, bool) { let aligned_size = utils::align_up(size_of_val(opcodes), PAGE_SIZE); - let mem_start = self.mem_start; - - utils::write_to_mem_slice(&mut self.mem, mem_start, opcodes); - unsafe { flush_icache(self.mem.as_ptr().add(mem_start), aligned_size) }; - - self.mem_start += aligned_size; - self.mem_common_end = self.mem_start; - - let jit_entry_addr = mem_start + self.mem.as_ptr() as usize; - self.jit_perf_map_record.record_common(jit_entry_addr, aligned_size, name); - jit_entry_addr as _ - } - - fn insert(&mut self, opcodes: &[u32]) -> (usize, usize, bool) { - let aligned_size = utils::align_up(size_of_val(opcodes), PAGE_SIZE); - let (allocated_offset_addr, flushed) = self.allocate_block(aligned_size); + let (allocated_offset_addr, flushed) = self.allocate_block(aligned_size, cpu_type); utils::write_to_mem_slice(&mut self.mem, allocated_offset_addr, opcodes); unsafe { flush_icache(self.mem.as_ptr().add(allocated_offset_addr), aligned_size) }; @@ -260,7 +261,7 @@ impl JitMemory { }}; ($entries:expr, $live_ranges:expr) => {{ - let (allocated_offset_addr, aligned_size, flushed) = self.insert(opcodes); + let (allocated_offset_addr, aligned_size, flushed) = self.insert(opcodes, cpu_type); let jit_entry_addr = (allocated_offset_addr + self.mem.as_ptr() as usize) as *const extern "C" fn(); @@ -269,7 +270,8 @@ impl JitMemory { $entries[entries_index] = JitEntry(jit_entry_addr); assert_eq!(ptr::addr_of!($entries[entries_index]), self.jit_memory_map.get_jit_entry(guest_pc), "jit memory mapping {guest_pc:x}"); - self.jit_funcs.push_back((ptr::addr_of!($entries[entries_index]) as usize, (allocated_offset_addr >> PAGE_SHIFT) as u16, ((allocated_offset_addr + aligned_size) >> PAGE_SHIFT) as u16)); + let entry_addr = ptr::addr_of!($entries[entries_index]) as usize; + self.get_jit_data(cpu_type).jit_funcs.push_back((entry_addr, (allocated_offset_addr >> PAGE_SHIFT) as u16, ((allocated_offset_addr + aligned_size) >> PAGE_SHIFT) as u16)); // >> 3 for u8 (each bit represents a page) let live_ranges_index = ((guest_pc >> JIT_LIVE_RANGE_PAGE_SIZE_SHIFT) >> 3) as usize; @@ -278,16 +280,6 @@ impl JitMemory { $live_ranges[live_ranges_index] |= 1 << live_ranges_bit; assert_eq!(ptr::addr_of!($live_ranges[live_ranges_index]), self.jit_memory_map.get_live_range(guest_pc), "jit live ranges mapping {guest_pc:x}"); - let per = (self.mem_start * 100) as f32 / JIT_MEMORY_SIZE as f32; - debug_println!( - "Insert new jit ({:x}) block with size {} at {:x}, {}% allocated with guest pc {:x}", - self.mem.as_ptr() as usize, - aligned_size, - allocated_offset_addr, - per, - guest_pc - ); - self.jit_perf_map_record.record(jit_entry_addr as usize, aligned_size, guest_pc, cpu_type); (jit_entry_addr, flushed) @@ -341,7 +333,7 @@ impl JitMemory { } pub fn invalidate_wram(&mut self) { - if !self.arm7_hle { + if self.arm7_data.size != 0 { for live_range in self.jit_live_ranges.shared_wram_arm7.deref() { if *live_range != 0 { self.jit_entries.shared_wram_arm7.fill(DEFAULT_JIT_ENTRY); diff --git a/src/main.rs b/src/main.rs index f7565204..edffdcdd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -196,7 +196,6 @@ fn run_cpu( if emu.settings.arm7_hle() { common.ipc.use_hle(); common.gpu.arm7_hle = true; - get_jit_mut!(emu).arm7_hle = true; execute_jit::(&mut emu_unsafe); } else { execute_jit::(&mut emu_unsafe);