From 380f488a7b30e32575be1d8ea4a3e3c95c13cf5c Mon Sep 17 00:00:00 2001 From: Willi Ye Date: Fri, 31 Jan 2025 17:34:31 +0100 Subject: [PATCH] Don't exit guest context when handling interrupts --- src/core/hle/bios.rs | 6 +- src/core/memory/mem.rs | 36 +++++++++--- src/jit/assembler/block_asm.rs | 4 +- src/jit/emitter/emit.rs | 22 ++++---- src/jit/emitter/emit_branch.rs | 2 + src/jit/emitter/thumb/emit_thumb.rs | 8 ++- src/jit/inst_branch_handler.rs | 86 ++++++++++++++++++++--------- src/jit/jit_asm.rs | 72 +++++++++++++++++------- src/jit/jit_asm_common_funs.rs | 47 +++++++++------- 9 files changed, 192 insertions(+), 91 deletions(-) diff --git a/src/core/hle/bios.rs b/src/core/hle/bios.rs index 59594788..91f33086 100644 --- a/src/core/hle/bios.rs +++ b/src/core/hle/bios.rs @@ -23,14 +23,14 @@ pub fn interrupt(emu: &mut Emu) { debug_println!("{CPU:?} interrupt"); let regs = get_regs_mut!(emu, CPU); - let mut cpsr = Cpsr::from(get_regs!(emu, CPU).cpsr); + let mut cpsr = Cpsr::from(regs.cpsr); cpsr.set_irq_disable(true); cpsr.set_thumb(false); cpsr.set_mode(u5::new(0x12)); regs.set_cpsr::(u32::from(cpsr), emu); - let is_thumb = (get_regs!(emu, CPU).pc & 1) == 1; - let mut spsr = Cpsr::from(get_regs!(emu, CPU).spsr); + let is_thumb = (regs.pc & 1) == 1; + let mut spsr = Cpsr::from(regs.spsr); spsr.set_thumb(is_thumb); regs.spsr = u32::from(spsr); diff --git a/src/core/memory/mem.rs b/src/core/memory/mem.rs index fb53c5bb..c40e5eab 100644 --- a/src/core/memory/mem.rs +++ b/src/core/memory/mem.rs @@ -15,8 +15,8 @@ use crate::core::CpuType::ARM9; use crate::jit::jit_memory::JitMemory; use crate::logging::debug_println; use crate::mmap::Shm; -use crate::utils; use crate::utils::Convert; +use crate::{utils, DEBUG_LOG}; use std::hint::unreachable_unchecked; use std::intrinsics::unlikely; use std::marker::PhantomData; @@ -819,10 +819,15 @@ impl Memory { let shm_offset = self.get_shm_offset::(aligned_addr) as u32; if shm_offset != 0 { utils::read_from_mem_slice(&self.shm, shm_offset, slice); - return; + } else { + MemoryMultipleSliceIo::::read(aligned_addr, slice, emu); } - MemoryMultipleSliceIo::::read(aligned_addr, slice, emu); + if DEBUG_LOG { + for (i, &value) in slice.iter().enumerate() { + debug_println!("{CPU:?} slice memory read at {:x} with value {:x}", aligned_addr as usize + i * size_of::(), value.into()); + } + } } pub fn read_fixed_slice(&mut self, addr: u32, emu: &mut Emu, slice: &mut [T]) { @@ -833,10 +838,15 @@ impl Memory { let shm_offset = self.get_shm_offset::(aligned_addr) as u32; if shm_offset != 0 { slice.fill(utils::read_from_mem(&self.shm, shm_offset)); - return; + } else { + MemoryFixedSliceIo::::read(aligned_addr, slice, emu); } - MemoryFixedSliceIo::::read(aligned_addr, slice, emu); + if DEBUG_LOG { + for &mut value in slice { + debug_println!("{CPU:?} fixed slice memory read at {:x} with value {:x}", aligned_addr as usize, value.into()); + } + } } pub fn write(&mut self, addr: u32, value: T, emu: &mut Emu) { @@ -862,9 +872,14 @@ impl Memory { } pub fn write_multiple_slice(&mut self, addr: u32, emu: &mut Emu, slice: &[T]) { - debug_println!("{CPU:?} fixed slice memory write at {addr:x} with size {}", slice.len()); + debug_println!("{CPU:?} fixed slice memory write at {addr:x} with size {}", size_of_val(slice)); let aligned_addr = addr & !(size_of::() as u32 - 1); let aligned_addr = aligned_addr & 0x0FFFFFFF; + if DEBUG_LOG { + for (i, &value) in slice.iter().enumerate() { + debug_println!("{CPU:?} slice memory write at {:x} with value {:x}", aligned_addr as usize + i * size_of::(), value.into()); + } + } let shm_offset = self.get_shm_offset::(aligned_addr) as u32; if shm_offset != 0 { @@ -876,9 +891,14 @@ impl Memory { } pub fn write_fixed_slice(&mut self, addr: u32, emu: &mut Emu, slice: &[T]) { - debug_println!("{CPU:?} fixed slice memory write at {addr:x} with size {}", slice.len()); + debug_println!("{CPU:?} fixed slice memory write at {addr:x} with size {}", size_of_val(slice)); let aligned_addr = addr & !(size_of::() as u32 - 1); let aligned_addr = aligned_addr & 0x0FFFFFFF; + if DEBUG_LOG { + for &value in slice { + debug_println!("{CPU:?} fixed slice memory write at {:x} with value {:x}", aligned_addr, value.into()); + } + } let shm_offset = self.get_shm_offset::(aligned_addr) as u32; if shm_offset != 0 { @@ -890,7 +910,7 @@ impl Memory { } pub fn write_multiple_memset(&mut self, addr: u32, value: T, size: usize, emu: &mut Emu) { - debug_println!("{CPU:?} multiple memset memory write at {addr:x} with size {size}"); + debug_println!("{CPU:?} multiple memset memory write at {addr:x} with size {}", size_of::() * size); let aligned_addr = addr & !(size_of::() as u32 - 1); let aligned_addr = aligned_addr & 0x0FFFFFFF; diff --git a/src/jit/assembler/block_asm.rs b/src/jit/assembler/block_asm.rs index f5c6fb4e..ce70f5e4 100644 --- a/src/jit/assembler/block_asm.rs +++ b/src/jit/assembler/block_asm.rs @@ -798,7 +798,7 @@ impl BlockAsm { for &i in block_indices { let basic_block = &mut self.cache.basic_blocks[i]; let sum_guest_regs_input_dirty = basic_block.guest_regs_input_dirty + guest_regs_dirty; - if sum_guest_regs_input_dirty != basic_block.guest_regs_input_dirty || !basic_block.guest_regs_resolved { + if !basic_block.guest_regs_resolved || sum_guest_regs_input_dirty != basic_block.guest_regs_input_dirty { self.buf.reachable_blocks.insert(i); basic_block.guest_regs_resolved = true; basic_block.guest_regs_input_dirty = sum_guest_regs_input_dirty; @@ -818,7 +818,7 @@ impl BlockAsm { let basic_block = unsafe { self.cache.basic_blocks.get_unchecked_mut(i) }; let sum_required_outputs = *basic_block.get_required_outputs() + required_outputs; - if sum_required_outputs != basic_block.get_required_outputs() || !basic_block.io_resolved { + if !basic_block.io_resolved || sum_required_outputs != basic_block.get_required_outputs() { basic_block.io_resolved = true; basic_block.set_required_outputs(sum_required_outputs); basic_block.init_resolve_io(self.buf); diff --git a/src/jit/emitter/emit.rs b/src/jit/emitter/emit.rs index 91f3527d..80f463d2 100644 --- a/src/jit/emitter/emit.rs +++ b/src/jit/emitter/emit.rs @@ -61,14 +61,9 @@ impl JitAsm<'_, CPU> { } if (op.is_mov() && self.jit_buf.current_inst().src_regs.is_reserved(Reg::LR) && !self.jit_buf.current_inst().out_regs.is_reserved(Reg::CPSR)) - || (op.is_multiple_mem_transfer() && *self.jit_buf.current_inst().operands()[0].as_reg_no_shift().unwrap() == Reg::SP && !op.mem_transfer_user()) + || (op.is_multiple_mem_transfer() && *self.jit_buf.current_inst().operands()[0].as_reg_no_shift().unwrap() == Reg::SP) || (op.is_single_mem_transfer() && self.jit_buf.current_inst().src_regs.is_reserved(Reg::SP)) { - let guest_pc_reg = block_asm.new_reg(); - block_asm.load_u32(guest_pc_reg, block_asm.tmp_regs.thread_regs_addr_reg, Reg::PC as u32 * 4); - self.emit_branch_return_stack_common(block_asm, guest_pc_reg); - block_asm.free_reg(guest_pc_reg); - } else { if op.mem_transfer_user() { block_asm.call(register_restore_spsr:: as *const ()); if CPU == ARM7 { @@ -76,8 +71,15 @@ impl JitAsm<'_, CPU> { } } - self.emit_branch_out_metadata(block_asm); - block_asm.epilogue(); + let guest_pc_reg = block_asm.new_reg(); + block_asm.load_u32(guest_pc_reg, block_asm.tmp_regs.thread_regs_addr_reg, Reg::PC as u32 * 4); + self.emit_branch_return_stack_common(block_asm, guest_pc_reg); + block_asm.free_reg(guest_pc_reg); + } else { + let guest_pc_reg = block_asm.new_reg(); + block_asm.load_u32(guest_pc_reg, block_asm.tmp_regs.thread_regs_addr_reg, Reg::PC as u32 * 4); + self.emit_branch_reg_common(block_asm, guest_pc_reg, false, false); + block_asm.free_reg(guest_pc_reg); } } @@ -126,9 +128,9 @@ impl JitAsm<'_, CPU> { if set_idle_loop { let idle_loop_reg = block_asm.new_reg(); - block_asm.load_u8(idle_loop_reg, runtime_data_addr_reg, JitRuntimeData::get_idle_loop_return_stack_ptr_offset() as u32); + block_asm.load_u8(idle_loop_reg, runtime_data_addr_reg, JitRuntimeData::get_idle_loop_in_interrupt_return_stack_ptr_offset() as u32); block_asm.orr(idle_loop_reg, idle_loop_reg, 0x80); - block_asm.store_u16(idle_loop_reg, runtime_data_addr_reg, JitRuntimeData::get_idle_loop_return_stack_ptr_offset() as u32); + block_asm.store_u8(idle_loop_reg, runtime_data_addr_reg, JitRuntimeData::get_idle_loop_in_interrupt_return_stack_ptr_offset() as u32); block_asm.free_reg(idle_loop_reg); } diff --git a/src/jit/emitter/emit_branch.rs b/src/jit/emitter/emit_branch.rs index fd457d76..3d4799a4 100644 --- a/src/jit/emitter/emit_branch.rs +++ b/src/jit/emitter/emit_branch.rs @@ -79,6 +79,8 @@ impl JitAsm<'_, CPU> { } pub fn emit_branch_label_common(&mut self, block_asm: &mut BlockAsm, target_pc: u32, cond: Cond) { + let target_pc = align_guest_pc(target_pc) | (target_pc & 1); + match Self::analyze_branch_label::(&self.jit_buf.insts, self.jit_buf.current_index, cond, self.jit_buf.current_pc, target_pc) { JitBranchInfo::Local(target_index) => { let target_pre_cycle_count_sum = self.jit_buf.insts_cycle_counts[target_index] - self.jit_buf.insts[target_index].cycle as u16; diff --git a/src/jit/emitter/thumb/emit_thumb.rs b/src/jit/emitter/thumb/emit_thumb.rs index be43c66e..0a2e9f7d 100644 --- a/src/jit/emitter/thumb/emit_thumb.rs +++ b/src/jit/emitter/thumb/emit_thumb.rs @@ -7,7 +7,7 @@ use crate::jit::jit_asm::JitAsm; use crate::jit::op::Op; use crate::jit::reg::Reg; -impl<'a, const CPU: CpuType> JitAsm<'a, CPU> { +impl JitAsm<'_, CPU> { pub fn emit_thumb(&mut self, block_asm: &mut BlockAsm) { block_asm.guest_pc(self.jit_buf.current_pc); @@ -87,8 +87,10 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> { self.emit_branch_return_stack_common(block_asm, guest_pc_reg); block_asm.free_reg(guest_pc_reg); } else { - self.emit_branch_out_metadata(block_asm); - block_asm.epilogue(); + let guest_pc_reg = block_asm.new_reg(); + block_asm.load_u32(guest_pc_reg, block_asm.tmp_regs.thread_regs_addr_reg, Reg::PC as u32 * 4); + self.emit_branch_reg_common(block_asm, guest_pc_reg, false, true); + block_asm.free_reg(guest_pc_reg); } } } diff --git a/src/jit/inst_branch_handler.rs b/src/jit/inst_branch_handler.rs index d4a6f618..f0151334 100644 --- a/src/jit/inst_branch_handler.rs +++ b/src/jit/inst_branch_handler.rs @@ -6,13 +6,14 @@ use crate::jit::jit_asm_common_funs::{exit_guest_context, get_max_loop_cycle_cou use crate::jit::jit_memory::JitEntry; use crate::logging::debug_println; use crate::{get_jit_asm_ptr, CURRENT_RUNNING_CPU, DEBUG_LOG, IS_DEBUG}; +use std::arch::naked_asm; use std::cmp::min; use std::intrinsics::{breakpoint, likely, unlikely}; use std::mem; -pub extern "C" fn run_scheduler(asm: *mut JitAsm, current_pc: u32) { +pub extern "C" fn run_scheduler(asm: *mut JitAsm<{ ARM9 }>, current_pc: u32) { let asm = unsafe { asm.as_mut_unchecked() }; - debug_println!("{CPU:?} run scheduler at {current_pc:x} target pc {:x}", get_regs!(asm.emu, CPU).pc); + debug_println!("{ARM9:?} run scheduler at {current_pc:x} target pc {:x}", get_regs!(asm.emu, ARM9).pc); let cycles = if ARM7_HLE { (asm.runtime_data.accumulated_cycles + 1) >> 1 @@ -39,13 +40,50 @@ pub extern "C" fn run_scheduler(asm: * get_common_mut!(asm.emu).gpu.gpu_3d_regs.run_cmds(cm.get_cycles(), asm.emu); } +#[naked] +unsafe extern "C" fn call_interrupt(entry: *const fn(), interrupt_sp_ptr: *mut usize) { + #[rustfmt::skip] + naked_asm!( + "push {{r4-r12,lr}}", + "str sp, [r1]", + "blx r0", + "pop {{r4-r12,pc}}", + ); +} + +#[inline(always)] +fn check_stack_depth(asm: &mut JitAsm<{ ARM9 }>, current_pc: u32) { + let sp_depth_size = asm.runtime_data.get_sp_depth_size(); + if unlikely(sp_depth_size >= MAX_STACK_DEPTH_SIZE) { + if IS_DEBUG { + asm.runtime_data.set_branch_out_pc(current_pc); + } + if DEBUG_LOG { + JitAsmCommonFuns::<{ ARM9 }>::debug_stack_depth_too_big(sp_depth_size, current_pc); + } + unsafe { exit_guest_context!(asm) }; + } +} + +pub extern "C" fn handle_interrupt(asm: *mut JitAsm<{ ARM9 }>, target_pc: u32, current_pc: u32) { + let asm = unsafe { asm.as_mut_unchecked() }; + check_stack_depth(asm, current_pc); + + let lr = align_guest_pc(target_pc) | (target_pc & 1); + let regs = get_regs!(asm.emu, ARM9); + + asm.runtime_data.pre_cycle_count_sum = 0; + asm.runtime_data.set_in_interrupt(true); + asm.runtime_data.interrupt_lr = lr; + get_regs_mut!(asm.emu, ARM9).set_thumb(regs.pc & 1 == 1); + let jit_entry = get_jit!(asm.emu).get_jit_start_addr(align_guest_pc(regs.pc)); + unsafe { call_interrupt(jit_entry as _, &mut asm.runtime_data.interrupt_sp) }; + asm.runtime_data.set_in_interrupt(false); +} + fn flush_cycles(asm: &mut JitAsm, total_cycles: u16, current_pc: u32) { asm.runtime_data.accumulated_cycles += total_cycles + 2 - asm.runtime_data.pre_cycle_count_sum; - debug_println!( - "{CPU:?} flush cycles {} at {current_pc:x} sp size {}", - asm.runtime_data.accumulated_cycles, - asm.runtime_data.get_sp_depth_size() - ); + debug_println!("{CPU:?} flush cycles {} at {current_pc:x}", asm.runtime_data.accumulated_cycles,); } fn check_scheduler(asm: &mut JitAsm, current_pc: u32) { @@ -54,18 +92,13 @@ fn check_scheduler(asm: &mut JitAsm, current_pc: u32) { ARM9 => { let pc_og = get_regs!(asm.emu, ARM9).pc; if asm.emu.settings.arm7_hle() { - run_scheduler::(asm as _, current_pc); + run_scheduler::(unsafe { mem::transmute(asm as *mut JitAsm) }, current_pc); } else { - run_scheduler::(asm as _, current_pc); + run_scheduler::(unsafe { mem::transmute(asm as *mut JitAsm) }, current_pc); } - // Handle interrupts if get_regs!(asm.emu, ARM9).pc != pc_og { - debug_println!("{CPU:?} exit guest flush cycles"); - if IS_DEBUG { - asm.runtime_data.set_branch_out_pc(current_pc); - } - unsafe { exit_guest_context!(asm) }; + handle_interrupt(unsafe { mem::transmute(asm as *mut JitAsm) }, pc_og, current_pc); } } ARM7 => { @@ -91,16 +124,7 @@ pub extern "C" fn pre_branch(asm: flush_cycles(asm, total_cycles, current_pc); if CPU == ARM9 && HAS_LR_RETURN { - let sp_depth_size = asm.runtime_data.get_sp_depth_size(); - if unlikely(sp_depth_size >= MAX_STACK_DEPTH_SIZE) { - if IS_DEBUG { - asm.runtime_data.set_branch_out_pc(current_pc); - } - if DEBUG_LOG { - JitAsmCommonFuns::::debug_stack_depth_too_big(sp_depth_size, current_pc); - } - unsafe { exit_guest_context!(asm) }; - } + check_stack_depth(unsafe { mem::transmute(asm as *mut JitAsm) }, current_pc); } check_scheduler(asm, current_pc); @@ -175,7 +199,17 @@ pub unsafe extern "C" fn branch_lr(total_cycles: u16, target } } else { if DEBUG_LOG { - JitAsmCommonFuns::::debug_branch_lr_failed(current_pc, target_pc); + JitAsmCommonFuns::::debug_branch_lr_failed(current_pc, target_pc, desired_lr); + } + if CPU == ARM9 && unlikely(asm.runtime_data.is_in_interrupt()) { + let sp_depth_size = asm.runtime_data.get_sp_depth_size(); + if likely(sp_depth_size < MAX_STACK_DEPTH_SIZE) { + asm.runtime_data.pre_cycle_count_sum = 0; + asm.runtime_data.push_return_stack(desired_lr); + unsafe { call_jit_fun(asm, target_pc) }; + } else { + JitAsmCommonFuns::::debug_stack_depth_too_big(sp_depth_size, current_pc); + } } exit_guest_context!(asm); } diff --git a/src/jit/jit_asm.rs b/src/jit/jit_asm.rs index 642e5f0d..44d1ea4f 100644 --- a/src/jit/jit_asm.rs +++ b/src/jit/jit_asm.rs @@ -59,8 +59,10 @@ pub struct JitRuntimeData { pub pre_cycle_count_sum: u16, pub accumulated_cycles: u16, pub host_sp: usize, - pub idle_loop_return_stack_ptr: u8, + pub idle_loop_in_interrupt_return_stack_ptr: u8, pub return_stack: [u32; RETURN_STACK_SIZE], + pub interrupt_sp: usize, + pub interrupt_lr: u32, #[cfg(debug_assertions)] branch_out_pc: u32, } @@ -71,8 +73,10 @@ impl JitRuntimeData { pre_cycle_count_sum: 0, accumulated_cycles: 0, host_sp: 0, - idle_loop_return_stack_ptr: 0, + idle_loop_in_interrupt_return_stack_ptr: 0, return_stack: [0; RETURN_STACK_SIZE], + interrupt_sp: 0, + interrupt_lr: 0, #[cfg(debug_assertions)] branch_out_pc: u32::MAX, }; @@ -122,8 +126,8 @@ impl JitRuntimeData { mem::offset_of!(JitRuntimeData, host_sp) } - pub const fn get_idle_loop_return_stack_ptr_offset() -> usize { - mem::offset_of!(JitRuntimeData, idle_loop_return_stack_ptr) + pub const fn get_idle_loop_in_interrupt_return_stack_ptr_offset() -> usize { + mem::offset_of!(JitRuntimeData, idle_loop_in_interrupt_return_stack_ptr) } pub const fn get_return_stack_offset() -> usize { @@ -131,15 +135,23 @@ impl JitRuntimeData { } pub fn is_idle_loop(&self) -> bool { - self.idle_loop_return_stack_ptr & 0x80 != 0 + self.idle_loop_in_interrupt_return_stack_ptr & 0x80 != 0 } pub fn clear_idle_loop(&mut self) { - self.idle_loop_return_stack_ptr &= !0x80; + self.idle_loop_in_interrupt_return_stack_ptr &= !0x80; + } + + pub fn is_in_interrupt(&self) -> bool { + self.idle_loop_in_interrupt_return_stack_ptr & 0x40 != 0 + } + + pub fn set_in_interrupt(&mut self, in_interrupt: bool) { + self.idle_loop_in_interrupt_return_stack_ptr = (self.idle_loop_in_interrupt_return_stack_ptr & !0x40) | ((in_interrupt as u8) << 6) } pub fn get_return_stack_ptr(&self) -> u8 { - self.idle_loop_return_stack_ptr & !0x80 + self.idle_loop_in_interrupt_return_stack_ptr & 0x3F } pub fn push_return_stack(&mut self, value: u32) { @@ -147,14 +159,14 @@ impl JitRuntimeData { unsafe { *self.return_stack.get_unchecked_mut(return_stack_ptr as usize) = value }; return_stack_ptr += 1; return_stack_ptr &= RETURN_STACK_SIZE as u8 - 1; - self.idle_loop_return_stack_ptr = (self.idle_loop_return_stack_ptr & 0x80) | return_stack_ptr; + self.idle_loop_in_interrupt_return_stack_ptr = (self.idle_loop_in_interrupt_return_stack_ptr & 0xC0) | return_stack_ptr; } pub fn pop_return_stack(&mut self) -> u32 { let mut return_stack_ptr = self.get_return_stack_ptr(); return_stack_ptr = return_stack_ptr.wrapping_sub(1); return_stack_ptr &= RETURN_STACK_SIZE as u8 - 1; - self.idle_loop_return_stack_ptr = (self.idle_loop_return_stack_ptr & 0x80) | return_stack_ptr; + self.idle_loop_in_interrupt_return_stack_ptr = (self.idle_loop_in_interrupt_return_stack_ptr & 0xC0) | return_stack_ptr; unsafe { *self.return_stack.get_unchecked(return_stack_ptr as usize) } } @@ -165,7 +177,7 @@ impl JitRuntimeData { } pub fn clear_return_stack_ptr(&mut self) { - self.idle_loop_return_stack_ptr &= 0x80; + self.idle_loop_in_interrupt_return_stack_ptr &= 0xC0; self.return_stack[RETURN_STACK_SIZE - 1] = 0; } } @@ -178,16 +190,38 @@ pub fn align_guest_pc(guest_pc: u32) -> u32 { pub extern "C" fn hle_bios_uninterrupt() { let asm = unsafe { get_jit_asm_ptr::().as_mut_unchecked() }; - if IS_DEBUG { - asm.runtime_data.set_branch_out_pc(get_regs!(asm.emu, CPU).pc); - } - asm.runtime_data.clear_return_stack_ptr(); + let regs = get_regs_mut!(asm.emu, CPU); + let current_pc = regs.pc; asm.runtime_data.accumulated_cycles += 3; bios::uninterrupt::(asm.emu); if unlikely(get_cpu_regs!(asm.emu, CPU).is_halted()) { + if IS_DEBUG { + asm.runtime_data.set_branch_out_pc(current_pc); + } unsafe { exit_guest_context!(asm) }; } else { - unsafe { call_jit_fun(asm, get_regs_mut!(asm.emu, CPU).pc) }; + match CPU { + ARM9 => { + if unlikely(asm.runtime_data.is_in_interrupt() && asm.runtime_data.interrupt_lr == regs.pc) { + regs.set_thumb(regs.pc & 1 == 1); + unsafe { + std::arch::asm!( + "mov sp, {}", + "pop {{r4-r12,pc}}", + in(reg) asm.runtime_data.interrupt_sp + ); + std::hint::unreachable_unchecked(); + } + } else { + asm.runtime_data.clear_return_stack_ptr(); + unsafe { call_jit_fun(asm, regs.pc) }; + } + } + ARM7 => { + asm.runtime_data.clear_return_stack_ptr(); + unsafe { call_jit_fun(asm, regs.pc) }; + } + } } } @@ -256,8 +290,8 @@ fn emit_code_block_internal(asm: &mut JitAsm, guest_pc: } let (jit_entry, flushed) = { - // println!("{CPU:?} {THUMB} emit code block {guest_pc:x}"); - // unsafe { BLOCK_LOG = guest_pc == 0x2004824 }; + // println!("{CPU:?} {thumb} emit code block {guest_pc:x}"); + // unsafe { BLOCK_LOG = guest_pc == 0x200675e }; let guest_regs_ptr = get_regs_mut!(asm.emu, CPU).get_reg_mut_ptr(); let host_sp_ptr = ptr::addr_of_mut!(asm.runtime_data.host_sp); @@ -348,11 +382,11 @@ fn execute_internal(guest_pc: u32) -> u16 { asm.runtime_data.pre_cycle_count_sum = 0; asm.runtime_data.accumulated_cycles = 0; asm.runtime_data.clear_return_stack_ptr(); - asm.runtime_data.idle_loop_return_stack_ptr = 0; + asm.runtime_data.idle_loop_in_interrupt_return_stack_ptr = 0; jit_entry }; - unsafe { call_jit_entry(jit_entry as _, ptr::addr_of_mut!(asm.runtime_data.host_sp)) }; + unsafe { call_jit_entry(jit_entry as _, &mut asm.runtime_data.host_sp) }; if IS_DEBUG { assert_ne!( diff --git a/src/jit/jit_asm_common_funs.rs b/src/jit/jit_asm_common_funs.rs index 3f2b433a..a56b9cc6 100644 --- a/src/jit/jit_asm_common_funs.rs +++ b/src/jit/jit_asm_common_funs.rs @@ -1,10 +1,24 @@ +macro_rules! exit_guest_context { + ($asm:expr) => {{ + // r4-r12,pc since we need an even amount of registers for 8 byte alignment, in case the compiler decides to use neon instructions + std::arch::asm!( + "mov sp, {}", + "pop {{r4-r12,pc}}", + in(reg) $asm.runtime_data.host_sp + ); + std::hint::unreachable_unchecked(); + }}; +} +pub(crate) use exit_guest_context; + use crate::core::emu::{get_jit, get_regs_mut}; use crate::core::CpuType; use crate::core::CpuType::{ARM7, ARM9}; use crate::jit::assembler::block_asm::BlockAsm; use crate::jit::assembler::{BlockLabel, BlockOperand, BlockReg}; -use crate::jit::inst_branch_handler::{branch_imm, branch_lr, branch_reg, pre_branch}; +use crate::jit::inst_branch_handler::{branch_imm, branch_lr, branch_reg, handle_interrupt, pre_branch}; use crate::jit::jit_asm::{JitAsm, JitRuntimeData}; +use crate::jit::jit_memory::JitEntry; use crate::jit::reg::Reg; use crate::jit::{inst_branch_handler, jit_memory_map, Cond, ShiftType}; use crate::{DEBUG_LOG, IS_DEBUG}; @@ -17,20 +31,6 @@ pub const fn get_max_loop_cycle_count() -> u32 { } } -macro_rules! exit_guest_context { - ($asm:expr) => {{ - // r4-r12,pc since we need an even amount of registers for 8 byte alignment, in case the compiler decides to use neon instructions - std::arch::asm!( - "mov sp, {}", - "pop {{r4-r12,pc}}", - in(reg) $asm.runtime_data.host_sp - ); - std::hint::unreachable_unchecked(); - }}; -} -use crate::jit::jit_memory::JitEntry; -pub(crate) use exit_guest_context; - pub struct JitAsmCommonFuns {} impl Default for JitAsmCommonFuns { @@ -175,6 +175,7 @@ impl JitAsmCommonFuns { ) { let continue_label = if add_continue_label { Some(block_asm.new_label()) } else { None }; let run_scheduler_label = block_asm.new_label(); + let handle_interrupt_label = block_asm.new_label(); let post_run_scheduler_label = block_asm.new_label(); let breakout_label = block_asm.new_label(); @@ -208,9 +209,9 @@ impl JitAsmCommonFuns { let pc_new_reg = block_asm.new_reg(); block_asm.load_u32(pc_og_reg, block_asm.tmp_regs.thread_regs_addr_reg, Reg::PC as u32 * 4); let func = if asm.emu.settings.arm7_hle() { - inst_branch_handler::run_scheduler:: as *const () + inst_branch_handler::run_scheduler:: as *const () } else { - inst_branch_handler::run_scheduler:: as *const () + inst_branch_handler::run_scheduler:: as *const () }; if IS_DEBUG { block_asm.call2(func, asm as *mut _ as u32, asm.jit_buf.current_pc); @@ -220,10 +221,16 @@ impl JitAsmCommonFuns { block_asm.load_u32(pc_new_reg, block_asm.tmp_regs.thread_regs_addr_reg, Reg::PC as u32 * 4); block_asm.cmp(pc_new_reg, pc_og_reg); - block_asm.branch(breakout_label, Cond::NE); + block_asm.branch(handle_interrupt_label, Cond::NE); block_asm.restore_reg(Reg::CPSR); block_asm.branch(post_run_scheduler_label, Cond::AL); + block_asm.label_unlikely(handle_interrupt_label); + block_asm.call3(handle_interrupt as *const (), asm as *mut _ as u32, pc_og_reg, asm.jit_buf.current_pc); + block_asm.restore_reg(Reg::CPSR); + block_asm.branch_fallthrough(post_run_scheduler_label, Cond::AL); + block_asm.branch(breakout_label, Cond::AL); + block_asm.free_reg(pc_new_reg); block_asm.free_reg(pc_og_reg); } @@ -321,8 +328,8 @@ impl JitAsmCommonFuns { println!("{CPU:?} branch lr from {current_pc:x} to {target_pc:x}") } - pub extern "C" fn debug_branch_lr_failed(current_pc: u32, target_pc: u32) { - println!("{CPU:?} failed to branch lr from {current_pc:x} to {target_pc:x}") + pub extern "C" fn debug_branch_lr_failed(current_pc: u32, target_pc: u32, desired_pc: u32) { + println!("{CPU:?} failed to branch lr from {current_pc:x} to {target_pc:x} desired: {desired_pc:x}") } pub extern "C" fn debug_return_stack_empty(current_pc: u32, target_pc: u32) {