Skip to content

Commit

Permalink
Don't exit guest context when handling interrupts
Browse files Browse the repository at this point in the history
  • Loading branch information
Grarak committed Feb 1, 2025
1 parent 4ba4186 commit 380f488
Show file tree
Hide file tree
Showing 9 changed files with 192 additions and 91 deletions.
6 changes: 3 additions & 3 deletions src/core/hle/bios.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ pub fn interrupt<const CPU: CpuType>(emu: &mut Emu) {
debug_println!("{CPU:?} interrupt");

let regs = get_regs_mut!(emu, CPU);
let mut cpsr = Cpsr::from(get_regs!(emu, CPU).cpsr);
let mut cpsr = Cpsr::from(regs.cpsr);
cpsr.set_irq_disable(true);
cpsr.set_thumb(false);
cpsr.set_mode(u5::new(0x12));
regs.set_cpsr::<true>(u32::from(cpsr), emu);

let is_thumb = (get_regs!(emu, CPU).pc & 1) == 1;
let mut spsr = Cpsr::from(get_regs!(emu, CPU).spsr);
let is_thumb = (regs.pc & 1) == 1;
let mut spsr = Cpsr::from(regs.spsr);
spsr.set_thumb(is_thumb);
regs.spsr = u32::from(spsr);

Expand Down
36 changes: 28 additions & 8 deletions src/core/memory/mem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ use crate::core::CpuType::ARM9;
use crate::jit::jit_memory::JitMemory;
use crate::logging::debug_println;
use crate::mmap::Shm;
use crate::utils;
use crate::utils::Convert;
use crate::{utils, DEBUG_LOG};
use std::hint::unreachable_unchecked;
use std::intrinsics::unlikely;
use std::marker::PhantomData;
Expand Down Expand Up @@ -819,10 +819,15 @@ impl Memory {
let shm_offset = self.get_shm_offset::<CPU, TCM, false>(aligned_addr) as u32;
if shm_offset != 0 {
utils::read_from_mem_slice(&self.shm, shm_offset, slice);
return;
} else {
MemoryMultipleSliceIo::<CPU, TCM, T>::read(aligned_addr, slice, emu);
}

MemoryMultipleSliceIo::<CPU, TCM, T>::read(aligned_addr, slice, emu);
if DEBUG_LOG {
for (i, &value) in slice.iter().enumerate() {
debug_println!("{CPU:?} slice memory read at {:x} with value {:x}", aligned_addr as usize + i * size_of::<T>(), value.into());
}
}
}

pub fn read_fixed_slice<const CPU: CpuType, const TCM: bool, T: Convert>(&mut self, addr: u32, emu: &mut Emu, slice: &mut [T]) {
Expand All @@ -833,10 +838,15 @@ impl Memory {
let shm_offset = self.get_shm_offset::<CPU, TCM, false>(aligned_addr) as u32;
if shm_offset != 0 {
slice.fill(utils::read_from_mem(&self.shm, shm_offset));
return;
} else {
MemoryFixedSliceIo::<CPU, TCM, T>::read(aligned_addr, slice, emu);
}

MemoryFixedSliceIo::<CPU, TCM, T>::read(aligned_addr, slice, emu);
if DEBUG_LOG {
for &mut value in slice {
debug_println!("{CPU:?} fixed slice memory read at {:x} with value {:x}", aligned_addr as usize, value.into());
}
}
}

pub fn write<const CPU: CpuType, T: Convert>(&mut self, addr: u32, value: T, emu: &mut Emu) {
Expand All @@ -862,9 +872,14 @@ impl Memory {
}

pub fn write_multiple_slice<const CPU: CpuType, const TCM: bool, T: Convert>(&mut self, addr: u32, emu: &mut Emu, slice: &[T]) {
debug_println!("{CPU:?} fixed slice memory write at {addr:x} with size {}", slice.len());
debug_println!("{CPU:?} fixed slice memory write at {addr:x} with size {}", size_of_val(slice));
let aligned_addr = addr & !(size_of::<T>() as u32 - 1);
let aligned_addr = aligned_addr & 0x0FFFFFFF;
if DEBUG_LOG {
for (i, &value) in slice.iter().enumerate() {
debug_println!("{CPU:?} slice memory write at {:x} with value {:x}", aligned_addr as usize + i * size_of::<T>(), value.into());
}
}

let shm_offset = self.get_shm_offset::<CPU, TCM, true>(aligned_addr) as u32;
if shm_offset != 0 {
Expand All @@ -876,9 +891,14 @@ impl Memory {
}

pub fn write_fixed_slice<const CPU: CpuType, const TCM: bool, T: Convert>(&mut self, addr: u32, emu: &mut Emu, slice: &[T]) {
debug_println!("{CPU:?} fixed slice memory write at {addr:x} with size {}", slice.len());
debug_println!("{CPU:?} fixed slice memory write at {addr:x} with size {}", size_of_val(slice));
let aligned_addr = addr & !(size_of::<T>() as u32 - 1);
let aligned_addr = aligned_addr & 0x0FFFFFFF;
if DEBUG_LOG {
for &value in slice {
debug_println!("{CPU:?} fixed slice memory write at {:x} with value {:x}", aligned_addr, value.into());
}
}

let shm_offset = self.get_shm_offset::<CPU, TCM, true>(aligned_addr) as u32;
if shm_offset != 0 {
Expand All @@ -890,7 +910,7 @@ impl Memory {
}

pub fn write_multiple_memset<const CPU: CpuType, const TCM: bool, T: Convert>(&mut self, addr: u32, value: T, size: usize, emu: &mut Emu) {
debug_println!("{CPU:?} multiple memset memory write at {addr:x} with size {size}");
debug_println!("{CPU:?} multiple memset memory write at {addr:x} with size {}", size_of::<T>() * size);
let aligned_addr = addr & !(size_of::<T>() as u32 - 1);
let aligned_addr = aligned_addr & 0x0FFFFFFF;

Expand Down
4 changes: 2 additions & 2 deletions src/jit/assembler/block_asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,7 @@ impl BlockAsm {
for &i in block_indices {
let basic_block = &mut self.cache.basic_blocks[i];
let sum_guest_regs_input_dirty = basic_block.guest_regs_input_dirty + guest_regs_dirty;
if sum_guest_regs_input_dirty != basic_block.guest_regs_input_dirty || !basic_block.guest_regs_resolved {
if !basic_block.guest_regs_resolved || sum_guest_regs_input_dirty != basic_block.guest_regs_input_dirty {
self.buf.reachable_blocks.insert(i);
basic_block.guest_regs_resolved = true;
basic_block.guest_regs_input_dirty = sum_guest_regs_input_dirty;
Expand All @@ -818,7 +818,7 @@ impl BlockAsm {

let basic_block = unsafe { self.cache.basic_blocks.get_unchecked_mut(i) };
let sum_required_outputs = *basic_block.get_required_outputs() + required_outputs;
if sum_required_outputs != basic_block.get_required_outputs() || !basic_block.io_resolved {
if !basic_block.io_resolved || sum_required_outputs != basic_block.get_required_outputs() {
basic_block.io_resolved = true;
basic_block.set_required_outputs(sum_required_outputs);
basic_block.init_resolve_io(self.buf);
Expand Down
22 changes: 12 additions & 10 deletions src/jit/emitter/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,23 +61,25 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
}

if (op.is_mov() && self.jit_buf.current_inst().src_regs.is_reserved(Reg::LR) && !self.jit_buf.current_inst().out_regs.is_reserved(Reg::CPSR))
|| (op.is_multiple_mem_transfer() && *self.jit_buf.current_inst().operands()[0].as_reg_no_shift().unwrap() == Reg::SP && !op.mem_transfer_user())
|| (op.is_multiple_mem_transfer() && *self.jit_buf.current_inst().operands()[0].as_reg_no_shift().unwrap() == Reg::SP)
|| (op.is_single_mem_transfer() && self.jit_buf.current_inst().src_regs.is_reserved(Reg::SP))
{
let guest_pc_reg = block_asm.new_reg();
block_asm.load_u32(guest_pc_reg, block_asm.tmp_regs.thread_regs_addr_reg, Reg::PC as u32 * 4);
self.emit_branch_return_stack_common(block_asm, guest_pc_reg);
block_asm.free_reg(guest_pc_reg);
} else {
if op.mem_transfer_user() {
block_asm.call(register_restore_spsr::<CPU> as *const ());
if CPU == ARM7 {
block_asm.call1(set_pc_arm_mode as *const (), get_regs_mut!(self.emu, CPU) as *mut _ as u32);
}
}

self.emit_branch_out_metadata(block_asm);
block_asm.epilogue();
let guest_pc_reg = block_asm.new_reg();
block_asm.load_u32(guest_pc_reg, block_asm.tmp_regs.thread_regs_addr_reg, Reg::PC as u32 * 4);
self.emit_branch_return_stack_common(block_asm, guest_pc_reg);
block_asm.free_reg(guest_pc_reg);
} else {
let guest_pc_reg = block_asm.new_reg();
block_asm.load_u32(guest_pc_reg, block_asm.tmp_regs.thread_regs_addr_reg, Reg::PC as u32 * 4);
self.emit_branch_reg_common(block_asm, guest_pc_reg, false, false);
block_asm.free_reg(guest_pc_reg);
}
}

Expand Down Expand Up @@ -126,9 +128,9 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {

if set_idle_loop {
let idle_loop_reg = block_asm.new_reg();
block_asm.load_u8(idle_loop_reg, runtime_data_addr_reg, JitRuntimeData::get_idle_loop_return_stack_ptr_offset() as u32);
block_asm.load_u8(idle_loop_reg, runtime_data_addr_reg, JitRuntimeData::get_idle_loop_in_interrupt_return_stack_ptr_offset() as u32);
block_asm.orr(idle_loop_reg, idle_loop_reg, 0x80);
block_asm.store_u16(idle_loop_reg, runtime_data_addr_reg, JitRuntimeData::get_idle_loop_return_stack_ptr_offset() as u32);
block_asm.store_u8(idle_loop_reg, runtime_data_addr_reg, JitRuntimeData::get_idle_loop_in_interrupt_return_stack_ptr_offset() as u32);
block_asm.free_reg(idle_loop_reg);
}

Expand Down
2 changes: 2 additions & 0 deletions src/jit/emitter/emit_branch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
}

pub fn emit_branch_label_common<const THUMB: bool>(&mut self, block_asm: &mut BlockAsm, target_pc: u32, cond: Cond) {
let target_pc = align_guest_pc(target_pc) | (target_pc & 1);

match Self::analyze_branch_label::<THUMB>(&self.jit_buf.insts, self.jit_buf.current_index, cond, self.jit_buf.current_pc, target_pc) {
JitBranchInfo::Local(target_index) => {
let target_pre_cycle_count_sum = self.jit_buf.insts_cycle_counts[target_index] - self.jit_buf.insts[target_index].cycle as u16;
Expand Down
8 changes: 5 additions & 3 deletions src/jit/emitter/thumb/emit_thumb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::jit::jit_asm::JitAsm;
use crate::jit::op::Op;
use crate::jit::reg::Reg;

impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
impl<const CPU: CpuType> JitAsm<'_, CPU> {
pub fn emit_thumb(&mut self, block_asm: &mut BlockAsm) {
block_asm.guest_pc(self.jit_buf.current_pc);

Expand Down Expand Up @@ -87,8 +87,10 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
self.emit_branch_return_stack_common(block_asm, guest_pc_reg);
block_asm.free_reg(guest_pc_reg);
} else {
self.emit_branch_out_metadata(block_asm);
block_asm.epilogue();
let guest_pc_reg = block_asm.new_reg();
block_asm.load_u32(guest_pc_reg, block_asm.tmp_regs.thread_regs_addr_reg, Reg::PC as u32 * 4);
self.emit_branch_reg_common(block_asm, guest_pc_reg, false, true);
block_asm.free_reg(guest_pc_reg);
}
}
}
Expand Down
86 changes: 60 additions & 26 deletions src/jit/inst_branch_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ use crate::jit::jit_asm_common_funs::{exit_guest_context, get_max_loop_cycle_cou
use crate::jit::jit_memory::JitEntry;
use crate::logging::debug_println;
use crate::{get_jit_asm_ptr, CURRENT_RUNNING_CPU, DEBUG_LOG, IS_DEBUG};
use std::arch::naked_asm;
use std::cmp::min;
use std::intrinsics::{breakpoint, likely, unlikely};
use std::mem;

pub extern "C" fn run_scheduler<const CPU: CpuType, const ARM7_HLE: bool>(asm: *mut JitAsm<CPU>, current_pc: u32) {
pub extern "C" fn run_scheduler<const ARM7_HLE: bool>(asm: *mut JitAsm<{ ARM9 }>, current_pc: u32) {
let asm = unsafe { asm.as_mut_unchecked() };
debug_println!("{CPU:?} run scheduler at {current_pc:x} target pc {:x}", get_regs!(asm.emu, CPU).pc);
debug_println!("{ARM9:?} run scheduler at {current_pc:x} target pc {:x}", get_regs!(asm.emu, ARM9).pc);

let cycles = if ARM7_HLE {
(asm.runtime_data.accumulated_cycles + 1) >> 1
Expand All @@ -39,13 +40,50 @@ pub extern "C" fn run_scheduler<const CPU: CpuType, const ARM7_HLE: bool>(asm: *
get_common_mut!(asm.emu).gpu.gpu_3d_regs.run_cmds(cm.get_cycles(), asm.emu);
}

#[naked]
unsafe extern "C" fn call_interrupt(entry: *const fn(), interrupt_sp_ptr: *mut usize) {
#[rustfmt::skip]
naked_asm!(
"push {{r4-r12,lr}}",
"str sp, [r1]",
"blx r0",
"pop {{r4-r12,pc}}",
);
}

#[inline(always)]
fn check_stack_depth(asm: &mut JitAsm<{ ARM9 }>, current_pc: u32) {
let sp_depth_size = asm.runtime_data.get_sp_depth_size();
if unlikely(sp_depth_size >= MAX_STACK_DEPTH_SIZE) {
if IS_DEBUG {
asm.runtime_data.set_branch_out_pc(current_pc);
}
if DEBUG_LOG {
JitAsmCommonFuns::<{ ARM9 }>::debug_stack_depth_too_big(sp_depth_size, current_pc);
}
unsafe { exit_guest_context!(asm) };
}
}

pub extern "C" fn handle_interrupt(asm: *mut JitAsm<{ ARM9 }>, target_pc: u32, current_pc: u32) {
let asm = unsafe { asm.as_mut_unchecked() };
check_stack_depth(asm, current_pc);

let lr = align_guest_pc(target_pc) | (target_pc & 1);
let regs = get_regs!(asm.emu, ARM9);

asm.runtime_data.pre_cycle_count_sum = 0;
asm.runtime_data.set_in_interrupt(true);
asm.runtime_data.interrupt_lr = lr;
get_regs_mut!(asm.emu, ARM9).set_thumb(regs.pc & 1 == 1);
let jit_entry = get_jit!(asm.emu).get_jit_start_addr(align_guest_pc(regs.pc));
unsafe { call_interrupt(jit_entry as _, &mut asm.runtime_data.interrupt_sp) };
asm.runtime_data.set_in_interrupt(false);
}

fn flush_cycles<const CPU: CpuType>(asm: &mut JitAsm<CPU>, total_cycles: u16, current_pc: u32) {
asm.runtime_data.accumulated_cycles += total_cycles + 2 - asm.runtime_data.pre_cycle_count_sum;
debug_println!(
"{CPU:?} flush cycles {} at {current_pc:x} sp size {}",
asm.runtime_data.accumulated_cycles,
asm.runtime_data.get_sp_depth_size()
);
debug_println!("{CPU:?} flush cycles {} at {current_pc:x}", asm.runtime_data.accumulated_cycles,);
}

fn check_scheduler<const CPU: CpuType>(asm: &mut JitAsm<CPU>, current_pc: u32) {
Expand All @@ -54,18 +92,13 @@ fn check_scheduler<const CPU: CpuType>(asm: &mut JitAsm<CPU>, current_pc: u32) {
ARM9 => {
let pc_og = get_regs!(asm.emu, ARM9).pc;
if asm.emu.settings.arm7_hle() {
run_scheduler::<CPU, true>(asm as _, current_pc);
run_scheduler::<true>(unsafe { mem::transmute(asm as *mut JitAsm<CPU>) }, current_pc);
} else {
run_scheduler::<CPU, false>(asm as _, current_pc);
run_scheduler::<false>(unsafe { mem::transmute(asm as *mut JitAsm<CPU>) }, current_pc);
}

// Handle interrupts
if get_regs!(asm.emu, ARM9).pc != pc_og {
debug_println!("{CPU:?} exit guest flush cycles");
if IS_DEBUG {
asm.runtime_data.set_branch_out_pc(current_pc);
}
unsafe { exit_guest_context!(asm) };
handle_interrupt(unsafe { mem::transmute(asm as *mut JitAsm<CPU>) }, pc_og, current_pc);
}
}
ARM7 => {
Expand All @@ -91,16 +124,7 @@ pub extern "C" fn pre_branch<const CPU: CpuType, const HAS_LR_RETURN: bool>(asm:
flush_cycles(asm, total_cycles, current_pc);

if CPU == ARM9 && HAS_LR_RETURN {
let sp_depth_size = asm.runtime_data.get_sp_depth_size();
if unlikely(sp_depth_size >= MAX_STACK_DEPTH_SIZE) {
if IS_DEBUG {
asm.runtime_data.set_branch_out_pc(current_pc);
}
if DEBUG_LOG {
JitAsmCommonFuns::<CPU>::debug_stack_depth_too_big(sp_depth_size, current_pc);
}
unsafe { exit_guest_context!(asm) };
}
check_stack_depth(unsafe { mem::transmute(asm as *mut JitAsm<CPU>) }, current_pc);
}

check_scheduler(asm, current_pc);
Expand Down Expand Up @@ -175,7 +199,17 @@ pub unsafe extern "C" fn branch_lr<const CPU: CpuType>(total_cycles: u16, target
}
} else {
if DEBUG_LOG {
JitAsmCommonFuns::<CPU>::debug_branch_lr_failed(current_pc, target_pc);
JitAsmCommonFuns::<CPU>::debug_branch_lr_failed(current_pc, target_pc, desired_lr);
}
if CPU == ARM9 && unlikely(asm.runtime_data.is_in_interrupt()) {
let sp_depth_size = asm.runtime_data.get_sp_depth_size();
if likely(sp_depth_size < MAX_STACK_DEPTH_SIZE) {
asm.runtime_data.pre_cycle_count_sum = 0;
asm.runtime_data.push_return_stack(desired_lr);
unsafe { call_jit_fun(asm, target_pc) };
} else {
JitAsmCommonFuns::<CPU>::debug_stack_depth_too_big(sp_depth_size, current_pc);
}
}
exit_guest_context!(asm);
}
Expand Down
Loading

0 comments on commit 380f488

Please sign in to comment.