From 98d1ba686255d540c2c352e7c5580265d7629f89 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 9 Jun 2024 22:30:51 +0000 Subject: [PATCH] sh2 drc, several bug fixes --- cpu/drc/emit_mips.c | 8 ++++++ cpu/drc/emit_x86.c | 3 ++- cpu/sh2/compiler.c | 47 +++++++++++++++++++++++++++--------- cpu/sh2/mame/sh2pico.c | 2 +- platform/libretro/libretro.c | 2 +- platform/linux/emu.c | 2 +- 6 files changed, 48 insertions(+), 16 deletions(-) diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index d775fdd80..4e59f2b22 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -1671,12 +1671,20 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force) asm volatile( " rdhwr %2, $1;" " bal 0f;" // needed to allow for jr.hb: +#if _MIPS_SZPTR == 64 + "0: daddiu $ra, $ra, 3f-0b;" // set ra to insn after jr.hb +#else "0: addiu $ra, $ra, 3f-0b;" // set ra to insn after jr.hb +#endif " beqz %2, 3f;" "1: synci 0(%0);" " sltu %3, %0, %1;" +#if _MIPS_SZPTR == 64 + " daddu %0, %0, %2;" +#else " addu %0, %0, %2;" +#endif " bnez %3, 1b;" " sync;" diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 53d52385f..c55ed316b 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1365,7 +1365,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ emith_asr(rn, mh, 15); \ - emith_addf_r_r_r_lsr(rn, rn, mh, 31); \ + emith_lsr(rm, mh, 31); \ + emith_addf_r_r(rn, rm); \ EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index b87f2b9b7..519d910b5 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -1162,7 +1162,7 @@ static void dr_link_outgoing(struct block_entry *be, int tcache_id, int is_slave #endif } -static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave) +static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave, int is_new) { int i; @@ -1181,6 +1181,28 @@ static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave // mark memory for overwrite detection dr_mark_memory(1, bd, tcache_id, 0); bd->active = 1; + + // TODO find out why this is necessary, as it shouldn't + if (!is_new) { +#if BRANCH_CACHE + if (tcache_id) + memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + else { + memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); + } +#endif +#if CALL_STACK + if (tcache_id) { + memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + sh2s[tcache_id-1].rts_cache_idx = 0; + } else { + memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } +#endif + } } static void REGPARM(3) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id) @@ -2610,7 +2632,8 @@ static uptr split_address(uptr la, uptr mask, s32 *offs) #ifdef __arm__ // arm32 offset has an add/sub flag and an unsigned 8 bit value, which only // allows values of [-255...255]. the value -256 thus can't be used. - if (*offs + sign == 0) { + if (*offs < 0) { // TODO not working at all with negative offsets on ARM? + //if (*offs == -sign) { la -= sign; *offs += sign; } @@ -2631,7 +2654,7 @@ static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, s32 *offs) // is r constant and points to a memory region? if (! gconst_get(r, &a)) return -1; - poffs = dr_ctx_get_mem_ptr(sh2, a, &mask); + poffs = dr_ctx_get_mem_ptr(sh2, a + *offs, &mask); if (poffs == -1) return -1; @@ -3330,7 +3353,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (block) { dbg(2, "== %csh2 reuse block %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm', base_pc, end_pc, base_literals, end_literals, block->entryp->tcache_ptr); - dr_activate_block(block, tcache_id, sh2->is_slave); + dr_activate_block(block, tcache_id, sh2->is_slave, 0); emith_update_cache(); return block->entryp[0].tcache_ptr; } @@ -3979,6 +4002,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_or_r_r_r(sr, sr, tmp3); // T rcache_free_tmp(tmp3); skip_op = div(opd).div1 + div(opd).rotcl; + cycles += skip_op; } else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) { // divide 64/32 @@ -4004,6 +4028,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); rcache_free_tmp(tmp3); skip_op = div(opd).div1 + div(opd).rotcl; + cycles += skip_op; } #endif break; @@ -4108,6 +4133,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_or_r_r_r(sr, sr, tmp3); // T rcache_free_tmp(tmp3); skip_op = div(opd).div1 + div(opd).rotcl; + cycles += skip_op; } else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) { // divide 64/32 @@ -4138,6 +4164,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); // Q = !Ro[0]^M rcache_free_tmp(tmp3); skip_op = div(opd).div1 + div(opd).rotcl; + cycles += skip_op; } else #endif { @@ -5223,7 +5250,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) ring_alloc(&tcache_ring[tcache_id], tcache_ptr - block_entry_ptr); host_instructions_updated(block_entry_ptr, tcache_ptr, 1); - dr_activate_block(block, tcache_id, sh2->is_slave); + dr_activate_block(block, tcache_id, sh2->is_slave, 1); emith_update_cache(); do_host_disasm(tcache_id); @@ -5243,10 +5270,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) printf("~~~\n"); */ -#if (DRC_DEBUG) - fflush(stdout); -#endif - return block_entry_ptr; } @@ -5675,8 +5698,9 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, int free) a += rest, len -= rest; } while (len > 0); - if (!removed && len <= 4) { - dbg(2, "rm_blocks called @%08x, no work?", _a); + if (!removed) { + if (len <= 4) + dbg(2, "rm_blocks called @%08x, no work?", _a); return; } @@ -5984,7 +6008,6 @@ int sh2_drc_init(SH2 *sh2) // disasm the utils tcache_dsm_ptrs[0] = tcache; do_host_disasm(0); - fflush(stdout); #endif #if (DRC_DEBUG & 1) hash_collisions = 0; diff --git a/cpu/sh2/mame/sh2pico.c b/cpu/sh2/mame/sh2pico.c index 65f4757e8..2c2ea4066 100644 --- a/cpu/sh2/mame/sh2pico.c +++ b/cpu/sh2/mame/sh2pico.c @@ -1,7 +1,7 @@ #include "../sh2.h" #ifdef DRC_CMP -#include "../compiler.c" +#include "../compiler.h" #define BUSY_LOOP_HACKS 0 #else #define BUSY_LOOP_HACKS 1 diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 62c09103f..fa3f7e5ac 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -2542,7 +2542,7 @@ void retro_init(void) | POPT_EN_MCD_PCM|POPT_EN_MCD_CDDA|POPT_EN_MCD_GFX | POPT_EN_32X|POPT_EN_PWM | POPT_ACC_SPRITES|POPT_DIS_32C_BORDER; -#ifdef __arm__ +#ifdef DRC_SH2 #ifdef _3DS if (ctr_svchack_successful) #endif diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 5d65ad5ee..7417ca098 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -36,7 +36,7 @@ void pemu_prep_defconfig(void) void pemu_validate_config(void) { -#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__riscv__) && !defined(__riscv) && !defined(__powerpc__) && !defined(__ppc__) && !defined(__PPC__) && !defined(__i386__) && !defined(__x86_64__) +#if !defined(DRC_SH2) PicoIn.opt &= ~POPT_EN_DRC; #endif }