diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index e8fe1c46b642..1432d6b72017 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -248,9 +248,20 @@ void IRFrontend::Comp_Special3(MIPSOpcode op) { { u32 sourcemask = mask >> pos; u32 destmask = ~(sourcemask << pos); - ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(sourcemask)); - if (pos != 0) { - ir.Write(IROp::ShlImm, IRTEMP_0, IRTEMP_0, pos); + + if (size != 32) { + // Need to use the sourcemask. + ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(sourcemask)); + if (pos != 0) { + ir.Write(IROp::ShlImm, IRTEMP_0, IRTEMP_0, pos); + } + } else { + // If the shl takes care of the sourcemask, don't need to and. + if (pos != 0) { + ir.Write(IROp::ShlImm, IRTEMP_0, rs, pos); + } else { + ir.Write(IROp::Mov, IRTEMP_0, rs); + } } ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(destmask)); ir.Write(IROp::Or, rt, rt, IRTEMP_0); diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 083b6acba333..6c686c85af81 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -121,6 +121,7 @@ static const IRMeta irMeta[] = { { IROp::FMovFromGPR, "FMovFromGPR", "FG" }, { IROp::FMovToGPR, "FMovToGPR", "GF" }, { IROp::OptFMovToGPRShr8, "OptFMovToGPRShr8", "GF" }, + { IROp::OptFCvtSWFromGPR, "OptFCvtSWFromGPR", "FG" }, { IROp::FpCondFromReg, "FpCondFromReg", "_G" }, { IROp::FpCondToReg, "FpCondToReg", "G" }, { IROp::FpCtrlFromReg, "FpCtrlFromReg", "_G" }, diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 6cdd3b88bada..c7f1e30eccd1 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -138,6 +138,7 @@ enum class IROp : uint8_t { FCvtScaledSW, FMovFromGPR, + OptFCvtSWFromGPR, FMovToGPR, OptFMovToGPRShr8, diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 7fe2990ff474..4d46983c974a 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -997,6 +997,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) { case IROp::FMovFromGPR: memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4); break; + case IROp::OptFCvtSWFromGPR: + mips->f[inst->dest] = (float)(int)mips->r[inst->src1]; + break; case IROp::FMovToGPR: memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4); break; @@ -1007,6 +1010,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) { mips->r[inst->dest] = temp >> 8; break; } + case IROp::ExitToConst: return inst->constant; diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 7f23846cea1e..f9a3294fad7a 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -229,6 +229,15 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions CONDITIONAL_DISABLE; bool logBlocks = false; + + bool letThroughHalves = false; + if (opts.optimizeForInterpreter) { + // If we're using the interpreter, which can handle these instructions directly, + // don't break "half" instructions up. + // Of course, we still want to combine if possible. + letThroughHalves = true; + } + for (int i = 0, n = (int)in.GetInstructions().size(); i < n; ++i) { const IRInst &inst = in.GetInstructions()[i]; @@ -305,6 +314,11 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions switch (inst.op) { case IROp::Load32Left: if (!combineOpposite(IROp::Load32Right, -3, IROp::Load32, -3)) { + if (letThroughHalves) { + out.Write(inst); + break; + } + addCommonProlog(); // dest &= (0x00ffffff >> shift) // Alternatively, could shift to a wall and back (but would require two shifts each way.) @@ -339,6 +353,10 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions case IROp::Load32Right: if (!combineOpposite(IROp::Load32Left, 3, IROp::Load32, 0)) { + if (letThroughHalves) { + out.Write(inst); + break; + } addCommonProlog(); // IRTEMP_LR_VALUE >>= shift out.Write(IROp::Shr, IRTEMP_LR_VALUE, IRTEMP_LR_VALUE, IRTEMP_LR_SHIFT); @@ -382,6 +400,10 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions case IROp::Store32Left: if (!combineOpposite(IROp::Store32Right, -3, IROp::Store32, -3)) { + if (letThroughHalves) { + out.Write(inst); + break; + } addCommonProlog(); // IRTEMP_LR_VALUE &= 0xffffff00 << shift out.WriteSetConstant(IRTEMP_LR_MASK, 0xffffff00); @@ -399,6 +421,10 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions case IROp::Store32Right: if (!combineOpposite(IROp::Store32Left, 3, IROp::Store32, 0)) { + if (letThroughHalves) { + out.Write(inst); + break; + } addCommonProlog(); // IRTEMP_LR_VALUE &= 0x00ffffff << (24 - shift) out.WriteSetConstant(IRTEMP_LR_MASK, 0x00ffffff); @@ -2174,13 +2200,23 @@ bool OptimizeLoadsAfterStores(const IRWriter &in, IRWriter &out, const IROptions case IROp::Store32: if (next.op == IROp::Load32 && next.constant == inst.constant && - next.dest == inst.src3 && + next.dest == inst.dest && next.src1 == inst.src1) { // The upcoming load is completely redundant. // Skip it. i++; } break; + case IROp::StoreVec4: + if (next.op == IROp::LoadVec4 && + next.constant == inst.constant && + next.dest == inst.dest && + next.src1 == inst.src1) { + // The upcoming load is completely redundant. These are common in Wipeout. + // Skip it. NOTE: It looks like vector load/stores uses different register assignments, but there's a union between dest and src3. + i++; + } + break; default: break; } @@ -2243,10 +2279,18 @@ bool OptimizeForInterpreter(const IRWriter &in, IRWriter &out, const IROptions & inst.op = IROp::OptFMovToGPRShr8; i++; // Skip the next instruction. } - out.Write(inst); - } else { - out.Write(inst); } + out.Write(inst); + break; + case IROp::FMovFromGPR: + if (!last) { + IRInst next = in.GetInstructions()[i + 1]; + if (next.op == IROp::FCvtSW && next.src1 == inst.dest && next.dest == inst.dest) { + inst.op = IROp::OptFCvtSWFromGPR; + i++; // Skip the next + } + } + out.Write(inst); break; default: out.Write(inst);