diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp
index e8fe1c46b642..1432d6b72017 100644
--- a/Core/MIPS/IR/IRCompALU.cpp
+++ b/Core/MIPS/IR/IRCompALU.cpp
@@ -248,9 +248,20 @@ void IRFrontend::Comp_Special3(MIPSOpcode op) {
 	{
 		u32 sourcemask = mask >> pos;
 		u32 destmask = ~(sourcemask << pos);
-		ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(sourcemask));
-		if (pos != 0) {
-			ir.Write(IROp::ShlImm, IRTEMP_0, IRTEMP_0, pos);
+
+		if (size != 32) {
+			// Need to use the sourcemask.
+			ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(sourcemask));
+			if (pos != 0) {
+				ir.Write(IROp::ShlImm, IRTEMP_0, IRTEMP_0, pos);
+			}
+		} else {
+			// If the shl takes care of the sourcemask, don't need to and.
+			if (pos != 0) {
+				ir.Write(IROp::ShlImm, IRTEMP_0, rs, pos);
+			} else {
+				ir.Write(IROp::Mov, IRTEMP_0, rs);
+			}
 		}
 		ir.Write(IROp::AndConst, rt, rt, ir.AddConstant(destmask));
 		ir.Write(IROp::Or, rt, rt, IRTEMP_0);
diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp
index 083b6acba333..6c686c85af81 100644
--- a/Core/MIPS/IR/IRInst.cpp
+++ b/Core/MIPS/IR/IRInst.cpp
@@ -121,6 +121,7 @@ static const IRMeta irMeta[] = {
 	{ IROp::FMovFromGPR, "FMovFromGPR", "FG" },
 	{ IROp::FMovToGPR, "FMovToGPR", "GF" },
 	{ IROp::OptFMovToGPRShr8, "OptFMovToGPRShr8", "GF" },
+	{ IROp::OptFCvtSWFromGPR, "OptFCvtSWFromGPR", "FG" },
 	{ IROp::FpCondFromReg, "FpCondFromReg", "_G" },
 	{ IROp::FpCondToReg, "FpCondToReg", "G" },
 	{ IROp::FpCtrlFromReg, "FpCtrlFromReg", "_G" },
diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h
index 6cdd3b88bada..c7f1e30eccd1 100644
--- a/Core/MIPS/IR/IRInst.h
+++ b/Core/MIPS/IR/IRInst.h
@@ -138,6 +138,7 @@ enum class IROp : uint8_t {
 	FCvtScaledSW,
 
 	FMovFromGPR,
+	OptFCvtSWFromGPR,
 	FMovToGPR,
 	OptFMovToGPRShr8,
 
diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp
index 7fe2990ff474..4d46983c974a 100644
--- a/Core/MIPS/IR/IRInterpreter.cpp
+++ b/Core/MIPS/IR/IRInterpreter.cpp
@@ -997,6 +997,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
 		case IROp::FMovFromGPR:
 			memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4);
 			break;
+		case IROp::OptFCvtSWFromGPR:
+			mips->f[inst->dest] = (float)(int)mips->r[inst->src1];
+			break;
 		case IROp::FMovToGPR:
 			memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4);
 			break;
@@ -1007,6 +1010,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
 			mips->r[inst->dest] = temp >> 8;
 			break;
 		}
+
 		case IROp::ExitToConst:
 			return inst->constant;
 
diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp
index 7f23846cea1e..f9a3294fad7a 100644
--- a/Core/MIPS/IR/IRPassSimplify.cpp
+++ b/Core/MIPS/IR/IRPassSimplify.cpp
@@ -229,6 +229,15 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions
 	CONDITIONAL_DISABLE;
 
 	bool logBlocks = false;
+
+	bool letThroughHalves = false;
+	if (opts.optimizeForInterpreter) {
+		// If we're using the interpreter, which can handle these instructions directly,
+		// don't break "half" instructions up.
+		// Of course, we still want to combine if possible.
+		letThroughHalves = true;
+	}
+
 	for (int i = 0, n = (int)in.GetInstructions().size(); i < n; ++i) {
 		const IRInst &inst = in.GetInstructions()[i];
 
@@ -305,6 +314,11 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions
 		switch (inst.op) {
 		case IROp::Load32Left:
 			if (!combineOpposite(IROp::Load32Right, -3, IROp::Load32, -3)) {
+				if (letThroughHalves) {
+					out.Write(inst);
+					break;
+				}
+
 				addCommonProlog();
 				// dest &= (0x00ffffff >> shift)
 				// Alternatively, could shift to a wall and back (but would require two shifts each way.)
@@ -339,6 +353,10 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions
 
 		case IROp::Load32Right:
 			if (!combineOpposite(IROp::Load32Left, 3, IROp::Load32, 0)) {
+				if (letThroughHalves) {
+					out.Write(inst);
+					break;
+				}
 				addCommonProlog();
 				// IRTEMP_LR_VALUE >>= shift
 				out.Write(IROp::Shr, IRTEMP_LR_VALUE, IRTEMP_LR_VALUE, IRTEMP_LR_SHIFT);
@@ -382,6 +400,10 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions
 
 		case IROp::Store32Left:
 			if (!combineOpposite(IROp::Store32Right, -3, IROp::Store32, -3)) {
+				if (letThroughHalves) {
+					out.Write(inst);
+					break;
+				}
 				addCommonProlog();
 				// IRTEMP_LR_VALUE &= 0xffffff00 << shift
 				out.WriteSetConstant(IRTEMP_LR_MASK, 0xffffff00);
@@ -399,6 +421,10 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions
 
 		case IROp::Store32Right:
 			if (!combineOpposite(IROp::Store32Left, 3, IROp::Store32, 0)) {
+				if (letThroughHalves) {
+					out.Write(inst);
+					break;
+				}
 				addCommonProlog();
 				// IRTEMP_LR_VALUE &= 0x00ffffff << (24 - shift)
 				out.WriteSetConstant(IRTEMP_LR_MASK, 0x00ffffff);
@@ -2174,13 +2200,23 @@ bool OptimizeLoadsAfterStores(const IRWriter &in, IRWriter &out, const IROptions
 		case IROp::Store32:
 			if (next.op == IROp::Load32 &&
 				next.constant == inst.constant &&
-				next.dest == inst.src3 &&
+				next.dest == inst.dest &&
 				next.src1 == inst.src1) {
 				// The upcoming load is completely redundant.
 				// Skip it.
 				i++;
 			}
 			break;
+		case IROp::StoreVec4:
+			if (next.op == IROp::LoadVec4 &&
+				next.constant == inst.constant &&
+				next.dest == inst.dest &&
+				next.src1 == inst.src1) {
+				// The upcoming load is completely redundant. These are common in Wipeout.
+				// Skip it. NOTE: It looks like vector load/stores uses different register assignments, but there's a union between dest and src3.
+				i++;
+			}
+			break;
 		default:
 			break;
 		}
@@ -2243,10 +2279,18 @@ bool OptimizeForInterpreter(const IRWriter &in, IRWriter &out, const IROptions &
 					inst.op = IROp::OptFMovToGPRShr8;
 					i++;  // Skip the next instruction.
 				}
-				out.Write(inst);
-			} else {
-				out.Write(inst);
 			}
+			out.Write(inst);
+			break;
+		case IROp::FMovFromGPR:
+			if (!last) {
+				IRInst next = in.GetInstructions()[i + 1];
+				if (next.op == IROp::FCvtSW && next.src1 == inst.dest && next.dest == inst.dest) {
+					inst.op = IROp::OptFCvtSWFromGPR;
+					i++;  // Skip the next
+				}
+			}
+			out.Write(inst);
 			break;
 		default:
 			out.Write(inst);