From 1dbeca0618966f4c8156d81e5b25d40f9f8b89e0 Mon Sep 17 00:00:00 2001
From: Henrik Rydgard <hrydgard@gmail.com>
Date: Tue, 20 Dec 2016 13:42:54 +0100
Subject: [PATCH] Only support decoding UV to float in vertex formats.
 Supporting u8 and 16 was more complexity than it's worth.

---
 GPU/Common/DrawEngineCommon.cpp    |  2 +-
 GPU/Common/VertexDecoderArm.cpp    | 17 -------
 GPU/Common/VertexDecoderArm64.cpp  | 12 -----
 GPU/Common/VertexDecoderCommon.cpp | 81 +++---------------------------
 GPU/Common/VertexDecoderCommon.h   |  5 --
 GPU/Common/VertexDecoderX86.cpp    | 13 -----
 GPU/Directx9/DrawEngineDX9.cpp     |  3 --
 GPU/GLES/DrawEngineGLES.cpp        |  2 -
 GPU/Software/TransformUnit.cpp     | 12 ++---
 GPU/Vulkan/DrawEngineVulkan.cpp    |  7 +--
 Windows/GEDebugger/TabVertices.cpp |  4 +-
 11 files changed, 15 insertions(+), 143 deletions(-)

diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp
index 6090af0d9581..c25eca683966 100644
--- a/GPU/Common/DrawEngineCommon.cpp
+++ b/GPU/Common/DrawEngineCommon.cpp
@@ -27,7 +27,7 @@
 
 #define QUAD_INDICES_MAX 65536
 
-DrawEngineCommon::DrawEngineCommon() : dec_(nullptr) {
+DrawEngineCommon::DrawEngineCommon() : dec_(nullptr), decOptions_{} {
 	quadIndices_ = new u16[6 * QUAD_INDICES_MAX];
 	decJitCache_ = new VertexDecoderJitCache();
 }
diff --git a/GPU/Common/VertexDecoderArm.cpp b/GPU/Common/VertexDecoderArm.cpp
index a67156f772be..76975aefd09c 100644
--- a/GPU/Common/VertexDecoderArm.cpp
+++ b/GPU/Common/VertexDecoderArm.cpp
@@ -119,8 +119,6 @@ static const JitLookup jitLookup[] = {
 	{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
 	{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
 
-	{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
-	{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
 	{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
 	{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},
 
@@ -563,21 +561,6 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() {
 	Jit_ApplyWeights();
 }
 
-// Fill last two bytes with zeroes to align to 4 bytes. LDRH does it for us, handy.
-void VertexDecoderJitCache::Jit_TcU8() {
-	LDRB(tempReg1, srcReg, dec_->tcoff);
-	LDRB(tempReg2, srcReg, dec_->tcoff + 1);
-	ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 8));
-	STR(tempReg1, dstReg, dec_->decFmt.uvoff);
-}
-
-void VertexDecoderJitCache::Jit_TcU16() {
-	LDRH(tempReg1, srcReg, dec_->tcoff);
-	LDRH(tempReg2, srcReg, dec_->tcoff + 2);
-	ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 16));
-	STR(tempReg1, dstReg, dec_->decFmt.uvoff);
-}
-
 void VertexDecoderJitCache::Jit_TcFloat() {
 	LDR(tempReg1, srcReg, dec_->tcoff);
 	LDR(tempReg2, srcReg, dec_->tcoff + 4);
diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp
index 4ac1a734d231..e196da58f2eb 100644
--- a/GPU/Common/VertexDecoderArm64.cpp
+++ b/GPU/Common/VertexDecoderArm64.cpp
@@ -92,8 +92,6 @@ static const JitLookup jitLookup[] = {
 	{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
 	{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
 
-	{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
-	{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
 	{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
 	{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},
 	{&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale},
@@ -578,16 +576,6 @@ void VertexDecoderJitCache::Jit_Color5551() {
 	CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ);
 }
 
-void VertexDecoderJitCache::Jit_TcU8() {
-	LDURH(tempReg1, srcReg, dec_->tcoff);
-	STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
-}
-
-void VertexDecoderJitCache::Jit_TcU16() {
-	LDUR(tempReg1, srcReg, dec_->tcoff);
-	STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
-}
-
 void VertexDecoderJitCache::Jit_TcU16Through() {
 	LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
 	LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2);
diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp
index 587cdc4ee005..80356e8cf503 100644
--- a/GPU/Common/VertexDecoderCommon.cpp
+++ b/GPU/Common/VertexDecoderCommon.cpp
@@ -264,14 +264,6 @@ void VertexDecoder::Step_WeightsFloatSkin() const
 	}
 }
 
-void VertexDecoder::Step_TcU8() const
-{
-	// u32 to write two bytes of zeroes for free.
-	u32 *uv = (u32*)(decoded_ + decFmt.uvoff);
-	const u16 *uvdata = (const u16*)(ptr_ + tcoff);
-	*uv = *uvdata;
-}
-
 void VertexDecoder::Step_TcU8ToFloat() const
 {
 	// u32 to write two bytes of zeroes for free.
@@ -281,14 +273,6 @@ void VertexDecoder::Step_TcU8ToFloat() const
 	uv[1] = uvdata[1] * (1.0f / 128.0f);
 }
 
-void VertexDecoder::Step_TcU16() const
-{
-	u32 *uv = (u32 *)(decoded_ + decFmt.uvoff);
-	// TODO: Fix big-endian without losing the optimization
-	const u32 *uvdata = (const u32*)(ptr_ + tcoff);
-	*uv = *uvdata;
-}
-
 void VertexDecoder::Step_TcU16ToFloat() const
 {
 	float *uv = (float *)(decoded_ + decFmt.uvoff);
@@ -903,13 +887,6 @@ static const StepFunction wtstep_skin[4] = {
 	&VertexDecoder::Step_WeightsFloatSkin,
 };
 
-static const StepFunction tcstep[4] = {
-	0,
-	&VertexDecoder::Step_TcU8,
-	&VertexDecoder::Step_TcU16,
-	&VertexDecoder::Step_TcFloat,
-};
-
 static const StepFunction tcstepToFloat[4] = {
 	0,
 	&VertexDecoder::Step_TcU8ToFloat,
@@ -973,13 +950,6 @@ static const StepFunction tcstep_morph_remasterToFloat[4] = {
 	&VertexDecoder::Step_TcFloatMorph,
 };
 
-static const StepFunction tcstep_through[4] = {
-	0,
-	&VertexDecoder::Step_TcU8,
-	&VertexDecoder::Step_TcU16Through,
-	&VertexDecoder::Step_TcFloatThrough,
-};
-
 static const StepFunction tcstep_throughToFloat[4] = {
 	0,
 	&VertexDecoder::Step_TcU8ToFloat,
@@ -987,14 +957,6 @@ static const StepFunction tcstep_throughToFloat[4] = {
 	&VertexDecoder::Step_TcFloatThrough,
 };
 
-// Some HD Remaster games double the u16 texture coordinates.
-static const StepFunction tcstep_remaster[4] = {
-	0,
-	&VertexDecoder::Step_TcU8,
-	&VertexDecoder::Step_TcU16Double,
-	&VertexDecoder::Step_TcFloat,
-};
-
 static const StepFunction tcstep_remasterToFloat[4] = {
 	0,
 	&VertexDecoder::Step_TcU8ToFloat,
@@ -1002,13 +964,6 @@ static const StepFunction tcstep_remasterToFloat[4] = {
 	&VertexDecoder::Step_TcFloat,
 };
 
-static const StepFunction tcstep_through_remaster[4] = {
-	0,
-	&VertexDecoder::Step_TcU8,
-	&VertexDecoder::Step_TcU16ThroughDouble,
-	&VertexDecoder::Step_TcFloatThrough,
-};
-
 static const StepFunction tcstep_through_remasterToFloat[4] = {
 	0,
 	&VertexDecoder::Step_TcU8ToFloat,
@@ -1180,34 +1135,14 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
 				steps_[numSteps_++] = morphcount == 1 ? tcstep_prescale[tc] : tcstep_prescale_morph[tc];
 			decFmt.uvfmt = DEC_FLOAT_2;
 		} else {
-			if (options.expandAllUVtoFloat) {
-				if (morphcount != 1 && !throughmode)
-					steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remasterToFloat[tc] : tcstep_morphToFloat[tc];
-				else if (g_DoubleTextureCoordinates)
-					steps_[numSteps_++] = throughmode ? tcstep_through_remasterToFloat[tc] : tcstep_remasterToFloat[tc];
-				else
-					steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc];
-				decFmt.uvfmt = DEC_FLOAT_2;
-			} else {
-				if (morphcount != 1 && !throughmode)
-					steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remaster[tc] : tcstep_morph[tc];
-				else if (g_DoubleTextureCoordinates)
-					steps_[numSteps_++] = throughmode ? tcstep_through_remaster[tc] : tcstep_remaster[tc];
-				else
-					steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc];
-
-				switch (tc) {
-				case GE_VTYPE_TC_8BIT >> GE_VTYPE_TC_SHIFT:
-					decFmt.uvfmt = throughmode ? DEC_U8A_2 : DEC_U8_2;
-					break;
-				case GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT:
-					decFmt.uvfmt = throughmode ? DEC_U16A_2 : DEC_U16_2;
-					break;
-				case GE_VTYPE_TC_FLOAT >> GE_VTYPE_TC_SHIFT:
-					decFmt.uvfmt = DEC_FLOAT_2;
-					break;
-				}
-			}
+			// We now always expand UV to float.
+			if (morphcount != 1 && !throughmode)
+				steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remasterToFloat[tc] : tcstep_morphToFloat[tc];
+			else if (g_DoubleTextureCoordinates)
+				steps_[numSteps_++] = throughmode ? tcstep_through_remasterToFloat[tc] : tcstep_remasterToFloat[tc];
+			else
+				steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc];
+			decFmt.uvfmt = DEC_FLOAT_2;
 		}
 
 		decFmt.uvoff = decOff;
diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h
index b7ec77df6366..f735fc80b03c 100644
--- a/GPU/Common/VertexDecoderCommon.h
+++ b/GPU/Common/VertexDecoderCommon.h
@@ -443,7 +443,6 @@ int TranslateNumBones(int bones);
 typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);
 
 struct VertexDecoderOptions {
-	bool expandAllUVtoFloat;
 	bool expandAllWeightsToFloat;
 	bool expand8BitNormalsToFloat;
 };
@@ -477,8 +476,6 @@ class VertexDecoder {
 	void Step_WeightsU16Skin() const;
 	void Step_WeightsFloatSkin() const;
 
-	void Step_TcU8() const;
-	void Step_TcU16() const;
 	void Step_TcU8ToFloat() const;
 	void Step_TcU16ToFloat() const;
 	void Step_TcFloat() const;
@@ -633,9 +630,7 @@ class VertexDecoderJitCache : public FakeGen::FakeXCodeBlock {
 	void Jit_WeightsU16Skin();
 	void Jit_WeightsFloatSkin();
 
-	void Jit_TcU8();
 	void Jit_TcU8ToFloat();
-	void Jit_TcU16();
 	void Jit_TcU16ToFloat();
 	void Jit_TcFloat();
 
diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp
index 0ba4a81c7c0a..2127bbe27115 100644
--- a/GPU/Common/VertexDecoderX86.cpp
+++ b/GPU/Common/VertexDecoderX86.cpp
@@ -97,8 +97,6 @@ static const JitLookup jitLookup[] = {
 	{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
 	{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
 
-	{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
-	{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
 	{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
 	{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
 	{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
@@ -687,17 +685,6 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() {
 	}
 }
 
-// Fill last two bytes with zeroes to align to 4 bytes. MOVZX does it for us, handy.
-void VertexDecoderJitCache::Jit_TcU8() {
-	MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->tcoff));
-	MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
-}
-
-void VertexDecoderJitCache::Jit_TcU16() {
-	MOV(32, R(tempReg1), MDisp(srcReg, dec_->tcoff));
-	MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
-}
-
 void VertexDecoderJitCache::Jit_TcU8ToFloat() {
 	Jit_AnyU8ToFloat(dec_->tcoff, 16);
 	MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), XMM3);
diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp
index 02756eeb76a9..ef3a29e31dcc 100644
--- a/GPU/Directx9/DrawEngineDX9.cpp
+++ b/GPU/Directx9/DrawEngineDX9.cpp
@@ -91,9 +91,6 @@ DrawEngineDX9::DrawEngineDX9()
 		dcid_(0),
 		fboTexNeedBind_(false),
 		fboTexBound_(false) {
-
-	memset(&decOptions_, 0, sizeof(decOptions_));
-	decOptions_.expandAllUVtoFloat = true;
 	decOptions_.expandAllWeightsToFloat = true;
 	decOptions_.expand8BitNormalsToFloat = true;
 
diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp
index 5911c0ca60a6..51a6c34fb108 100644
--- a/GPU/GLES/DrawEngineGLES.cpp
+++ b/GPU/GLES/DrawEngineGLES.cpp
@@ -130,8 +130,6 @@ DrawEngineGLES::DrawEngineGLES()
 		fboTexBound_(false) {
 	decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
 	bufferDecimationCounter_ = VERTEXCACHE_NAME_DECIMATION_INTERVAL;
-	memset(&decOptions_, 0, sizeof(decOptions_));
-	decOptions_.expandAllUVtoFloat = false;
 	// Allocate nicely aligned memory. Maybe graphics drivers will
 	// appreciate it.
 	// All this is a LOT of memory, need to see if we can cut down somehow.
diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp
index 26d623e63470..c45bc37c0ec6 100644
--- a/GPU/Software/TransformUnit.cpp
+++ b/GPU/Software/TransformUnit.cpp
@@ -217,9 +217,7 @@ int TransformUnit::patchBufferSize_ = 0;
 
 void TransformUnit::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertex_type) {
 	VertexDecoder vdecoder;
-	VertexDecoderOptions options;
-	memset(&options, 0, sizeof(options));
-	options.expandAllUVtoFloat = false;
+	VertexDecoderOptions options{};
 	vdecoder.SetVertexType(vertex_type, options);
 	const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt();
 
@@ -310,9 +308,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type
 {
 	// TODO: Cache VertexDecoder objects
 	VertexDecoder vdecoder;
-	VertexDecoderOptions options;
-	memset(&options, 0, sizeof(options));
-	options.expandAllUVtoFloat = false;
+	VertexDecoderOptions options{};
 	vdecoder.SetVertexType(vertex_type, options);
 	const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt();
 
@@ -569,9 +565,7 @@ bool TransformUnit::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVert
 	simpleVertices.resize(indexUpperBound + 1);
 
 	VertexDecoder vdecoder;
-	VertexDecoderOptions options;
-	memset(&options, 0, sizeof(options));
-	options.expandAllUVtoFloat = false;  // TODO: True should be fine here
+	VertexDecoderOptions options{};
 	vdecoder.SetVertexType(gstate.vertType, options);
 	DrawEngineCommon::NormalizeVertices((u8 *)(&simpleVertices[0]), (u8 *)(&temp_buffer[0]), Memory::GetPointer(gstate_c.vertexAddr), &vdecoder, indexLowerBound, indexUpperBound, gstate.vertType);
 
diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp
index adf7ec470314..98feecbf376b 100644
--- a/GPU/Vulkan/DrawEngineVulkan.cpp
+++ b/GPU/Vulkan/DrawEngineVulkan.cpp
@@ -74,12 +74,9 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan)
 	fboTexNeedBind_(false),
 	fboTexBound_(false),
 	curFrame_(0),
-	nullTexture_(nullptr) {
+	nullTexture_(nullptr),
+	stats_{}  {
 
-	memset(&stats_, 0, sizeof(stats_));
-
-	memset(&decOptions_, 0, sizeof(decOptions_));
-	decOptions_.expandAllUVtoFloat = false;  // this may be a good idea though.
 	decOptions_.expandAllWeightsToFloat = false;
 	decOptions_.expand8BitNormalsToFloat = false;
 
diff --git a/Windows/GEDebugger/TabVertices.cpp b/Windows/GEDebugger/TabVertices.cpp
index ffc6a45fb4bc..c8569dfef1e4 100644
--- a/Windows/GEDebugger/TabVertices.cpp
+++ b/Windows/GEDebugger/TabVertices.cpp
@@ -288,9 +288,7 @@ int CtrlVertexList::GetRowCount() {
 	if (!gpuDebug->GetCurrentSimpleVertices(rowCount_, vertices, indices)) {
 		rowCount_ = 0;
 	}
-	VertexDecoderOptions options;
-	memset(&options, 0, sizeof(options));
-	options.expandAllUVtoFloat = false;
+	VertexDecoderOptions options{};
 	decoder->SetVertexType(state.vertType, options);
 	return rowCount_;
 }