From 1dbeca0618966f4c8156d81e5b25d40f9f8b89e0 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Tue, 20 Dec 2016 13:42:54 +0100 Subject: [PATCH] Only support decoding UV to float in vertex formats. Supporting u8 and 16 was more complexity than it's worth. --- GPU/Common/DrawEngineCommon.cpp | 2 +- GPU/Common/VertexDecoderArm.cpp | 17 ------- GPU/Common/VertexDecoderArm64.cpp | 12 ----- GPU/Common/VertexDecoderCommon.cpp | 81 +++--------------------------- GPU/Common/VertexDecoderCommon.h | 5 -- GPU/Common/VertexDecoderX86.cpp | 13 ----- GPU/Directx9/DrawEngineDX9.cpp | 3 -- GPU/GLES/DrawEngineGLES.cpp | 2 - GPU/Software/TransformUnit.cpp | 12 ++--- GPU/Vulkan/DrawEngineVulkan.cpp | 7 +-- Windows/GEDebugger/TabVertices.cpp | 4 +- 11 files changed, 15 insertions(+), 143 deletions(-) diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 6090af0d9581..c25eca683966 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -27,7 +27,7 @@ #define QUAD_INDICES_MAX 65536 -DrawEngineCommon::DrawEngineCommon() : dec_(nullptr) { +DrawEngineCommon::DrawEngineCommon() : dec_(nullptr), decOptions_{} { quadIndices_ = new u16[6 * QUAD_INDICES_MAX]; decJitCache_ = new VertexDecoderJitCache(); } diff --git a/GPU/Common/VertexDecoderArm.cpp b/GPU/Common/VertexDecoderArm.cpp index a67156f772be..76975aefd09c 100644 --- a/GPU/Common/VertexDecoderArm.cpp +++ b/GPU/Common/VertexDecoderArm.cpp @@ -119,8 +119,6 @@ static const JitLookup jitLookup[] = { {&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin}, {&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin}, - {&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8}, - {&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16}, {&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat}, {&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double}, @@ -563,21 +561,6 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() { Jit_ApplyWeights(); } -// Fill last two bytes with zeroes to align to 4 bytes. LDRH does it for us, handy. -void VertexDecoderJitCache::Jit_TcU8() { - LDRB(tempReg1, srcReg, dec_->tcoff); - LDRB(tempReg2, srcReg, dec_->tcoff + 1); - ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 8)); - STR(tempReg1, dstReg, dec_->decFmt.uvoff); -} - -void VertexDecoderJitCache::Jit_TcU16() { - LDRH(tempReg1, srcReg, dec_->tcoff); - LDRH(tempReg2, srcReg, dec_->tcoff + 2); - ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 16)); - STR(tempReg1, dstReg, dec_->decFmt.uvoff); -} - void VertexDecoderJitCache::Jit_TcFloat() { LDR(tempReg1, srcReg, dec_->tcoff); LDR(tempReg2, srcReg, dec_->tcoff + 4); diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp index 4ac1a734d231..e196da58f2eb 100644 --- a/GPU/Common/VertexDecoderArm64.cpp +++ b/GPU/Common/VertexDecoderArm64.cpp @@ -92,8 +92,6 @@ static const JitLookup jitLookup[] = { {&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin}, {&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin}, - {&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8}, - {&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16}, {&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat}, {&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double}, {&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale}, @@ -578,16 +576,6 @@ void VertexDecoderJitCache::Jit_Color5551() { CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ); } -void VertexDecoderJitCache::Jit_TcU8() { - LDURH(tempReg1, srcReg, dec_->tcoff); - STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff); -} - -void VertexDecoderJitCache::Jit_TcU16() { - LDUR(tempReg1, srcReg, dec_->tcoff); - STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff); -} - void VertexDecoderJitCache::Jit_TcU16Through() { LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff); LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2); diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp index 587cdc4ee005..80356e8cf503 100644 --- a/GPU/Common/VertexDecoderCommon.cpp +++ b/GPU/Common/VertexDecoderCommon.cpp @@ -264,14 +264,6 @@ void VertexDecoder::Step_WeightsFloatSkin() const } } -void VertexDecoder::Step_TcU8() const -{ - // u32 to write two bytes of zeroes for free. - u32 *uv = (u32*)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16*)(ptr_ + tcoff); - *uv = *uvdata; -} - void VertexDecoder::Step_TcU8ToFloat() const { // u32 to write two bytes of zeroes for free. @@ -281,14 +273,6 @@ void VertexDecoder::Step_TcU8ToFloat() const uv[1] = uvdata[1] * (1.0f / 128.0f); } -void VertexDecoder::Step_TcU16() const -{ - u32 *uv = (u32 *)(decoded_ + decFmt.uvoff); - // TODO: Fix big-endian without losing the optimization - const u32 *uvdata = (const u32*)(ptr_ + tcoff); - *uv = *uvdata; -} - void VertexDecoder::Step_TcU16ToFloat() const { float *uv = (float *)(decoded_ + decFmt.uvoff); @@ -903,13 +887,6 @@ static const StepFunction wtstep_skin[4] = { &VertexDecoder::Step_WeightsFloatSkin, }; -static const StepFunction tcstep[4] = { - 0, - &VertexDecoder::Step_TcU8, - &VertexDecoder::Step_TcU16, - &VertexDecoder::Step_TcFloat, -}; - static const StepFunction tcstepToFloat[4] = { 0, &VertexDecoder::Step_TcU8ToFloat, @@ -973,13 +950,6 @@ static const StepFunction tcstep_morph_remasterToFloat[4] = { &VertexDecoder::Step_TcFloatMorph, }; -static const StepFunction tcstep_through[4] = { - 0, - &VertexDecoder::Step_TcU8, - &VertexDecoder::Step_TcU16Through, - &VertexDecoder::Step_TcFloatThrough, -}; - static const StepFunction tcstep_throughToFloat[4] = { 0, &VertexDecoder::Step_TcU8ToFloat, @@ -987,14 +957,6 @@ static const StepFunction tcstep_throughToFloat[4] = { &VertexDecoder::Step_TcFloatThrough, }; -// Some HD Remaster games double the u16 texture coordinates. -static const StepFunction tcstep_remaster[4] = { - 0, - &VertexDecoder::Step_TcU8, - &VertexDecoder::Step_TcU16Double, - &VertexDecoder::Step_TcFloat, -}; - static const StepFunction tcstep_remasterToFloat[4] = { 0, &VertexDecoder::Step_TcU8ToFloat, @@ -1002,13 +964,6 @@ static const StepFunction tcstep_remasterToFloat[4] = { &VertexDecoder::Step_TcFloat, }; -static const StepFunction tcstep_through_remaster[4] = { - 0, - &VertexDecoder::Step_TcU8, - &VertexDecoder::Step_TcU16ThroughDouble, - &VertexDecoder::Step_TcFloatThrough, -}; - static const StepFunction tcstep_through_remasterToFloat[4] = { 0, &VertexDecoder::Step_TcU8ToFloat, @@ -1180,34 +1135,14 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options, steps_[numSteps_++] = morphcount == 1 ? tcstep_prescale[tc] : tcstep_prescale_morph[tc]; decFmt.uvfmt = DEC_FLOAT_2; } else { - if (options.expandAllUVtoFloat) { - if (morphcount != 1 && !throughmode) - steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remasterToFloat[tc] : tcstep_morphToFloat[tc]; - else if (g_DoubleTextureCoordinates) - steps_[numSteps_++] = throughmode ? tcstep_through_remasterToFloat[tc] : tcstep_remasterToFloat[tc]; - else - steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc]; - decFmt.uvfmt = DEC_FLOAT_2; - } else { - if (morphcount != 1 && !throughmode) - steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remaster[tc] : tcstep_morph[tc]; - else if (g_DoubleTextureCoordinates) - steps_[numSteps_++] = throughmode ? tcstep_through_remaster[tc] : tcstep_remaster[tc]; - else - steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc]; - - switch (tc) { - case GE_VTYPE_TC_8BIT >> GE_VTYPE_TC_SHIFT: - decFmt.uvfmt = throughmode ? DEC_U8A_2 : DEC_U8_2; - break; - case GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT: - decFmt.uvfmt = throughmode ? DEC_U16A_2 : DEC_U16_2; - break; - case GE_VTYPE_TC_FLOAT >> GE_VTYPE_TC_SHIFT: - decFmt.uvfmt = DEC_FLOAT_2; - break; - } - } + // We now always expand UV to float. + if (morphcount != 1 && !throughmode) + steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remasterToFloat[tc] : tcstep_morphToFloat[tc]; + else if (g_DoubleTextureCoordinates) + steps_[numSteps_++] = throughmode ? tcstep_through_remasterToFloat[tc] : tcstep_remasterToFloat[tc]; + else + steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc]; + decFmt.uvfmt = DEC_FLOAT_2; } decFmt.uvoff = decOff; diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h index b7ec77df6366..f735fc80b03c 100644 --- a/GPU/Common/VertexDecoderCommon.h +++ b/GPU/Common/VertexDecoderCommon.h @@ -443,7 +443,6 @@ int TranslateNumBones(int bones); typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count); struct VertexDecoderOptions { - bool expandAllUVtoFloat; bool expandAllWeightsToFloat; bool expand8BitNormalsToFloat; }; @@ -477,8 +476,6 @@ class VertexDecoder { void Step_WeightsU16Skin() const; void Step_WeightsFloatSkin() const; - void Step_TcU8() const; - void Step_TcU16() const; void Step_TcU8ToFloat() const; void Step_TcU16ToFloat() const; void Step_TcFloat() const; @@ -633,9 +630,7 @@ class VertexDecoderJitCache : public FakeGen::FakeXCodeBlock { void Jit_WeightsU16Skin(); void Jit_WeightsFloatSkin(); - void Jit_TcU8(); void Jit_TcU8ToFloat(); - void Jit_TcU16(); void Jit_TcU16ToFloat(); void Jit_TcFloat(); diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp index 0ba4a81c7c0a..2127bbe27115 100644 --- a/GPU/Common/VertexDecoderX86.cpp +++ b/GPU/Common/VertexDecoderX86.cpp @@ -97,8 +97,6 @@ static const JitLookup jitLookup[] = { {&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin}, {&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin}, - {&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8}, - {&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16}, {&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat}, {&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat}, {&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat}, @@ -687,17 +685,6 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() { } } -// Fill last two bytes with zeroes to align to 4 bytes. MOVZX does it for us, handy. -void VertexDecoderJitCache::Jit_TcU8() { - MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->tcoff)); - MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1)); -} - -void VertexDecoderJitCache::Jit_TcU16() { - MOV(32, R(tempReg1), MDisp(srcReg, dec_->tcoff)); - MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1)); -} - void VertexDecoderJitCache::Jit_TcU8ToFloat() { Jit_AnyU8ToFloat(dec_->tcoff, 16); MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), XMM3); diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index 02756eeb76a9..ef3a29e31dcc 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -91,9 +91,6 @@ DrawEngineDX9::DrawEngineDX9() dcid_(0), fboTexNeedBind_(false), fboTexBound_(false) { - - memset(&decOptions_, 0, sizeof(decOptions_)); - decOptions_.expandAllUVtoFloat = true; decOptions_.expandAllWeightsToFloat = true; decOptions_.expand8BitNormalsToFloat = true; diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 5911c0ca60a6..51a6c34fb108 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -130,8 +130,6 @@ DrawEngineGLES::DrawEngineGLES() fboTexBound_(false) { decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; bufferDecimationCounter_ = VERTEXCACHE_NAME_DECIMATION_INTERVAL; - memset(&decOptions_, 0, sizeof(decOptions_)); - decOptions_.expandAllUVtoFloat = false; // Allocate nicely aligned memory. Maybe graphics drivers will // appreciate it. // All this is a LOT of memory, need to see if we can cut down somehow. diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 26d623e63470..c45bc37c0ec6 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -217,9 +217,7 @@ int TransformUnit::patchBufferSize_ = 0; void TransformUnit::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertex_type) { VertexDecoder vdecoder; - VertexDecoderOptions options; - memset(&options, 0, sizeof(options)); - options.expandAllUVtoFloat = false; + VertexDecoderOptions options{}; vdecoder.SetVertexType(vertex_type, options); const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt(); @@ -310,9 +308,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type { // TODO: Cache VertexDecoder objects VertexDecoder vdecoder; - VertexDecoderOptions options; - memset(&options, 0, sizeof(options)); - options.expandAllUVtoFloat = false; + VertexDecoderOptions options{}; vdecoder.SetVertexType(vertex_type, options); const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt(); @@ -569,9 +565,7 @@ bool TransformUnit::GetCurrentSimpleVertices(int count, std::vectorGetCurrentSimpleVertices(rowCount_, vertices, indices)) { rowCount_ = 0; } - VertexDecoderOptions options; - memset(&options, 0, sizeof(options)); - options.expandAllUVtoFloat = false; + VertexDecoderOptions options{}; decoder->SetVertexType(state.vertType, options); return rowCount_; }