Skip to content

Commit

Permalink
Only support decoding UV to float in vertex formats. Supporting u8 an…
Browse files Browse the repository at this point in the history
…d 16 was more complexity than it's worth.
  • Loading branch information
hrydgard committed Dec 20, 2016
1 parent 9d7983e commit 1dbeca0
Show file tree
Hide file tree
Showing 11 changed files with 15 additions and 143 deletions.
2 changes: 1 addition & 1 deletion GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

#define QUAD_INDICES_MAX 65536

DrawEngineCommon::DrawEngineCommon() : dec_(nullptr) {
DrawEngineCommon::DrawEngineCommon() : dec_(nullptr), decOptions_{} {
quadIndices_ = new u16[6 * QUAD_INDICES_MAX];
decJitCache_ = new VertexDecoderJitCache();
}
Expand Down
17 changes: 0 additions & 17 deletions GPU/Common/VertexDecoderArm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},

{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},

Expand Down Expand Up @@ -563,21 +561,6 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() {
Jit_ApplyWeights();
}

// Fill last two bytes with zeroes to align to 4 bytes. LDRH does it for us, handy.
void VertexDecoderJitCache::Jit_TcU8() {
LDRB(tempReg1, srcReg, dec_->tcoff);
LDRB(tempReg2, srcReg, dec_->tcoff + 1);
ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 8));
STR(tempReg1, dstReg, dec_->decFmt.uvoff);
}

void VertexDecoderJitCache::Jit_TcU16() {
LDRH(tempReg1, srcReg, dec_->tcoff);
LDRH(tempReg2, srcReg, dec_->tcoff + 2);
ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 16));
STR(tempReg1, dstReg, dec_->decFmt.uvoff);
}

void VertexDecoderJitCache::Jit_TcFloat() {
LDR(tempReg1, srcReg, dec_->tcoff);
LDR(tempReg2, srcReg, dec_->tcoff + 4);
Expand Down
12 changes: 0 additions & 12 deletions GPU/Common/VertexDecoderArm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},

{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},
{&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale},
Expand Down Expand Up @@ -578,16 +576,6 @@ void VertexDecoderJitCache::Jit_Color5551() {
CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ);
}

void VertexDecoderJitCache::Jit_TcU8() {
LDURH(tempReg1, srcReg, dec_->tcoff);
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
}

void VertexDecoderJitCache::Jit_TcU16() {
LDUR(tempReg1, srcReg, dec_->tcoff);
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
}

void VertexDecoderJitCache::Jit_TcU16Through() {
LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2);
Expand Down
81 changes: 8 additions & 73 deletions GPU/Common/VertexDecoderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,6 @@ void VertexDecoder::Step_WeightsFloatSkin() const
}
}

void VertexDecoder::Step_TcU8() const
{
// u32 to write two bytes of zeroes for free.
u32 *uv = (u32*)(decoded_ + decFmt.uvoff);
const u16 *uvdata = (const u16*)(ptr_ + tcoff);
*uv = *uvdata;
}

void VertexDecoder::Step_TcU8ToFloat() const
{
// u32 to write two bytes of zeroes for free.
Expand All @@ -281,14 +273,6 @@ void VertexDecoder::Step_TcU8ToFloat() const
uv[1] = uvdata[1] * (1.0f / 128.0f);
}

void VertexDecoder::Step_TcU16() const
{
u32 *uv = (u32 *)(decoded_ + decFmt.uvoff);
// TODO: Fix big-endian without losing the optimization
const u32 *uvdata = (const u32*)(ptr_ + tcoff);
*uv = *uvdata;
}

void VertexDecoder::Step_TcU16ToFloat() const
{
float *uv = (float *)(decoded_ + decFmt.uvoff);
Expand Down Expand Up @@ -903,13 +887,6 @@ static const StepFunction wtstep_skin[4] = {
&VertexDecoder::Step_WeightsFloatSkin,
};

static const StepFunction tcstep[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16,
&VertexDecoder::Step_TcFloat,
};

static const StepFunction tcstepToFloat[4] = {
0,
&VertexDecoder::Step_TcU8ToFloat,
Expand Down Expand Up @@ -973,42 +950,20 @@ static const StepFunction tcstep_morph_remasterToFloat[4] = {
&VertexDecoder::Step_TcFloatMorph,
};

static const StepFunction tcstep_through[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16Through,
&VertexDecoder::Step_TcFloatThrough,
};

static const StepFunction tcstep_throughToFloat[4] = {
0,
&VertexDecoder::Step_TcU8ToFloat,
&VertexDecoder::Step_TcU16ThroughToFloat,
&VertexDecoder::Step_TcFloatThrough,
};

// Some HD Remaster games double the u16 texture coordinates.
static const StepFunction tcstep_remaster[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16Double,
&VertexDecoder::Step_TcFloat,
};

static const StepFunction tcstep_remasterToFloat[4] = {
0,
&VertexDecoder::Step_TcU8ToFloat,
&VertexDecoder::Step_TcU16DoubleToFloat,
&VertexDecoder::Step_TcFloat,
};

static const StepFunction tcstep_through_remaster[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16ThroughDouble,
&VertexDecoder::Step_TcFloatThrough,
};

static const StepFunction tcstep_through_remasterToFloat[4] = {
0,
&VertexDecoder::Step_TcU8ToFloat,
Expand Down Expand Up @@ -1180,34 +1135,14 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
steps_[numSteps_++] = morphcount == 1 ? tcstep_prescale[tc] : tcstep_prescale_morph[tc];
decFmt.uvfmt = DEC_FLOAT_2;
} else {
if (options.expandAllUVtoFloat) {
if (morphcount != 1 && !throughmode)
steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remasterToFloat[tc] : tcstep_morphToFloat[tc];
else if (g_DoubleTextureCoordinates)
steps_[numSteps_++] = throughmode ? tcstep_through_remasterToFloat[tc] : tcstep_remasterToFloat[tc];
else
steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc];
decFmt.uvfmt = DEC_FLOAT_2;
} else {
if (morphcount != 1 && !throughmode)
steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remaster[tc] : tcstep_morph[tc];
else if (g_DoubleTextureCoordinates)
steps_[numSteps_++] = throughmode ? tcstep_through_remaster[tc] : tcstep_remaster[tc];
else
steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc];

switch (tc) {
case GE_VTYPE_TC_8BIT >> GE_VTYPE_TC_SHIFT:
decFmt.uvfmt = throughmode ? DEC_U8A_2 : DEC_U8_2;
break;
case GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT:
decFmt.uvfmt = throughmode ? DEC_U16A_2 : DEC_U16_2;
break;
case GE_VTYPE_TC_FLOAT >> GE_VTYPE_TC_SHIFT:
decFmt.uvfmt = DEC_FLOAT_2;
break;
}
}
// We now always expand UV to float.
if (morphcount != 1 && !throughmode)
steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remasterToFloat[tc] : tcstep_morphToFloat[tc];
else if (g_DoubleTextureCoordinates)
steps_[numSteps_++] = throughmode ? tcstep_through_remasterToFloat[tc] : tcstep_remasterToFloat[tc];
else
steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc];
decFmt.uvfmt = DEC_FLOAT_2;
}

decFmt.uvoff = decOff;
Expand Down
5 changes: 0 additions & 5 deletions GPU/Common/VertexDecoderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,6 @@ int TranslateNumBones(int bones);
typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);

struct VertexDecoderOptions {
bool expandAllUVtoFloat;
bool expandAllWeightsToFloat;
bool expand8BitNormalsToFloat;
};
Expand Down Expand Up @@ -477,8 +476,6 @@ class VertexDecoder {
void Step_WeightsU16Skin() const;
void Step_WeightsFloatSkin() const;

void Step_TcU8() const;
void Step_TcU16() const;
void Step_TcU8ToFloat() const;
void Step_TcU16ToFloat() const;
void Step_TcFloat() const;
Expand Down Expand Up @@ -633,9 +630,7 @@ class VertexDecoderJitCache : public FakeGen::FakeXCodeBlock {
void Jit_WeightsU16Skin();
void Jit_WeightsFloatSkin();

void Jit_TcU8();
void Jit_TcU8ToFloat();
void Jit_TcU16();
void Jit_TcU16ToFloat();
void Jit_TcFloat();

Expand Down
13 changes: 0 additions & 13 deletions GPU/Common/VertexDecoderX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,6 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},

{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
Expand Down Expand Up @@ -687,17 +685,6 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() {
}
}

// Fill last two bytes with zeroes to align to 4 bytes. MOVZX does it for us, handy.
void VertexDecoderJitCache::Jit_TcU8() {
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->tcoff));
MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
}

void VertexDecoderJitCache::Jit_TcU16() {
MOV(32, R(tempReg1), MDisp(srcReg, dec_->tcoff));
MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
}

void VertexDecoderJitCache::Jit_TcU8ToFloat() {
Jit_AnyU8ToFloat(dec_->tcoff, 16);
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), XMM3);
Expand Down
3 changes: 0 additions & 3 deletions GPU/Directx9/DrawEngineDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,6 @@ DrawEngineDX9::DrawEngineDX9()
dcid_(0),
fboTexNeedBind_(false),
fboTexBound_(false) {

memset(&decOptions_, 0, sizeof(decOptions_));
decOptions_.expandAllUVtoFloat = true;
decOptions_.expandAllWeightsToFloat = true;
decOptions_.expand8BitNormalsToFloat = true;

Expand Down
2 changes: 0 additions & 2 deletions GPU/GLES/DrawEngineGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,6 @@ DrawEngineGLES::DrawEngineGLES()
fboTexBound_(false) {
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
bufferDecimationCounter_ = VERTEXCACHE_NAME_DECIMATION_INTERVAL;
memset(&decOptions_, 0, sizeof(decOptions_));
decOptions_.expandAllUVtoFloat = false;
// Allocate nicely aligned memory. Maybe graphics drivers will
// appreciate it.
// All this is a LOT of memory, need to see if we can cut down somehow.
Expand Down
12 changes: 3 additions & 9 deletions GPU/Software/TransformUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,9 +217,7 @@ int TransformUnit::patchBufferSize_ = 0;

void TransformUnit::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertex_type) {
VertexDecoder vdecoder;
VertexDecoderOptions options;
memset(&options, 0, sizeof(options));
options.expandAllUVtoFloat = false;
VertexDecoderOptions options{};
vdecoder.SetVertexType(vertex_type, options);
const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt();

Expand Down Expand Up @@ -310,9 +308,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type
{
// TODO: Cache VertexDecoder objects
VertexDecoder vdecoder;
VertexDecoderOptions options;
memset(&options, 0, sizeof(options));
options.expandAllUVtoFloat = false;
VertexDecoderOptions options{};
vdecoder.SetVertexType(vertex_type, options);
const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt();

Expand Down Expand Up @@ -569,9 +565,7 @@ bool TransformUnit::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVert
simpleVertices.resize(indexUpperBound + 1);

VertexDecoder vdecoder;
VertexDecoderOptions options;
memset(&options, 0, sizeof(options));
options.expandAllUVtoFloat = false; // TODO: True should be fine here
VertexDecoderOptions options{};
vdecoder.SetVertexType(gstate.vertType, options);
DrawEngineCommon::NormalizeVertices((u8 *)(&simpleVertices[0]), (u8 *)(&temp_buffer[0]), Memory::GetPointer(gstate_c.vertexAddr), &vdecoder, indexLowerBound, indexUpperBound, gstate.vertType);

Expand Down
7 changes: 2 additions & 5 deletions GPU/Vulkan/DrawEngineVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,9 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan)
fboTexNeedBind_(false),
fboTexBound_(false),
curFrame_(0),
nullTexture_(nullptr) {
nullTexture_(nullptr),
stats_{} {

memset(&stats_, 0, sizeof(stats_));

memset(&decOptions_, 0, sizeof(decOptions_));
decOptions_.expandAllUVtoFloat = false; // this may be a good idea though.
decOptions_.expandAllWeightsToFloat = false;
decOptions_.expand8BitNormalsToFloat = false;

Expand Down
4 changes: 1 addition & 3 deletions Windows/GEDebugger/TabVertices.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,7 @@ int CtrlVertexList::GetRowCount() {
if (!gpuDebug->GetCurrentSimpleVertices(rowCount_, vertices, indices)) {
rowCount_ = 0;
}
VertexDecoderOptions options;
memset(&options, 0, sizeof(options));
options.expandAllUVtoFloat = false;
VertexDecoderOptions options{};
decoder->SetVertexType(state.vertType, options);
return rowCount_;
}
Expand Down

0 comments on commit 1dbeca0

Please sign in to comment.