Skip to content

Commit

Permalink
Merge pull request #9176 from hrydgard/always-prescale-uv
Browse files Browse the repository at this point in the history
Always prescale uv ("texcoord speedhack")
  • Loading branch information
hrydgard authored Dec 20, 2016
2 parents 80af358 + 0fa2f2c commit e9bea75
Show file tree
Hide file tree
Showing 32 changed files with 118 additions and 581 deletions.
1 change: 0 additions & 1 deletion Core/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,6 @@ static ConfigSetting debuggerSettings[] = {
};

static ConfigSetting speedHackSettings[] = {
ReportedConfigSetting("PrescaleUVCoords", &g_Config.bPrescaleUV, true, true, true),
ReportedConfigSetting("DisableAlphaTest", &g_Config.bDisableAlphaTest, false, true, true),

ConfigSetting(false),
Expand Down
9 changes: 1 addition & 8 deletions Core/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,14 +360,7 @@ struct Config {
float fAnalogLimiterDeadzone;
// GLES backend-specific hacks. Not saved to the ini file, do not add checkboxes. Will be made into
// proper options when good enough.
// PrescaleUV:
// * Applies UV scale/offset when decoding verts. Get rid of some work in the vertex shader,
// saves a uniform upload and is a prerequisite for future optimized hybrid
// (SW skinning, HW transform) skinning.
// * Still has major problems so off by default - need to store tex scale/offset per DeferredDrawCall,
// which currently isn't done so if texscale/offset isn't static (like in Tekken 6) things go wrong.
bool bPrescaleUV;
bool bDisableAlphaTest; // Helps PowerVR immensely, breaks some graphics
bool bDisableAlphaTest; // Helps PowerVR performance immensely, breaks some graphics
// End GLES hacks.

// Use the hardware scaler to scale up the image to save fillrate. Similar to Windows' window size, really.
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

#define QUAD_INDICES_MAX 65536

DrawEngineCommon::DrawEngineCommon() : dec_(nullptr) {
DrawEngineCommon::DrawEngineCommon() : dec_(nullptr), decOptions_{} {
quadIndices_ = new u16[6 * QUAD_INDICES_MAX];
decJitCache_ = new VertexDecoderJitCache();
}
Expand Down
6 changes: 0 additions & 6 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ std::string VertexShaderDesc(const ShaderID &id) {
}
if (id.Bits(VS_BIT_MATERIAL_UPDATE, 3)) desc << "MatUp:" << id.Bits(VS_BIT_MATERIAL_UPDATE, 3) << " ";
if (id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2)) desc << "WScale " << id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2) << " ";
if (id.Bits(VS_BIT_TEXCOORD_FMTSCALE, 2)) desc << "TCScale " << id.Bits(VS_BIT_TEXCOORD_FMTSCALE, 2) << " ";
if (id.Bit(VS_BIT_FLATSHADE)) desc << "Flat ";

// TODO: More...
Expand Down Expand Up @@ -119,11 +118,6 @@ void ComputeVertexShaderID(ShaderID *id_out, u32 vertType, bool useHWTransform)

id.SetBit(VS_BIT_NORM_REVERSE, gstate.areNormalsReversed());
id.SetBit(VS_BIT_HAS_TEXCOORD, hasTexcoord);
if (doTextureProjection && gstate.getUVProjMode() == GE_PROJMAP_UV) {
id.SetBits(VS_BIT_TEXCOORD_FMTSCALE, 2, (vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT); // two bits
} else {
id.SetBits(VS_BIT_TEXCOORD_FMTSCALE, 2, 3); // float - no scaling
}
}

id.SetBit(VS_BIT_FLATSHADE, doFlatShading);
Expand Down
1 change: 0 additions & 1 deletion GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ enum {
VS_BIT_LIGHT3_ENABLE = 55,
VS_BIT_LIGHTING_ENABLE = 56,
VS_BIT_WEIGHT_FMTSCALE = 57, // only two bits, 1 free after
VS_BIT_TEXCOORD_FMTSCALE = 60,
VS_BIT_FLATSHADE = 62, // 1 free after
};

Expand Down
14 changes: 3 additions & 11 deletions GPU/Common/SoftwareTransformCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,9 @@ void SoftwareTransform(

float uscale = 1.0f;
float vscale = 1.0f;
bool scaleUV = false;
if (throughmode) {
uscale /= gstate_c.curTextureWidth;
vscale /= gstate_c.curTextureHeight;
} else {
scaleUV = !g_Config.bPrescaleUV;
}

bool skinningEnabled = vertTypeIsSkinningEnabled(vertType);
Expand Down Expand Up @@ -317,14 +314,9 @@ void SoftwareTransform(
switch (gstate.getUVGenMode()) {
case GE_TEXMAP_TEXTURE_COORDS: // UV mapping
case GE_TEXMAP_UNKNOWN: // Seen in Riviera. Unsure of meaning, but this works.
// Texture scale/offset is only performed in this mode.
if (scaleUV) {
uv[0] = ruv[0]*gstate_c.uv.uScale + gstate_c.uv.uOff;
uv[1] = ruv[1]*gstate_c.uv.vScale + gstate_c.uv.vOff;
} else {
uv[0] = ruv[0];
uv[1] = ruv[1];
}
// We always prescale in the vertex decoder now.
uv[0] = ruv[0];
uv[1] = ruv[1];
uv[2] = 1.0f;
break;

Expand Down
12 changes: 6 additions & 6 deletions GPU/Common/SplineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -864,21 +864,21 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi
u32 vertTypeWithIndex16 = (vertType & ~GE_VTYPE_IDX_MASK) | GE_VTYPE_IDX_16BIT;

UVScale prevUVScale;
if (g_Config.bPrescaleUV && (origVertType & GE_VTYPE_TC_MASK) != 0) {
if ((origVertType & GE_VTYPE_TC_MASK) != 0) {
// We scaled during Normalize already so let's turn it off when drawing.
prevUVScale = gstate_c.uv;
gstate_c.uv.uScale = 1.0f;
gstate_c.uv.vScale = 1.0f;
gstate_c.uv.uOff = 0;
gstate_c.uv.vOff = 0;
gstate_c.uv.uOff = 0.0f;
gstate_c.uv.vOff = 0.0f;
}

int generatedBytesRead;
DispatchSubmitPrim(splineBuffer, quadIndices_, primType[prim_type], count, vertTypeWithIndex16, &generatedBytesRead);

DispatchFlush();

if (g_Config.bPrescaleUV && (origVertType & GE_VTYPE_TC_MASK) != 0) {
if ((origVertType & GE_VTYPE_TC_MASK) != 0) {
gstate_c.uv = prevUVScale;
}
}
Expand Down Expand Up @@ -979,7 +979,7 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi
u32 vertTypeWithIndex16 = (vertType & ~GE_VTYPE_IDX_MASK) | GE_VTYPE_IDX_16BIT;

UVScale prevUVScale;
if (g_Config.bPrescaleUV && (origVertType & GE_VTYPE_TC_MASK) != 0) {
if (origVertType & GE_VTYPE_TC_MASK) {
// We scaled during Normalize already so let's turn it off when drawing.
prevUVScale = gstate_c.uv;
gstate_c.uv.uScale = 1.0f;
Expand All @@ -993,7 +993,7 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi

DispatchFlush();

if (g_Config.bPrescaleUV && (origVertType & GE_VTYPE_TC_MASK) != 0) {
if (origVertType & GE_VTYPE_TC_MASK) {
gstate_c.uv = prevUVScale;
}
}
17 changes: 0 additions & 17 deletions GPU/Common/VertexDecoderArm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},

{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},

Expand Down Expand Up @@ -563,21 +561,6 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() {
Jit_ApplyWeights();
}

// Fill last two bytes with zeroes to align to 4 bytes. LDRH does it for us, handy.
void VertexDecoderJitCache::Jit_TcU8() {
LDRB(tempReg1, srcReg, dec_->tcoff);
LDRB(tempReg2, srcReg, dec_->tcoff + 1);
ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 8));
STR(tempReg1, dstReg, dec_->decFmt.uvoff);
}

void VertexDecoderJitCache::Jit_TcU16() {
LDRH(tempReg1, srcReg, dec_->tcoff);
LDRH(tempReg2, srcReg, dec_->tcoff + 2);
ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 16));
STR(tempReg1, dstReg, dec_->decFmt.uvoff);
}

void VertexDecoderJitCache::Jit_TcFloat() {
LDR(tempReg1, srcReg, dec_->tcoff);
LDR(tempReg2, srcReg, dec_->tcoff + 4);
Expand Down
12 changes: 0 additions & 12 deletions GPU/Common/VertexDecoderArm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},

{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},
{&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale},
Expand Down Expand Up @@ -578,16 +576,6 @@ void VertexDecoderJitCache::Jit_Color5551() {
CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ);
}

void VertexDecoderJitCache::Jit_TcU8() {
LDURH(tempReg1, srcReg, dec_->tcoff);
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
}

void VertexDecoderJitCache::Jit_TcU16() {
LDUR(tempReg1, srcReg, dec_->tcoff);
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
}

void VertexDecoderJitCache::Jit_TcU16Through() {
LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2);
Expand Down
83 changes: 9 additions & 74 deletions GPU/Common/VertexDecoderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,6 @@ void VertexDecoder::Step_WeightsFloatSkin() const
}
}

void VertexDecoder::Step_TcU8() const
{
// u32 to write two bytes of zeroes for free.
u32 *uv = (u32*)(decoded_ + decFmt.uvoff);
const u16 *uvdata = (const u16*)(ptr_ + tcoff);
*uv = *uvdata;
}

void VertexDecoder::Step_TcU8ToFloat() const
{
// u32 to write two bytes of zeroes for free.
Expand All @@ -281,14 +273,6 @@ void VertexDecoder::Step_TcU8ToFloat() const
uv[1] = uvdata[1] * (1.0f / 128.0f);
}

void VertexDecoder::Step_TcU16() const
{
u32 *uv = (u32 *)(decoded_ + decFmt.uvoff);
// TODO: Fix big-endian without losing the optimization
const u32 *uvdata = (const u32*)(ptr_ + tcoff);
*uv = *uvdata;
}

void VertexDecoder::Step_TcU16ToFloat() const
{
float *uv = (float *)(decoded_ + decFmt.uvoff);
Expand Down Expand Up @@ -903,13 +887,6 @@ static const StepFunction wtstep_skin[4] = {
&VertexDecoder::Step_WeightsFloatSkin,
};

static const StepFunction tcstep[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16,
&VertexDecoder::Step_TcFloat,
};

static const StepFunction tcstepToFloat[4] = {
0,
&VertexDecoder::Step_TcU8ToFloat,
Expand Down Expand Up @@ -973,42 +950,20 @@ static const StepFunction tcstep_morph_remasterToFloat[4] = {
&VertexDecoder::Step_TcFloatMorph,
};

static const StepFunction tcstep_through[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16Through,
&VertexDecoder::Step_TcFloatThrough,
};

static const StepFunction tcstep_throughToFloat[4] = {
0,
&VertexDecoder::Step_TcU8ToFloat,
&VertexDecoder::Step_TcU16ThroughToFloat,
&VertexDecoder::Step_TcFloatThrough,
};

// Some HD Remaster games double the u16 texture coordinates.
static const StepFunction tcstep_remaster[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16Double,
&VertexDecoder::Step_TcFloat,
};

static const StepFunction tcstep_remasterToFloat[4] = {
0,
&VertexDecoder::Step_TcU8ToFloat,
&VertexDecoder::Step_TcU16DoubleToFloat,
&VertexDecoder::Step_TcFloat,
};

static const StepFunction tcstep_through_remaster[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16ThroughDouble,
&VertexDecoder::Step_TcFloatThrough,
};

static const StepFunction tcstep_through_remasterToFloat[4] = {
0,
&VertexDecoder::Step_TcU8ToFloat,
Expand Down Expand Up @@ -1173,41 +1128,21 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
biggest = tcalign[tc];

// NOTE: That we check getUVGenMode here means that we must include it in the decoder ID!
if (g_Config.bPrescaleUV && !throughmode && (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_UNKNOWN)) {
if (!throughmode && (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_UNKNOWN)) {
if (g_DoubleTextureCoordinates)
steps_[numSteps_++] = morphcount == 1 ? tcstep_prescale_remaster[tc] : tcstep_prescale_morph_remaster[tc];
else
steps_[numSteps_++] = morphcount == 1 ? tcstep_prescale[tc] : tcstep_prescale_morph[tc];
decFmt.uvfmt = DEC_FLOAT_2;
} else {
if (options.expandAllUVtoFloat) {
if (morphcount != 1 && !throughmode)
steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remasterToFloat[tc] : tcstep_morphToFloat[tc];
else if (g_DoubleTextureCoordinates)
steps_[numSteps_++] = throughmode ? tcstep_through_remasterToFloat[tc] : tcstep_remasterToFloat[tc];
else
steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc];
decFmt.uvfmt = DEC_FLOAT_2;
} else {
if (morphcount != 1 && !throughmode)
steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remaster[tc] : tcstep_morph[tc];
else if (g_DoubleTextureCoordinates)
steps_[numSteps_++] = throughmode ? tcstep_through_remaster[tc] : tcstep_remaster[tc];
else
steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc];

switch (tc) {
case GE_VTYPE_TC_8BIT >> GE_VTYPE_TC_SHIFT:
decFmt.uvfmt = throughmode ? DEC_U8A_2 : DEC_U8_2;
break;
case GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT:
decFmt.uvfmt = throughmode ? DEC_U16A_2 : DEC_U16_2;
break;
case GE_VTYPE_TC_FLOAT >> GE_VTYPE_TC_SHIFT:
decFmt.uvfmt = DEC_FLOAT_2;
break;
}
}
// We now always expand UV to float.
if (morphcount != 1 && !throughmode)
steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remasterToFloat[tc] : tcstep_morphToFloat[tc];
else if (g_DoubleTextureCoordinates)
steps_[numSteps_++] = throughmode ? tcstep_through_remasterToFloat[tc] : tcstep_remasterToFloat[tc];
else
steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc];
decFmt.uvfmt = DEC_FLOAT_2;
}

decFmt.uvoff = decOff;
Expand Down
5 changes: 0 additions & 5 deletions GPU/Common/VertexDecoderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,6 @@ int TranslateNumBones(int bones);
typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);

struct VertexDecoderOptions {
bool expandAllUVtoFloat;
bool expandAllWeightsToFloat;
bool expand8BitNormalsToFloat;
};
Expand Down Expand Up @@ -477,8 +476,6 @@ class VertexDecoder {
void Step_WeightsU16Skin() const;
void Step_WeightsFloatSkin() const;

void Step_TcU8() const;
void Step_TcU16() const;
void Step_TcU8ToFloat() const;
void Step_TcU16ToFloat() const;
void Step_TcFloat() const;
Expand Down Expand Up @@ -633,9 +630,7 @@ class VertexDecoderJitCache : public FakeGen::FakeXCodeBlock {
void Jit_WeightsU16Skin();
void Jit_WeightsFloatSkin();

void Jit_TcU8();
void Jit_TcU8ToFloat();
void Jit_TcU16();
void Jit_TcU16ToFloat();
void Jit_TcFloat();

Expand Down
13 changes: 0 additions & 13 deletions GPU/Common/VertexDecoderX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,6 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},

{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
Expand Down Expand Up @@ -687,17 +685,6 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() {
}
}

// Fill last two bytes with zeroes to align to 4 bytes. MOVZX does it for us, handy.
void VertexDecoderJitCache::Jit_TcU8() {
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->tcoff));
MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
}

void VertexDecoderJitCache::Jit_TcU16() {
MOV(32, R(tempReg1), MDisp(srcReg, dec_->tcoff));
MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
}

void VertexDecoderJitCache::Jit_TcU8ToFloat() {
Jit_AnyU8ToFloat(dec_->tcoff, 16);
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), XMM3);
Expand Down
Loading

0 comments on commit e9bea75

Please sign in to comment.