Skip to content

Commit

Permalink
Merge pull request #16001 from unknownbrackets/softgpu-earlyz
Browse files Browse the repository at this point in the history
softgpu: Check depth test early on simple stencil
  • Loading branch information
hrydgard authored Sep 11, 2022
2 parents a2ca85b + 15d5fa4 commit d011768
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 23 deletions.
10 changes: 7 additions & 3 deletions GPU/Software/DrawPixel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,10 @@ static inline bool DepthTestPassed(GEComparison func, int x, int y, int stride,
}
}

bool CheckDepthTestPassed(GEComparison func, int x, int y, int stride, u16 z) {
return DepthTestPassed(func, x, y, stride, z);
}

static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) {
// All of the operations here intentionally preserve alpha/stencil.
switch (op) {
Expand Down Expand Up @@ -400,7 +404,7 @@ template <bool clearMode, GEBufferFormat fbFormat>
void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg color_in, const PixelFuncID &pixelID) {
Vec4<int> prim_color = Vec4<int>(color_in).Clamp(0, 255);
// Depth range test - applied in clear mode, if not through mode.
if (pixelID.applyDepthRange)
if (pixelID.applyDepthRange && !pixelID.earlyZChecks)
if (z < pixelID.cached.minz || z > pixelID.cached.maxz)
return;

Expand Down Expand Up @@ -436,14 +440,14 @@ void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg colo
}

// Also apply depth at the same time. If disabled, same as passing.
if (pixelID.DepthTestFunc() != GE_COMP_ALWAYS && !DepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
if (!pixelID.earlyZChecks && pixelID.DepthTestFunc() != GE_COMP_ALWAYS && !DepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
stencil = ApplyStencilOp(fbFormat, stencilReplace, pixelID.ZFail(), stencil);
SetPixelStencil(fbFormat, pixelID.cached.framebufStride, targetWriteMask, x, y, stencil);
return;
}

stencil = ApplyStencilOp(fbFormat, stencilReplace, pixelID.ZPass(), stencil);
} else {
} else if (!pixelID.earlyZChecks) {
if (pixelID.DepthTestFunc() != GE_COMP_ALWAYS && !DepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
return;
}
Expand Down
2 changes: 2 additions & 0 deletions GPU/Software/DrawPixel.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ SingleFunc GetSingleFunc(const PixelFuncID &id);
void Init();
void Shutdown();

bool CheckDepthTestPassed(GEComparison func, int x, int y, int stride, u16 z);

bool DescribeCodePtr(const u8 *ptr, std::string &name);

struct PixelBlendState {
Expand Down
10 changes: 5 additions & 5 deletions GPU/Software/DrawPixelX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ RegCache::Reg PixelJitCache::GetColorOff(const PixelFuncID &id) {
if (!regCache_.Has(RegCache::GEN_COLOR_OFF)) {
Describe("GetColorOff");
if (id.useStandardStride && !id.dithering) {
bool loadDepthOff = id.depthWrite || id.DepthTestFunc() != GE_COMP_ALWAYS;
bool loadDepthOff = id.depthWrite || (id.DepthTestFunc() != GE_COMP_ALWAYS && !id.earlyZChecks);
X64Reg depthTemp = INVALID_REG;
X64Reg argYReg = regCache_.Find(RegCache::GEN_ARG_Y);
X64Reg argXReg = regCache_.Find(RegCache::GEN_ARG_X);
Expand Down Expand Up @@ -345,7 +345,7 @@ void PixelJitCache::WriteConstantPool(const PixelFuncID &id) {
}

bool PixelJitCache::Jit_ApplyDepthRange(const PixelFuncID &id) {
if (id.applyDepthRange) {
if (id.applyDepthRange && !id.earlyZChecks) {
Describe("ApplyDepthR");
X64Reg argZReg = regCache_.Find(RegCache::GEN_ARG_Z);
X64Reg idReg = GetPixelID();
Expand All @@ -365,7 +365,7 @@ bool PixelJitCache::Jit_ApplyDepthRange(const PixelFuncID &id) {
// Since this is early on, try to free up the z reg if we don't need it anymore.
if (id.clearMode && !id.DepthClear())
regCache_.ForceRelease(RegCache::GEN_ARG_Z);
else if (!id.clearMode && !id.depthWrite && id.DepthTestFunc() == GE_COMP_ALWAYS)
else if (!id.clearMode && !id.depthWrite && (id.DepthTestFunc() == GE_COMP_ALWAYS || id.earlyZChecks))
regCache_.ForceRelease(RegCache::GEN_ARG_Z);

return true;
Expand Down Expand Up @@ -721,7 +721,7 @@ bool PixelJitCache::Jit_StencilTest(const PixelFuncID &id, RegCache::Reg stencil
}

bool PixelJitCache::Jit_DepthTestForStencil(const PixelFuncID &id, RegCache::Reg stencilReg) {
if (id.DepthTestFunc() == GE_COMP_ALWAYS)
if (id.DepthTestFunc() == GE_COMP_ALWAYS || id.earlyZChecks)
return true;

X64Reg depthOffReg = GetDepthOff(id);
Expand Down Expand Up @@ -964,7 +964,7 @@ bool PixelJitCache::Jit_WriteStencilOnly(const PixelFuncID &id, RegCache::Reg st
}

bool PixelJitCache::Jit_DepthTest(const PixelFuncID &id) {
if (id.DepthTestFunc() == GE_COMP_ALWAYS)
if (id.DepthTestFunc() == GE_COMP_ALWAYS || id.earlyZChecks)
return true;

if (id.DepthTestFunc() == GE_COMP_NEVER) {
Expand Down
7 changes: 7 additions & 0 deletions GPU/Software/FuncId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,13 @@ void ComputePixelFuncID(PixelFuncID *id, bool throughMode) {

id->applyLogicOp = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY;
id->applyFog = gstate.isFogEnabled() && !throughMode;

id->earlyZChecks = id->DepthTestFunc() != GE_COMP_ALWAYS;
if (id->stencilTest && id->earlyZChecks) {
// Can't do them early if stencil might need to write.
if (id->SFail() != GE_STENCILOP_KEEP || id->ZFail() != GE_STENCILOP_KEEP)
id->earlyZChecks = false;
}
}

// Cache some values for later convenience.
Expand Down
3 changes: 2 additions & 1 deletion GPU/Software/FuncId.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ struct PixelFuncID {
uint8_t sFail : 3;
uint8_t zFail : 3;
uint8_t zPass : 3;
// 60 bits, 4 free.
bool earlyZChecks : 1;
// 61 bits, 3 free.
};
};

Expand Down
95 changes: 81 additions & 14 deletions GPU/Software/Rasterizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,11 @@ void DrawTriangleSlice(
const bool flatColor1 = flatColorAll || (v0.color1 == v1.color1 && v0.color1 == v2.color1);
const bool noFog = clearMode || !pixelID.applyFog || (v0.fogdepth >= 1.0f && v1.fogdepth >= 1.0f && v2.fogdepth >= 1.0f);

if (pixelID.applyDepthRange && flatZ) {
if (v0.screenpos.z < pixelID.cached.minz || v0.screenpos.z > pixelID.cached.maxz)
return;
}

#if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC)
uint32_t bpp = pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2;
std::string tag = StringFromFormat("DisplayListT_%08x", state.listPC);
Expand Down Expand Up @@ -754,6 +759,32 @@ void DrawTriangleSlice(
if (AnyMask<useSSE4>(mask)) {
Vec4<float> wsum_recip = EdgeRecip(w0, w1, w2);

Vec4<int> z;
if (flatZ) {
z = Vec4<int>::AssignToAll(v2.screenpos.z);
} else {
// Z is interpolated pretty much directly.
Vec4<float> zfloats = w0.Cast<float>() * v0.screenpos.z + w1.Cast<float>() * v1.screenpos.z + w2.Cast<float>() * v2.screenpos.z;
z = (zfloats * wsum_recip).Cast<int>();
}

if (pixelID.earlyZChecks) {
for (int i = 0; i < 4; ++i) {
if (pixelID.applyDepthRange) {
if (z[i] < pixelID.cached.minz || z[i] > pixelID.cached.maxz)
mask[i] = -1;
}
if (mask[i] < 0)
continue;

int x = p.x + (i & 1);
int y = p.y + (i / 2);
if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z[i])) {
mask[i] = -1;
}
}
}

// Color interpolation is not perspective corrected on the PSP.
Vec4<int> prim_color[4];
if (!flatColor0) {
Expand Down Expand Up @@ -816,15 +847,6 @@ void DrawTriangleSlice(
}
}

Vec4<int> z;
if (flatZ) {
z = Vec4<int>::AssignToAll(v2.screenpos.z);
} else {
// Z is interpolated pretty much directly.
Vec4<float> zfloats = w0.Cast<float>() * v0.screenpos.z + w1.Cast<float>() * v1.screenpos.z + w2.Cast<float>() * v2.screenpos.z;
z = (zfloats * wsum_recip).Cast<int>();
}

PROFILE_THIS_SCOPE("draw_tri_px");
DrawingCoords subp = p;
for (int i = 0; i < 4; ++i) {
Expand Down Expand Up @@ -947,6 +969,12 @@ void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &
Vec4<int> z = Vec4<int>::AssignToAll(v1.screenpos.z);
Vec3<int> sec_color = v1.color1;

if (state.pixelID.applyDepthRange) {
// We can bail early since the Z is flat.
if (v1.screenpos.z < state.pixelID.cached.minz || v1.screenpos.z > state.pixelID.cached.maxz)
return;
}

#if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC)
uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2;
std::string tag = StringFromFormat("DisplayListR_%08x", state.listPC);
Expand All @@ -972,6 +1000,19 @@ void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &
prim_color[i] = v1.color0;
}

if (state.pixelID.earlyZChecks) {
for (int i = 0; i < 4; ++i) {
if (mask[i] < 0)
continue;

int x = p.x + (i & 1);
int y = p.y + (i / 2);
if (!CheckDepthTestPassed(state.pixelID.DepthTestFunc(), x, y, state.pixelID.cached.depthbufStride, z[i])) {
mask[i] = -1;
}
}
}

if (state.enableTextures) {
Vec4<float> s, t;
s = Vec4<float>::AssignToAll(st.s()) + sto4;
Expand Down Expand Up @@ -1038,6 +1079,20 @@ void DrawPoint(const VertexData &v0, const BinCoords &range, const RasterizerSta
auto &pixelID = state.pixelID;
auto &samplerID = state.samplerID;

DrawingCoords p = TransformUnit::ScreenToDrawing(pos);
u16 z = pos.z;

if (pixelID.earlyZChecks) {
if (pixelID.applyDepthRange) {
if (z < pixelID.cached.minz || z > pixelID.cached.maxz)
return;
}

if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), p.x, p.y, pixelID.cached.depthbufStride, z)) {
return;
}
}

if (state.enableTextures) {
float s = v0.texturecoords.s();
float t = v0.texturecoords.t();
Expand All @@ -1060,9 +1115,6 @@ void DrawPoint(const VertexData &v0, const BinCoords &range, const RasterizerSta
if (!pixelID.clearMode)
prim_color += Vec4<int>(sec_color, 0);

DrawingCoords p = TransformUnit::ScreenToDrawing(pos);
u16 z = pos.z;

u8 fog = 255;
if (pixelID.applyFog) {
fog = ClampFogDepth(v0.fogdepth);
Expand Down Expand Up @@ -1302,7 +1354,23 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range
double z = a.z;
const int steps1 = steps == 0 ? 1 : steps;
for (int i = 0; i < steps; i++) {
if (x >= range.x1 && y >= range.y1 && x <= range.x2 && y <= range.y2) {
DrawingCoords p = TransformUnit::ScreenToDrawing(x, y);

bool maskOK = x >= range.x1 && y >= range.y1 && x <= range.x2 && y <= range.y2;
if (maskOK) {
if (pixelID.earlyZChecks) {
if (pixelID.applyDepthRange) {
if (z < pixelID.cached.minz || z > pixelID.cached.maxz)
maskOK = false;
}

if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
maskOK = false;
}
}
}

if (maskOK) {
// Interpolate between the two points.
Vec4<int> prim_color;
Vec3<int> sec_color;
Expand Down Expand Up @@ -1368,7 +1436,6 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range
prim_color += Vec4<int>(sec_color, 0);

PROFILE_THIS_SCOPE("draw_px");
DrawingCoords p = TransformUnit::ScreenToDrawing(x, y);
state.drawPixel(p.x, p.y, z, fog, ToVec4IntArg(prim_color), pixelID);

#if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC)
Expand Down

0 comments on commit d011768

Please sign in to comment.