From 5315c404c55778b48ab7dbf41295ed3904d0ce54 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 12:13:39 -0700 Subject: [PATCH 01/17] GPU: Cull rectangles outside valid Z. Both TL and BR must be outside in the same direction to be culled when depth clamp is enabled. --- GPU/Common/SoftwareTransformCommon.cpp | 27 +++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index ccadf1f18817..296f4dd1975a 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -64,14 +64,8 @@ static void SwapUVs(TransformedVertex &a, TransformedVertex &b) { // Note: 0 is BR and 2 is TL. -static void RotateUV(TransformedVertex v[4], float flippedMatrix[16], bool flippedY) { - // Transform these two coordinates to figure out whether they're flipped or not. - Vec4f tl; - Vec3ByMatrix44(tl.AsArray(), v[2].pos, flippedMatrix); - - Vec4f br; - Vec3ByMatrix44(br.AsArray(), v[0].pos, flippedMatrix); - +static void RotateUV(TransformedVertex v[4], Vec4f tl, Vec4f br, bool flippedY) { + // We use the transformed tl/br coordinates to figure out whether they're flipped or not. float ySign = flippedY ? -1.0 : 1.0; const float invtlw = 1.0f / tl.w; @@ -629,10 +623,21 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy trans[3].u = transVtxTL.u; // That's the four corners. Now process UV rotation. - if (throughmode) + if (throughmode) { RotateUVThrough(trans); - else - RotateUV(trans, flippedMatrix, flippedY); + } else { + Vec4f tl; + Vec3ByMatrix44(tl.AsArray(), transVtxTL.pos, flippedMatrix); + Vec4f br; + Vec3ByMatrix44(br.AsArray(), transVtxBR.pos, flippedMatrix); + + // If both transformed verts are outside Z, cull this rectangle entirely. + constexpr float outsideValue = 1.000030517578125f; + if (fabsf(tl.z) >= outsideValue && fabsf(br.z) >= outsideValue) + continue; + + RotateUV(trans, tl, br, flippedY); + } // Triangle: BR-TR-TL indsOut[0] = i * 2 + 0; From 4ac36cb8103f240a3c8790da20e54cf4989edc3b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 12:18:28 -0700 Subject: [PATCH 02/17] GPU: Cull rectangles more when depth clamp off. If any vert is outside Z, it's culled when not clamping/clipping. --- GPU/Common/SoftwareTransformCommon.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index 296f4dd1975a..882ebc89c6bb 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -633,7 +633,11 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy // If both transformed verts are outside Z, cull this rectangle entirely. constexpr float outsideValue = 1.000030517578125f; - if (fabsf(tl.z) >= outsideValue && fabsf(br.z) >= outsideValue) + bool tlOutside = fabsf(tl.z / tl.w) >= outsideValue; + bool brOutside = fabsf(br.z / br.w) >= outsideValue; + if (tlOutside && brOutside) + continue; + if (!gstate.isDepthClampEnabled() && (tlOutside || brOutside)) continue; RotateUV(trans, tl, br, flippedY); From 6252241c0ff6b5c490877ef4af259082dfe440a9 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 12:20:34 -0700 Subject: [PATCH 03/17] GPU: Verify throughmode for clears/rects. --- GPU/Common/SoftwareTransformCommon.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index 882ebc89c6bb..93a72e07da58 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -432,7 +432,7 @@ void SoftwareTransform::Decode(int prim, u32 vertType, const DecVtxFormat &decVt // TODO: This bleeds outside the play area in non-buffered mode. Big deal? Probably not. // TODO: Allow creating a depth clear and a color draw. bool reallyAClear = false; - if (maxIndex > 1 && prim == GE_PRIM_RECTANGLES && gstate.isModeClear()) { + if (maxIndex > 1 && prim == GE_PRIM_RECTANGLES && gstate.isModeClear() && throughmode) { int scissorX2 = gstate.getScissorX2() + 1; int scissorY2 = gstate.getScissorY2() + 1; reallyAClear = IsReallyAClear(transformed, maxIndex, scissorX2, scissorY2); @@ -459,7 +459,7 @@ void SoftwareTransform::Decode(int prim, u32 vertType, const DecVtxFormat &decVt } // Detect full screen "clears" that might not be so obvious, to set the safe size if possible. - if (!result->setSafeSize && prim == GE_PRIM_RECTANGLES && maxIndex == 2) { + if (!result->setSafeSize && prim == GE_PRIM_RECTANGLES && maxIndex == 2 && throughmode) { bool clearingColor = gstate.isModeClear() && (gstate.isClearModeColorMask() || gstate.isClearModeAlphaMask()); bool writingColor = gstate.getColorMask() != 0xFFFFFFFF; bool startsZeroX = transformed[0].x <= 0.0f && transformed[1].x > 0.0f && transformed[1].x > transformed[0].x; From 24011c3754a8c750a13ad4245823afef371d9459 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 16:54:25 -0700 Subject: [PATCH 04/17] GPU: Correct depth handling for guardband. This culls based on pre-viewport Z and avoids culling based on the clip range at negative Z. --- GPU/Common/ShaderUniforms.cpp | 40 ++++++---------------------- GPU/Common/VertexShaderGenerator.cpp | 28 +++++++++---------- GPU/Directx9/ShaderManagerDX9.cpp | 11 +++----- GPU/GLES/ShaderManagerGLES.cpp | 9 +------ 4 files changed, 26 insertions(+), 62 deletions(-) diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 8ca4bacff697..a8a02e78a465 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -43,29 +43,12 @@ void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bo float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale()); return (pspViewport * heightScale) - yOffset; }; - auto reverseViewportZ = [hasNegZ](float z) { - float vpZScale = gstate.getViewportZScale(); - float vpZCenter = gstate.getViewportZCenter(); - - float scale, center; - if (gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) { - // These are just the reverse of the formulas in GPUStateUtils. - float halfActualZRange = vpZScale * (1.0f / gstate_c.vpDepthScale); - float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange; - - // In accurate depth mode, we're comparing against a value scaled to (minz, maxz). - // And minz might be very negative, (e.g. if we're clamping in that direction.) - scale = halfActualZRange; - center = minz + halfActualZRange; - } else { - // In old-style depth mode, we're comparing against a value scaled to viewport. - // (and possibly incorrectly clipped against it.) - scale = vpZScale; - center = vpZCenter; + auto transformZ = [hasNegZ](float z) { + // Z culling ignores the viewport, so we just redo the projection matrix adjustments. + if (hasNegZ) { + return (z * gstate_c.vpDepthScale) + gstate_c.vpZOffset; } - - float realViewport = (z - center) * (1.0f / scale); - return hasNegZ ? realViewport : (realViewport * 0.5f + 0.5f); + return (z * gstate_c.vpDepthScale * 0.5f) + gstate_c.vpZOffset * 0.5f + 0.5f; }; auto sortPair = [](float a, float b) { return a > b ? std::make_pair(b, a) : std::make_pair(a, b); @@ -75,7 +58,7 @@ void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bo // Any vertex outside this range (unless depth clamp enabled) is discarded. auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f)); auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f)); - auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f)); + auto z = sortPair(transformZ(-1.000030517578125f), transformZ(1.000030517578125f)); // Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard". float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f; @@ -243,18 +226,11 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView float viewZScale = halfActualZRange * 2.0f; // Account for the half pixel offset. float viewZCenter = minz + (DepthSliceFactor() / 256.0f) * 0.5f; - float viewZInvScale; - - if (viewZScale != 0.0) { - viewZInvScale = 1.0f / viewZScale; - } else { - viewZInvScale = 0.0; - } ub->depthRange[0] = viewZScale; ub->depthRange[1] = viewZCenter; - ub->depthRange[2] = viewZCenter; - ub->depthRange[3] = viewZInvScale; + ub->depthRange[2] = gstate_c.vpZOffset * 0.5f + 0.5f; + ub->depthRange[3] = 2.0f * (1.0f / gstate_c.vpDepthScale); } if (dirtyUniforms & DIRTY_CULLRANGE) { diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index f2d3152ec59d..19b6a104c0d6 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -332,10 +332,8 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag } } - if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, "vec4 u_depthRange : register(c%i);\n", CONST_VS_DEPTHRANGE); - } if (!isModeThrough) { + WRITE(p, "vec4 u_depthRange : register(c%i);\n", CONST_VS_DEPTHRANGE); WRITE(p, "vec4 u_cullRangeMin : register(c%i);\n", CONST_VS_CULLRANGEMIN); WRITE(p, "vec4 u_cullRangeMax : register(c%i);\n", CONST_VS_CULLRANGEMAX); } @@ -517,15 +515,11 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag *uniformMask |= DIRTY_FOGCOEF; } - if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, "uniform highp vec4 u_depthRange;\n"); - *uniformMask |= DIRTY_DEPTHRANGE; - } - if (!isModeThrough) { + WRITE(p, "uniform highp vec4 u_depthRange;\n"); WRITE(p, "uniform highp vec4 u_cullRangeMin;\n"); WRITE(p, "uniform highp vec4 u_cullRangeMax;\n"); - *uniformMask |= DIRTY_CULLRANGE; + *uniformMask |= DIRTY_DEPTHRANGE | DIRTY_CULLRANGE; } WRITE(p, "%s%s lowp vec4 v_color0;\n", shading, compat.varying_vs); @@ -554,7 +548,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " float z = v.z / v.w;\n"); WRITE(p, " z = z * u_depthRange.x + u_depthRange.y;\n"); WRITE(p, " z = floor(z);\n"); - WRITE(p, " z = (z - u_depthRange.z) * u_depthRange.w;\n"); + WRITE(p, " z = (z - u_depthRange.y) / u_depthRange.x;\n"); WRITE(p, " return vec4(v.x, v.y, z * v.w, v.w);\n"); WRITE(p, "}\n\n"); } @@ -1099,14 +1093,20 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag if (vertexRangeCulling) { WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n"); - // Vertex range culling doesn't happen when depth is clamped, so only do this if in range. - WRITE(p, " if (u_cullRangeMin.w <= 0.0 || (projPos.z >= u_cullRangeMin.z && projPos.z <= u_cullRangeMax.z)) {\n"); - const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y || projPos.z < u_cullRangeMin.z"; - const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y || projPos.z > u_cullRangeMax.z"; + WRITE(p, " float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n"); + // Vertex range culling doesn't happen when Z clips, note sign of w is important. + WRITE(p, " if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n"); + const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y"; + const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y"; WRITE(p, " if (%s || %s) {\n", outMin, outMax); WRITE(p, " outPos.xyzw = u_cullRangeMax.wwww;\n"); WRITE(p, " }\n"); WRITE(p, " }\n"); + WRITE(p, " if (u_cullRangeMin.w <= 0.0) {\n"); + WRITE(p, " if (projPos.z < u_cullRangeMin.z || projPos.z > u_cullRangeMax.z) {\n"); + WRITE(p, " outPos.xyzw = u_cullRangeMax.wwww;\n"); + WRITE(p, " }\n"); + WRITE(p, " }\n"); } // We've named the output gl_Position in HLSL as well. diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 90d9b325caed..9927460cd4c5 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -439,15 +439,10 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) { float viewZScale = halfActualZRange * 2.0f; // Account for the half pixel offset. float viewZCenter = minz + (DepthSliceFactor() / 256.0f) * 0.5f; - float viewZInvScale; + float reverseScale = 2.0f * (1.0f / gstate_c.vpDepthScale); + float reverseTranslate = gstate_c.vpZOffset * 0.5f + 0.5f; - if (viewZScale != 0.0) { - viewZInvScale = 1.0f / viewZScale; - } else { - viewZInvScale = 0.0; - } - - float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; + float data[4] = { viewZScale, viewZCenter, reverseTranslate, reverseScale }; VSSetFloatUniform4(CONST_VS_DEPTHRANGE, data); } if (dirtyUniforms & DIRTY_CULLRANGE) { diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 44029e691ccc..bc086b01df64 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -476,14 +476,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu viewZCenter = vpZCenter; } - float viewZInvScale; - if (viewZScale != 0.0) { - viewZInvScale = 1.0f / viewZScale; - } else { - viewZInvScale = 0.0; - } - - float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; + float data[4] = { viewZScale, viewZCenter, gstate_c.vpZOffset, 1.0f / gstate_c.vpDepthScale }; SetFloatUniform4(render_, &u_depthRange, data); } if (dirty & DIRTY_CULLRANGE) { From 273b9a3dc161cf18629dfc8a4077e475803bb7da Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 16:57:52 -0700 Subject: [PATCH 05/17] Vulkan: Add negative Z clipping. --- Common/GPU/Vulkan/VulkanContext.cpp | 2 ++ GPU/Common/VertexShaderGenerator.cpp | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/Common/GPU/Vulkan/VulkanContext.cpp b/Common/GPU/Vulkan/VulkanContext.cpp index c9c53b5503e4..9427a3d3baea 100644 --- a/Common/GPU/Vulkan/VulkanContext.cpp +++ b/Common/GPU/Vulkan/VulkanContext.cpp @@ -580,6 +580,8 @@ void VulkanContext::ChooseDevice(int physical_device) { deviceFeatures_.enabled.depthClamp = deviceFeatures_.available.depthClamp; deviceFeatures_.enabled.depthBounds = deviceFeatures_.available.depthBounds; deviceFeatures_.enabled.samplerAnisotropy = deviceFeatures_.available.samplerAnisotropy; + deviceFeatures_.enabled.shaderClipDistance = deviceFeatures_.available.shaderClipDistance; + deviceFeatures_.enabled.shaderCullDistance = deviceFeatures_.available.shaderCullDistance; // For easy wireframe mode, someday. deviceFeatures_.enabled.fillModeNonSolid = deviceFeatures_.available.fillModeNonSolid; diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 19b6a104c0d6..02c30c2dbefd 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -105,6 +105,8 @@ const char *boneWeightAttrInitHLSL[9] = { // to 0 and 65535 if a depth clamping/clipping flag is set (x/y clipping is performed only if depth // needs to be clamped.) // +// Additionally, depth is clipped to negative z based on vec.z (before viewport), at -1. +// // All this above is for full transform mode. // In through mode, the Z coordinate just goes straight through and there is no perspective division. // We simulate this of course with pretty much an identity matrix. Rounding Z becomes very easy. @@ -1107,6 +1109,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " outPos.xyzw = u_cullRangeMax.wwww;\n"); WRITE(p, " }\n"); WRITE(p, " }\n"); + + if (compat.shaderLanguage == GLSL_VULKAN) { + WRITE(p, " %sgl_ClipDistance[0] = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix); + } } // We've named the output gl_Position in HLSL as well. From 2271b41d077b6390506b1bcd7e5d0dbc3f3718ad Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 17:10:29 -0700 Subject: [PATCH 06/17] Vulkan: Use clip distance only if supported. --- GPU/Common/VertexShaderGenerator.cpp | 3 ++- GPU/GPUState.h | 2 +- GPU/Vulkan/GPU_Vulkan.cpp | 15 ++++++++++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 02c30c2dbefd..d969a719c263 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -1110,7 +1110,8 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " }\n"); WRITE(p, " }\n"); - if (compat.shaderLanguage == GLSL_VULKAN) { + if (compat.shaderLanguage == GLSL_VULKAN && gstate_c.Supports(GPU_SUPPORTS_CLIP_CULL_DISTANCE)) { + // TODO: Not rectangles... WRITE(p, " %sgl_ClipDistance[0] = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix); } } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 5b11b37b7317..5806fdab240a 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -482,7 +482,7 @@ enum { GPU_SUPPORTS_32BIT_INT_FSHADER = FLAG_BIT(15), GPU_SUPPORTS_DEPTH_TEXTURE = FLAG_BIT(16), GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17), - // Free bit: 18, + GPU_SUPPORTS_CLIP_CULL_DISTANCE = FLAG_BIT(18), GPU_SUPPORTS_COPY_IMAGE = FLAG_BIT(19), GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20), GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21), diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 6b4c5e7d03d0..7af0794ae6e9 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -238,21 +238,26 @@ void GPU_Vulkan::CheckGPUFeatures() { features |= GPU_SUPPORTS_FRAMEBUFFER_BLIT_TO_DEPTH; } - if (vulkan_->GetDeviceFeatures().enabled.wideLines) { + auto &enabledFeatures = vulkan_->GetDeviceFeatures().enabled; + if (enabledFeatures.wideLines) { features |= GPU_SUPPORTS_WIDE_LINES; } - if (vulkan_->GetDeviceFeatures().enabled.depthClamp) { + if (enabledFeatures.depthClamp) { features |= GPU_SUPPORTS_DEPTH_CLAMP; } - if (vulkan_->GetDeviceFeatures().enabled.dualSrcBlend) { + if (enabledFeatures.shaderClipDistance && enabledFeatures.shaderCullDistance) { + // Must support at least 8 if feature supported, so we're fine. + features |= GPU_SUPPORTS_CLIP_CULL_DISTANCE; + } + if (enabledFeatures.dualSrcBlend) { if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) { features |= GPU_SUPPORTS_DUALSOURCE_BLEND; } } - if (vulkan_->GetDeviceFeatures().enabled.logicOp) { + if (enabledFeatures.logicOp) { features |= GPU_SUPPORTS_LOGIC_OP; } - if (vulkan_->GetDeviceFeatures().enabled.samplerAnisotropy) { + if (enabledFeatures.samplerAnisotropy) { features |= GPU_SUPPORTS_ANISOTROPY; } From 046a5c548b11afe31fa098f2f396b62665f2cc13 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 17:17:46 -0700 Subject: [PATCH 07/17] GLES: Check clip/cull distance support. Pretty limited on GLES3+. Also D3D11. Seems like doing it on D3D9 might be a bit tricky. --- Common/GPU/D3D11/thin3d_d3d11.cpp | 2 ++ Common/GPU/OpenGL/thin3d_gl.cpp | 1 + Common/GPU/Vulkan/thin3d_vulkan.cpp | 1 + Common/GPU/thin3d.h | 1 + GPU/D3D11/GPU_D3D11.cpp | 2 ++ GPU/GLES/GPU_GLES.cpp | 2 ++ 6 files changed, 9 insertions(+) diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index 114692228cbd..b927382c6288 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -245,6 +245,8 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de // Seems like a fair approximation... caps_.dualSourceBlend = featureLevel_ >= D3D_FEATURE_LEVEL_10_0; caps_.depthClampSupported = featureLevel_ >= D3D_FEATURE_LEVEL_10_0; + // SV_ClipDistance# seems to be 10+. + caps_.clipCullDistanceSupported = featureLevel_ >= D3D_FEATURE_LEVEL_10_0; caps_.depthRangeMinusOneToOne = false; caps_.framebufferBlitSupported = false; diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 1c7a19792d4f..83102c88989d 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -534,6 +534,7 @@ OpenGLContext::OpenGLContext() { caps_.framebufferBlitSupported = gl_extensions.NV_framebuffer_blit || gl_extensions.ARB_framebuffer_object; caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported; caps_.depthClampSupported = gl_extensions.ARB_depth_clamp; + caps_.clipCullDistanceSupported = gl_extensions.EXT_clip_cull_distance || (!gl_extensions.IsGLES && gl_extensions.VersionGEThan(3, 0)); // Interesting potential hack for emulating GL_DEPTH_CLAMP (use a separate varying, force depth in fragment shader): // This will induce a performance penalty on many architectures though so a blanket enable of this diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 2d74240b9a1f..8d996aea0c15 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -780,6 +780,7 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit) caps_.multiViewport = vulkan->GetDeviceFeatures().enabled.multiViewport != 0; caps_.dualSourceBlend = vulkan->GetDeviceFeatures().enabled.dualSrcBlend != 0; caps_.depthClampSupported = vulkan->GetDeviceFeatures().enabled.depthClamp != 0; + caps_.clipCullDistanceSupported = vulkan->GetDeviceFeatures().enabled.shaderClipDistance != 0 && vulkan->GetDeviceFeatures().enabled.shaderCullDistance != 0; caps_.framebufferBlitSupported = true; caps_.framebufferCopySupported = true; caps_.framebufferDepthBlitSupported = false; // Can be checked for. diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index 023098405cc5..c7a4db531a23 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -520,6 +520,7 @@ struct DeviceCaps { bool dualSourceBlend; bool logicOpSupported; bool depthClampSupported; + bool clipCullDistanceSupported; bool framebufferCopySupported; bool framebufferBlitSupported; bool framebufferDepthCopySupported; diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp index f973d49e5ef0..e01dd30b2351 100644 --- a/GPU/D3D11/GPU_D3D11.cpp +++ b/GPU/D3D11/GPU_D3D11.cpp @@ -128,6 +128,8 @@ void GPU_D3D11::CheckGPUFeatures() { features |= GPU_SUPPORTS_DUALSOURCE_BLEND; if (draw_->GetDeviceCaps().depthClampSupported) features |= GPU_SUPPORTS_DEPTH_CLAMP; + if (draw_->GetDeviceCaps().clipCullDistanceSupported) + features |= GPU_SUPPORTS_CLIP_CULL_DISTANCE; features |= GPU_SUPPORTS_COPY_IMAGE; features |= GPU_SUPPORTS_TEXTURE_FLOAT; features |= GPU_SUPPORTS_INSTANCE_RENDERING; diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 18b11d57f5f2..fc28f4d9ed8e 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -228,6 +228,8 @@ void GPU_GLES::CheckGPUFeatures() { if (gl_extensions.GLES3) features |= GPU_SUPPORTS_DEPTH_TEXTURE; } + if (draw_->GetDeviceCaps().clipCullDistanceSupported) + features |= GPU_SUPPORTS_CLIP_CULL_DISTANCE; // If we already have a 16-bit depth buffer, we don't need to round. bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8; From 7d00b6ca90f2c1220cce8a6aca44b4ece9bd97fa Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 17:53:50 -0700 Subject: [PATCH 08/17] GLES: Enable/disable clip distance 0. --- Common/GPU/OpenGL/GLQueueRunner.cpp | 15 +++++++++++++++ Common/GPU/OpenGL/GLRenderManager.h | 4 +++- Common/GPU/OpenGL/thin3d_gl.cpp | 2 +- GPU/Common/VertexShaderGenerator.cpp | 5 ++++- GPU/GLES/DepalettizeShaderGLES.cpp | 2 +- GPU/GLES/DepthBufferGLES.cpp | 2 +- GPU/GLES/FramebufferManagerGLES.cpp | 2 +- GPU/GLES/ShaderManagerGLES.cpp | 19 ++++++++++++------- GPU/GLES/ShaderManagerGLES.h | 11 +++++++++-- GPU/GLES/StencilBufferGLES.cpp | 2 +- 10 files changed, 48 insertions(+), 16 deletions(-) diff --git a/Common/GPU/OpenGL/GLQueueRunner.cpp b/Common/GPU/OpenGL/GLQueueRunner.cpp index a2d66b9a8690..f3bd46dd4b03 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.cpp +++ b/Common/GPU/OpenGL/GLQueueRunner.cpp @@ -17,6 +17,11 @@ #include "GLRenderManager.h" #include "DataFormatGL.h" +// These are the same value, alias for simplicity. +#if defined(GL_CLIP_DISTANCE0_EXT) && !defined(GL_CLIP_DISTANCE0) +#define GL_CLIP_DISTANCE0 GL_CLIP_DISTANCE0_EXT +#endif + static constexpr int TEXCACHE_NAME_CACHE_SIZE = 16; #if PPSSPP_PLATFORM(IOS) @@ -798,6 +803,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last int logicOp = -1; bool logicEnabled = false; #endif + bool clipDistance0Enabled = false; GLuint blendEqColor = (GLuint)-1; GLuint blendEqAlpha = (GLuint)-1; @@ -1106,6 +1112,13 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last { if (curProgram != c.program.program) { glUseProgram(c.program.program->program); + if (c.program.program->use_clip_distance0 != clipDistance0Enabled) { + if (c.program.program->use_clip_distance0) + glEnable(GL_CLIP_DISTANCE0); + else + glDisable(GL_CLIP_DISTANCE0); + clipDistance0Enabled = c.program.program->use_clip_distance0; + } curProgram = c.program.program; } CHECK_GL_ERROR_IF_DEBUG(); @@ -1340,6 +1353,8 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last glDisable(GL_COLOR_LOGIC_OP); } #endif + if (clipDistance0Enabled) + glDisable(GL_CLIP_DISTANCE0); if ((colorMask & 15) != 15) glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); CHECK_GL_ERROR_IF_DEBUG(); diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index b4c3378739aa..110704bab254 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -119,6 +119,7 @@ class GLRProgram { std::vector semantics_; std::vector queries_; std::vector initialize_; + bool use_clip_distance0 = false; struct UniformInfo { int loc_; @@ -422,13 +423,14 @@ class GLRenderManager { // not be an active render pass. GLRProgram *CreateProgram( std::vector shaders, std::vector semantics, std::vector queries, - std::vector initalizers, bool supportDualSource) { + std::vector initalizers, bool supportDualSource, bool useClipDistance0) { GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM }; _assert_(shaders.size() <= ARRAY_SIZE(step.create_program.shaders)); step.create_program.program = new GLRProgram(); step.create_program.program->semantics_ = semantics; step.create_program.program->queries_ = queries; step.create_program.program->initialize_ = initalizers; + step.create_program.program->use_clip_distance0 = useClipDistance0; step.create_program.support_dual_source = supportDualSource; _assert_msg_(shaders.size() > 0, "Can't create a program with zero shaders"); for (size_t i = 0; i < shaders.size(); i++) { diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 83102c88989d..ab4bb2416153 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -1163,7 +1163,7 @@ bool OpenGLPipeline::LinkShaders() { std::vector initialize; for (int i = 0; i < MAX_TEXTURE_SLOTS; ++i) initialize.push_back({ &samplerLocs_[i], 0, i }); - program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, false); + program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, false, false); return true; } diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index d969a719c263..4e90be2e537c 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -141,6 +141,9 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag if (gl_extensions.EXT_gpu_shader4) { gl_exts.push_back("#extension GL_EXT_gpu_shader4 : enable"); } + if (gl_extensions.EXT_clip_cull_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) { + gl_exts.push_back("#extension GL_EXT_clip_cull_distance : enable"); + } } ShaderWriter p(buffer, compat, ShaderStage::Vertex, gl_exts.data(), gl_exts.size()); @@ -1110,7 +1113,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " }\n"); WRITE(p, " }\n"); - if (compat.shaderLanguage == GLSL_VULKAN && gstate_c.Supports(GPU_SUPPORTS_CLIP_CULL_DISTANCE)) { + if (gstate_c.Supports(GPU_SUPPORTS_CLIP_CULL_DISTANCE) && (compat.shaderLanguage == GLSL_VULKAN || ShaderLanguageIsOpenGL(compat.shaderLanguage))) { // TODO: Not rectangles... WRITE(p, " %sgl_ClipDistance[0] = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix); } diff --git a/GPU/GLES/DepalettizeShaderGLES.cpp b/GPU/GLES/DepalettizeShaderGLES.cpp index 47ddfa19074a..fbf544f1cae1 100644 --- a/GPU/GLES/DepalettizeShaderGLES.cpp +++ b/GPU/GLES/DepalettizeShaderGLES.cpp @@ -183,7 +183,7 @@ DepalShader *DepalShaderCacheGLES::GetDepalettizeShader(uint32_t clutMode, GEBuf std::vector shaders{ vertexShader_, fragShader }; - GLRProgram *program = render_->CreateProgram(shaders, semantics, queries, initializer, false); + GLRProgram *program = render_->CreateProgram(shaders, semantics, queries, initializer, false, false); depal->program = program; depal->fragShader = fragShader; diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp index 7eae61caa690..d26271372120 100644 --- a/GPU/GLES/DepthBufferGLES.cpp +++ b/GPU/GLES/DepthBufferGLES.cpp @@ -116,7 +116,7 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int queries.push_back({ &u_depthDownloadTo8, "u_depthTo8" }); std::vector inits; inits.push_back({ &u_depthDownloadTex, 0, TEX_SLOT_PSP_TEXTURE }); - depthDownloadProgram_ = render_->CreateProgram(shaders, semantics, queries, inits, false); + depthDownloadProgram_ = render_->CreateProgram(shaders, semantics, queries, inits, false, false); for (auto iter : shaders) { render_->DeleteShader(iter); } diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index f28b4f7016f5..56672127d9b6 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -87,7 +87,7 @@ void FramebufferManagerGLES::CompileDraw2DProgram() { std::vector semantics; semantics.push_back({ 0, "a_position" }); semantics.push_back({ 1, "a_texcoord0" }); - draw2dprogram_ = render_->CreateProgram(shaders, semantics, queries, initializers, false); + draw2dprogram_ = render_->CreateProgram(shaders, semantics, queries, initializers, false, false); for (auto shader : shaders) render_->DeleteShader(shader); } diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index bc086b01df64..a25231c23256 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -51,10 +51,10 @@ using namespace Lin; -Shader::Shader(GLRenderManager *render, const char *code, const std::string &desc, uint32_t glShaderType, bool useHWTransform, uint32_t attrMask, uint64_t uniformMask) - : render_(render), failed_(false), useHWTransform_(useHWTransform), attrMask_(attrMask), uniformMask_(uniformMask) { +Shader::Shader(GLRenderManager *render, const char *code, const std::string &desc, const ShaderDescGLES ¶ms) + : render_(render), useHWTransform_(params.useHWTransform), attrMask_(params.attrMask), uniformMask_(params.uniformMask) { PROFILE_THIS_SCOPE("shadercomp"); - isFragment_ = glShaderType == GL_FRAGMENT_SHADER; + isFragment_ = params.glShaderType == GL_FRAGMENT_SHADER; source_ = code; #ifdef SHADERLOG #ifdef _WIN32 @@ -63,7 +63,7 @@ Shader::Shader(GLRenderManager *render, const char *code, const std::string &des printf("%s\n", code); #endif #endif - shader = render->CreateShader(glShaderType, source_, desc); + shader = render->CreateShader(params.glShaderType, source_, desc); } Shader::~Shader() { @@ -182,7 +182,9 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, initialize.push_back({ &u_tess_weights_u, 0, 5 }); initialize.push_back({ &u_tess_weights_v, 0, 6 }); - program = render->CreateProgram(shaders, semantics, queries, initialize, gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND); + bool useDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0; + bool useClip0 = VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_CULL_DISTANCE); + program = render->CreateProgram(shaders, semantics, queries, initialize, useDualSource, useClip0); // The rest, use the "dirty" mechanism. dirtyUniforms = DIRTY_ALL_UNIFORMS; @@ -633,7 +635,8 @@ Shader *ShaderManagerGLES::CompileFragmentShader(FShaderID FSID) { return nullptr; } std::string desc = FragmentShaderDesc(FSID); - return new Shader(render_, codeBuffer_, desc, GL_FRAGMENT_SHADER, false, 0, uniformMask); + ShaderDescGLES params{ GL_FRAGMENT_SHADER, 0, uniformMask }; + return new Shader(render_, codeBuffer_, desc, params); } Shader *ShaderManagerGLES::CompileVertexShader(VShaderID VSID) { @@ -646,7 +649,9 @@ Shader *ShaderManagerGLES::CompileVertexShader(VShaderID VSID) { return nullptr; } std::string desc = VertexShaderDesc(VSID); - return new Shader(render_, codeBuffer_, desc, GL_VERTEX_SHADER, useHWTransform, attrMask, uniformMask); + ShaderDescGLES params{ GL_VERTEX_SHADER, attrMask, uniformMask }; + params.useHWTransform = useHWTransform; + return new Shader(render_, codeBuffer_, desc, params); } Shader *ShaderManagerGLES::ApplyVertexShader(bool useHWTransform, bool useHWTessellation, u32 vertType, bool weightsAsFloat, VShaderID *VSID) { diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index c49b6700c02d..24ef3e42f118 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -114,9 +114,16 @@ class LinkedShader { // Real public interface +struct ShaderDescGLES { + uint32_t glShaderType; + uint32_t attrMask; + uint64_t uniformMask; + bool useHWTransform; +}; + class Shader { public: - Shader(GLRenderManager *render, const char *code, const std::string &desc, uint32_t glShaderType, bool useHWTransform, uint32_t attrMask, uint64_t uniformMask); + Shader(GLRenderManager *render, const char *code, const std::string &desc, const ShaderDescGLES ¶ms); ~Shader(); GLRShader *shader; @@ -131,7 +138,7 @@ class Shader { private: GLRenderManager *render_; std::string source_; - bool failed_; + bool failed_ = false; bool useHWTransform_; bool isFragment_; uint32_t attrMask_; // only used in vertex shaders diff --git a/GPU/GLES/StencilBufferGLES.cpp b/GPU/GLES/StencilBufferGLES.cpp index 480690b9301c..7ea535925f2e 100644 --- a/GPU/GLES/StencilBufferGLES.cpp +++ b/GPU/GLES/StencilBufferGLES.cpp @@ -147,7 +147,7 @@ bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, StencilUplo queries.push_back({ &u_stencilValue, "u_stencilValue" }); std::vector inits; inits.push_back({ &u_stencilUploadTex, 0, TEX_SLOT_PSP_TEXTURE }); - stencilUploadProgram_ = render_->CreateProgram(shaders, semantics, queries, inits, false); + stencilUploadProgram_ = render_->CreateProgram(shaders, semantics, queries, inits, false, false); for (auto iter : shaders) { render_->DeleteShader(iter); } From d2ff66a660cd4c7f0f8153f68a21043c138d19d2 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 18:01:36 -0700 Subject: [PATCH 09/17] UI: Clear textures on Begin. On GLES, saw a texture bound to slot 1 when UI started to draw after an emu frame, which caused a crash because there was no sampler. Let's just explicitly flush. --- Common/UI/Context.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Common/UI/Context.cpp b/Common/UI/Context.cpp index dc3fbd11d89e..ff698bbc9764 100644 --- a/Common/UI/Context.cpp +++ b/Common/UI/Context.cpp @@ -56,7 +56,11 @@ void UIContext::BeginNoTex() { void UIContext::BeginPipeline(Draw::Pipeline *pipeline, Draw::SamplerState *samplerState) { _assert_(pipeline != nullptr); - draw_->BindSamplerStates(0, 1, &samplerState); + // Also clear out any other textures bound. + Draw::SamplerState *samplers[3]{ samplerState }; + draw_->BindSamplerStates(0, 3, samplers); + Draw::Texture *textures[2]{}; + draw_->BindTextures(1, 2, textures); RebindTexture(); UIBegin(pipeline); } From 1a603fedf59c1b146466d0037f4fb66791ed14d5 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 18:03:11 -0700 Subject: [PATCH 10/17] Vulkan: Cull verts fully outside depth. Following PSP rules of -1 to 1 pre-viewport Z. This also enables it for GLES/OpenGL. --- GPU/Common/VertexShaderGenerator.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 4e90be2e537c..ae554ac7c72e 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -1116,6 +1116,11 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag if (gstate_c.Supports(GPU_SUPPORTS_CLIP_CULL_DISTANCE) && (compat.shaderLanguage == GLSL_VULKAN || ShaderLanguageIsOpenGL(compat.shaderLanguage))) { // TODO: Not rectangles... WRITE(p, " %sgl_ClipDistance[0] = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix); + // Cull any triangle fully outside in the same direction when depth clamp enabled. + WRITE(p, " if (u_cullRangeMin.w > 0.0) {\n"); + WRITE(p, " %sgl_CullDistance[0] = projPos.z - u_cullRangeMin.z;\n", compat.vsOutPrefix); + WRITE(p, " %sgl_CullDistance[1] = u_cullRangeMax.z - projPos.z;\n", compat.vsOutPrefix); + WRITE(p, " }\n"); } } From 1e66a66ed7cf87c461454648fc32f2c18f00ac4f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 18:13:43 -0700 Subject: [PATCH 11/17] D3D11: Correct clearing samplers. --- Common/GPU/D3D11/thin3d_d3d11.cpp | 2 +- Common/GPU/D3D9/thin3d_d3d9.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index b927382c6288..e239fcc395a2 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -1347,7 +1347,7 @@ void D3D11DrawContext::BindSamplerStates(int start, int count, SamplerState **st _assert_(start + count <= ARRAY_SIZE(samplers)); for (int i = 0; i < count; i++) { D3D11SamplerState *samp = (D3D11SamplerState *)states[i]; - samplers[i] = samp->ss; + samplers[i] = samp ? samp->ss : nullptr; } context_->PSSetSamplers(start, count, samplers); } diff --git a/Common/GPU/D3D9/thin3d_d3d9.cpp b/Common/GPU/D3D9/thin3d_d3d9.cpp index 449b74b05c50..f277d6ae95cb 100644 --- a/Common/GPU/D3D9/thin3d_d3d9.cpp +++ b/Common/GPU/D3D9/thin3d_d3d9.cpp @@ -530,7 +530,8 @@ class D3D9Context : public DrawContext { _assert_(start + count <= MAX_BOUND_TEXTURES); for (int i = 0; i < count; ++i) { D3D9SamplerState *s = static_cast(states[i]); - s->Apply(device_, start + i); + if (s) + s->Apply(device_, start + i); } } void BindVertexBuffers(int start, int count, Buffer **buffers, const int *offsets) override { From c6a52909f989a59cab76242ca48c9d34f357681a Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 18:19:56 -0700 Subject: [PATCH 12/17] D3D11: Support vertex clip/cull planes. --- GPU/Common/VertexShaderGenerator.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index ae554ac7c72e..296c734626ba 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -400,6 +400,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " vec4 gl_Position : POSITION;\n"); } else { WRITE(p, " vec4 gl_Position : SV_Position;\n"); + if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_CULL_DISTANCE)) { + WRITE(p, " float gl_ClipDistance : SV_ClipDistance0;\n"); + WRITE(p, " float2 gl_CullDistance : SV_CullDistance0;\n"); + } } WRITE(p, "};\n"); } else { @@ -1113,13 +1117,16 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " }\n"); WRITE(p, " }\n"); - if (gstate_c.Supports(GPU_SUPPORTS_CLIP_CULL_DISTANCE) && (compat.shaderLanguage == GLSL_VULKAN || ShaderLanguageIsOpenGL(compat.shaderLanguage))) { + const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? "" : "[0]"; + const char *cull0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]"; + const char *cull1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]"; + if (gstate_c.Supports(GPU_SUPPORTS_CLIP_CULL_DISTANCE)) { // TODO: Not rectangles... - WRITE(p, " %sgl_ClipDistance[0] = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix); + WRITE(p, " %sgl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix, clip0); // Cull any triangle fully outside in the same direction when depth clamp enabled. WRITE(p, " if (u_cullRangeMin.w > 0.0) {\n"); - WRITE(p, " %sgl_CullDistance[0] = projPos.z - u_cullRangeMin.z;\n", compat.vsOutPrefix); - WRITE(p, " %sgl_CullDistance[1] = u_cullRangeMax.z - projPos.z;\n", compat.vsOutPrefix); + WRITE(p, " %sgl_CullDistance%s = projPos.z - u_cullRangeMin.z;\n", compat.vsOutPrefix, cull0); + WRITE(p, " %sgl_CullDistance%s = u_cullRangeMax.z - projPos.z;\n", compat.vsOutPrefix, cull1); WRITE(p, " }\n"); } } From 1c7cd67f6ddb7d8d90962ef52d13d695746c6d90 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 11 Sep 2021 19:08:02 -0700 Subject: [PATCH 13/17] iOS: Buildfix bad GLES headers. --- Common/GPU/OpenGL/GLQueueRunner.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Common/GPU/OpenGL/GLQueueRunner.cpp b/Common/GPU/OpenGL/GLQueueRunner.cpp index f3bd46dd4b03..e229f59c1167 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.cpp +++ b/Common/GPU/OpenGL/GLQueueRunner.cpp @@ -20,6 +20,8 @@ // These are the same value, alias for simplicity. #if defined(GL_CLIP_DISTANCE0_EXT) && !defined(GL_CLIP_DISTANCE0) #define GL_CLIP_DISTANCE0 GL_CLIP_DISTANCE0_EXT +#elif !defined(GL_CLIP_DISTANCE0) +#define GL_CLIP_DISTANCE0 0x3000 #endif static constexpr int TEXCACHE_NAME_CACHE_SIZE = 16; From 5e6f54033eb937a665f98537e505c4e038910b1d Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 19 Sep 2021 07:14:54 -0700 Subject: [PATCH 14/17] GPU: Split clip and cull caps. GL_ARB_cull_distance is needed, sometimes available on older GL. --- Common/GPU/D3D11/thin3d_d3d11.cpp | 3 ++- Common/GPU/OpenGL/thin3d_gl.cpp | 3 ++- Common/GPU/Vulkan/thin3d_vulkan.cpp | 3 ++- Common/GPU/thin3d.h | 3 ++- GPU/D3D11/GPU_D3D11.cpp | 2 +- GPU/GLES/GPU_GLES.cpp | 2 +- 6 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index e239fcc395a2..52192fce7471 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -246,7 +246,8 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de caps_.dualSourceBlend = featureLevel_ >= D3D_FEATURE_LEVEL_10_0; caps_.depthClampSupported = featureLevel_ >= D3D_FEATURE_LEVEL_10_0; // SV_ClipDistance# seems to be 10+. - caps_.clipCullDistanceSupported = featureLevel_ >= D3D_FEATURE_LEVEL_10_0; + caps_.clipDistanceSupported = featureLevel_ >= D3D_FEATURE_LEVEL_10_0; + caps_.cullDistanceSupported = featureLevel_ >= D3D_FEATURE_LEVEL_10_0; caps_.depthRangeMinusOneToOne = false; caps_.framebufferBlitSupported = false; diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index ab4bb2416153..554f2cdd7e4a 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -534,7 +534,8 @@ OpenGLContext::OpenGLContext() { caps_.framebufferBlitSupported = gl_extensions.NV_framebuffer_blit || gl_extensions.ARB_framebuffer_object; caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported; caps_.depthClampSupported = gl_extensions.ARB_depth_clamp; - caps_.clipCullDistanceSupported = gl_extensions.EXT_clip_cull_distance || (!gl_extensions.IsGLES && gl_extensions.VersionGEThan(3, 0)); + caps_.clipDistanceSupported = gl_extensions.EXT_clip_cull_distance || (!gl_extensions.IsGLES && gl_extensions.VersionGEThan(3, 0)); + caps_.cullDistanceSupported = gl_extensions.EXT_clip_cull_distance || gl_extensions.ARB_cull_distance; // Interesting potential hack for emulating GL_DEPTH_CLAMP (use a separate varying, force depth in fragment shader): // This will induce a performance penalty on many architectures though so a blanket enable of this diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 8d996aea0c15..1dd083df6bc7 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -780,7 +780,8 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit) caps_.multiViewport = vulkan->GetDeviceFeatures().enabled.multiViewport != 0; caps_.dualSourceBlend = vulkan->GetDeviceFeatures().enabled.dualSrcBlend != 0; caps_.depthClampSupported = vulkan->GetDeviceFeatures().enabled.depthClamp != 0; - caps_.clipCullDistanceSupported = vulkan->GetDeviceFeatures().enabled.shaderClipDistance != 0 && vulkan->GetDeviceFeatures().enabled.shaderCullDistance != 0; + caps_.clipDistanceSupported = vulkan->GetDeviceFeatures().enabled.shaderClipDistance != 0; + caps_.cullDistanceSupported = vulkan->GetDeviceFeatures().enabled.shaderCullDistance != 0; caps_.framebufferBlitSupported = true; caps_.framebufferCopySupported = true; caps_.framebufferDepthBlitSupported = false; // Can be checked for. diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index c7a4db531a23..a3f05385c40d 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -520,7 +520,8 @@ struct DeviceCaps { bool dualSourceBlend; bool logicOpSupported; bool depthClampSupported; - bool clipCullDistanceSupported; + bool clipDistanceSupported; + bool cullDistanceSupported; bool framebufferCopySupported; bool framebufferBlitSupported; bool framebufferDepthCopySupported; diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp index e01dd30b2351..5d3f86467341 100644 --- a/GPU/D3D11/GPU_D3D11.cpp +++ b/GPU/D3D11/GPU_D3D11.cpp @@ -128,7 +128,7 @@ void GPU_D3D11::CheckGPUFeatures() { features |= GPU_SUPPORTS_DUALSOURCE_BLEND; if (draw_->GetDeviceCaps().depthClampSupported) features |= GPU_SUPPORTS_DEPTH_CLAMP; - if (draw_->GetDeviceCaps().clipCullDistanceSupported) + if (draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported) features |= GPU_SUPPORTS_CLIP_CULL_DISTANCE; features |= GPU_SUPPORTS_COPY_IMAGE; features |= GPU_SUPPORTS_TEXTURE_FLOAT; diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index fc28f4d9ed8e..7e59e9e4654f 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -228,7 +228,7 @@ void GPU_GLES::CheckGPUFeatures() { if (gl_extensions.GLES3) features |= GPU_SUPPORTS_DEPTH_TEXTURE; } - if (draw_->GetDeviceCaps().clipCullDistanceSupported) + if (draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported) features |= GPU_SUPPORTS_CLIP_CULL_DISTANCE; // If we already have a 16-bit depth buffer, we don't need to round. From 7b00c4a57206cc5e3d250fd91970b176d94a83ee Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 19 Sep 2021 23:16:21 -0700 Subject: [PATCH 15/17] GPU: Move Z/W equal hack to bugs from supports. It's really a bug (might even ideally cap the version?), and we already have other bugs handled the same way. --- Common/GPU/Vulkan/thin3d_vulkan.cpp | 5 +++++ Common/GPU/thin3d.h | 1 + GPU/Common/VertexShaderGenerator.cpp | 5 +++-- GPU/GPUState.h | 2 +- GPU/Vulkan/GPU_Vulkan.cpp | 4 ---- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 1dd083df6bc7..508f36ea8380 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -818,6 +818,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool splitSubmit) } else if (caps_.vendor == GPUVendor::VENDOR_INTEL) { // Workaround for Intel driver bug. TODO: Re-enable after some driver version bugs_.Infest(Bugs::DUAL_SOURCE_BLENDING_BROKEN); + } else if (caps_.vendor == GPUVendor::VENDOR_ARM) { + // These GPUs (up to some certain hardware version?) have a bug where draws where gl_Position.w == .z + // corrupt the depth buffer. This is easily worked around by simply scaling Z down a tiny bit when this case + // is detected. See: https://github.com/hrydgard/ppsspp/issues/11937 + bugs_.Infest(Bugs::EQUAL_WZ_CORRUPTS_DEPTH); } caps_.deviceID = deviceProps.deviceID; diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index a3f05385c40d..9d3264e0906a 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -317,6 +317,7 @@ class Bugs { BROKEN_NAN_IN_CONDITIONAL = 4, COLORWRITEMASK_BROKEN_WITH_DEPTHTEST = 5, BROKEN_FLAT_IN_SHADER = 6, + EQUAL_WZ_CORRUPTS_DEPTH = 7, }; protected: diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 296c734626ba..d2acccccfeba 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -159,6 +159,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag bool doShadeMapping = uvGenMode == GE_TEXMAP_ENVIRONMENT_MAP; bool flatBug = bugs.Has(Draw::Bugs::BROKEN_FLAT_IN_SHADER) && g_Config.bVendorBugChecksEnabled; + bool needsZWHack = bugs.Has(Draw::Bugs::EQUAL_WZ_CORRUPTS_DEPTH) && g_Config.bVendorBugChecksEnabled; bool doFlatShading = id.Bit(VS_BIT_FLATSHADE) && !flatBug; bool useHWTransform = id.Bit(VS_BIT_USE_HW_TRANSFORM); @@ -1134,8 +1135,8 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag // We've named the output gl_Position in HLSL as well. WRITE(p, " %sgl_Position = outPos;\n", compat.vsOutPrefix); - if (gstate_c.Supports(GPU_NEEDS_Z_EQUAL_W_HACK)) { - // See comment in GPU_Vulkan.cpp. + if (needsZWHack) { + // See comment in thin3d_vulkan.cpp. WRITE(p, " if (%sgl_Position.z == %sgl_Position.w) %sgl_Position.z *= 0.999999;\n", compat.vsOutPrefix, compat.vsOutPrefix, compat.vsOutPrefix); } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 5806fdab240a..84928b1d392d 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -492,7 +492,7 @@ enum { GPU_SUPPORTS_FRAMEBUFFER_BLIT = FLAG_BIT(26), GPU_SUPPORTS_FRAMEBUFFER_BLIT_TO_DEPTH = FLAG_BIT(27), GPU_SUPPORTS_TEXTURE_NPOT = FLAG_BIT(28), - GPU_NEEDS_Z_EQUAL_W_HACK = FLAG_BIT(29), + // Free bit: 29 // Free bit: 30 GPU_PREFER_REVERSE_COLOR_ORDER = FLAG_BIT(31), }; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 7af0794ae6e9..86b0369699e2 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -206,10 +206,6 @@ void GPU_Vulkan::CheckGPUFeatures() { if (!PSP_CoreParameter().compat.flags().DisableAccurateDepth || driverTooOld) { features |= GPU_SUPPORTS_ACCURATE_DEPTH; } - // These GPUs (up to some certain hardware version?) has a bug where draws where gl_Position.w == .z - // corrupt the depth buffer. This is easily worked around by simply scaling Z down a tiny bit when this case - // is detected. See: https://github.com/hrydgard/ppsspp/issues/11937 - features |= GPU_NEEDS_Z_EQUAL_W_HACK; break; } default: From 33598f2e75294d742e49dc15b98505705374e2cb Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 19 Sep 2021 23:27:30 -0700 Subject: [PATCH 16/17] GPU: Support clip and cull distances separately. Older GL devices, and it seems Apple devices, may not support cull. --- GPU/Common/VertexShaderGenerator.cpp | 8 ++++++-- GPU/D3D11/GPU_D3D11.cpp | 6 ++++-- GPU/GLES/GPU_GLES.cpp | 6 ++++-- GPU/GLES/ShaderManagerGLES.cpp | 2 +- GPU/GPUState.h | 6 +++--- GPU/Vulkan/GPU_Vulkan.cpp | 7 +++++-- 6 files changed, 23 insertions(+), 12 deletions(-) diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index d2acccccfeba..44048163c443 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -401,8 +401,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " vec4 gl_Position : POSITION;\n"); } else { WRITE(p, " vec4 gl_Position : SV_Position;\n"); - if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_CULL_DISTANCE)) { + if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) { WRITE(p, " float gl_ClipDistance : SV_ClipDistance0;\n"); + } + if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) { WRITE(p, " float2 gl_CullDistance : SV_CullDistance0;\n"); } } @@ -1121,9 +1123,11 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? "" : "[0]"; const char *cull0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]"; const char *cull1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]"; - if (gstate_c.Supports(GPU_SUPPORTS_CLIP_CULL_DISTANCE)) { + if (gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) { // TODO: Not rectangles... WRITE(p, " %sgl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", compat.vsOutPrefix, clip0); + } + if (gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) { // Cull any triangle fully outside in the same direction when depth clamp enabled. WRITE(p, " if (u_cullRangeMin.w > 0.0) {\n"); WRITE(p, " %sgl_CullDistance%s = projPos.z - u_cullRangeMin.z;\n", compat.vsOutPrefix, cull0); diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp index 5d3f86467341..c279315d5d7c 100644 --- a/GPU/D3D11/GPU_D3D11.cpp +++ b/GPU/D3D11/GPU_D3D11.cpp @@ -128,8 +128,10 @@ void GPU_D3D11::CheckGPUFeatures() { features |= GPU_SUPPORTS_DUALSOURCE_BLEND; if (draw_->GetDeviceCaps().depthClampSupported) features |= GPU_SUPPORTS_DEPTH_CLAMP; - if (draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported) - features |= GPU_SUPPORTS_CLIP_CULL_DISTANCE; + if (draw_->GetDeviceCaps().clipDistanceSupported) + features |= GPU_SUPPORTS_CLIP_DISTANCE; + if (draw_->GetDeviceCaps().cullDistanceSupported) + features |= GPU_SUPPORTS_CULL_DISTANCE; features |= GPU_SUPPORTS_COPY_IMAGE; features |= GPU_SUPPORTS_TEXTURE_FLOAT; features |= GPU_SUPPORTS_INSTANCE_RENDERING; diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 7e59e9e4654f..8e055cdfa909 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -228,8 +228,10 @@ void GPU_GLES::CheckGPUFeatures() { if (gl_extensions.GLES3) features |= GPU_SUPPORTS_DEPTH_TEXTURE; } - if (draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported) - features |= GPU_SUPPORTS_CLIP_CULL_DISTANCE; + if (draw_->GetDeviceCaps().clipDistanceSupported) + features |= GPU_SUPPORTS_CLIP_DISTANCE; + if (draw_->GetDeviceCaps().cullDistanceSupported) + features |= GPU_SUPPORTS_CULL_DISTANCE; // If we already have a 16-bit depth buffer, we don't need to round. bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8; diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index a25231c23256..f2c351470222 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -183,7 +183,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, initialize.push_back({ &u_tess_weights_v, 0, 6 }); bool useDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0; - bool useClip0 = VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_CULL_DISTANCE); + bool useClip0 = VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE); program = render->CreateProgram(shaders, semantics, queries, initialize, useDualSource, useClip0); // The rest, use the "dirty" mechanism. diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 84928b1d392d..af80811cb575 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -482,7 +482,7 @@ enum { GPU_SUPPORTS_32BIT_INT_FSHADER = FLAG_BIT(15), GPU_SUPPORTS_DEPTH_TEXTURE = FLAG_BIT(16), GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17), - GPU_SUPPORTS_CLIP_CULL_DISTANCE = FLAG_BIT(18), + // Free bit: 18 GPU_SUPPORTS_COPY_IMAGE = FLAG_BIT(19), GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20), GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21), @@ -492,8 +492,8 @@ enum { GPU_SUPPORTS_FRAMEBUFFER_BLIT = FLAG_BIT(26), GPU_SUPPORTS_FRAMEBUFFER_BLIT_TO_DEPTH = FLAG_BIT(27), GPU_SUPPORTS_TEXTURE_NPOT = FLAG_BIT(28), - // Free bit: 29 - // Free bit: 30 + GPU_SUPPORTS_CLIP_DISTANCE = FLAG_BIT(29), + GPU_SUPPORTS_CULL_DISTANCE = FLAG_BIT(30), GPU_PREFER_REVERSE_COLOR_ORDER = FLAG_BIT(31), }; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 86b0369699e2..5baf466f3669 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -241,9 +241,12 @@ void GPU_Vulkan::CheckGPUFeatures() { if (enabledFeatures.depthClamp) { features |= GPU_SUPPORTS_DEPTH_CLAMP; } - if (enabledFeatures.shaderClipDistance && enabledFeatures.shaderCullDistance) { + if (enabledFeatures.shaderClipDistance) { + features |= GPU_SUPPORTS_CLIP_DISTANCE; + } + if (enabledFeatures.shaderCullDistance) { // Must support at least 8 if feature supported, so we're fine. - features |= GPU_SUPPORTS_CLIP_CULL_DISTANCE; + features |= GPU_SUPPORTS_CULL_DISTANCE; } if (enabledFeatures.dualSrcBlend) { if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) { From 275baccc5bec6f811f0e411c1f29e6e6f939688c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 19 Sep 2021 23:32:05 -0700 Subject: [PATCH 17/17] GLES: Support GL_APPLE_clip_distance too. Seems modern Apple mobile chips only support clip. --- Common/GPU/OpenGL/GLFeatures.cpp | 1 + Common/GPU/OpenGL/GLFeatures.h | 3 +++ Common/GPU/OpenGL/thin3d_gl.cpp | 9 +++++++-- GPU/Common/VertexShaderGenerator.cpp | 3 +++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Common/GPU/OpenGL/GLFeatures.cpp b/Common/GPU/OpenGL/GLFeatures.cpp index ef78271525f9..6e15b3aa843a 100644 --- a/Common/GPU/OpenGL/GLFeatures.cpp +++ b/Common/GPU/OpenGL/GLFeatures.cpp @@ -385,6 +385,7 @@ void CheckGLExtensions() { gl_extensions.OES_texture_float = g_set_gl_extensions.count("GL_OES_texture_float") != 0; gl_extensions.EXT_buffer_storage = g_set_gl_extensions.count("GL_EXT_buffer_storage") != 0; gl_extensions.EXT_clip_cull_distance = g_set_gl_extensions.count("GL_EXT_clip_cull_distance") != 0; + gl_extensions.APPLE_clip_distance = g_set_gl_extensions.count("GL_APPLE_clip_distance") != 0; #if defined(__ANDROID__) // On Android, incredibly, this is not consistently non-zero! It does seem to have the same value though. diff --git a/Common/GPU/OpenGL/GLFeatures.h b/Common/GPU/OpenGL/GLFeatures.h index 6a8f15beb00d..8efd782a1463 100644 --- a/Common/GPU/OpenGL/GLFeatures.h +++ b/Common/GPU/OpenGL/GLFeatures.h @@ -94,6 +94,9 @@ struct GLExtensions { // ARM bool ARM_shader_framebuffer_fetch; + // APPLE + bool APPLE_clip_distance; + // EGL bool EGL_NV_system_time; bool EGL_NV_coverage_sample; diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 554f2cdd7e4a..a9ab5151d094 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -534,8 +534,13 @@ OpenGLContext::OpenGLContext() { caps_.framebufferBlitSupported = gl_extensions.NV_framebuffer_blit || gl_extensions.ARB_framebuffer_object; caps_.framebufferDepthBlitSupported = caps_.framebufferBlitSupported; caps_.depthClampSupported = gl_extensions.ARB_depth_clamp; - caps_.clipDistanceSupported = gl_extensions.EXT_clip_cull_distance || (!gl_extensions.IsGLES && gl_extensions.VersionGEThan(3, 0)); - caps_.cullDistanceSupported = gl_extensions.EXT_clip_cull_distance || gl_extensions.ARB_cull_distance; + if (gl_extensions.IsGLES) { + caps_.clipDistanceSupported = gl_extensions.EXT_clip_cull_distance || gl_extensions.APPLE_clip_distance; + caps_.cullDistanceSupported = gl_extensions.EXT_clip_cull_distance; + } else { + caps_.clipDistanceSupported = gl_extensions.VersionGEThan(3, 0); + caps_.cullDistanceSupported = gl_extensions.ARB_cull_distance; + } // Interesting potential hack for emulating GL_DEPTH_CLAMP (use a separate varying, force depth in fragment shader): // This will induce a performance penalty on many architectures though so a blanket enable of this diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 44048163c443..a4d8c68a632c 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -144,6 +144,9 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag if (gl_extensions.EXT_clip_cull_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) { gl_exts.push_back("#extension GL_EXT_clip_cull_distance : enable"); } + if (gl_extensions.APPLE_clip_distance && id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) { + gl_exts.push_back("#extension GL_APPLE_clip_distance : enable"); + } } ShaderWriter p(buffer, compat, ShaderStage::Vertex, gl_exts.data(), gl_exts.size());