From 9512bc61745f2ac65fa2c2153588ffa774db0452 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 11 May 2023 12:04:29 +0200 Subject: [PATCH] Don't cache render target copies for shader blending, only cache copies for overlap Fixes #17451, while also keeping the Dante performance fix from #17032. Of course, it's possible that something else could slow down now... But hopefully not. This could also fix other problems. --- GPU/Common/FramebufferManagerCommon.cpp | 4 ++-- GPU/Common/FramebufferManagerCommon.h | 1 + GPU/D3D11/StateMappingD3D11.cpp | 2 +- GPU/Directx9/StateMappingDX9.cpp | 2 +- GPU/GLES/StateMappingGLES.cpp | 2 +- GPU/Vulkan/StateMappingVulkan.cpp | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index b55ad1bf39d2..9876952f3911 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1240,7 +1240,7 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF // Self-texturing, need a copy currently (some backends can potentially support it though). WARN_LOG_ONCE(selfTextureCopy, G3D, "Attempting to texture from current render target (src=%08x / target=%08x / flags=%d), making a copy", framebuffer->fb_address, currentRenderVfb_->fb_address, flags); // TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size. - if (currentFramebufferCopy_) { + if (currentFramebufferCopy_ && (flags & BINDFBCOLOR_UNCACHED) == 0) { // We have a copy already that hasn't been invalidated, let's keep using it. draw_->BindFramebufferAsTexture(currentFramebufferCopy_, stage, Draw::FB_COLOR_BIT, layer); return true; @@ -1258,7 +1258,7 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF // Only cache the copy if it wasn't a partial copy. // TODO: Improve on this. - if (!partial) { + if (!partial && (flags & BINDFBCOLOR_UNCACHED) == 0) { currentFramebufferCopy_ = renderCopy; } gpuStats.numCopiesForSelfTex++; diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index d0c83b4d6c11..0c23a7e41e9c 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -199,6 +199,7 @@ enum BindFramebufferColorFlags { BINDFBCOLOR_APPLY_TEX_OFFSET = 4, // Used when rendering to a temporary surface (e.g. not the current render target.) BINDFBCOLOR_FORCE_SELF = 8, + BINDFBCOLOR_UNCACHED = 16, }; enum DrawTextureFlags { diff --git a/GPU/D3D11/StateMappingD3D11.cpp b/GPU/D3D11/StateMappingD3D11.cpp index 1d00bfb46d61..aa1ec6606600 100644 --- a/GPU/D3D11/StateMappingD3D11.cpp +++ b/GPU/D3D11/StateMappingD3D11.cpp @@ -158,7 +158,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) { ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState); if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) { - framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY, 0); + framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY | BINDFBCOLOR_UNCACHED, 0); // No sampler required, we do a plain Load in the pixel shader. fboTexBound_ = true; fboTexBindState = FBO_TEX_NONE; diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index 5412cd9a2328..24866d2d97ca 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -138,7 +138,7 @@ void DrawEngineDX9::ApplyDrawState(int prim) { if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. - framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY, Draw::ALL_LAYERS); + framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY | BINDFBCOLOR_UNCACHED, Draw::ALL_LAYERS); // If we are rendering at a higher resolution, linear is probably best for the dest color. device_->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); device_->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); diff --git a/GPU/GLES/StateMappingGLES.cpp b/GPU/GLES/StateMappingGLES.cpp index 98af8d11f324..ac4b20f3fe5c 100644 --- a/GPU/GLES/StateMappingGLES.cpp +++ b/GPU/GLES/StateMappingGLES.cpp @@ -157,7 +157,7 @@ void DrawEngineGLES::ApplyDrawState(int prim) { // fboTexNeedsBind_ won't be set if we can read directly from the target. if (fboTexBindState == FBO_TEX_COPY_BIND_TEX) { // Note that this is positions, not UVs, that we need the copy from. - framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY, 0); + framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY | BINDFBCOLOR_UNCACHED, 0); // If we are rendering at a higher resolution, linear is probably best for the dest color. renderManager->SetTextureSampler(1, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_LINEAR, GL_LINEAR, 0.0f); fboTexBound_ = true; diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index de6a292b831d..18bff95502dd 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -367,7 +367,7 @@ void DrawEngineVulkan::BindShaderBlendTex() { if (!gstate.isModeClear()) { if (fboTexBindState_ == FBO_TEX_COPY_BIND_TEX) { VirtualFramebuffer *curRenderVfb = framebufferManager_->GetCurrentRenderVFB(); - bool bindResult = framebufferManager_->BindFramebufferAsColorTexture(1, curRenderVfb, BINDFBCOLOR_MAY_COPY, Draw::ALL_LAYERS); + bool bindResult = framebufferManager_->BindFramebufferAsColorTexture(1, curRenderVfb, BINDFBCOLOR_MAY_COPY | BINDFBCOLOR_UNCACHED, Draw::ALL_LAYERS); _dbg_assert_(bindResult); boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE1_IMAGEVIEW); boundSecondaryIsInputAttachment_ = false;