diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 9d3862e3222a..6dd8c0c90ea8 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -20,9 +20,9 @@ #include "Common/Log.h" #include "Core/Reporting.h" #include "GPU/GPUState.h" -#include "GPU/GLES/GLStateCache.h" #include "GPU/Common/DepalettizeShaderCommon.h" - +#include "GPU/Directx9/PixelShaderGeneratorDX9.h" +#include "GPU/GLES/GLStateCache.h" #define WRITE p+=sprintf @@ -48,6 +48,7 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang WRITE(p, "out vec4 fragColor0;\n"); WRITE(p, "uniform sampler2D tex;\n"); WRITE(p, "uniform sampler2D pal;\n"); + WRITE(p, "uniform vec2 u_offset;\n"); } // TODO: Add support for integer textures. Though it hardly matters. @@ -110,7 +111,7 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang WRITE(p, ";\n"); } - WRITE(p, " fragColor0 = texture(pal, vec2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels); + WRITE(p, " fragColor0 = texture(pal, vec2((float(index) + 0.5) * %f * u_offset.x + u_offset.y, 0.0));\n", 1.0 / texturePixels); WRITE(p, "}\n"); } @@ -239,17 +240,19 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa WRITE(p, "varying vec2 v_texcoord0;\n"); WRITE(p, "uniform sampler2D tex;\n"); WRITE(p, "uniform sampler2D pal;\n"); + WRITE(p, "uniform vec2 u_offset;\n"); WRITE(p, "void main() {\n"); WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n"); - WRITE(p, " float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset); + WRITE(p, " float coord = ((%s * %f)%s) * u_offset.x + u_offset.y;\n", lookupMethod, index_multiplier, offset); WRITE(p, " gl_FragColor = texture2D(pal, vec2(coord, 0.0));\n"); WRITE(p, "}\n"); } else if (lang == HLSL_DX9) { WRITE(p, "sampler tex: register(s0);\n"); WRITE(p, "sampler pal: register(s1);\n"); + WRITE(p, "float2 u_offset : register(c%i);\n", CONST_PS_DEPAL_OFFSET); WRITE(p, "float4 main(float2 v_texcoord0 : TEXCOORD0) : COLOR0 {\n"); WRITE(p, " float4 index = tex2D(tex, v_texcoord0);\n"); - WRITE(p, " float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset); + WRITE(p, " float coord = ((%s * %f)%s) * u_offset.x + u_offset.y;\n", lookupMethod, index_multiplier, offset); WRITE(p, " return tex2D(pal, float2(coord, 0.0)).bgra;\n"); WRITE(p, "}\n"); } @@ -270,4 +273,52 @@ void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat, ShaderLanguag } } +void GenerateIndexedShader(char *buffer, ShaderLanguage lang) { + char *p = buffer; + + if (lang == GLSL_140) { + if (gl_extensions.IsGLES) { + WRITE(p, "#version 100\n"); + WRITE(p, "precision mediump float;\n"); + } else { + WRITE(p, "#version 110\n"); + } + WRITE(p, "varying vec2 v_texcoord0;\n"); + WRITE(p, "uniform sampler2D tex;\n"); + WRITE(p, "uniform sampler2D pal;\n"); + WRITE(p, "uniform vec2 u_offset;\n"); + WRITE(p, "void main() {\n"); + WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n"); + WRITE(p, " float coord = index.r * u_offset.x + u_offset.y;\n"); + WRITE(p, " gl_FragColor = texture2D(pal, vec2(coord, 0.0));\n"); + WRITE(p, "}\n"); + } else if (lang == GLSL_300) { + if (gl_extensions.IsGLES) { + WRITE(p, "#version 300 es\n"); + WRITE(p, "precision mediump float;\n"); + } else { + WRITE(p, "#version 330\n"); + } + WRITE(p, "in vec2 v_texcoord0;\n"); + WRITE(p, "out vec4 fragColor0;\n"); + WRITE(p, "uniform sampler2D tex;\n"); + WRITE(p, "uniform sampler2D pal;\n"); + WRITE(p, "uniform vec2 u_offset;\n"); + WRITE(p, "void main() {\n"); + WRITE(p, " vec4 index = texture(tex, v_texcoord0);\n"); + WRITE(p, " float coord = index.r * u_offset.x + u_offset.y;\n"); + WRITE(p, " fragColor0 = texture(pal, vec2(coord, 0.0));\n"); + WRITE(p, "}\n"); + } else if (lang == HLSL_DX9) { + WRITE(p, "sampler tex: register(s0);\n"); + WRITE(p, "sampler pal: register(s1);\n"); + WRITE(p, "float2 u_offset : register(c%i);\n", CONST_PS_DEPAL_OFFSET); + WRITE(p, "float4 main(float2 v_texcoord0 : TEXCOORD0) : COLOR0 {\n"); + WRITE(p, " float4 index = tex2D(tex, v_texcoord0);\n"); + WRITE(p, " float coord = index.r * u_offset.x + u_offset.y;\n"); + WRITE(p, " return tex2D(pal, float2(coord, 0.0)).bgra;\n"); + WRITE(p, "}\n"); + } +} + #undef WRITE \ No newline at end of file diff --git a/GPU/Common/DepalettizeShaderCommon.h b/GPU/Common/DepalettizeShaderCommon.h index c49a65cdbf6b..8f95251c0d77 100644 --- a/GPU/Common/DepalettizeShaderCommon.h +++ b/GPU/Common/DepalettizeShaderCommon.h @@ -27,3 +27,4 @@ enum ShaderLanguage { }; void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language); +void GenerateIndexedShader(char *buffer, ShaderLanguage lang); diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index 7fa99ff9df9b..0df49767c9a6 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -30,6 +30,8 @@ enum { FB_USAGE_RENDERTARGET = 2, FB_USAGE_TEXTURE = 4, FB_USAGE_CLUT = 8, + + FB_USAGE_KEEP = FB_USAGE_CLUT, }; enum { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 4e88b8d68c30..23a610782d65 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -321,7 +321,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { clutRenderAddress_ = 0xFFFFFFFF; if (Memory::IsValidAddress(clutAddr)) { - if (Memory::IsVRAMAddress(clutAddr)) { + if (Memory::IsVRAMAddress(clutAddr) && !g_Config.bDisableSlowFramebufEffects) { // Clear the uncached bit, etc. to match framebuffers. const u32 clutFramebufAddr = clutAddr & 0x3FFFFFFF; const u32 clutFramebufEnd = clutFramebufAddr + loadBytes; @@ -352,11 +352,16 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { // It's possible for a game to (successfully) access outside valid memory. u32 bytes = Memory::ValidSize(clutAddr, loadBytes); - if (clutRenderAddress_ != 0xFFFFFFFF && !g_Config.bDisableSlowFramebufEffects) { - DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes); - Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); - if (bytes < loadBytes) { - memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes); + if (clutRenderAddress_ != 0xFFFFFFFF) { + bool useIndexed = standardScaleFactor_ == 1; + + if (!useIndexed) { + DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes); + Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); + if (bytes < loadBytes) { + memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes); + } + clutRenderAddress_ = 0xFFFFFFFF; } } else { #ifdef _M_SSE @@ -386,7 +391,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { } else { memset(clutBufRaw_, 0x00, loadBytes); } - // Reload the clut next time. + // Update the clut (translating colors if necessary) next time. clutLastFormat_ = 0xFFFFFFFF; clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes); } @@ -411,6 +416,115 @@ void TextureCacheCommon::UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *te DoUnswizzleTex16(texptr, dest, bxc, byc, destPitch); } +void *TextureCacheCommon::DecodeLevelToIndexed(GETextureFormat format, int level, int *bufwout) { + u32 texaddr = gstate.getTextureAddress(level); + bool swizzled = gstate.isTextureSwizzled(); + if ((texaddr & 0x00600000) != 0 && Memory::IsVRAMAddress(texaddr)) { + // This means it's in a mirror, possibly a swizzled mirror. Let's report. + WARN_LOG_REPORT_ONCE(texmirror, G3D, "Decoding texture from VRAM mirror at %08x swizzle=%d", texaddr, swizzled ? 1 : 0); + if ((texaddr & 0x00200000) == 0x00200000) { + // Technically 2 and 6 are slightly different, but this is better than nothing probably. + swizzled = !swizzled; + } + // Note that (texaddr & 0x00600000) == 0x00600000 is very likely to be depth texturing. + } + + int bufw = GetTextureBufw(level, texaddr, format); + if (bufwout) + *bufwout = bufw; + int w = gstate.getTextureWidth(level); + int h = gstate.getTextureHeight(level); + const u8 *texptr = Memory::GetPointer(texaddr); + + tmpTexBuf16.resize(std::max(bufw, w) * h); + tmpTexBuf32.resize(std::max(bufw, w) * h); + tmpTexBufRearrange.resize(std::max(bufw, w) * h); + + u8 *finalBuf = (u8 *)tmpTexBuf16.data(); + switch (format) { + case GE_TFMT_CLUT4: + { + const bool mipmapShareClut = gstate.isClutSharedForMipmaps(); + const int clutSharingOffset = mipmapShareClut ? 0 : level * 16; + + const u8 *indexed = texptr; + if (swizzled) { + UnswizzleFromMem(tmpTexBuf32.data(), bufw / 2, texptr, bufw, h, 0); + indexed = (const u8 *)tmpTexBuf32.data(); + } + + for (int i = 0; i < bufw * h; i += 2) { + u8 index = *indexed++; + finalBuf[i + 0] = gstate.transformClutIndex((index >> 0) & 0xf) + clutSharingOffset; + finalBuf[i + 1] = gstate.transformClutIndex((index >> 4) & 0xf) + clutSharingOffset; + } + } + break; + + case GE_TFMT_CLUT8: + { + const u8 *indexed = texptr; + if (swizzled) { + UnswizzleFromMem(tmpTexBuf32.data(), bufw, texptr, bufw, h, 1); + indexed = (const u8 *)tmpTexBuf32.data(); + } + + for (int i = 0; i < bufw * h; ++i) { + finalBuf[i] = gstate.transformClutIndex(*indexed++); + } + } + break; + + case GE_TFMT_CLUT16: + { + const u16_le *indexed = (const u16_le *)texptr; + if (swizzled) { + UnswizzleFromMem(tmpTexBuf32.data(), bufw * 2, texptr, bufw, h, 2); + indexed = (const u16_le *)tmpTexBuf32.data(); + } + + for (int i = 0; i < bufw * h; ++i) { + finalBuf[i] = gstate.transformClutIndex(*indexed++); + } + } + break; + + case GE_TFMT_CLUT32: + { + const u32_le *indexed = (const u32_le *)texptr; + if (swizzled) { + UnswizzleFromMem(tmpTexBuf32.data(), bufw * 4, texptr, bufw, h, 4); + indexed = (const u32_le *)tmpTexBuf32.data(); + } + + for (int i = 0; i < bufw * h; ++i) { + finalBuf[i] = gstate.transformClutIndex(*indexed++); + } + } + break; + + case GE_TFMT_4444: + case GE_TFMT_5551: + case GE_TFMT_5650: + case GE_TFMT_8888: + case GE_TFMT_DXT1: + case GE_TFMT_DXT3: + case GE_TFMT_DXT5: + default: + ERROR_LOG_REPORT(G3D, "Invalid indexed format %d", format); + return nullptr; + } + + // Technically, the index can actually be up to 512. This is pretty rare (getClutIndexStartPos.) + // Unfortunately, not all platforms support uploading > 8 bit values. + if (gstate.getClutPaletteFormat() != GE_CMODE_32BIT_ABGR8888 && (gstate.getClutIndexStartPos() & 0x100) != 0) { + ERROR_LOG_REPORT(G3D, "Unsupported indexed texture with CLUT indexes outside 0-255"); + } + + // TODO: Change to using an output and stride. + return finalBuf; +} + bool TextureCacheCommon::GetCurrentClutBuffer(GPUDebugBuffer &buffer) { const u32 bpp = gstate.getClutPaletteFormat() == GE_CMODE_32BIT_ABGR8888 ? 4 : 2; const u32 pixels = 1024 / bpp; diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index 462296d34bc0..2f0ddeb5fe8a 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -82,6 +82,7 @@ class TextureCacheCommon { STATUS_TO_SCALE = 0x80, // Pending texture scaling in a later frame. STATUS_IS_SCALED = 0x100, // Has been scaled (can't be replaceImages'd.) STATUS_FREE_CHANGE = 0x200, // Allow one change before marking "frequent". + STATUS_INDEXED = 04200, // Texture is R only for on-GPU CLUT processing. }; // Status, but int so we can zero initialize. @@ -153,6 +154,7 @@ class TextureCacheCommon { }; bool DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool useBGRA = false); + void *DecodeLevelToIndexed(GETextureFormat format, int level, int *bufwout); void UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel); bool ReadIndexedTex(u8 *out, int outPitch, int level, const u8 *texptr, int bytesPerIndex, int bufw); diff --git a/GPU/Directx9/DepalettizeShaderDX9.cpp b/GPU/Directx9/DepalettizeShaderDX9.cpp index a85f6282dbf5..cc4c894faf57 100644 --- a/GPU/Directx9/DepalettizeShaderDX9.cpp +++ b/GPU/Directx9/DepalettizeShaderDX9.cpp @@ -151,7 +151,6 @@ LPDIRECT3DPIXELSHADER9 DepalShaderCacheDX9::GetDepalettizePixelShader(GEPaletteF } char *buffer = new char[2048]; - GenerateDepalShader(buffer, pixelFormat, HLSL_DX9); LPDIRECT3DPIXELSHADER9 pshader; @@ -172,4 +171,33 @@ LPDIRECT3DPIXELSHADER9 DepalShaderCacheDX9::GetDepalettizePixelShader(GEPaletteF return depal->pixelShader; } +LPDIRECT3DPIXELSHADER9 DepalShaderCacheDX9::GetIndexedPixelShader() { + if (indexedShader_.pixelShader != nullptr) { + if (indexedShader_.pixelShader == (LPDIRECT3DPIXELSHADER9)-1) { + // Previously failed. Don't try again. + return nullptr; + } + return indexedShader_.pixelShader; + } + + char *buffer = new char[2048]; + GenerateIndexedShader(buffer, HLSL_DX9); + + std::string errorMessage; + if (!CompilePixelShader(buffer, &indexedShader_.pixelShader, NULL, errorMessage)) { + ERROR_LOG(G3D, "Failed to compile depal pixel shader: %s\n\n%s", buffer, errorMessage.c_str()); + indexedShader_.pixelShader = nullptr; + } + + delete[] buffer; + + if (indexedShader_.pixelShader == nullptr) { + // So that we know not to try again next time. + indexedShader_.pixelShader = (LPDIRECT3DPIXELSHADER9)-1; + return nullptr; + } + + return indexedShader_.pixelShader; +} + } // namespace \ No newline at end of file diff --git a/GPU/Directx9/DepalettizeShaderDX9.h b/GPU/Directx9/DepalettizeShaderDX9.h index 7cdcb9413fc4..92b3a9043d9e 100644 --- a/GPU/Directx9/DepalettizeShaderDX9.h +++ b/GPU/Directx9/DepalettizeShaderDX9.h @@ -26,6 +26,9 @@ namespace DX9 { class DepalShaderDX9 { public: + DepalShaderDX9() : pixelShader(nullptr) { + } + LPDIRECT3DPIXELSHADER9 pixelShader; }; @@ -45,6 +48,7 @@ class DepalShaderCacheDX9 { LPDIRECT3DPIXELSHADER9 GetDepalettizePixelShader(GEPaletteFormat clutFormat, GEBufferFormat pixelFormat); LPDIRECT3DVERTEXSHADER9 GetDepalettizeVertexShader() { return vertexShader_; } LPDIRECT3DTEXTURE9 GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); + LPDIRECT3DPIXELSHADER9 GetIndexedPixelShader(); void Clear(); void Decimate(); @@ -54,6 +58,7 @@ class DepalShaderCacheDX9 { LPDIRECT3DVERTEXSHADER9 vertexShader_; std::map cache_; std::map texCache_; + DepalShaderDX9 indexedShader_; }; } // namespace \ No newline at end of file diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index b934d4bfd863..1f7478c24aa0 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -1224,7 +1224,7 @@ namespace DX9 { UpdateFramebufUsage(vfb); if (vfb != displayFramebuf_ && vfb != prevDisplayFramebuf_ && vfb != prevPrevDisplayFramebuf_) { - if (age > FBO_OLD_AGE) { + if (age > FBO_OLD_AGE && (vfb->usageFlags & FB_USAGE_KEEP) == 0) { INFO_LOG(SCEGE, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age); DestroyFramebuf(vfb); vfbs_.erase(vfbs_.begin() + i--); diff --git a/GPU/Directx9/PixelShaderGeneratorDX9.h b/GPU/Directx9/PixelShaderGeneratorDX9.h index 44914dfd2adf..f7996cc51099 100644 --- a/GPU/Directx9/PixelShaderGeneratorDX9.h +++ b/GPU/Directx9/PixelShaderGeneratorDX9.h @@ -39,4 +39,7 @@ bool GenerateFragmentShaderDX9(const ShaderID &id, char *buffer); // For stencil upload #define CONST_PS_STENCILVALUE 10 +// For depal +#define CONST_PS_DEPAL_OFFSET 11 + }; diff --git a/GPU/Directx9/TextureCacheDX9.cpp b/GPU/Directx9/TextureCacheDX9.cpp index c2fce5941199..7a99bd0f6672 100644 --- a/GPU/Directx9/TextureCacheDX9.cpp +++ b/GPU/Directx9/TextureCacheDX9.cpp @@ -532,6 +532,9 @@ void TextureCacheDX9::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase } inline u32 TextureCacheDX9::GetCurrentClutHash() { + // If we're using a rendered clut, always use the same cache entry. + if (clutRenderAddress_ != 0xFFFFFFFF) + return 0x1337C0DE; return clutHash_; } @@ -619,12 +622,16 @@ void TextureCacheDX9::ApplyTexture() { if (entry->framebuffer) { ApplyTextureFramebuffer(entry, entry->framebuffer); } else { - LPDIRECT3DTEXTURE9 texture = DxTex(entry); - if (texture != lastBoundTexture) { - pD3Ddevice->SetTexture(0, texture); - lastBoundTexture = texture; + if ((entry->status & TexCacheEntry::STATUS_INDEXED) != 0) { + ApplyIndexedTexture(nextTexture_); + } else { + LPDIRECT3DTEXTURE9 texture = DxTex(entry); + if (texture != lastBoundTexture) { + pD3Ddevice->SetTexture(0, texture); + lastBoundTexture = texture; + } + UpdateSamplingParams(*entry, false); } - UpdateSamplingParams(*entry, false); gstate_c.textureFullAlpha = entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL; gstate_c.textureSimpleAlpha = entry->GetAlphaStatus() != TexCacheEntry::STATUS_ALPHA_UNKNOWN; @@ -754,6 +761,80 @@ class TextureShaderApplierDX9 { int renderH_; }; +void TextureCacheDX9::ApplyIndexedTexture(TexCacheEntry *entry) { + VirtualFramebuffer *clutVfb = nullptr; + const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); + for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { + auto framebuffer = fbCache_[i]; + if (framebuffer->fb_address == clutRenderAddress_) { + clutVfb = framebuffer; + } + } + + if (!clutVfb) { + ERROR_LOG_REPORT(G3D, "Unable to apply indexed texture: vfb gone"); + return; + } + + LPDIRECT3DPIXELSHADER9 pshader = depalShaderCache_->GetIndexedPixelShader(); + if (!pshader) { + return; + } + + // TODO: Mipmaps are theoretically possible, but not implemented. + int w = 1 << ((entry->dim >> 0) & 0xf); + int h = 1 << ((entry->dim >> 8) & 0xf); + + FBO_DX9 *depalFBO = framebufferManager_->GetTempFBO(w, h, FBO_8888); + fbo_bind_as_render_target(depalFBO); + shaderManager_->DirtyLastShader(); + + // Positions are -1 -> 1, so our offsets should be -0.1 for a 10 wide texture. + float xoff = -0.5f / (w / 2); + float yoff = -0.5f / (h / 2); + + TextureShaderApplierDX9 shaderApply(pshader, w, h, w, h, xoff, yoff); + shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset, xoff, yoff); + shaderApply.Use(depalShaderCache_->GetDepalettizeVertexShader()); + + float render_offset = clutRenderOffset_ / (clutFormat == GE_CMODE_32BIT_ABGR8888 ? 4 : 2); + float texel_offset = (0.5f + render_offset) / (float)clutVfb->bufferWidth; + if (gstate.getClutPaletteFormat() != GE_CMODE_32BIT_ABGR8888 && (gstate.getClutIndexStartPos() & 0x100) != 0) { + // In this case, we truncated the index entries. Apply the offset here. + if (clutVfb->renderWidth > 256) { + texel_offset += 256.0f / (float)clutVfb->renderWidth; + } + } + + // We scale by the width of the CLUT - to map 0.0 -> 0, 1.0 -> 255. + // If the width is 256, 255 is right (see offset above.) We aim for the texel centers. + float texel_mult = 255.0f / (float)clutVfb->bufferWidth; + + const float f[4] = { texel_mult, texel_offset, 0.0f, 0.0f }; + pD3Ddevice->SetPixelShaderConstantF(CONST_PS_DEPAL_OFFSET, f, 1); + + pD3Ddevice->SetTexture(1, fbo_get_color_texture(clutVfb->fbo_dx9)); + pD3Ddevice->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_POINT); + pD3Ddevice->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_POINT); + pD3Ddevice->SetSamplerState(1, D3DSAMP_MIPFILTER, D3DTEXF_NONE); + + pD3Ddevice->SetTexture(0, DxTex(entry)); + pD3Ddevice->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); + pD3Ddevice->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); + pD3Ddevice->SetSamplerState(0, D3DSAMP_MIPFILTER, D3DTEXF_NONE); + + shaderApply.Shade(); + + fbo_bind_color_as_texture(depalFBO, 0); + + framebufferManager_->RebindFramebuffer(); + SetFramebufferSamplingParams(w, h); + + pD3Ddevice->SetTexture(0, fbo_get_color_texture(depalFBO)); + + lastBoundTexture = INVALID_TEX; +} + void TextureCacheDX9::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) { LPDIRECT3DPIXELSHADER9 pshader = nullptr; const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); @@ -762,7 +843,19 @@ void TextureCacheDX9::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFrame } if (pshader) { - LPDIRECT3DTEXTURE9 clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_); + LPDIRECT3DTEXTURE9 clutTexture = nullptr; + + VirtualFramebuffer *clutVfb = nullptr; + for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { + auto clutFramebuffer = fbCache_[i]; + if (clutFramebuffer->fb_address == clutRenderAddress_) { + clutVfb = clutFramebuffer; + } + } + + if (!clutVfb) { + clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_); + } FBO_DX9 *depalFBO = framebufferManager_->GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, FBO_8888); fbo_bind_as_render_target(depalFBO); @@ -775,7 +868,30 @@ void TextureCacheDX9::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFrame shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset, xoff, yoff); shaderApply.Use(depalShaderCache_->GetDepalettizeVertexShader()); - pD3Ddevice->SetTexture(1, clutTexture); + float texturePixels = 256.0f; + if (clutFormat != GE_CMODE_32BIT_ABGR8888) + texturePixels = 512.0f; + + if (clutVfb) { + float render_offset = clutRenderOffset_ / (clutFormat == GE_CMODE_32BIT_ABGR8888 ? 4 : 2); + + // Before this multiplier, (texturePixels - 1) would be near 1.0. + // If our buffer is actually 320 wide, we need to rescale that. + // There's already some accounting for centers. + float texel_mult = texturePixels / (float)clutVfb->bufferWidth; + float texel_offset = render_offset / (float)clutVfb->bufferWidth; + const float f[4] = { texel_mult, texel_offset, 0.0f, 0.0f }; + pD3Ddevice->SetPixelShaderConstantF(CONST_PS_DEPAL_OFFSET, f, 1); + } else { + const float f[4] = { 1.0f, 0.0f, 0.0f, 0.0f }; + pD3Ddevice->SetPixelShaderConstantF(CONST_PS_DEPAL_OFFSET, f, 1); + } + + if (clutVfb) { + pD3Ddevice->SetTexture(1, fbo_get_color_texture(clutVfb->fbo_dx9)); + } else { + pD3Ddevice->SetTexture(1, clutTexture); + } pD3Ddevice->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_POINT); pD3Ddevice->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_POINT); pD3Ddevice->SetSamplerState(1, D3DSAMP_MIPFILTER, D3DTEXF_NONE); @@ -888,7 +1004,7 @@ void TextureCacheDX9::SetTexture(bool force) { cluthash = 0; } u64 cachekey = TexCacheEntry::CacheKey(texaddr, format, dim, cluthash); - + int bufw = GetTextureBufw(0, texaddr, format); u8 maxLevel = gstate.getTextureMaxLevel(); @@ -900,7 +1016,7 @@ void TextureCacheDX9::SetTexture(bool force) { gstate_c.bgraTexture = true; gstate_c.skipDrawReason &= ~SKIPDRAW_BAD_FB_TEXTURE; bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; - + if (iter != cache.end()) { entry = &iter->second; // Validate the texture still matches the cache entry. @@ -910,10 +1026,6 @@ void TextureCacheDX9::SetTexture(bool force) { // Check for FBO - slow! if (entry->framebuffer) { if (match) { - if (hasClut && clutRenderAddress_ != 0xFFFFFFFF) { - WARN_LOG_REPORT_ONCE(clutAndTexRender, G3D, "Using rendered texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat()); - } - SetTextureFramebuffer(entry, entry->framebuffer); return; } else { @@ -936,6 +1048,16 @@ void TextureCacheDX9::SetTexture(bool force) { rehash = false; } + // Check the clut status. + if (match) { + bool nowUsingClutRender = clutRenderAddress_ != 0xFFFFFFFF && hasClut; + bool wasUsingClutRender = (entry->status & TexCacheEntry::STATUS_INDEXED) != 0; + if (nowUsingClutRender != wasUsingClutRender) { + match = false; + reason = "CLUT render status changed"; + } + } + if (match) { if (entry->lastFrame != gpuStats.numFlips) { u32 diff = gpuStats.numFlips - entry->lastFrame; @@ -1006,10 +1128,6 @@ void TextureCacheDX9::SetTexture(bool force) { TexCacheEntry entryNew = {0}; cache[cachekey] = entryNew; - if (hasClut && clutRenderAddress_ != 0xFFFFFFFF) { - WARN_LOG_REPORT_ONCE(clutUseRender, G3D, "Using texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat()); - } - entry = &cache[cachekey]; if (g_Config.bTextureBackoffCache) { entry->status = TexCacheEntry::STATUS_HASHING; @@ -1243,6 +1361,16 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry, bool replaceImage scaleFactor = 1; } + if (clutRenderAddress_ != 0xFFFFFFFF && entry->cluthash != 0) { + entry->status |= TexCacheEntry::STATUS_INDEXED; + dstFmt = D3DFMT_L8; + // Can't scale an indexed texture (this means it uses a CLUT that was rendered.) + scaleFactor = 1; + } else { + // Clear in case it stopped being an indexed texture. + entry->status &= ~TexCacheEntry::STATUS_INDEXED; + } + if (scaleFactor != 1) { if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED) { entry->status |= TexCacheEntry::STATUS_TO_SCALE; @@ -1355,6 +1483,8 @@ u32 ToD3D9Format(ReplacedTextureFormat fmt) { } void TextureCacheDX9::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, bool replaceImages, int scaleFactor, u32 dstFmt) { + bool useIndexed = (entry.status & TexCacheEntry::STATUS_INDEXED) != 0; + int w = gstate.getTextureWidth(level); int h = gstate.getTextureHeight(level); @@ -1411,6 +1541,21 @@ void TextureCacheDX9::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &re } bool decSuccess = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, false); + if (!useIndexed) { + decSuccess = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, false); + } else { + pixelData = (u32 *)DecodeLevelToIndexed(GETextureFormat(entry.format), level, &bufw); + decSuccess = pixelData != nullptr; + + if (decSuccess) { + for (int y = 0; y < h; ++y) { + memcpy((u8 *)rect.pBits + rect.Pitch * y, (u8 *)rect.pBits + bufw * y, w); + } + } + pixelData = (u32 *)rect.pBits; + bpp = 1; + decPitch = bufw; + } if (!decSuccess) { memset(pixelData, 0, decPitch * h); } diff --git a/GPU/Directx9/TextureCacheDX9.h b/GPU/Directx9/TextureCacheDX9.h index 59729a939c36..54a277ccc12b 100644 --- a/GPU/Directx9/TextureCacheDX9.h +++ b/GPU/Directx9/TextureCacheDX9.h @@ -86,6 +86,7 @@ class TextureCacheDX9 : public TextureCacheCommon { void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple); bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0) override; void SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer); + void ApplyIndexedTexture(TexCacheEntry *entry); void ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer); bool CheckFullHash(TexCacheEntry *const entry, bool &doDelete); diff --git a/GPU/GLES/DepalettizeShader.cpp b/GPU/GLES/DepalettizeShader.cpp index d82191c83e7e..2963a4d5d468 100644 --- a/GPU/GLES/DepalettizeShader.cpp +++ b/GPU/GLES/DepalettizeShader.cpp @@ -193,25 +193,7 @@ void DepalShaderCache::Decimate() { } } -DepalShader *DepalShaderCache::GetDepalettizeShader(GEPaletteFormat clutFormat, GEBufferFormat pixelFormat) { - u32 id = GenerateShaderID(clutFormat, pixelFormat); - - auto shader = cache_.find(id); - if (shader != cache_.end()) { - return shader->second; - } - - if (vertexShader_ == 0) { - if (!CreateVertexShader()) { - // The vertex shader failed, no need to bother trying the fragment. - return nullptr; - } - } - - char *buffer = new char[2048]; - - GenerateDepalShader(buffer, pixelFormat, useGL3_ ? GLSL_300 : GLSL_140); - +void DepalShaderCache::CreateFragShader(DepalShader *depal, char *buffer) { GLuint fragShader = glCreateShader(GL_FRAGMENT_SHADER); const char *buf = buffer; @@ -232,14 +214,14 @@ DepalShader *DepalShaderCache::GetDepalettizeShader(GEPaletteFormat clutFormat, GLint u_tex = glGetUniformLocation(program, "tex"); GLint u_pal = glGetUniformLocation(program, "pal"); + GLint u_offset = glGetUniformLocation(program, "u_offset"); glUniform1i(u_tex, 0); glUniform1i(u_pal, 3); - DepalShader *depal = new DepalShader(); depal->program = program; depal->fragShader = fragShader; - cache_[id] = depal; + depal->u_offset = u_offset; GLint linkStatus = GL_FALSE; glGetProgramiv(program, GL_LINK_STATUS, &linkStatus); @@ -267,7 +249,59 @@ DepalShader *DepalShaderCache::GetDepalettizeShader(GEPaletteFormat clutFormat, depal->a_position = glGetAttribLocation(program, "a_position"); depal->a_texcoord0 = glGetAttribLocation(program, "a_texcoord0"); } +} + +DepalShader *DepalShaderCache::GetDepalettizeShader(GEPaletteFormat clutFormat, GEBufferFormat pixelFormat) { + u32 id = GenerateShaderID(clutFormat, pixelFormat); + + auto shader = cache_.find(id); + if (shader != cache_.end()) { + return shader->second; + } + + if (vertexShader_ == 0) { + if (!CreateVertexShader()) { + // The vertex shader failed, no need to bother trying the fragment. + return nullptr; + } + } + + char *buffer = new char[2048]; + GenerateDepalShader(buffer, pixelFormat, useGL3_ ? GLSL_300 : GLSL_140); + + DepalShader *depal = new DepalShader(); + CreateFragShader(depal, buffer); + cache_[id] = depal; delete[] buffer; return depal->program ? depal : nullptr; } + +DepalShader *DepalShaderCache::GetIndexedShader() { + if (indexedShader_.program != 0) { + if (indexedShader_.program == -1) { + // Previously failed. Don't try again. + return nullptr; + } + return &indexedShader_; + } + + if (vertexShader_ == 0) { + if (!CreateVertexShader()) { + // The vertex shader failed, no need to bother trying the fragment. + return nullptr; + } + } + + char *buffer = new char[2048]; + GenerateIndexedShader(buffer, useGL3_ ? GLSL_300 : GLSL_140); + + CreateFragShader(&indexedShader_, buffer); + if (indexedShader_.program == 0) { + // So that we know not to try again next time. + indexedShader_.program = -1; + } + + delete[] buffer; + return indexedShader_.program != 0 && indexedShader_.program != -1 ? &indexedShader_ : nullptr; +} diff --git a/GPU/GLES/DepalettizeShader.h b/GPU/GLES/DepalettizeShader.h index 1916d43df40e..bd7c9d826b2a 100644 --- a/GPU/GLES/DepalettizeShader.h +++ b/GPU/GLES/DepalettizeShader.h @@ -23,10 +23,14 @@ class DepalShader { public: + DepalShader() : program(0), fragShader(0) { + } + GLuint program; GLuint fragShader; GLint a_position; GLint a_texcoord0; + GLint u_offset; }; class DepalTexture { @@ -44,17 +48,20 @@ class DepalShaderCache { // This also uploads the palette and binds the correct texture. DepalShader *GetDepalettizeShader(GEPaletteFormat clutFormat, GEBufferFormat pixelFormat); GLuint GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); + DepalShader *GetIndexedShader(); void Clear(); void Decimate(); private: u32 GenerateShaderID(GEPaletteFormat clutFormat, GEBufferFormat pixelFormat); bool CreateVertexShader(); + void CreateFragShader(DepalShader *depal, char *buffer); bool useGL3_; bool vertexShaderFailed_; GLuint vertexShader_; std::map cache_; std::map texCache_; + DepalShader indexedShader_; }; diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 9161888dcd49..7a232f58c7fa 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -1896,7 +1896,7 @@ void FramebufferManager::DecimateFBOs() { UpdateFramebufUsage(vfb); if (vfb != displayFramebuf_ && vfb != prevDisplayFramebuf_ && vfb != prevPrevDisplayFramebuf_) { - if (age > FBO_OLD_AGE) { + if (age > FBO_OLD_AGE && (vfb->usageFlags & FB_USAGE_KEEP) == 0) { INFO_LOG(SCEGE, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age); DestroyFramebuf(vfb); vfbs_.erase(vfbs_.begin() + i--); diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 560cfff6fe99..9eeb761ee717 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -19,6 +19,7 @@ #include #include "ext/xxhash.h" +#include "gfx/gl_common.h" #include "i18n/i18n.h" #include "math/math_util.h" #include "profiler/profiler.h" @@ -30,6 +31,7 @@ #include "Core/Reporting.h" #include "GPU/ge_constants.h" #include "GPU/GPUState.h" +#include "GPU/Common/TextureDecoder.h" #include "GPU/GLES/GLStateCache.h" #include "GPU/GLES/TextureCache.h" #include "GPU/GLES/Framebuffer.h" @@ -558,6 +560,9 @@ void TextureCache::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, b } inline u32 TextureCache::GetCurrentClutHash() { + // If we're using a rendered clut, always use the same cache entry. + if (clutRenderAddress_ != 0xFFFFFFFF) + return 0x1337C0DE; return clutHash_; } @@ -686,11 +691,15 @@ void TextureCache::ApplyTexture() { if (entry->framebuffer) { ApplyTextureFramebuffer(entry, entry->framebuffer); } else { - if (entry->textureName != lastBoundTexture) { - glBindTexture(GL_TEXTURE_2D, entry->textureName); - lastBoundTexture = entry->textureName; + if ((entry->status & TexCacheEntry::STATUS_INDEXED) != 0) { + ApplyIndexedTexture(nextTexture_); + } else { + if (entry->textureName != lastBoundTexture) { + glBindTexture(GL_TEXTURE_2D, entry->textureName); + lastBoundTexture = entry->textureName; + } + UpdateSamplingParams(*entry, false); } - UpdateSamplingParams(*entry, false); gstate_c.textureFullAlpha = entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL; gstate_c.textureSimpleAlpha = entry->GetAlphaStatus() != TexCacheEntry::STATUS_ALPHA_UNKNOWN; @@ -833,6 +842,75 @@ class TextureShaderApplier { int renderH_; }; +void TextureCache::ApplyIndexedTexture(TexCacheEntry *entry) { + VirtualFramebuffer *clutVfb = nullptr; + const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); + for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { + auto framebuffer = fbCache_[i]; + if (framebuffer->fb_address == clutRenderAddress_) { + clutVfb = framebuffer; + } + } + + if (!clutVfb) { + ERROR_LOG_REPORT(G3D, "Unable to apply indexed texture: vfb gone"); + return; + } + + DepalShader *shader = depalShaderCache_->GetIndexedShader(); + if (!shader) { + return; + } + + // TODO: Mipmaps are theoretically possible, but not implemented. + int w = 1 << ((entry->dim >> 0) & 0xf); + int h = 1 << ((entry->dim >> 8) & 0xf); + + FBO *depalFBO = framebufferManager_->GetTempFBO(w, h, FBO_8888); + fbo_bind_as_render_target(depalFBO); + shaderManager_->DirtyLastShader(); + + TextureShaderApplier shaderApply(shader, w, h, w, h); + shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset); + shaderApply.Use(transformDraw_); + + if (shader->u_offset != -1) { + float render_offset = clutRenderOffset_ / (clutFormat == GE_CMODE_32BIT_ABGR8888 ? 4 : 2); + float texel_offset = (0.5f + render_offset) / (float)clutVfb->bufferWidth; + if (gstate.getClutPaletteFormat() != GE_CMODE_32BIT_ABGR8888 && (gstate.getClutIndexStartPos() & 0x100) != 0) { + // In this case, we truncated the index entries. Apply the offset here. + if (clutVfb->renderWidth > 256) { + texel_offset += 256.0f / (float)clutVfb->renderWidth; + } + } + + // We scale by the width of the CLUT - to map 0.0 -> 0, 1.0 -> 255. + // If the width is 256, 255 is right (see offset above.) We aim for the texel centers. + float texel_mult = 255.0f / (float)clutVfb->bufferWidth; + + glUniform2f(shader->u_offset, texel_mult, texel_offset); + } + + glActiveTexture(GL_TEXTURE3); + fbo_bind_color_as_texture(clutVfb->fbo, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glActiveTexture(GL_TEXTURE0); + + glBindTexture(GL_TEXTURE_2D, entry->textureName); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + + shaderApply.Shade(); + + fbo_bind_color_as_texture(depalFBO, 0); + + framebufferManager_->RebindFramebuffer(); + SetFramebufferSamplingParams(w, h); + + lastBoundTexture = INVALID_TEX; +} + void TextureCache::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) { DepalShader *depal = nullptr; const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); @@ -840,7 +918,19 @@ void TextureCache::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuf depal = depalShaderCache_->GetDepalettizeShader(clutFormat, framebuffer->drawnFormat); } if (depal) { - GLuint clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_); + GLuint clutTexture = 0; + + VirtualFramebuffer *clutVfb = nullptr; + for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { + auto clutFramebuffer = fbCache_[i]; + if (clutFramebuffer->fb_address == clutRenderAddress_) { + clutVfb = clutFramebuffer; + } + } + + if (!clutVfb) { + clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_); + } FBO *depalFBO = framebufferManager_->GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, FBO_8888); fbo_bind_as_render_target(depalFBO); shaderManager_->DirtyLastShader(); @@ -849,8 +939,33 @@ void TextureCache::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuf shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset); shaderApply.Use(transformDraw_); + if (depal->u_offset != -1) { + float texturePixels = 256.0f; + if (clutFormat != GE_CMODE_32BIT_ABGR8888) + texturePixels = 512.0f; + + if (clutVfb) { + float render_offset = clutRenderOffset_ / (clutFormat == GE_CMODE_32BIT_ABGR8888 ? 4 : 2); + + // Before this multiplier, (texturePixels - 1) would be near 1.0. + // If our buffer is actually 320 wide, we need to rescale that. + // There's already some accounting for centers. + float texel_mult = texturePixels / (float)clutVfb->bufferWidth; + float texel_offset = render_offset / (float)clutVfb->bufferWidth; + glUniform2f(depal->u_offset, texel_mult, texel_offset); + } else { + glUniform2f(depal->u_offset, 1.0f, 0.0f); + } + } + glActiveTexture(GL_TEXTURE3); - glBindTexture(GL_TEXTURE_2D, clutTexture); + if (clutVfb) { + fbo_bind_color_as_texture(clutVfb->fbo, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + } else { + glBindTexture(GL_TEXTURE_2D, clutTexture); + } glActiveTexture(GL_TEXTURE0); framebufferManager_->BindFramebufferColor(GL_TEXTURE0, gstate.getFrameBufRawAddress(), framebuffer, BINDFBCOLOR_SKIP_COPY); @@ -1010,10 +1125,6 @@ void TextureCache::SetTexture(bool force) { // Check for FBO - slow! if (entry->framebuffer) { if (match) { - if (hasClut && clutRenderAddress_ != 0xFFFFFFFF) { - WARN_LOG_REPORT_ONCE(clutAndTexRender, G3D, "Using rendered texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat()); - } - SetTextureFramebuffer(entry, entry->framebuffer); return; } else { @@ -1036,6 +1147,16 @@ void TextureCache::SetTexture(bool force) { rehash = false; } + // Check the clut status. + if (match) { + bool nowUsingClutRender = clutRenderAddress_ != 0xFFFFFFFF && hasClut; + bool wasUsingClutRender = (entry->status & TexCacheEntry::STATUS_INDEXED) != 0; + if (nowUsingClutRender != wasUsingClutRender) { + match = false; + reason = "CLUT render status changed"; + } + } + if (match) { if (entry->lastFrame != gpuStats.numFlips) { u32 diff = gpuStats.numFlips - entry->lastFrame; @@ -1106,10 +1227,6 @@ void TextureCache::SetTexture(bool force) { TexCacheEntry entryNew = {0}; cache[cachekey] = entryNew; - if (hasClut && clutRenderAddress_ != 0xFFFFFFFF) { - WARN_LOG_REPORT_ONCE(clutUseRender, G3D, "Using texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat()); - } - entry = &cache[cachekey]; if (g_Config.bTextureBackoffCache) { entry->status = TexCacheEntry::STATUS_HASHING; @@ -1347,6 +1464,16 @@ void TextureCache::BuildTexture(TexCacheEntry *const entry, bool replaceImages) scaleFactor = 1; } + if (clutRenderAddress_ != 0xFFFFFFFF && entry->cluthash != 0) { + entry->status |= TexCacheEntry::STATUS_INDEXED; + dstFmt = GL_UNSIGNED_BYTE; + // Can't scale an indexed texture (this means it uses a CLUT that was rendered.) + scaleFactor = 1; + } else { + // Clear in case it stopped being an indexed texture. + entry->status &= ~TexCacheEntry::STATUS_INDEXED; + } + if (scaleFactor != 1) { if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED) { entry->status |= TexCacheEntry::STATUS_TO_SCALE; @@ -1377,6 +1504,9 @@ void TextureCache::BuildTexture(TexCacheEntry *const entry, bool replaceImages) ERROR_LOG(G3D, "Unknown dstfmt %i", (int)actualFmt); break; } + if ((entry->status & TexCacheEntry::STATUS_INDEXED) != 0) { + storageFmt = GL_R8; + } // TODO: This may cause bugs, since it hard-sets the texture w/h, and we might try to reuse it later with a different size. glTexStorage2D(GL_TEXTURE_2D, maxLevel + 1, storageFmt, w * scaleFactor, h * scaleFactor); // Make sure we don't use glTexImage2D after glTexStorage2D. @@ -1526,6 +1656,7 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &repla int h = gstate.getTextureHeight(level); bool useUnpack = false; bool useBGRA; + bool useIndexed = (entry.status & TexCacheEntry::STATUS_INDEXED) != 0; u32 *pixelData; // TODO: only do this once @@ -1550,8 +1681,13 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &repla GEPaletteFormat clutformat = gstate.getClutPaletteFormat(); int bufw; - void *finalBuf = DecodeTextureLevelOld(GETextureFormat(entry.format), clutformat, level, dstFmt, scaleFactor, &bufw); - if (finalBuf == NULL) { + void *finalBuf; + if (!useIndexed) { + finalBuf = DecodeTextureLevelOld(GETextureFormat(entry.format), clutformat, level, dstFmt, scaleFactor, &bufw); + } else { + finalBuf = DecodeLevelToIndexed(GETextureFormat(entry.format), level, &bufw); + } + if (finalBuf == nullptr) { return; } @@ -1562,14 +1698,14 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &repla } // Textures are always aligned to 16 bytes bufw, so this could safely be 4 always. - texByteAlign = dstFmt == GL_UNSIGNED_BYTE ? 4 : 2; - useBGRA = UseBGRA8888() && dstFmt == GL_UNSIGNED_BYTE; + texByteAlign = useIndexed ? 1 : (dstFmt == GL_UNSIGNED_BYTE ? 4 : 2); + useBGRA = UseBGRA8888() && dstFmt == GL_UNSIGNED_BYTE && !useIndexed; pixelData = (u32 *)finalBuf; - if (scaleFactor > 1) + if (scaleFactor > 1 && !useIndexed) scaler.Scale(pixelData, dstFmt, w, h, scaleFactor); - if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) { + if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0 && !useIndexed) { TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, useUnpack ? bufw : w, w, h); entry.SetAlphaStatus(alphaStatus, level); } else { @@ -1594,6 +1730,13 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &repla glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign); GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; + if (useIndexed) { + components = GL_LUMINANCE; + if (gl_extensions.GLES3 || gl_extensions.VersionGEThan(3, 0, 0)) { + // In the shader, we always access r. GL 3+ allows using GL_RED only. + components = GL_RED; + } + } GLuint components2 = components; if (useBGRA) { diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 94aea747ddb4..d2654594d1bb 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -102,6 +102,7 @@ class TextureCache : public TextureCacheCommon { void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple); bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0) override; void SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer); + void ApplyIndexedTexture(TexCacheEntry *entry); void ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer); bool CheckFullHash(TexCacheEntry *const entry, bool &doDelete);