From ba0d04a14275a1deb2e613974569a497591a8e6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 22 Nov 2017 12:24:05 +0100 Subject: [PATCH 1/3] Vulkan: Implement depth texturing through depal. --- GPU/Common/DepalettizeShaderCommon.cpp | 23 ++++++++- GPU/Common/FramebufferManagerCommon.cpp | 26 +++++++---- GPU/Common/GPUStateUtils.cpp | 3 -- GPU/Common/TextureCacheCommon.cpp | 57 +++++++++++++---------- GPU/Common/TextureCacheCommon.h | 15 ++++-- GPU/GeDisasm.cpp | 2 +- GPU/Vulkan/DepalettizeShaderVulkan.cpp | 2 + GPU/Vulkan/ShaderManagerVulkan.cpp | 2 +- GPU/Vulkan/TextureCacheVulkan.cpp | 19 ++++++-- GPU/ge_constants.h | 1 + ext/native/thin3d/VulkanQueueRunner.cpp | 27 +++++++---- ext/native/thin3d/VulkanQueueRunner.h | 6 +-- ext/native/thin3d/VulkanRenderManager.cpp | 9 ++-- 13 files changed, 128 insertions(+), 64 deletions(-) diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index a29f4f49e9c2..871fea767cba 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -45,6 +45,14 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang WRITE(p, "layout(set = 0, binding = 1) uniform sampler2D pal;\n"); WRITE(p, "layout(location = 0) in vec2 v_texcoord0;\n"); WRITE(p, "layout(location = 0) out vec4 fragColor0;\n"); + + // Support for depth. + if (pixelFormat == GE_FORMAT_DEPTH16) { + WRITE(p, "layout (push_constant) uniform params {\n"); + WRITE(p, " float z_scale; float z_offset;\n"); + WRITE(p, "};\n"); + } + } else { if (gl_extensions.IsGLES) { WRITE(p, "#version 300 es\n"); @@ -63,9 +71,12 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang WRITE(p, "float4 main(in float2 v_texcoord0 : TEXCOORD0) : SV_Target {\n"); WRITE(p, " float4 color = tex.Sample(texSamp, v_texcoord0);\n"); } else { - // TODO: Add support for integer textures. Though it hardly matters. WRITE(p, "void main() {\n"); - WRITE(p, " vec4 color = texture(tex, v_texcoord0);\n"); + if (pixelFormat == GE_FORMAT_DEPTH16) { + WRITE(p, " float color = texture(tex, v_texcoord0).r;\n"); + } else { + WRITE(p, " vec4 color = texture(tex, v_texcoord0);\n"); + } } int mask = gstate.getClutIndexMask(); @@ -105,6 +116,11 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang if (shiftedMask & 0x8000) WRITE(p, " int a = int(color.a);\n"); else WRITE(p, " int a = 0;\n"); WRITE(p, " int index = (a << 15) | (b << 10) | (g << 5) | (r);\n"); break; + case GE_FORMAT_DEPTH16: + // Remap depth buffer. + WRITE(p, " float depth = (color - z_offset) * z_scale;\n"); + WRITE(p, " int index = int(clamp(depth, 0.0, 65535.0));\n"); + break; default: break; } @@ -225,6 +241,9 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa formatOK = false; } break; + case GE_FORMAT_DEPTH16: + sprintf(lookupMethod, "index.r * (1.0 / 256.0)"); + break; default: break; } diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 556e2519ee74..06d6b083284c 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -374,7 +374,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame ResizeFramebufFBO(vfb, drawing_width, drawing_height, true); NotifyRenderFramebufferCreated(vfb); - INFO_LOG(FRAMEBUF, "Creating FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format); + INFO_LOG(FRAMEBUF, "Creating FBO for %08x (z: %08x) : %i x %i x %i", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, vfb->format); vfb->last_frame_render = gpuStats.numFlips; frameLastFramebufUsed_ = gpuStats.numFlips; @@ -445,7 +445,9 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame } void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) { - textureCache_->NotifyFramebuffer(v->fb_address, v, NOTIFY_FB_DESTROYED); + // Notify the texture cache of both the color and depth buffers. + textureCache_->NotifyFramebuffer(v->fb_address, v, NOTIFY_FB_DESTROYED, NOTIFY_FB_COLOR); + textureCache_->NotifyFramebuffer(v->z_address, v, NOTIFY_FB_DESTROYED, NOTIFY_FB_DEPTH); if (v->fbo) { v->fbo->Release(); v->fbo = nullptr; @@ -472,7 +474,8 @@ void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer DownloadFramebufferOnSwitch(currentRenderVfb_); } - textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED); + textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED, NOTIFY_FB_COLOR); + textureCache_->NotifyFramebuffer(vfb->z_address, vfb, NOTIFY_FB_CREATED, NOTIFY_FB_DEPTH); // Ugly... if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { @@ -486,7 +489,8 @@ void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) { if (vfbFormatChanged) { - textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED); + textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_COLOR); + textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_DEPTH); if (vfb->drawnFormat != vfb->format) { ReformatFramebufferFrom(vfb, vfb->drawnFormat); } @@ -552,7 +556,8 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe } else { if (vfb->fbo) { // This should only happen very briefly when toggling useBufferedRendering_. - textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_DESTROYED); + textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_DESTROYED, NOTIFY_FB_COLOR); + textureCache_->NotifyFramebuffer(vfb->z_address, vfb, NOTIFY_FB_DESTROYED, NOTIFY_FB_DEPTH); vfb->fbo->Release(); vfb->fbo = nullptr; } @@ -564,7 +569,8 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB; } } - textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED); + textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_COLOR); + textureCache_->NotifyFramebuffer(vfb->z_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_DEPTH); // ugly... is all this needed? if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { @@ -1164,9 +1170,9 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, if (dstBuffer && srcBuffer && !isMemset) { if (srcBuffer == dstBuffer) { - WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst); + WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x (size: %x)", src, dst, size); } else { - WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x", src, dst); + WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x (size: %x)", src, dst, size); // Just do the blit! BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0); SetColorUpdated(dstBuffer, skipDrawReason); @@ -1177,7 +1183,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, if (isMemset) { gpuStats.numClears++; } - WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x", src, dst); + WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x (size: %x)", src, dst, size); FlushBeforeCopy(); const u8 *srcBase = Memory::GetPointerUnchecked(src); DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH); @@ -1330,9 +1336,9 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd vfb->drawnFormat = GE_FORMAT_8888; vfb->usageFlags = FB_USAGE_RENDERTARGET; SetColorUpdated(vfb, 0); - textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED); char name[64]; snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address); + textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED, NOTIFY_FB_COLOR); vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, (Draw::FBColorDepth)vfb->colorDepth, name }); vfbs_.push_back(vfb); diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index 8d8ebba59ba7..2f629b1b8926 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -633,9 +633,6 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo float vpWidth = fabsf(gstate_c.vpWidth); float vpHeight = fabsf(gstate_c.vpHeight); - // We used to apply the viewport here via glstate, but there are limits which vary by driver. - // This may mean some games won't work, or at least won't work at higher render resolutions. - // So we apply it in the shader instead. float left = renderX + vpX0; float top = renderY + vpY0; float right = left + vpWidth; diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 258776171950..2789d1ff29e3 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -389,7 +389,7 @@ void TextureCacheCommon::SetTexture(bool force) { return; } else { // Make sure we re-evaluate framebuffers. - DetachFramebuffer(entry, texaddr, entry->framebuffer); + DetachFramebuffer(entry, texaddr, entry->framebuffer, (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR); reason = "detached framebuf"; match = false; } @@ -530,7 +530,7 @@ void TextureCacheCommon::SetTexture(bool force) { entry->framebuffer = nullptr; for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { auto framebuffer = fbCache_[i]; - AttachFramebuffer(entry, framebuffer->fb_address, framebuffer); + AttachFramebuffer(entry, framebuffer->fb_address, framebuffer, 0, (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR); } // If we ended up with a framebuffer, attach it - no texture decoding needed. @@ -640,12 +640,12 @@ void TextureCacheCommon::HandleTextureChange(TexCacheEntry *const entry, const c entry->numFrames = 0; } -void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg) { +void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg, FramebufferNotificationChannel channel) { // Mask to ignore the Z memory mirrors if the address is in VRAM. // These checks are mainly to reduce scanning all textures. const u32 mirrorMask = 0x00600000; const u32 addr = Memory::IsVRAMAddress(address) ? (address & ~mirrorMask) : address; - const u32 bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2; + const u32 bpp = (framebuffer->format == GE_FORMAT_8888 && channel == NOTIFY_FB_COLOR) ? 4 : 2; const u64 cacheKey = (u64)addr << 32; // If it has a clut, those are the low 32 bits, so it'll be inside this range. // Also, if it's a subsample of the buffer, it'll also be within the FBO. @@ -663,14 +663,14 @@ void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *fram fbCache_.push_back(framebuffer); } for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) { - AttachFramebuffer(it->second.get(), addr, framebuffer); + AttachFramebuffer(it->second.get(), addr, framebuffer, 0, channel); } // Let's assume anything in mirrors is fair game to check. for (auto it = cache_.lower_bound(mirrorCacheKey), end = cache_.upper_bound(mirrorCacheKeyEnd); it != end; ++it) { const u64 mirrorlessKey = it->first & ~0x0060000000000000ULL; // Let's still make sure it's in the cache range. if (mirrorlessKey >= cacheKey && mirrorlessKey <= cacheKeyEnd) { - AttachFramebuffer(it->second.get(), addr, framebuffer); + AttachFramebuffer(it->second.get(), addr, framebuffer, 0, channel); } } break; @@ -685,12 +685,13 @@ void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *fram // We might erase, so move to the next one already (which won't become invalid.) ++it; - DetachFramebuffer(cache_[cachekey].get(), addr, framebuffer); + DetachFramebuffer(cache_[cachekey].get(), addr, framebuffer, channel); } break; } } -void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo) { + +void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo, FramebufferNotificationChannel channel) { const u64 cachekey = entry->CacheKey(); const bool hasInvalidFramebuffer = entry->framebuffer == nullptr || entry->invalidHint == -1; const bool hasOlderFramebuffer = entry->framebuffer != nullptr && entry->framebuffer->last_frame_render < framebuffer->last_frame_render; @@ -713,6 +714,9 @@ void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFra entry->invalidHint = 0; entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; entry->maxLevel = 0; + if (channel == NOTIFY_FB_DEPTH) { + entry->status |= TexCacheEntry::STATUS_DEPTH; + } fbTexInfo_[cachekey] = fbInfo; framebuffer->last_frame_attached = gpuStats.numFlips; GPUDebug::NotifyTextureAttachment(entry->addr); @@ -721,7 +725,7 @@ void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFra } } -void TextureCacheCommon::AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo) { +void TextureCacheCommon::AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo, FramebufferNotificationChannel channel) { const u64 cachekey = entry->CacheKey(); if (entry->framebuffer == nullptr || entry->framebuffer == framebuffer) { @@ -733,12 +737,14 @@ void TextureCacheCommon::AttachFramebufferInvalid(TexCacheEntry *entry, VirtualF entry->invalidHint = -1; entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; entry->maxLevel = 0; + if (channel == NOTIFY_FB_DEPTH) + entry->status |= TexCacheEntry::STATUS_DEPTH; fbTexInfo_[cachekey] = fbInfo; GPUDebug::NotifyTextureAttachment(entry->addr); } } -void TextureCacheCommon::DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer) { +void TextureCacheCommon::DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, FramebufferNotificationChannel channel) { if (entry->framebuffer == framebuffer) { const u64 cachekey = entry->CacheKey(); cacheSizeEstimate_ += EstimateTexMemoryUsage(entry); @@ -751,20 +757,20 @@ void TextureCacheCommon::DetachFramebuffer(TexCacheEntry *entry, u32 address, Vi } } -bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset) { +bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) { static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32; AttachedFramebufferInfo fbInfo = { 0 }; const u32 mirrorMask = 0x00600000; u32 addr = address & 0x3FFFFFFF; - u32 texaddr = entry->addr + texaddrOffset; + u32 texaddr = (entry->addr + texaddrOffset) & ~mirrorMask; if (entry->addr & 0x04000000) { addr &= ~mirrorMask; texaddr &= ~mirrorMask; } const bool noOffset = texaddr == addr; - const bool exactMatch = noOffset && entry->format < 4; + const bool exactMatch = noOffset && entry->format < 4 && channel == NOTIFY_FB_COLOR; const u32 w = 1 << ((entry->dim >> 0) & 0xf); const u32 h = 1 << ((entry->dim >> 8) & 0xf); // 512 on a 272 framebuffer is sane, so let's be lenient. @@ -781,10 +787,10 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi // Let's avoid using it when we know the format is wrong. May be a video/etc. updating memory. // However, some games use a different format to clear the buffer. if (framebuffer->last_frame_attached + 1 < gpuStats.numFlips) { - DetachFramebuffer(entry, address, framebuffer); + DetachFramebuffer(entry, address, framebuffer, channel); } } else { - AttachFramebufferValid(entry, framebuffer, fbInfo); + AttachFramebufferValid(entry, framebuffer, fbInfo, channel); return true; } } else { @@ -792,6 +798,7 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi if (!framebufferManager_->UseBufferedRendering()) return false; + // Check works for D16 too (???) const bool matchingClutFormat = (framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) || (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16); @@ -809,7 +816,7 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi WARN_LOG_ONCE(diffStrides2, G3D, "Texturing from framebuffer (matching_clut=%s) different strides %d != %d", matchingClutFormat ? "yes" : "no", entry->bufw, framebuffer->fb_stride); } else { // Assume any render-to-tex with different bufw + offset is a render from ram. - DetachFramebuffer(entry, address, framebuffer); + DetachFramebuffer(entry, address, framebuffer, channel); return false; } } @@ -817,13 +824,13 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi // Check if it's in bufferWidth (which might be higher than width and may indicate the framebuffer includes the data.) if (fbInfo.xOffset >= framebuffer->bufferWidth && fbInfo.xOffset + w <= (u32)framebuffer->fb_stride) { // This happens in Brave Story, see #10045 - the texture is in the space between strides, with matching stride. - DetachFramebuffer(entry, address, framebuffer); + DetachFramebuffer(entry, address, framebuffer, channel); return false; } if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) { // Can't be inside the framebuffer then, ram. Detach to be safe. - DetachFramebuffer(entry, address, framebuffer); + DetachFramebuffer(entry, address, framebuffer, channel); return false; } @@ -831,7 +838,7 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi // TODO: Maybe we can reduce this check and find a better way above 0x04110000? if (fbInfo.yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000) { WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height); - DetachFramebuffer(entry, address, framebuffer); + DetachFramebuffer(entry, address, framebuffer, channel); return false; } @@ -841,13 +848,13 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi if (!noOffset) { WARN_LOG_REPORT_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset); } - AttachFramebufferValid(entry, framebuffer, fbInfo); + AttachFramebufferValid(entry, framebuffer, fbInfo, channel); entry->status |= TexCacheEntry::STATUS_DEPALETTIZE; // We'll validate it compiles later. return true; } else if (IsClutFormat((GETextureFormat)(entry->format)) || IsDXTFormat((GETextureFormat)(entry->format))) { WARN_LOG_ONCE(fourEightBit, G3D, "%s format not supported when texturing from framebuffer of format %s", GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format)); - DetachFramebuffer(entry, address, framebuffer); + DetachFramebuffer(entry, address, framebuffer, channel); return false; } @@ -856,18 +863,18 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi if (framebuffer->format != entry->format) { WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Texturing from framebuffer with different formats %s != %s at %08x", GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), address); - AttachFramebufferValid(entry, framebuffer, fbInfo); + AttachFramebufferValid(entry, framebuffer, fbInfo, channel); return true; } else { WARN_LOG_ONCE(subarea, G3D, "Render to area containing texture at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset); // If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect. - AttachFramebufferInvalid(entry, framebuffer, fbInfo); + AttachFramebufferInvalid(entry, framebuffer, fbInfo, channel); return true; } } else { WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible format %s != %s at %08x", GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), address); - DetachFramebuffer(entry, address, framebuffer); + DetachFramebuffer(entry, address, framebuffer, channel); return false; } } @@ -944,7 +951,7 @@ bool TextureCacheCommon::SetOffsetTexture(u32 yOffset) { bool success = false; for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { auto framebuffer = fbCache_[i]; - if (AttachFramebuffer(entry, framebuffer->fb_address, framebuffer, texaddrOffset)) { + if (AttachFramebuffer(entry, framebuffer->fb_address, framebuffer, texaddrOffset, (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR)) { success = true; } } diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index eca1e13836f1..4962856342fd 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -41,6 +41,11 @@ enum FramebufferNotification { NOTIFY_FB_DESTROYED, }; +enum FramebufferNotificationChannel { + NOTIFY_FB_COLOR = 0, + NOTIFY_FB_DEPTH = 1, +}; + // Changes more frequent than this will be considered "frequent" and prevent texture scaling. #define TEXCACHE_FRAME_CHANGE_FREQUENT 6 // Note: only used when hash backoff is disabled. @@ -200,7 +205,7 @@ class TextureCacheCommon { virtual void Clear(bool delete_them); // FramebufferManager keeps TextureCache updated about what regions of memory are being rendered to. - void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg); + void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg, FramebufferNotificationChannel channel); virtual void NotifyConfigChanged(); void NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt); @@ -251,10 +256,10 @@ class TextureCacheCommon { void UpdateSamplingParams(TexCacheEntry &entry, SamplerCacheKey &key); // Used by D3D11 and Vulkan. void UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode); - bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0); - void AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo); - void AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo); - void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer); + bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel); + void AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo, FramebufferNotificationChannel channel); + void AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo, FramebufferNotificationChannel channel); + void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, FramebufferNotificationChannel channel); void SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer); diff --git a/GPU/GeDisasm.cpp b/GPU/GeDisasm.cpp index 4faaca6a43a6..fda2d778900f 100644 --- a/GPU/GeDisasm.cpp +++ b/GPU/GeDisasm.cpp @@ -429,7 +429,7 @@ void GeDisassembleOp(u32 pc, u32 op, u32 prev, char *buffer, int bufsize) { case GE_CMD_LOADCLUT: // This could be used to "dirty" textures with clut. if (data) - snprintf(buffer, bufsize, "Clut load: %06x", data); + snprintf(buffer, bufsize, "Clut load: %08x, %d bytes, %06x", gstate.getClutAddress(), (data & 0x3F) << 5, data & 0xFFFFC0); else snprintf(buffer, bufsize, "Clut load"); break; diff --git a/GPU/Vulkan/DepalettizeShaderVulkan.cpp b/GPU/Vulkan/DepalettizeShaderVulkan.cpp index 31041b010b1f..e1cc53fd098e 100644 --- a/GPU/Vulkan/DepalettizeShaderVulkan.cpp +++ b/GPU/Vulkan/DepalettizeShaderVulkan.cpp @@ -103,6 +103,7 @@ DepalShaderVulkan *DepalShaderCacheVulkan::GetDepalettizeShader(uint32_t clutMod std::string error; VkShaderModule fshader = CompileShaderModule(vulkan_, VK_SHADER_STAGE_FRAGMENT_BIT, buffer, &error); if (fshader == VK_NULL_HANDLE) { + INFO_LOG(G3D, "Source:\n%s\n\n", buffer); Crash(); delete[] buffer; return nullptr; @@ -111,6 +112,7 @@ DepalShaderVulkan *DepalShaderCacheVulkan::GetDepalettizeShader(uint32_t clutMod VkPipeline pipeline = vulkan2D_->GetPipeline(rp, vshader_, fshader); // Can delete the shader module now that the pipeline has been created. // Maybe don't even need to queue it.. + // "true" keeps the pipeline itself alive, forgetting the fshader. vulkan2D_->PurgeFragmentShader(fshader, true); vulkan_->Delete().QueueDeleteShaderModule(fshader); diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 515adb158db8..4cd2c98363f9 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -361,7 +361,7 @@ VulkanFragmentShader *ShaderManagerVulkan::GetFragmentShaderFromModule(VkShaderM // instantaneous. #define CACHE_HEADER_MAGIC 0xff51f420 -#define CACHE_VERSION 17 +#define CACHE_VERSION 18 struct VulkanCacheHeader { uint32_t magic; uint32_t version; diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index d15f68988aa9..1dc8630082a3 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -558,8 +558,9 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr DepalShaderVulkan *depalShader = nullptr; uint32_t clutMode = gstate.clutformat & 0xFFFFFF; - bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer; bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS); + bool depth = (entry->status & TexCacheEntry::STATUS_DEPTH) != 0; + bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth; if ((entry->status & TexCacheEntry::STATUS_DEPALETTIZE) && !g_Config.bDisableSlowFramebufEffects) { if (useShaderDepal) { @@ -584,7 +585,7 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr imageView_ = framebufferManagerVulkan_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); return; } else { - depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, framebuffer->drawnFormat); + depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); drawEngine_->SetDepalTexture(VK_NULL_HANDLE); gstate_c.SetUseShaderDepal(false); } @@ -652,12 +653,24 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr VkBuffer pushed; uint32_t offset = push_->PushAligned(verts, sizeof(verts), 4, &pushed); - draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, Draw::FB_COLOR_BIT, 0); + draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0); VkImageView fbo = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE0_IMAGEVIEW); VkDescriptorSet descSet = vulkan2D_->GetDescriptorSet(fbo, samplerNearest_, clutTexture->GetImageView(), samplerNearest_); VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); renderManager->BindPipeline(depalShader->pipeline); + + if (depth) { + DepthScaleFactors scaleFactors = GetDepthScaleFactors(); + struct DepthPushConstants { + float z_scale; + float z_offset; + }; + DepthPushConstants push; + push.z_scale = scaleFactors.scale; + push.z_offset = scaleFactors.offset; + renderManager->PushConstants(vulkan2D_->GetPipelineLayout(), VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(DepthPushConstants), &push); + } renderManager->SetScissor(VkRect2D{ {0, 0}, { framebuffer->renderWidth, framebuffer->renderHeight} }); renderManager->SetViewport(VkViewport{ 0.f, 0.f, (float)framebuffer->renderWidth, (float)framebuffer->renderHeight, 0.f, 1.f }); renderManager->Draw(vulkan2D_->GetPipelineLayout(), descSet, 0, nullptr, pushed, offset, 4); diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index fc7c2ca982a6..5facb4ad0523 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -282,6 +282,7 @@ enum GEBufferFormat GE_FORMAT_5551 = 1, GE_FORMAT_4444 = 2, GE_FORMAT_8888 = 3, + GE_FORMAT_DEPTH16 = 4, // Virtual format, just used to pass into Depal GE_FORMAT_INVALID = 0xFF, }; diff --git a/ext/native/thin3d/VulkanQueueRunner.cpp b/ext/native/thin3d/VulkanQueueRunner.cpp index f7825947db3e..0ede7acccc2d 100644 --- a/ext/native/thin3d/VulkanQueueRunner.cpp +++ b/ext/native/thin3d/VulkanQueueRunner.cpp @@ -2,6 +2,7 @@ #include "base/timeutil.h" #include "DataFormat.h" +#include "Common/Log.h" #include "VulkanQueueRunner.h" #include "VulkanRenderManager.h" @@ -12,7 +13,7 @@ void VulkanQueueRunner::CreateDeviceObjects() { InitBackbufferRenderPass(); framebufferRenderPass_ = GetRenderPass(VKRRenderPassAction::CLEAR, VKRRenderPassAction::CLEAR, VKRRenderPassAction::CLEAR, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); #if 0 // Just to check whether it makes sense to split some of these. drawidx is way bigger than the others... @@ -238,7 +239,7 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) { attachments[1].finalLayout = VK_IMAGE_LAYOUT_GENERAL; #else attachments[1].initialLayout = key.prevDepthLayout; - attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachments[1].finalLayout = key.finalDepthStencilLayout; #endif attachments[1].flags = 0; @@ -387,10 +388,13 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector &st for (int j = 0; j < (int)steps.size(); j++) { if (steps[j]->stepType == VKRStepType::RENDER && - steps[j]->render.framebuffer && - steps[j]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) { - // Just leave it at color_optimal. - steps[j]->render.finalColorLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + steps[j]->render.framebuffer) { + if (steps[j]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) { + steps[j]->render.finalColorLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } + if (steps[j]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) { + steps[j]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + } } } @@ -1043,6 +1047,8 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c return; } + // Write-after-write hazards. Fixed flicker in God of War on ARM (before we added another fix). + // TODO: depth too if (step.render.framebuffer && step.render.framebuffer->color.layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { VkImageMemoryBarrier barrier{}; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -1213,6 +1219,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c // The renderpass handles the layout transition. if (fb) { fb->color.layout = step.render.finalColorLayout; + fb->depth.layout = step.render.finalDepthStencilLayout; } } @@ -1225,6 +1232,7 @@ void VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKRStep &step int h; if (step.render.framebuffer) { _dbg_assert_(step.render.finalColorLayout != VK_IMAGE_LAYOUT_UNDEFINED); + _dbg_assert_(step.render.finalDepthStencilLayout != VK_IMAGE_LAYOUT_UNDEFINED); VKRFramebuffer *fb = step.render.framebuffer; framebuf = fb->framebuf; @@ -1249,7 +1257,9 @@ void VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKRStep &step renderPass = GetRenderPass( step.render.color, step.render.depth, step.render.stencil, - fb->color.layout, fb->depth.layout, step.render.finalColorLayout); + fb->color.layout, fb->depth.layout, + step.render.finalColorLayout, + step.render.finalDepthStencilLayout); // We now do any layout pretransitions as part of the render pass. fb->color.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; @@ -1685,6 +1695,7 @@ void VulkanQueueRunner::CopyReadbackBuffer(int width, int height, Draw::DataForm } else if (srcFormat == Draw::DataFormat::B8G8R8A8_UNORM) { ConvertFromBGRA8888(pixels, (const uint8_t *)mappedData, pixelStride, width, width, height, destFormat); } else if (srcFormat == destFormat) { + // Can just memcpy when it matches no matter the format! uint8_t *dst = pixels; const uint8_t *src = (const uint8_t *)mappedData; for (int y = 0; y < height; ++y) { @@ -1697,7 +1708,7 @@ void VulkanQueueRunner::CopyReadbackBuffer(int width, int height, Draw::DataForm } else { // TODO: Maybe a depth conversion or something? ELOG("CopyReadbackBuffer: Unknown format"); - assert(false); + _assert_msg_(false, "CopyReadbackBuffer: Unknown src format %d", (int)srcFormat); } vkUnmapMemory(vulkan_->GetDevice(), readbackMemory_); } diff --git a/ext/native/thin3d/VulkanQueueRunner.h b/ext/native/thin3d/VulkanQueueRunner.h index 90210bf40df0..a89951af3beb 100644 --- a/ext/native/thin3d/VulkanQueueRunner.h +++ b/ext/native/thin3d/VulkanQueueRunner.h @@ -212,14 +212,14 @@ class VulkanQueueRunner { VkImageLayout prevColorLayout; VkImageLayout prevDepthLayout; VkImageLayout finalColorLayout; - // TODO: Also pre-transition depth, for copies etc. + VkImageLayout finalDepthStencilLayout; }; // Only call this from the render thread! Also ok during initialization (LoadCache). VkRenderPass GetRenderPass( VKRRenderPassAction colorLoadAction, VKRRenderPassAction depthLoadAction, VKRRenderPassAction stencilLoadAction, - VkImageLayout prevColorLayout, VkImageLayout prevDepthLayout, VkImageLayout finalColorLayout) { - RPKey key{ colorLoadAction, depthLoadAction, stencilLoadAction, prevColorLayout, prevDepthLayout, finalColorLayout }; + VkImageLayout prevColorLayout, VkImageLayout prevDepthLayout, VkImageLayout finalColorLayout, VkImageLayout finalDepthStencilLayout) { + RPKey key{ colorLoadAction, depthLoadAction, stencilLoadAction, prevColorLayout, prevDepthLayout, finalColorLayout, finalDepthStencilLayout }; return GetRenderPass(key); } diff --git a/ext/native/thin3d/VulkanRenderManager.cpp b/ext/native/thin3d/VulkanRenderManager.cpp index dcc5f6ccf766..0a48e277aa87 100644 --- a/ext/native/thin3d/VulkanRenderManager.cpp +++ b/ext/native/thin3d/VulkanRenderManager.cpp @@ -63,6 +63,8 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int res = vkBindImageMemory(vulkan->GetDevice(), img.image, img.memory, 0); _dbg_assert_(res == VK_SUCCESS); + // Note that we don't view or at + VkImageAspectFlags viewAspects = color ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; VkImageAspectFlags aspects = color ? VK_IMAGE_ASPECT_COLOR_BIT : (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); VkImageViewCreateInfo ivci{ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; @@ -70,7 +72,7 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int ivci.format = ici.format; ivci.image = img.image; ivci.viewType = VK_IMAGE_VIEW_TYPE_2D; - ivci.subresourceRange.aspectMask = aspects; + ivci.subresourceRange.aspectMask = viewAspects; ivci.subresourceRange.layerCount = 1; ivci.subresourceRange.levelCount = 1; res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.imageView); @@ -548,6 +550,7 @@ void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRR step->render.numDraws = 0; step->render.numReads = 0; step->render.finalColorLayout = !fb ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED; + step->render.finalDepthStencilLayout = !fb ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED; step->tag = tag; steps_.push_back(step); @@ -1016,10 +1019,10 @@ VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, in curRenderStep_->preTransitions.back().fb == fb && curRenderStep_->preTransitions.back().targetLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { // We're done. - return fb->color.imageView; + return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.imageView : fb->depth.imageView; } else { curRenderStep_->preTransitions.push_back({ aspectBit, fb, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }); - return fb->color.imageView; + return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.imageView : fb->depth.imageView; } } From 86355779d7b578d138afe40620f2c42713480d15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 9 Aug 2020 20:49:08 +0200 Subject: [PATCH 2/3] Remove partial comment --- ext/native/thin3d/VulkanRenderManager.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/ext/native/thin3d/VulkanRenderManager.cpp b/ext/native/thin3d/VulkanRenderManager.cpp index 0a48e277aa87..385d8a580e8c 100644 --- a/ext/native/thin3d/VulkanRenderManager.cpp +++ b/ext/native/thin3d/VulkanRenderManager.cpp @@ -63,7 +63,6 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int res = vkBindImageMemory(vulkan->GetDevice(), img.image, img.memory, 0); _dbg_assert_(res == VK_SUCCESS); - // Note that we don't view or at VkImageAspectFlags viewAspects = color ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; VkImageAspectFlags aspects = color ? VK_IMAGE_ASPECT_COLOR_BIT : (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); From 0aa2ceb372f01883d0a4eb82dc3476453935d994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 10 Aug 2020 09:16:28 +0200 Subject: [PATCH 3/3] Address feedback --- GPU/Common/DepalettizeShaderCommon.cpp | 11 ++++- GPU/Common/FramebufferManagerCommon.cpp | 6 +-- GPU/Common/TextureCacheCommon.cpp | 7 ++-- ext/native/thin3d/VulkanQueueRunner.cpp | 54 +++++++++++++++++-------- 4 files changed, 55 insertions(+), 23 deletions(-) diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 871fea767cba..3e9cd383abc9 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -21,6 +21,7 @@ #include "GPU/Common/ShaderId.h" #include "GPU/Common/ShaderCommon.h" +#include "Common/StringUtils.h" #include "Common/Log.h" #include "Core/Reporting.h" #include "GPU/GPUState.h" @@ -242,8 +243,16 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa } break; case GE_FORMAT_DEPTH16: - sprintf(lookupMethod, "index.r * (1.0 / 256.0)"); + { + // TODO: I think we can handle most scenarios here, but texturing from depth buffers requires an extension on ES 2.0 anyway. + if ((mask & (mask + 1)) == 0 && shift < 16) { + index_multiplier = 1.0f / (float)(1 << shift); + truncate_cpy(lookupMethod, "index.r"); + } else { + formatOK = false; + } break; + } default: break; } diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 06d6b083284c..3a8e443ba5ed 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1170,9 +1170,9 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, if (dstBuffer && srcBuffer && !isMemset) { if (srcBuffer == dstBuffer) { - WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x (size: %x)", src, dst, size); + WARN_LOG_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x (size: %x)", src, dst, size); } else { - WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x (size: %x)", src, dst, size); + WARN_LOG_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x (size: %x)", src, dst, size); // Just do the blit! BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0); SetColorUpdated(dstBuffer, skipDrawReason); @@ -1195,7 +1195,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst); FlushBeforeCopy(); if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) { - WARN_LOG_REPORT_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight); + WARN_LOG_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight); } else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated && !PSP_CoreParameter().compat.flags().DisableReadbacks) { ReadFramebufferToMemory(srcBuffer, 0, srcY, srcBuffer->width, srcH); srcBuffer->usageFlags = (srcBuffer->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR; diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 2789d1ff29e3..14f49ffa904d 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -764,7 +764,7 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi const u32 mirrorMask = 0x00600000; u32 addr = address & 0x3FFFFFFF; - u32 texaddr = (entry->addr + texaddrOffset) & ~mirrorMask; + u32 texaddr = entry->addr + texaddrOffset; if (entry->addr & 0x04000000) { addr &= ~mirrorMask; texaddr &= ~mirrorMask; @@ -800,8 +800,9 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi // Check works for D16 too (???) const bool matchingClutFormat = - (framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) || - (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16); + (channel != NOTIFY_FB_COLOR && entry->format == GE_TFMT_CLUT16) || + (channel == NOTIFY_FB_COLOR && framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) || + (channel == NOTIFY_FB_COLOR && framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16); const bool clutFormat = IsClutFormat((GETextureFormat)(entry->format)); diff --git a/ext/native/thin3d/VulkanQueueRunner.cpp b/ext/native/thin3d/VulkanQueueRunner.cpp index 0ede7acccc2d..0130f901db6a 100644 --- a/ext/native/thin3d/VulkanQueueRunner.cpp +++ b/ext/native/thin3d/VulkanQueueRunner.cpp @@ -1047,22 +1047,44 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c return; } - // Write-after-write hazards. Fixed flicker in God of War on ARM (before we added another fix). - // TODO: depth too - if (step.render.framebuffer && step.render.framebuffer->color.layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { - VkImageMemoryBarrier barrier{}; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - barrier.subresourceRange.layerCount = 1; - barrier.subresourceRange.levelCount = 1; - barrier.image = step.render.framebuffer->color.image; - barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; - barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); + // Write-after-write hazards. Fixed flicker in God of War on ARM (before we added another fix that removed these). + if (step.render.framebuffer) { + int n = 0; + int stage = 0; + VkImageMemoryBarrier barriers[2]{}; + if (step.render.framebuffer->color.layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + barriers[n].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barriers[n].oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + barriers[n].newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + barriers[n].subresourceRange.layerCount = 1; + barriers[n].subresourceRange.levelCount = 1; + barriers[n].image = step.render.framebuffer->color.image; + barriers[n].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barriers[n].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; + barriers[n].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barriers[n].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[n].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + n++; + } + if (step.render.framebuffer->depth.layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL) { + barriers[n].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barriers[n].oldLayout = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL; + barriers[n].newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + barriers[n].subresourceRange.layerCount = 1; + barriers[n].subresourceRange.levelCount = 1; + barriers[n].image = step.render.framebuffer->depth.image; + barriers[n].srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + barriers[n].dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + barriers[n].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barriers[n].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[n].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + stage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + n++; + } + if (stage) { + vkCmdPipelineBarrier(cmd, stage, stage, 0, 0, nullptr, 0, nullptr, n, barriers); + } } // This is supposed to bind a vulkan render pass to the command buffer.