Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove constraint that virtual framebuffers have to represent VRAM. #11553

Merged
merged 4 commits into from
Nov 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 28 additions & 30 deletions GPU/Common/FramebufferCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,24 +172,20 @@ void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, G
}

VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) {
addr &= 0x3FFFFFFF;
VirtualFramebuffer *match = nullptr;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *v = vfbs_[i];
if (MaskedEqual(v->fb_address, addr)) {
if (v->fb_address == addr) {
// Could check w too but whatever
if (match == nullptr || match->last_frame_render < v->last_frame_render) {
match = v;
}
}
}

return match;
}

bool FramebufferManagerCommon::MaskedEqual(u32 addr1, u32 addr2) {
return (addr1 & 0x03FFFFFF) == (addr2 & 0x03FFFFFF);
}

u32 FramebufferManagerCommon::FramebufferByteSize(const VirtualFramebuffer *vfb) const {
return vfb->fb_stride * vfb->height * (vfb->format == GE_FORMAT_8888 ? 4 : 2);
}
Expand Down Expand Up @@ -253,11 +249,10 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, GEBufferForma

if (viewport_width != region_width) {
// The majority of the time, these are equal. If not, let's check what we know.
const u32 fb_normalized_address = fb_address | 0x44000000;
u32 nearest_address = 0xFFFFFFFF;
for (size_t i = 0; i < vfbs_.size(); ++i) {
const u32 other_address = vfbs_[i]->fb_address | 0x44000000;
if (other_address > fb_normalized_address && other_address < nearest_address) {
const u32 other_address = vfbs_[i]->fb_address & 0x3FFFFFFF;
if (other_address > fb_address && other_address < nearest_address) {
nearest_address = other_address;
}
}
Expand All @@ -266,7 +261,7 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, GEBufferForma
// This catches some cases where we can know this.
// Hmm. The problem is that we could only catch it for the first of two buffers...
const u32 bpp = fb_format == GE_FORMAT_8888 ? 4 : 2;
int avail_height = (nearest_address - fb_normalized_address) / (fb_stride * bpp);
int avail_height = (nearest_address - fb_address) / (fb_stride * bpp);
if (avail_height < drawing_height && avail_height == region_height) {
drawing_width = std::min(region_width, fb_stride);
drawing_height = avail_height;
Expand All @@ -282,11 +277,10 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, GEBufferForma
}

void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPUgstate &gstate) {
params->fb_addr = gstate.getFrameBufAddress();
params->fb_address = gstate.getFrameBufRawAddress();
params->fb_address = (gstate.getFrameBufRawAddress() & 0x3FFFFFFF) | 0x04000000; // GetFramebufferHeuristicInputs is only called from rendering, and thus, it's VRAM.
params->fb_stride = gstate.FrameBufStride();

params->z_address = gstate.getDepthBufRawAddress();
params->z_address = (gstate.getDepthBufRawAddress() & 0x3FFFFFFF) | 0x04000000;
params->z_stride = gstate.DepthBufStride();

params->fmt = gstate.FrameBufFormat();
Expand Down Expand Up @@ -440,9 +434,9 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
SetColorUpdated(vfb, skipDrawReason);

u32 byteSize = FramebufferByteSize(vfb);
u32 fb_address_mem = (params.fb_address & 0x3FFFFFFF) | 0x04000000;
if (Memory::IsVRAMAddress(fb_address_mem) && fb_address_mem + byteSize > framebufRangeEnd_) {
framebufRangeEnd_ = fb_address_mem + byteSize;
// FB heuristics always produce an address in VRAM (this is during rendering) so we don't need to poke in the 0x04000000 flag here.
if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + byteSize > framebufRangeEnd_) {
framebufRangeEnd_ = params.fb_address + byteSize;
}

ResizeFramebufFBO(vfb, drawing_width, drawing_height, true);
Expand All @@ -456,8 +450,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
currentRenderVfb_ = vfb;

if (useBufferedRendering_ && !g_Config.bDisableSlowFramebufEffects) {
gpu->PerformMemoryUpload(fb_address_mem, byteSize);
NotifyStencilUpload(fb_address_mem, byteSize, true);
gpu->PerformMemoryUpload(params.fb_address, byteSize);
NotifyStencilUpload(params.fb_address, byteSize, true);
// TODO: Is it worth trying to upload the depth buffer?
}

Expand Down Expand Up @@ -680,7 +674,8 @@ void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width,
}

void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {
addr &= ~0x40000000;
// Take off the uncached flag from the address. Not to be confused with the start of VRAM.
addr &= 0x3FFFFFFF;
// TODO: Could go through all FBOs, but probably not important?
// TODO: Could also check for inner changes, but video is most important.
bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr();
Expand All @@ -691,7 +686,7 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {

for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
if (MaskedEqual(vfb->fb_address, addr)) {
if (vfb->fb_address == addr) {
FlushBeforeCopy();

if (useBufferedRendering_ && vfb->fbo) {
Expand All @@ -700,7 +695,7 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {
// If we're not rendering to it, format may be wrong. Use displayFormat_ instead.
fmt = displayFormat_;
}
DrawPixels(vfb, 0, 0, Memory::GetPointer(addr | 0x04000000), fmt, vfb->fb_stride, vfb->width, vfb->height);
DrawPixels(vfb, 0, 0, Memory::GetPointer(addr), fmt, vfb->fb_stride, vfb->width, vfb->height);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is inside a MaskedEqual() check which is probably dangerous and needs to be changed. I think MaskedEqual needs removing entirely, or it needs to check the VRAM bit and not mask it out (probably better to just ensure we never put anything in with kernel/cached bits.)

-[Unknown]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing MaskedEqual entirely. I think this should be fine as-is now.

SetColorUpdated(vfb, gstate_c.skipDrawReason);
} else {
INFO_LOG(FRAMEBUF, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format);
Expand Down Expand Up @@ -866,11 +861,13 @@ void FramebufferManagerCommon::CopyDisplayToOutput() {

VirtualFramebuffer *vfb = GetVFBAt(displayFramebufPtr_);
if (!vfb) {
// Let's search for a framebuf within this range.
const u32 addr = (displayFramebufPtr_ & 0x03FFFFFF) | 0x04000000;
// Let's search for a framebuf within this range. Note that we also look for
// "framebuffers" sitting in RAM so we only take off the kernel and uncached bits of the address
// when comparing.
const u32 addr = displayFramebufPtr_ & 0x3FFFFFFF;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *v = vfbs_[i];
const u32 v_addr = (v->fb_address & 0x03FFFFFF) | 0x04000000;
const u32 v_addr = v->fb_address & 0x3FFFFFFF;
const u32 v_size = FramebufferByteSize(v);
if (addr >= v_addr && addr < v_addr + v_size) {
const u32 dstBpp = v->format == GE_FORMAT_8888 ? 4 : 2;
Expand Down Expand Up @@ -1241,7 +1238,8 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
continue;
}

const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF;
// We only remove the kernel and uncached bits when comparing.
const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
const u32 vfb_size = FramebufferByteSize(vfb);
const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
Expand Down Expand Up @@ -1352,7 +1350,7 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst

for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF;
const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
const u32 vfb_size = FramebufferByteSize(vfb);
const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
Expand Down Expand Up @@ -1915,7 +1913,7 @@ bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEB

if (!vfb) {
// If there's no vfb and we're drawing there, must be memory?
buffer = GPUDebugBuffer(Memory::GetPointer(fb_address | 0x04000000), fb_stride, 512, format);
buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, format);
return true;
}

Expand Down Expand Up @@ -1969,7 +1967,7 @@ bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32

if (!vfb) {
// If there's no vfb and we're drawing there, must be memory?
buffer = GPUDebugBuffer(Memory::GetPointer(z_address | 0x04000000), z_stride, 512, GPU_DBG_FORMAT_16BIT);
buffer = GPUDebugBuffer(Memory::GetPointer(z_address), z_stride, 512, GPU_DBG_FORMAT_16BIT);
return true;
}

Expand Down Expand Up @@ -2005,7 +2003,7 @@ bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, G
if (!vfb) {
// If there's no vfb and we're drawing there, must be memory?
// TODO: Actually get the stencil.
buffer = GPUDebugBuffer(Memory::GetPointer(fb_address | 0x04000000), fb_stride, 512, GPU_DBG_FORMAT_8888);
buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, GPU_DBG_FORMAT_8888);
return true;
}

Expand Down Expand Up @@ -2057,7 +2055,7 @@ void FramebufferManagerCommon::PackFramebufferSync_(VirtualFramebuffer *vfb, int
return;
}

const u32 fb_address = (0x04000000) | vfb->fb_address;
const u32 fb_address = vfb->fb_address & 0x3FFFFFFF;

Draw::DataFormat destFormat = GEFormatToThin3D(vfb->format);
const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
Expand Down
6 changes: 2 additions & 4 deletions GPU/Common/FramebufferCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ struct VirtualFramebuffer {
};

struct FramebufferHeuristicParams {
u32 fb_addr;
u32 fb_address;
int fb_stride;
u32 z_address;
Expand Down Expand Up @@ -243,10 +242,10 @@ class FramebufferManagerCommon {
size_t NumVFBs() const { return vfbs_.size(); }

u32 PrevDisplayFramebufAddr() {
return prevDisplayFramebuf_ ? (0x04000000 | prevDisplayFramebuf_->fb_address) : 0;
return prevDisplayFramebuf_ ? prevDisplayFramebuf_->fb_address : 0;
}
u32 DisplayFramebufAddr() {
return displayFramebuf_ ? (0x04000000 | displayFramebuf_->fb_address) : 0;
return displayFramebuf_ ? displayFramebuf_->fb_address : 0;
}

u32 DisplayFramebufStride() {
Expand Down Expand Up @@ -332,7 +331,6 @@ class FramebufferManagerCommon {

void EstimateDrawingSize(u32 fb_address, GEBufferFormat fb_format, int viewport_width, int viewport_height, int region_width, int region_height, int scissor_width, int scissor_height, int fb_stride, int &drawing_width, int &drawing_height);
u32 FramebufferByteSize(const VirtualFramebuffer *vfb) const;
static bool MaskedEqual(u32 addr1, u32 addr2);

void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb);
void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged);
Expand Down
18 changes: 11 additions & 7 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -635,9 +635,10 @@ void TextureCacheCommon::HandleTextureChange(TexCacheEntry *const entry, const c
}

void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg) {
// Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors.
// Mask to ignore the Z memory mirrors if the address is in VRAM.
// These checks are mainly to reduce scanning all textures.
const u32 addr = (address | 0x04000000) & 0x3F9FFFFF;
const u32 mirrorMask = 0x00600000;
const u32 addr = Memory::IsVRAMAddress(address) ? (address & ~mirrorMask) : address;
const u32 bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2;
const u64 cacheKey = (u64)addr << 32;
// If it has a clut, those are the low 32 bits, so it'll be inside this range.
Expand Down Expand Up @@ -749,10 +750,13 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi

AttachedFramebufferInfo fbInfo = { 0 };

const u64 mirrorMask = 0x00600000;
// Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors.
const u32 addr = (address | 0x04000000) & 0x3FFFFFFF & ~mirrorMask;
const u32 texaddr = ((entry->addr + texaddrOffset) & ~mirrorMask);
const u32 mirrorMask = 0x00600000;
u32 addr = address & 0x3FFFFFFF;
u32 texaddr = entry->addr + texaddrOffset;
if (entry->addr & 0x04000000) {
addr &= ~mirrorMask;
texaddr &= ~mirrorMask;
}
const bool noOffset = texaddr == addr;
const bool exactMatch = noOffset && entry->format < 4;
const u32 w = 1 << ((entry->dim >> 0) & 0xf);
Expand Down Expand Up @@ -990,7 +994,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
clutRenderOffset_ = MAX_CLUT_OFFSET;
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
auto framebuffer = fbCache_[i];
const u32 fb_address = framebuffer->fb_address | 0x04000000;
const u32 fb_address = framebuffer->fb_address & 0x3FFFFFFF;
const u32 bpp = framebuffer->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
u32 offset = clutFramebufAddr - fb_address;

Expand Down
2 changes: 1 addition & 1 deletion GPU/D3D11/FramebufferManagerD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,7 @@ void FramebufferManagerD3D11::PackDepthbuffer(VirtualFramebuffer *vfb, int x, in
return;
}

const u32 z_address = (0x04000000) | vfb->z_address;
const u32 z_address = vfb->z_address;
// TODO
}

Expand Down
3 changes: 2 additions & 1 deletion GPU/D3D11/StencilBufferD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,15 @@ VS_OUT main(VS_IN In) {

// TODO : If SV_StencilRef is available (D3D11.3) then this can be done in a single pass.
bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, bool skipZero) {
addr &= 0x3FFFFFFF;
if (!MayIntersectFramebuffer(addr)) {
return false;
}

VirtualFramebuffer *dstBuffer = 0;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
if (MaskedEqual(vfb->fb_address, addr)) {
if (vfb->fb_address == addr) {
dstBuffer = vfb;
}
}
Expand Down
10 changes: 5 additions & 5 deletions GPU/Directx9/FramebufferDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = {
return;
}

const u32 fb_address = (0x04000000) | vfb->fb_address;
const u32 fb_address = vfb->fb_address & 0x3FFFFFFF;
const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;

// We always need to convert from the framebuffer native format.
Expand Down Expand Up @@ -627,7 +627,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = {
}

// We always read the depth buffer in 24_8 format.
const u32 z_address = (0x04000000) | vfb->z_address;
const u32 z_address = vfb->z_address;

DEBUG_LOG(FRAMEBUF, "Reading depthbuffer to mem at %08x for vfb=%08x", z_address, vfb->fb_address);

Expand Down Expand Up @@ -732,7 +732,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = {

if (!vfb) {
// If there's no vfb and we're drawing there, must be memory?
buffer = GPUDebugBuffer(Memory::GetPointer(fb_address | 0x04000000), fb_stride, 512, fb_format);
buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, fb_format);
return true;
}
LPDIRECT3DSURFACE9 renderTarget = vfb->fbo ? (LPDIRECT3DSURFACE9)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_COLOR_BIT | Draw::FB_SURFACE_BIT, 0) : nullptr;
Expand Down Expand Up @@ -809,7 +809,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = {

if (!vfb) {
// If there's no vfb and we're drawing there, must be memory?
buffer = GPUDebugBuffer(Memory::GetPointer(z_address | 0x04000000), z_stride, 512, GPU_DBG_FORMAT_16BIT);
buffer = GPUDebugBuffer(Memory::GetPointer(z_address), z_stride, 512, GPU_DBG_FORMAT_16BIT);
return true;
}

Expand Down Expand Up @@ -847,7 +847,7 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = {

if (!vfb) {
// If there's no vfb and we're drawing there, must be memory?
buffer = GPUDebugBuffer(Memory::GetPointer(vfb->z_address | 0x04000000), vfb->z_stride, 512, GPU_DBG_FORMAT_16BIT);
buffer = GPUDebugBuffer(Memory::GetPointer(vfb->z_address), vfb->z_stride, 512, GPU_DBG_FORMAT_16BIT);
return true;
}

Expand Down
3 changes: 2 additions & 1 deletion GPU/Directx9/StencilBufferDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@ static const char *stencil_vs =
"}\n";

bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZero) {
addr &= 0x3FFFFFFF;
if (!MayIntersectFramebuffer(addr)) {
return false;
}

VirtualFramebuffer *dstBuffer = 0;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
if (MaskedEqual(vfb->fb_address, addr)) {
if (vfb->fb_address == addr) {
dstBuffer = vfb;
}
}
Expand Down
2 changes: 1 addition & 1 deletion GPU/GLES/DrawEngineGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ void DrawEngineGLES::DoFlush() {
gstate_c.Clean(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
textureNeedsApply = true;
} else if (gstate.getTextureAddress(0) == ((gstate.getFrameBufRawAddress() | 0x04000000) & 0x3FFFFFFF)) {
// This catches the case of clearing a texture.
// This catches the case of clearing a texture. (#10957)
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
}

Expand Down
3 changes: 2 additions & 1 deletion GPU/GLES/StencilBufferGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,15 @@ static const char *stencil_vs =
"}\n";

bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, bool skipZero) {
addr &= 0x3FFFFFFF;
if (!MayIntersectFramebuffer(addr)) {
return false;
}

VirtualFramebuffer *dstBuffer = 0;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
if (MaskedEqual(vfb->fb_address, addr)) {
if (vfb->fb_address == addr) {
dstBuffer = vfb;
}
}
Expand Down
3 changes: 2 additions & 1 deletion GPU/Vulkan/StencilBufferVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,15 @@ void main() {
// messing about with bitplane textures and the like. Or actually, maybe not... Let's start with
// the traditional approach.
bool FramebufferManagerVulkan::NotifyStencilUpload(u32 addr, int size, bool skipZero) {
addr &= 0x3FFFFFFF;
if (!MayIntersectFramebuffer(addr)) {
return false;
}

VirtualFramebuffer *dstBuffer = 0;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
if (MaskedEqual(vfb->fb_address, addr)) {
if (vfb->fb_address == addr) {
dstBuffer = vfb;
}
}
Expand Down