-
Notifications
You must be signed in to change notification settings - Fork 2.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
D3D11 framebuffer readback #9321
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -105,9 +105,22 @@ FramebufferManagerD3D11::FramebufferManagerD3D11(Draw::DrawContext *draw) | |
vb.Usage = D3D11_USAGE_DYNAMIC; | ||
vb.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; | ||
device_->CreateBuffer(&vb, nullptr, &quadBuffer_); | ||
|
||
D3D11_TEXTURE2D_DESC packDesc{}; | ||
packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; | ||
packDesc.BindFlags = 0; | ||
packDesc.Width = 512; // 512x512 is the maximum size of a framebuffer on the PSP. | ||
packDesc.Height = 512; | ||
packDesc.ArraySize = 1; | ||
packDesc.MipLevels = 1; | ||
packDesc.Usage = D3D11_USAGE_STAGING; | ||
packDesc.SampleDesc.Count = 1; | ||
packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; | ||
device_->CreateTexture2D(&packDesc, nullptr, &packTexture_); | ||
} | ||
|
||
FramebufferManagerD3D11::~FramebufferManagerD3D11() { | ||
packTexture_->Release(); | ||
// Drawing cleanup | ||
if (quadVertexShader_) | ||
quadVertexShader_->Release(); | ||
|
@@ -635,7 +648,7 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 wid | |
return; | ||
} else { | ||
for (u32 y = 0; y < height; ++y) { | ||
ConvertBGRA8888ToRGBA8888(dst32, src32, width); | ||
memcpy(dst32, src32, width * 4); | ||
src32 += srcStride; | ||
dst32 += dstStride; | ||
} | ||
|
@@ -646,21 +659,21 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 wid | |
switch (format) { | ||
case GE_FORMAT_565: // BGR 565 | ||
for (u32 y = 0; y < height; ++y) { | ||
ConvertBGRA8888ToRGB565(dst16, src32, width); | ||
ConvertRGBA8888ToRGB565(dst16, src32, width); | ||
src32 += srcStride; | ||
dst16 += dstStride; | ||
} | ||
break; | ||
case GE_FORMAT_5551: // ABGR 1555 | ||
for (u32 y = 0; y < height; ++y) { | ||
ConvertBGRA8888ToRGBA5551(dst16, src32, width); | ||
ConvertRGBA8888ToRGBA5551(dst16, src32, width); | ||
src32 += srcStride; | ||
dst16 += dstStride; | ||
} | ||
break; | ||
case GE_FORMAT_4444: // ABGR 4444 | ||
for (u32 y = 0; y < height; ++y) { | ||
ConvertBGRA8888ToRGBA4444(dst16, src32, width); | ||
ConvertRGBA8888ToRGBA4444(dst16, src32, width); | ||
src32 += srcStride; | ||
dst16 += dstStride; | ||
} | ||
|
@@ -673,6 +686,9 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 wid | |
} | ||
} | ||
|
||
// This function takes an already correctly-sized framebuffer and packs it into RAM. | ||
// Does not need to account for scaling. | ||
// Color conversion is currently done on CPU but should be done on GPU. | ||
void FramebufferManagerD3D11::PackFramebufferD3D11_(VirtualFramebuffer *vfb, int x, int y, int w, int h) { | ||
if (!vfb->fbo) { | ||
ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferD3D11_: vfb->fbo == 0"); | ||
|
@@ -686,85 +702,38 @@ void FramebufferManagerD3D11::PackFramebufferD3D11_(VirtualFramebuffer *vfb, int | |
// We always need to convert from the framebuffer native format. | ||
// Right now that's always 8888. | ||
DEBUG_LOG(HLE, "Reading framebuffer to mem, fb_address = %08x", fb_address); | ||
ID3D11Texture2D *colorTex = (ID3D11Texture2D *)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_COLOR_BIT, 0); | ||
|
||
/* | ||
LPDIRECT3DSURFACE9 renderTarget = (LPDIRECT3DSURFACE9)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_COLOR_BIT | Draw::FB_SURFACE_BIT, 0); | ||
D3DSURFACE_DESC desc; | ||
renderTarget->GetDesc(&desc); | ||
|
||
LPDIRECT3DSURFACE9 offscreen = GetOffscreenSurface(renderTarget, vfb); | ||
if (offscreen) { | ||
HRESULT hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen); | ||
if (SUCCEEDED(hr)) { | ||
D3DLOCKED_RECT locked; | ||
u32 widthFactor = vfb->renderWidth / vfb->bufferWidth; | ||
u32 heightFactor = vfb->renderHeight / vfb->bufferHeight; | ||
RECT rect = { (LONG)(x * widthFactor), (LONG)(y * heightFactor), (LONG)((x + w) * widthFactor), (LONG)((y + h) * heightFactor) }; | ||
hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY); | ||
if (SUCCEEDED(hr)) { | ||
// TODO: Handle the other formats? We don't currently create them, I think. | ||
const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp; | ||
// Pixel size always 4 here because we always request BGRA8888. | ||
ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)locked.pBits, vfb->fb_stride, locked.Pitch / 4, w, h, vfb->format); | ||
offscreen->UnlockRect(); | ||
} else { | ||
ERROR_LOG_REPORT(G3D, "Unable to lock rect from %08x: %d,%d %dx%d of %dx%d", fb_address, rect.left, rect.top, rect.right, rect.bottom, vfb->renderWidth, vfb->renderHeight); | ||
} | ||
} else { | ||
ERROR_LOG_REPORT(G3D, "Unable to download render target data from %08x", fb_address); | ||
} | ||
D3D11_BOX srcBox{ 0, 0, 0, vfb->width, vfb->height, 1 }; | ||
context_->CopySubresourceRegion(packTexture_, 0, 0, 0, 0, colorTex, 0, &srcBox); | ||
|
||
// Ideally, we'd round robin between two packTexture_, and simply use the other one. Though if the game | ||
// does a once-off copy, that won't work at all. | ||
|
||
// BIG GPU STALL | ||
D3D11_MAPPED_SUBRESOURCE map; | ||
HRESULT result = context_->Map(packTexture_, 0, D3D11_MAP_READ, 0, &map); | ||
if (FAILED(result)) { | ||
return; | ||
} | ||
*/ | ||
|
||
// TODO: Handle the other formats? We don't currently create them, I think. | ||
const int srcByteOffset = y * map.RowPitch + x * 4; | ||
const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp; | ||
// Pixel size always 4 here because we always request BGRA8888. | ||
ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)map.pData, vfb->fb_stride, map.RowPitch/4, w, h, vfb->format); | ||
context_->Unmap(packTexture_, 0); | ||
} | ||
|
||
// Nobody calls this yet. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had a branch that used this, it improved some issues in Katamari or something, but I think it was slow and had accuracy problems with some GPUs. I was thinking of using it to detect accuracy problems with GPUs on startup, although it feels painful a bit. -[Unknown] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right. There are potential uses, some silly games like Burnout Dominator read the depth buffer using the CPU for lens flares, so it would be useful for that as well, though with a heavy speed hit (but we could double buffer it making lens flares one frame late). |
||
void FramebufferManagerD3D11::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h) { | ||
if (!vfb->fbo) { | ||
ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackDepthbuffer: vfb->fbo == 0"); | ||
return; | ||
} | ||
|
||
// We always read the depth buffer in 24_8 format. | ||
const u32 z_address = (0x04000000) | vfb->z_address; | ||
|
||
/* | ||
DEBUG_LOG(SCEGE, "Reading depthbuffer to mem at %08x for vfb=%08x", z_address, vfb->fb_address); | ||
|
||
LPDIRECT3DTEXTURE9 tex = (LPDIRECT3DTEXTURE9)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_DEPTH_BIT, 0); | ||
if (tex) { | ||
D3DSURFACE_DESC desc; | ||
D3DLOCKED_RECT locked; | ||
tex->GetLevelDesc(0, &desc); | ||
RECT rect = { 0, 0, (LONG)desc.Width, (LONG)desc.Height }; | ||
HRESULT hr = tex->LockRect(0, &locked, &rect, D3DLOCK_READONLY); | ||
|
||
if (SUCCEEDED(hr)) { | ||
const int dstByteOffset = y * vfb->fb_stride * sizeof(s16); | ||
const u32 *packed = (const u32 *)locked.pBits; | ||
u16 *depth = (u16 *)Memory::GetPointer(z_address); | ||
|
||
// TODO: Optimize. | ||
for (int yp = 0; yp < h; ++yp) { | ||
for (int xp = 0; xp < w; ++xp) { | ||
const int offset = (yp + y) & vfb->z_stride + x + xp; | ||
|
||
float scaled = FromScaledDepth((packed[offset] & 0x00FFFFFF) * (1.0f / 16777215.0f)); | ||
if (scaled <= 0.0f) { | ||
depth[offset] = 0; | ||
} else if (scaled >= 65535.0f) { | ||
depth[offset] = 65535; | ||
} else { | ||
depth[offset] = (int)scaled; | ||
} | ||
} | ||
} | ||
|
||
tex->UnlockRect(0); | ||
} else { | ||
ERROR_LOG_REPORT(G3D, "Unable to lock rect from depth %08x: %d,%d %dx%d of %dx%d", vfb->fb_address, rect.left, rect.top, rect.right, rect.bottom, vfb->renderWidth, vfb->renderHeight); | ||
} | ||
} else { | ||
ERROR_LOG_REPORT(G3D, "Unable to download render target depth from %08x", vfb->fb_address); | ||
}*/ | ||
// TODO | ||
} | ||
|
||
void FramebufferManagerD3D11::EndFrame() { | ||
|
@@ -868,18 +837,114 @@ void FramebufferManagerD3D11::Resized() { | |
resized_ = true; | ||
} | ||
|
||
bool FramebufferManagerD3D11::GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes) { | ||
return false; | ||
// Lots of this code could be shared (like the downsampling). | ||
bool FramebufferManagerD3D11::GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxRes) { | ||
VirtualFramebuffer *vfb = currentRenderVfb_; | ||
if (!vfb) { | ||
vfb = GetVFBAt(fb_address); | ||
} | ||
|
||
if (!vfb) { | ||
// If there's no vfb and we're drawing there, must be memory? | ||
buffer = GPUDebugBuffer(Memory::GetPointer(fb_address | 0x04000000), fb_stride, 512, format); | ||
return true; | ||
} | ||
|
||
int w = vfb->renderWidth, h = vfb->renderHeight; | ||
Draw::Framebuffer *fboForRead = nullptr; | ||
if (vfb->fbo) { | ||
if (maxRes > 0 && vfb->renderWidth > vfb->width * maxRes) { | ||
w = vfb->width * maxRes; | ||
h = vfb->height * maxRes; | ||
|
||
Draw::Framebuffer *tempFBO = GetTempFBO(w, h); | ||
VirtualFramebuffer tempVfb = *vfb; | ||
tempVfb.fbo = tempFBO; | ||
tempVfb.bufferWidth = vfb->width; | ||
tempVfb.bufferHeight = vfb->height; | ||
tempVfb.renderWidth = w; | ||
tempVfb.renderHeight = h; | ||
BlitFramebuffer(&tempVfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0); | ||
|
||
fboForRead = tempFBO; | ||
} else { | ||
fboForRead = vfb->fbo; | ||
} | ||
} | ||
|
||
buffer.Allocate(w, h, GE_FORMAT_8888, !useBufferedRendering_, true); | ||
|
||
ID3D11Texture2D *packTex; | ||
D3D11_TEXTURE2D_DESC packDesc{}; | ||
packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; | ||
packDesc.BindFlags = 0; | ||
packDesc.Width = w; | ||
packDesc.Height = h; | ||
packDesc.ArraySize = 1; | ||
packDesc.MipLevels = 1; | ||
packDesc.Usage = D3D11_USAGE_STAGING; | ||
packDesc.SampleDesc.Count = 1; | ||
packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; | ||
device_->CreateTexture2D(&packDesc, nullptr, &packTex); | ||
|
||
ID3D11Texture2D *nativeTex = (ID3D11Texture2D *)draw_->GetFramebufferAPITexture(fboForRead, Draw::FB_COLOR_BIT, 0); | ||
context_->CopyResource(packTex, nativeTex); | ||
|
||
D3D11_MAPPED_SUBRESOURCE map; | ||
context_->Map(packTex, 0, D3D11_MAP_READ, 0, &map); | ||
|
||
for (int y = 0; y < h; y++) { | ||
uint8_t *dest = (uint8_t *)buffer.GetData() + y * w * 4; | ||
const uint8_t *src = ((const uint8_t *)map.pData) + map.RowPitch * y; | ||
memcpy(dest, src, 4 * w); | ||
} | ||
|
||
context_->Unmap(packTex, 0); | ||
packTex->Release(); | ||
return true; | ||
} | ||
|
||
bool FramebufferManagerD3D11::GetOutputFramebuffer(GPUDebugBuffer &buffer) { | ||
bool FramebufferManagerD3D11::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) { | ||
return false; | ||
} | ||
|
||
bool FramebufferManagerD3D11::GetCurrentDepthbuffer(GPUDebugBuffer &buffer) { | ||
bool FramebufferManagerD3D11::GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) { | ||
return false; | ||
} | ||
|
||
bool FramebufferManagerD3D11::GetCurrentStencilbuffer(GPUDebugBuffer &buffer) { | ||
return false; | ||
} | ||
bool FramebufferManagerD3D11::GetOutputFramebuffer(GPUDebugBuffer &buffer) { | ||
ID3D11Texture2D *backbuffer = (ID3D11Texture2D *)draw_->GetNativeObject(Draw::NativeObject::BACKBUFFER_COLOR_TEX); | ||
D3D11_TEXTURE2D_DESC desc; | ||
backbuffer->GetDesc(&desc); | ||
int w = desc.Width; | ||
int h = desc.Height; | ||
buffer.Allocate(w, h, GE_FORMAT_8888, !useBufferedRendering_, true); | ||
|
||
ID3D11Texture2D *packTex; | ||
D3D11_TEXTURE2D_DESC packDesc{}; | ||
packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; | ||
packDesc.BindFlags = 0; | ||
packDesc.Width = w; | ||
packDesc.Height = h; | ||
packDesc.ArraySize = 1; | ||
packDesc.MipLevels = 1; | ||
packDesc.Usage = D3D11_USAGE_STAGING; | ||
packDesc.SampleDesc.Count = 1; | ||
packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; | ||
device_->CreateTexture2D(&packDesc, nullptr, &packTex); | ||
|
||
context_->CopyResource(packTex, backbuffer); | ||
|
||
D3D11_MAPPED_SUBRESOURCE map; | ||
context_->Map(packTex, 0, D3D11_MAP_READ, 0, &map); | ||
|
||
for (int y = 0; y < h; y++) { | ||
uint8_t *dest = (uint8_t *)buffer.GetData() + y * w * 4; | ||
const uint8_t *src = ((const uint8_t *)map.pData) + map.RowPitch * y; | ||
memcpy(dest, src, 4 * w); | ||
} | ||
|
||
context_->Unmap(packTex, 0); | ||
packTex->Release(); | ||
return true; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this is true. This is the maximum size that can be textured from, but I think drawn can be wider. See #4739. Also some games (I believe Silent Hill?) draw to one big 1024 wide surface, but draw on the left and right sides - see curRTOffsetX.
I do think 512 may be the max height. I've never seen wider than 1024, so that may be the max width.
Also, I don't know about HD remasters. Presumably they draw double the width and height or something?
-[Unknown]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, that's right. Probably should just make this adaptive..