Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

D3D11 framebuffer readback #9321

Merged
merged 3 commits into from
Feb 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions GPU/Common/DepalettizeShaderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

#define WRITE p+=sprintf

// TODO: Add a compute shader path. Complete waste of time to set up a graphics state.

// Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well.
void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language) {
char *p = buffer;
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/FramebufferCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,7 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst
}
}

// 1:1 pixel sides buffers, we resize buffers to these before we read them back.
VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFramebuffer *vfb) {
// For now we'll keep these on the same struct as the ones that can get displayed
// (and blatantly copy work already done above while at it).
Expand Down
7 changes: 7 additions & 0 deletions GPU/Common/FramebufferCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ namespace Draw {
class DrawContext;
}

struct GPUDebugBuffer;
class TextureCacheCommon;
class ShaderManagerCommon;

Expand Down Expand Up @@ -264,6 +265,12 @@ class FramebufferManagerCommon {

Draw::Framebuffer *GetTempFBO(u16 w, u16 h, Draw::FBColorDepth depth = Draw::FBO_8888);

// Debug features
virtual bool GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxRes) = 0;
virtual bool GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) = 0;
virtual bool GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) = 0;
virtual bool GetOutputFramebuffer(GPUDebugBuffer &buffer) = 0;

protected:
virtual void SetViewport2D(int x, int y, int w, int h) = 0;
void CalculatePostShaderUniforms(int bufferWidth, int bufferHeight, int renderWidth, int renderHeight, PostShaderUniforms *uniforms);
Expand Down
223 changes: 144 additions & 79 deletions GPU/D3D11/FramebufferManagerD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,22 @@ FramebufferManagerD3D11::FramebufferManagerD3D11(Draw::DrawContext *draw)
vb.Usage = D3D11_USAGE_DYNAMIC;
vb.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
device_->CreateBuffer(&vb, nullptr, &quadBuffer_);

D3D11_TEXTURE2D_DESC packDesc{};
packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
packDesc.BindFlags = 0;
packDesc.Width = 512; // 512x512 is the maximum size of a framebuffer on the PSP.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is true. This is the maximum size that can be textured from, but I think drawn can be wider. See #4739. Also some games (I believe Silent Hill?) draw to one big 1024 wide surface, but draw on the left and right sides - see curRTOffsetX.

I do think 512 may be the max height. I've never seen wider than 1024, so that may be the max width.

Also, I don't know about HD remasters. Presumably they draw double the width and height or something?

-[Unknown]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, that's right. Probably should just make this adaptive..

packDesc.Height = 512;
packDesc.ArraySize = 1;
packDesc.MipLevels = 1;
packDesc.Usage = D3D11_USAGE_STAGING;
packDesc.SampleDesc.Count = 1;
packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
device_->CreateTexture2D(&packDesc, nullptr, &packTexture_);
}

FramebufferManagerD3D11::~FramebufferManagerD3D11() {
packTexture_->Release();
// Drawing cleanup
if (quadVertexShader_)
quadVertexShader_->Release();
Expand Down Expand Up @@ -635,7 +648,7 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 wid
return;
} else {
for (u32 y = 0; y < height; ++y) {
ConvertBGRA8888ToRGBA8888(dst32, src32, width);
memcpy(dst32, src32, width * 4);
src32 += srcStride;
dst32 += dstStride;
}
Expand All @@ -646,21 +659,21 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 wid
switch (format) {
case GE_FORMAT_565: // BGR 565
for (u32 y = 0; y < height; ++y) {
ConvertBGRA8888ToRGB565(dst16, src32, width);
ConvertRGBA8888ToRGB565(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
}
break;
case GE_FORMAT_5551: // ABGR 1555
for (u32 y = 0; y < height; ++y) {
ConvertBGRA8888ToRGBA5551(dst16, src32, width);
ConvertRGBA8888ToRGBA5551(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
}
break;
case GE_FORMAT_4444: // ABGR 4444
for (u32 y = 0; y < height; ++y) {
ConvertBGRA8888ToRGBA4444(dst16, src32, width);
ConvertRGBA8888ToRGBA4444(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
}
Expand All @@ -673,6 +686,9 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 wid
}
}

// This function takes an already correctly-sized framebuffer and packs it into RAM.
// Does not need to account for scaling.
// Color conversion is currently done on CPU but should be done on GPU.
void FramebufferManagerD3D11::PackFramebufferD3D11_(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
if (!vfb->fbo) {
ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferD3D11_: vfb->fbo == 0");
Expand All @@ -686,85 +702,38 @@ void FramebufferManagerD3D11::PackFramebufferD3D11_(VirtualFramebuffer *vfb, int
// We always need to convert from the framebuffer native format.
// Right now that's always 8888.
DEBUG_LOG(HLE, "Reading framebuffer to mem, fb_address = %08x", fb_address);
ID3D11Texture2D *colorTex = (ID3D11Texture2D *)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_COLOR_BIT, 0);

/*
LPDIRECT3DSURFACE9 renderTarget = (LPDIRECT3DSURFACE9)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_COLOR_BIT | Draw::FB_SURFACE_BIT, 0);
D3DSURFACE_DESC desc;
renderTarget->GetDesc(&desc);

LPDIRECT3DSURFACE9 offscreen = GetOffscreenSurface(renderTarget, vfb);
if (offscreen) {
HRESULT hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen);
if (SUCCEEDED(hr)) {
D3DLOCKED_RECT locked;
u32 widthFactor = vfb->renderWidth / vfb->bufferWidth;
u32 heightFactor = vfb->renderHeight / vfb->bufferHeight;
RECT rect = { (LONG)(x * widthFactor), (LONG)(y * heightFactor), (LONG)((x + w) * widthFactor), (LONG)((y + h) * heightFactor) };
hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY);
if (SUCCEEDED(hr)) {
// TODO: Handle the other formats? We don't currently create them, I think.
const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp;
// Pixel size always 4 here because we always request BGRA8888.
ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)locked.pBits, vfb->fb_stride, locked.Pitch / 4, w, h, vfb->format);
offscreen->UnlockRect();
} else {
ERROR_LOG_REPORT(G3D, "Unable to lock rect from %08x: %d,%d %dx%d of %dx%d", fb_address, rect.left, rect.top, rect.right, rect.bottom, vfb->renderWidth, vfb->renderHeight);
}
} else {
ERROR_LOG_REPORT(G3D, "Unable to download render target data from %08x", fb_address);
}
D3D11_BOX srcBox{ 0, 0, 0, vfb->width, vfb->height, 1 };
context_->CopySubresourceRegion(packTexture_, 0, 0, 0, 0, colorTex, 0, &srcBox);

// Ideally, we'd round robin between two packTexture_, and simply use the other one. Though if the game
// does a once-off copy, that won't work at all.

// BIG GPU STALL
D3D11_MAPPED_SUBRESOURCE map;
HRESULT result = context_->Map(packTexture_, 0, D3D11_MAP_READ, 0, &map);
if (FAILED(result)) {
return;
}
*/

// TODO: Handle the other formats? We don't currently create them, I think.
const int srcByteOffset = y * map.RowPitch + x * 4;
const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp;
// Pixel size always 4 here because we always request BGRA8888.
ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)map.pData, vfb->fb_stride, map.RowPitch/4, w, h, vfb->format);
context_->Unmap(packTexture_, 0);
}

// Nobody calls this yet.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had a branch that used this, it improved some issues in Katamari or something, but I think it was slow and had accuracy problems with some GPUs. I was thinking of using it to detect accuracy problems with GPUs on startup, although it feels painful a bit.

-[Unknown]

Copy link
Owner Author

@hrydgard hrydgard Mar 18, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. There are potential uses, some silly games like Burnout Dominator read the depth buffer using the CPU for lens flares, so it would be useful for that as well, though with a heavy speed hit (but we could double buffer it making lens flares one frame late).

void FramebufferManagerD3D11::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
if (!vfb->fbo) {
ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackDepthbuffer: vfb->fbo == 0");
return;
}

// We always read the depth buffer in 24_8 format.
const u32 z_address = (0x04000000) | vfb->z_address;

/*
DEBUG_LOG(SCEGE, "Reading depthbuffer to mem at %08x for vfb=%08x", z_address, vfb->fb_address);

LPDIRECT3DTEXTURE9 tex = (LPDIRECT3DTEXTURE9)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_DEPTH_BIT, 0);
if (tex) {
D3DSURFACE_DESC desc;
D3DLOCKED_RECT locked;
tex->GetLevelDesc(0, &desc);
RECT rect = { 0, 0, (LONG)desc.Width, (LONG)desc.Height };
HRESULT hr = tex->LockRect(0, &locked, &rect, D3DLOCK_READONLY);

if (SUCCEEDED(hr)) {
const int dstByteOffset = y * vfb->fb_stride * sizeof(s16);
const u32 *packed = (const u32 *)locked.pBits;
u16 *depth = (u16 *)Memory::GetPointer(z_address);

// TODO: Optimize.
for (int yp = 0; yp < h; ++yp) {
for (int xp = 0; xp < w; ++xp) {
const int offset = (yp + y) & vfb->z_stride + x + xp;

float scaled = FromScaledDepth((packed[offset] & 0x00FFFFFF) * (1.0f / 16777215.0f));
if (scaled <= 0.0f) {
depth[offset] = 0;
} else if (scaled >= 65535.0f) {
depth[offset] = 65535;
} else {
depth[offset] = (int)scaled;
}
}
}

tex->UnlockRect(0);
} else {
ERROR_LOG_REPORT(G3D, "Unable to lock rect from depth %08x: %d,%d %dx%d of %dx%d", vfb->fb_address, rect.left, rect.top, rect.right, rect.bottom, vfb->renderWidth, vfb->renderHeight);
}
} else {
ERROR_LOG_REPORT(G3D, "Unable to download render target depth from %08x", vfb->fb_address);
}*/
// TODO
}

void FramebufferManagerD3D11::EndFrame() {
Expand Down Expand Up @@ -868,18 +837,114 @@ void FramebufferManagerD3D11::Resized() {
resized_ = true;
}

bool FramebufferManagerD3D11::GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes) {
return false;
// Lots of this code could be shared (like the downsampling).
bool FramebufferManagerD3D11::GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxRes) {
VirtualFramebuffer *vfb = currentRenderVfb_;
if (!vfb) {
vfb = GetVFBAt(fb_address);
}

if (!vfb) {
// If there's no vfb and we're drawing there, must be memory?
buffer = GPUDebugBuffer(Memory::GetPointer(fb_address | 0x04000000), fb_stride, 512, format);
return true;
}

int w = vfb->renderWidth, h = vfb->renderHeight;
Draw::Framebuffer *fboForRead = nullptr;
if (vfb->fbo) {
if (maxRes > 0 && vfb->renderWidth > vfb->width * maxRes) {
w = vfb->width * maxRes;
h = vfb->height * maxRes;

Draw::Framebuffer *tempFBO = GetTempFBO(w, h);
VirtualFramebuffer tempVfb = *vfb;
tempVfb.fbo = tempFBO;
tempVfb.bufferWidth = vfb->width;
tempVfb.bufferHeight = vfb->height;
tempVfb.renderWidth = w;
tempVfb.renderHeight = h;
BlitFramebuffer(&tempVfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0);

fboForRead = tempFBO;
} else {
fboForRead = vfb->fbo;
}
}

buffer.Allocate(w, h, GE_FORMAT_8888, !useBufferedRendering_, true);

ID3D11Texture2D *packTex;
D3D11_TEXTURE2D_DESC packDesc{};
packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
packDesc.BindFlags = 0;
packDesc.Width = w;
packDesc.Height = h;
packDesc.ArraySize = 1;
packDesc.MipLevels = 1;
packDesc.Usage = D3D11_USAGE_STAGING;
packDesc.SampleDesc.Count = 1;
packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
device_->CreateTexture2D(&packDesc, nullptr, &packTex);

ID3D11Texture2D *nativeTex = (ID3D11Texture2D *)draw_->GetFramebufferAPITexture(fboForRead, Draw::FB_COLOR_BIT, 0);
context_->CopyResource(packTex, nativeTex);

D3D11_MAPPED_SUBRESOURCE map;
context_->Map(packTex, 0, D3D11_MAP_READ, 0, &map);

for (int y = 0; y < h; y++) {
uint8_t *dest = (uint8_t *)buffer.GetData() + y * w * 4;
const uint8_t *src = ((const uint8_t *)map.pData) + map.RowPitch * y;
memcpy(dest, src, 4 * w);
}

context_->Unmap(packTex, 0);
packTex->Release();
return true;
}

bool FramebufferManagerD3D11::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
bool FramebufferManagerD3D11::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) {
return false;
}

bool FramebufferManagerD3D11::GetCurrentDepthbuffer(GPUDebugBuffer &buffer) {
bool FramebufferManagerD3D11::GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) {
return false;
}

bool FramebufferManagerD3D11::GetCurrentStencilbuffer(GPUDebugBuffer &buffer) {
return false;
}
bool FramebufferManagerD3D11::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
ID3D11Texture2D *backbuffer = (ID3D11Texture2D *)draw_->GetNativeObject(Draw::NativeObject::BACKBUFFER_COLOR_TEX);
D3D11_TEXTURE2D_DESC desc;
backbuffer->GetDesc(&desc);
int w = desc.Width;
int h = desc.Height;
buffer.Allocate(w, h, GE_FORMAT_8888, !useBufferedRendering_, true);

ID3D11Texture2D *packTex;
D3D11_TEXTURE2D_DESC packDesc{};
packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
packDesc.BindFlags = 0;
packDesc.Width = w;
packDesc.Height = h;
packDesc.ArraySize = 1;
packDesc.MipLevels = 1;
packDesc.Usage = D3D11_USAGE_STAGING;
packDesc.SampleDesc.Count = 1;
packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
device_->CreateTexture2D(&packDesc, nullptr, &packTex);

context_->CopyResource(packTex, backbuffer);

D3D11_MAPPED_SUBRESOURCE map;
context_->Map(packTex, 0, D3D11_MAP_READ, 0, &map);

for (int y = 0; y < h; y++) {
uint8_t *dest = (uint8_t *)buffer.GetData() + y * w * 4;
const uint8_t *src = ((const uint8_t *)map.pData) + map.RowPitch * y;
memcpy(dest, src, 4 * w);
}

context_->Unmap(packTex, 0);
packTex->Release();
return true;
}
14 changes: 9 additions & 5 deletions GPU/D3D11/FramebufferManagerD3D11.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {

virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) override;

bool GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes);
bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer);
bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer);
bool GetOutputFramebuffer(GPUDebugBuffer &buffer);
bool GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxRes) override;
bool GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) override;
bool GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) override;
bool GetOutputFramebuffer(GPUDebugBuffer &buffer) override;

virtual void RebindFramebuffer() override;

Expand Down Expand Up @@ -136,6 +136,10 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {
ShaderManagerD3D11 *shaderManagerD3D11_;
DrawEngineD3D11 *drawEngine_;

// 1:1 Readback texture, 512x512 fixed
// For larger debug readbacks, we create/destroy textures on the fly.
ID3D11Texture2D *packTexture_;

// Used by post-processing shader
std::vector<Draw::Framebuffer *> extraFBOs_;

Expand All @@ -145,4 +149,4 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {
AsyncPBO *pixelBufObj_; //this isn't that large
u8 currentPBO_;
#endif
};
};
16 changes: 0 additions & 16 deletions GPU/D3D11/GPU_D3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -981,18 +981,6 @@ void GPU_D3D11::DoState(PointerWrap &p) {
}
}

bool GPU_D3D11::GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes) {
return framebufferManagerD3D11_->GetCurrentFramebuffer(buffer, type, maxRes);
}

bool GPU_D3D11::GetCurrentDepthbuffer(GPUDebugBuffer &buffer) {
return framebufferManagerD3D11_->GetCurrentDepthbuffer(buffer);
}

bool GPU_D3D11::GetCurrentStencilbuffer(GPUDebugBuffer &buffer) {
return framebufferManagerD3D11_->GetCurrentStencilbuffer(buffer);
}

bool GPU_D3D11::GetCurrentTexture(GPUDebugBuffer &buffer, int level) {
if (!gstate.isTextureMapEnabled()) {
return false;
Expand All @@ -1007,10 +995,6 @@ bool GPU_D3D11::GetCurrentClut(GPUDebugBuffer &buffer) {
return textureCacheD3D11_->GetCurrentClutBuffer(buffer);
}

bool GPU_D3D11::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
return framebufferManagerD3D11_->GetOutputFramebuffer(buffer);
}

bool GPU_D3D11::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) {
return drawEngine_.GetCurrentSimpleVertices(count, vertices, indices);
}
Expand Down
Loading