Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Depth readback with built-in stretchblit #16905

Merged
merged 4 commits into from
Feb 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1499,7 +1499,7 @@ if(NOT MOBILE_DEVICE)
endif()

set(GPU_GLES
GPU/GLES/DepthBufferGLES.cpp
GPU/GLES/StencilBufferGLES.cpp
GPU/GLES/GPU_GLES.cpp
GPU/GLES/GPU_GLES.h
GPU/GLES/FragmentTestCacheGLES.cpp
Expand Down Expand Up @@ -1580,6 +1580,7 @@ set(GPU_SOURCES
${GPU_NEON}
GPU/Common/Draw2D.cpp
GPU/Common/Draw2D.h
GPU/Common/DepthBufferCommon.cpp
GPU/Common/TextureShaderCommon.cpp
GPU/Common/TextureShaderCommon.h
GPU/Common/DepalettizeShaderCommon.cpp
Expand Down
189 changes: 59 additions & 130 deletions GPU/GLES/DepthBufferGLES.cpp → GPU/Common/DepthBufferCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,70 +21,61 @@
#include "Common/LogReporting.h"
#include "Core/ConfigValues.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/GLES/DrawEngineGLES.h"
#include "GPU/GLES/FramebufferManagerGLES.h"
#include "GPU/GLES/ShaderManagerGLES.h"
#include "GPU/GLES/TextureCacheGLES.h"
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/FramebufferManagerCommon.h"
#include "GPU/Common/TextureCacheCommon.h"
#include "Common/GPU/ShaderWriter.h"

static const char *depth_dl_fs = R"(
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#endif
#if __VERSION__ >= 130
#define varying in
#define texture2D texture
#define gl_FragColor fragColor0
out vec4 fragColor0;
#endif
varying vec2 v_texcoord;
uniform vec4 u_depthFactor;
uniform vec4 u_depthShift;
uniform vec4 u_depthTo8;
uniform sampler2D tex;
void main() {
float depth = texture2D(tex, v_texcoord).r;
// At this point, clamped maps [0, 1] to [0, 65535].
float clamped = clamp((depth + u_depthFactor.x) * u_depthFactor.y, 0.0, 1.0);

vec4 enc = u_depthShift * clamped;
enc = floor(mod(enc, 256.0)) * u_depthTo8;
// Let's ignore the bits outside 16 bit precision.
gl_FragColor = enc.yzww;
}
)";

static const char *depth_vs = R"(
#ifdef GL_ES
precision highp float;
#endif
#if __VERSION__ >= 130
#define attribute in
#define varying out
#endif
attribute vec2 a_position;
varying vec2 v_texcoord;
void main() {
v_texcoord = a_position * 2.0;
gl_Position = vec4(v_texcoord * 2.0 - vec2(1.0, 1.0), 0.0, 1.0);
}
)";
static const InputDef vs_inputs[] = {
{ "vec2", "a_position", Draw::SEM_POSITION },
};

struct DepthUB {
float u_depthFactor[4];
float u_depthShift[4];
float u_depthTo8[4];
};

const UniformDef depthUniforms[] = {
{ "vec4", "u_depthFactor", 0 },
{ "vec4", "u_depthShift", 1},
{ "vec4", "u_depthTo8", 2},
};

const UniformBufferDesc depthUBDesc{ sizeof(DepthUB), {
{ "u_depthFactor", -1, -1, UniformType::FLOAT4, 0 },
{ "u_depthShift", -1, -1, UniformType::FLOAT4, 16 },
{ "u_depthTo8", -1, -1, UniformType::FLOAT4, 32 },
} };

static const SamplerDef samplers[] = {
{ 0, "tex" },
};

static const VaryingDef varyings[] = {
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
};

void GenerateDepthDownloadFs(ShaderWriter &writer) {
writer.DeclareSamplers(samplers);
writer.BeginFSMain(depthUniforms, varyings);
writer.C(" float depth = ").SampleTexture2D("tex", "v_texcoord").C(".r; \n");
// At this point, clamped maps [0, 1] to [0, 65535].
writer.C(" float clamped = clamp((depth + u_depthFactor.x) * u_depthFactor.y, 0.0, 1.0);\n");
writer.C(" vec4 enc = u_depthShift * clamped;\n");
writer.C(" enc = floor(mod(enc, 256.0)) * u_depthTo8;\n");
writer.C(" vec4 outColor = enc.yzww;\n"); // Let's ignore the bits outside 16 bit precision.
writer.EndFSMain("outColor");
}

void GenerateDepthDownloadVs(ShaderWriter &writer) {
writer.BeginVSMain(vs_inputs, Slice<UniformDef>::empty(), varyings);
writer.C("v_texcoord = a_position * 2.0;\n");
writer.C("gl_Position = vec4(v_texcoord * 2.0 - vec2(1.0, 1.0), 0.0, 1.0);");
writer.EndVSMain(varyings);
}

static const char *stencil_dl_fs = R"(
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
Expand Down Expand Up @@ -131,7 +122,7 @@ static bool SupportsDepthTexturing() {
return gl_extensions.ARB_texture_float;
}

static Draw::Pipeline *CreateReadbackPipeline(Draw::DrawContext *draw, const char *tag, const UniformBufferDesc *ubDesc, const char *fs, const char *fsTag, const char *vs, const char *vsTag) {
Draw::Pipeline *CreateReadbackPipeline(Draw::DrawContext *draw, const char *tag, const UniformBufferDesc *ubDesc, const char *fs, const char *fsTag, const char *vs, const char *vsTag) {
using namespace Draw;

const ShaderLanguageDesc &shaderLanguageDesc = draw->GetShaderLanguageDesc();
Expand Down Expand Up @@ -173,7 +164,7 @@ static Draw::Pipeline *CreateReadbackPipeline(Draw::DrawContext *draw, const cha
return pipeline;
}

bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) {
bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) {
using namespace Draw;

if (!fbo) {
Expand All @@ -186,33 +177,43 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int
}

// Pixel size always 4 here because we always request float or RGBA.
const u32 bufSize = w * h * 4;
const u32 bufSize = destW * destH * 4;
if (!convBuf_ || convBufSize_ < bufSize) {
delete[] convBuf_;
convBuf_ = new u8[bufSize];
convBufSize_ = bufSize;
}

const bool useColorPath = gl_extensions.IsGLES;
float scaleX = (float)destW / w;
float scaleY = (float)destH / h;

bool useColorPath = gl_extensions.IsGLES || scaleX != 1.0f || scaleY != 1.0f;
bool format16Bit = false;

if (useColorPath) {
if (!depthReadbackPipeline_) {
depthReadbackPipeline_ = CreateReadbackPipeline(draw_, "depth_dl", &depthUBDesc, depth_dl_fs, "depth_dl_fs", depth_vs, "depth_vs");
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
char depth_dl_fs[1024];
char depth_dl_vs[1024];
ShaderWriter fsWriter(depth_dl_fs, shaderLanguageDesc, ShaderStage::Fragment);
ShaderWriter vsWriter(depth_dl_vs, shaderLanguageDesc, ShaderStage::Vertex);
GenerateDepthDownloadFs(fsWriter);
GenerateDepthDownloadVs(vsWriter);
depthReadbackPipeline_ = CreateReadbackPipeline(draw_, "depth_dl", &depthUBDesc, depth_dl_fs, "depth_dl_fs", depth_dl_vs, "depth_dl_vs");
depthReadbackSampler_ = draw_->CreateSamplerState({});
}

shaderManager_->DirtyLastShader();
auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width(), fbo->Height());
draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackDepthbufferSync");
Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f };
Draw::Viewport viewport = { 0.0f, 0.0f, (float)destW, (float)destH, 0.0f, 1.0f };
draw_->SetViewports(1, &viewport);

draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_DEPTH_BIT, 0);
draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &depthReadbackSampler_);

// We must bind the program after starting the render pass.
draw_->SetScissorRect(0, 0, w, h);
draw_->SetScissorRect(0, 0, destW, destH);
draw_->BindPipeline(depthReadbackPipeline_);

DepthUB ub{};
Expand Down Expand Up @@ -241,7 +242,7 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int
};
draw_->DrawUP(positions, 3);

draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x, y, w, h, DataFormat::R8G8B8A8_UNORM, convBuf_, w, "ReadbackDepthbufferSync");
draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x * scaleX, y * scaleY, w * scaleX, h * scaleY, DataFormat::R8G8B8A8_UNORM, convBuf_, w, "ReadbackDepthbufferSync");

textureCache_->ForgetLastTexture();
// TODO: Use 4444 so we can copy lines directly (instead of 32 -> 16 on CPU)?
Expand All @@ -266,7 +267,7 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int
// TODO: Apply this in the shader? May have precision issues if it becomes important to match.
// We downloaded float values directly in this case.
uint16_t *dest = pixels;
const GLfloat *packedf = (GLfloat *)convBuf_;
const float *packedf = (float *)convBuf_;
DepthScaleFactors depthScale = GetDepthScaleFactors();
for (int yp = 0; yp < h; ++yp) {
for (int xp = 0; xp < w; ++xp) {
Expand All @@ -287,75 +288,3 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
return true;
}

// Well, this is not depth, but it's depth/stencil related.
bool FramebufferManagerGLES::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) {
using namespace Draw;

if (!fbo) {
ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "ReadbackStencilbufferSync: bad fbo");
return false;
}

const bool useColorPath = gl_extensions.IsGLES;
if (!useColorPath) {
return draw_->CopyFramebufferToMemorySync(fbo, FB_STENCIL_BIT, x, y, w, h, DataFormat::S8, pixels, pixelsStride, "ReadbackStencilbufferSync");
}

// Unsupported below GLES 3.1 or without ARB_stencil_texturing.
// OES_texture_stencil8 is related, but used to specify texture data.
if ((gl_extensions.IsGLES && !gl_extensions.VersionGEThan(3, 1)) && !gl_extensions.ARB_stencil_texturing)
return false;

// Pixel size always 4 here because we always request RGBA back.
const u32 bufSize = w * h * 4;
if (!convBuf_ || convBufSize_ < bufSize) {
delete[] convBuf_;
convBuf_ = new u8[bufSize];
convBufSize_ = bufSize;
}

if (!stencilReadbackPipeline_) {
stencilReadbackPipeline_ = CreateReadbackPipeline(draw_, "stencil_dl", &depthUBDesc, stencil_dl_fs, "stencil_dl_fs", stencil_vs, "stencil_vs");
stencilReadbackSampler_ = draw_->CreateSamplerState({});
}

shaderManager_->DirtyLastShader();
auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width(), fbo->Height());
draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackStencilbufferSync");
Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f };
draw_->SetViewports(1, &viewport);

draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_STENCIL_BIT, 0);
draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &stencilReadbackSampler_);

// We must bind the program after starting the render pass.
draw_->SetScissorRect(0, 0, w, h);
draw_->BindPipeline(stencilReadbackPipeline_);

// Fullscreen triangle coordinates.
static const float positions[6] = {
0.0, 0.0,
1.0, 0.0,
0.0, 1.0,
};
draw_->DrawUP(positions, 3);

draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x, y, w, h, DataFormat::R8G8B8A8_UNORM, convBuf_, w, "ReadbackStencilbufferSync");

textureCache_->ForgetLastTexture();

// TODO: Use 1/4 width to write all values directly and skip CPU conversion?
uint8_t *dest = pixels;
const u32_le *packed32 = (u32_le *)convBuf_;
for (int yp = 0; yp < h; ++yp) {
for (int xp = 0; xp < w; ++xp) {
dest[xp] = packed32[xp] & 0xFF;
}
dest += pixelsStride;
packed32 += w;
}

gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
return true;
}
38 changes: 21 additions & 17 deletions GPU/Common/FramebufferManagerCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ FramebufferManagerCommon::~FramebufferManagerCommon() {
bvfbs_.clear();

delete presentation_;
delete[] convBuf_;
}

void FramebufferManagerCommon::Init(int msaaLevel) {
Expand Down Expand Up @@ -2640,17 +2641,26 @@ bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32
}

bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
if (gstate_c.Use(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) {
buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT_DIV_256, flipY);
} else {
buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT, flipY);
}
// No need to free on failure, that's the caller's job (it likely will reuse a buffer.)
bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, "GetDepthBuffer");
if (!retval) {
// Try ReadbackDepthbufferSync, in case GLES.

bool retval;
if (true) {
// Always use ReadbackDepthbufferSync (while we debug it)
buffer.Allocate(w, h, GPU_DBG_FORMAT_16BIT, flipY);
retval = ReadbackDepthbufferSync(vfb->fbo, 0, 0, w, h, (uint16_t *)buffer.GetData(), w);
retval = ReadbackDepthbufferSync(vfb->fbo, 0, 0, w, h, (uint16_t *)buffer.GetData(), w, w, h);
} else {
// Old code
if (gstate_c.Use(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) {
buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT_DIV_256, flipY);
} else {
buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT, flipY);
}
// No need to free on failure, that's the caller's job (it likely will reuse a buffer.)
retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, "GetDepthBuffer");
if (!retval) {
// Try ReadbackDepthbufferSync, in case GLES.
buffer.Allocate(w, h, GPU_DBG_FORMAT_16BIT, flipY);
retval = ReadbackDepthbufferSync(vfb->fbo, 0, 0, w, h, (uint16_t *)buffer.GetData(), w, w, h);
}
}

// After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe.
Expand Down Expand Up @@ -2748,7 +2758,7 @@ void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb,

if (channel == RASTER_DEPTH) {
_assert_msg_(vfb && vfb->z_address != 0 && vfb->z_stride != 0, "Depth buffer invalid");
ReadbackDepthbufferSync(vfb->fbo, x, y, w, h, (uint16_t *)destPtr, stride);
ReadbackDepthbufferSync(vfb->fbo, x, y, w, h, (uint16_t *)destPtr, stride, w, h);
} else {
draw_->CopyFramebufferToMemorySync(vfb->fbo, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, stride, "ReadbackFramebufferSync");
}
Expand All @@ -2760,12 +2770,6 @@ void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb,
gpuStats.numReadbacks++;
}

bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) {
Draw::DataFormat destFormat = GEFormatToThin3D(GE_FORMAT_DEPTH16);
// TODO: Apply depth scale factors if we don't have depth clamp.
return draw_->CopyFramebufferToMemorySync(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, pixels, pixelsStride, "ReadbackDepthbufferSync");
}

bool FramebufferManagerCommon::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) {
return draw_->CopyFramebufferToMemorySync(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, Draw::DataFormat::S8, pixels, pixelsStride, "ReadbackStencilbufferSync");
}
Expand Down
6 changes: 5 additions & 1 deletion GPU/Common/FramebufferManagerCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ class FramebufferManagerCommon {
protected:
virtual void ReadbackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel);
// Used for when a shader is required, such as GLES.
virtual bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride);
virtual bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH);
virtual bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride);
void SetViewport2D(int x, int y, int w, int h);
Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height);
Expand Down Expand Up @@ -601,4 +601,8 @@ class FramebufferManagerCommon {

Draw2D draw2D_;
// The fragment shaders are "owned" by the pipelines since they're 1:1.

// Depth readback helper state
u8 *convBuf_ = nullptr;
u32 convBufSize_ = 0;
};
7 changes: 6 additions & 1 deletion GPU/Directx9/FramebufferManagerDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,12 @@ FramebufferManagerDX9::FramebufferManagerDX9(Draw::DrawContext *draw)
FramebufferManagerDX9::~FramebufferManagerDX9() {
}

bool FramebufferManagerDX9::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) {
bool FramebufferManagerDX9::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) {
// Don't yet support stretched readbacks here.
if (destW != w || destH != h) {
return false;
}

// We always read the depth buffer in 24_8 format.
LPDIRECT3DTEXTURE9 tex = (LPDIRECT3DTEXTURE9)draw_->GetFramebufferAPITexture(fbo, Draw::FB_DEPTH_BIT, 0);
if (!tex)
Expand Down
Loading