From f2a6c744bc9f30dfd1282bae35204744eb4cb862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 3 Feb 2023 18:51:59 +0100 Subject: [PATCH 1/4] Add built-in stretch functionality to depth readback shader path --- GPU/Common/FramebufferManagerCommon.cpp | 39 +++++++++++++++++-------- GPU/Common/FramebufferManagerCommon.h | 2 +- GPU/Directx9/FramebufferManagerDX9.cpp | 7 ++++- GPU/Directx9/FramebufferManagerDX9.h | 2 +- GPU/GLES/DepthBufferGLES.cpp | 18 ++++++++---- GPU/GLES/FramebufferManagerGLES.h | 2 +- 6 files changed, 48 insertions(+), 22 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index de73d52e29ea..9c7a4de70802 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -2640,17 +2640,26 @@ bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 } bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false; - if (gstate_c.Use(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) { - buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT_DIV_256, flipY); - } else { - buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT, flipY); - } - // No need to free on failure, that's the caller's job (it likely will reuse a buffer.) - bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, "GetDepthBuffer"); - if (!retval) { - // Try ReadbackDepthbufferSync, in case GLES. + + bool retval; + if (true) { + // Always use ReadbackDepthbufferSync (while we debug it) buffer.Allocate(w, h, GPU_DBG_FORMAT_16BIT, flipY); - retval = ReadbackDepthbufferSync(vfb->fbo, 0, 0, w, h, (uint16_t *)buffer.GetData(), w); + retval = ReadbackDepthbufferSync(vfb->fbo, 0, 0, w, h, (uint16_t *)buffer.GetData(), w, w, h); + } else { + // Old code + if (gstate_c.Use(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) { + buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT_DIV_256, flipY); + } else { + buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT, flipY); + } + // No need to free on failure, that's the caller's job (it likely will reuse a buffer.) + retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, "GetDepthBuffer"); + if (!retval) { + // Try ReadbackDepthbufferSync, in case GLES. + buffer.Allocate(w, h, GPU_DBG_FORMAT_16BIT, flipY); + retval = ReadbackDepthbufferSync(vfb->fbo, 0, 0, w, h, (uint16_t *)buffer.GetData(), w, w, h); + } } // After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe. @@ -2748,7 +2757,7 @@ void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb, if (channel == RASTER_DEPTH) { _assert_msg_(vfb && vfb->z_address != 0 && vfb->z_stride != 0, "Depth buffer invalid"); - ReadbackDepthbufferSync(vfb->fbo, x, y, w, h, (uint16_t *)destPtr, stride); + ReadbackDepthbufferSync(vfb->fbo, x, y, w, h, (uint16_t *)destPtr, stride, w, h); } else { draw_->CopyFramebufferToMemorySync(vfb->fbo, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, stride, "ReadbackFramebufferSync"); } @@ -2760,8 +2769,14 @@ void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb, gpuStats.numReadbacks++; } -bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) { +bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) { Draw::DataFormat destFormat = GEFormatToThin3D(GE_FORMAT_DEPTH16); + + if (w != destW || h != destH) { + // This path can't handle stretch blits. That's fine, this path is going away later. + return false; + } + // TODO: Apply depth scale factors if we don't have depth clamp. return draw_->CopyFramebufferToMemorySync(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, pixels, pixelsStride, "ReadbackDepthbufferSync"); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 4f0212f8cd1e..2e91e4b2a84c 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -453,7 +453,7 @@ class FramebufferManagerCommon { protected: virtual void ReadbackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel); // Used for when a shader is required, such as GLES. - virtual bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride); + virtual bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH); virtual bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride); void SetViewport2D(int x, int y, int w, int h); Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); diff --git a/GPU/Directx9/FramebufferManagerDX9.cpp b/GPU/Directx9/FramebufferManagerDX9.cpp index da53a7b802a3..a3a37eea4c3f 100644 --- a/GPU/Directx9/FramebufferManagerDX9.cpp +++ b/GPU/Directx9/FramebufferManagerDX9.cpp @@ -53,7 +53,12 @@ FramebufferManagerDX9::FramebufferManagerDX9(Draw::DrawContext *draw) FramebufferManagerDX9::~FramebufferManagerDX9() { } -bool FramebufferManagerDX9::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) { +bool FramebufferManagerDX9::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) { + // Don't yet support stretched readbacks here. + if (destW != w || destH != h) { + return false; + } + // We always read the depth buffer in 24_8 format. LPDIRECT3DTEXTURE9 tex = (LPDIRECT3DTEXTURE9)draw_->GetFramebufferAPITexture(fbo, Draw::FB_DEPTH_BIT, 0); if (!tex) diff --git a/GPU/Directx9/FramebufferManagerDX9.h b/GPU/Directx9/FramebufferManagerDX9.h index e189425b3871..bcbe3f7941b2 100644 --- a/GPU/Directx9/FramebufferManagerDX9.h +++ b/GPU/Directx9/FramebufferManagerDX9.h @@ -38,5 +38,5 @@ class FramebufferManagerDX9 : public FramebufferManagerCommon { ~FramebufferManagerDX9(); protected: - bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) override; + bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) override; }; diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp index 96072685eacf..39b9fed28302 100644 --- a/GPU/GLES/DepthBufferGLES.cpp +++ b/GPU/GLES/DepthBufferGLES.cpp @@ -173,7 +173,7 @@ static Draw::Pipeline *CreateReadbackPipeline(Draw::DrawContext *draw, const cha return pipeline; } -bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) { +bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) { using namespace Draw; if (!fbo) { @@ -186,16 +186,22 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int } // Pixel size always 4 here because we always request float or RGBA. - const u32 bufSize = w * h * 4; + const u32 bufSize = destW * destH * 4; if (!convBuf_ || convBufSize_ < bufSize) { delete[] convBuf_; convBuf_ = new u8[bufSize]; convBufSize_ = bufSize; } - const bool useColorPath = gl_extensions.IsGLES; + float scaleX = (float)destW / w; + float scaleY = (float)destH / h; + + bool useColorPath = gl_extensions.IsGLES || scaleX != 1.0f || scaleY != 1.0f; bool format16Bit = false; + // For testing. DO NOT merge. + useColorPath = true; + if (useColorPath) { if (!depthReadbackPipeline_) { depthReadbackPipeline_ = CreateReadbackPipeline(draw_, "depth_dl", &depthUBDesc, depth_dl_fs, "depth_dl_fs", depth_vs, "depth_vs"); @@ -205,14 +211,14 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int shaderManager_->DirtyLastShader(); auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width(), fbo->Height()); draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackDepthbufferSync"); - Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f }; + Draw::Viewport viewport = { 0.0f, 0.0f, (float)destW, (float)destH, 0.0f, 1.0f }; draw_->SetViewports(1, &viewport); draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_DEPTH_BIT, 0); draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &depthReadbackSampler_); // We must bind the program after starting the render pass. - draw_->SetScissorRect(0, 0, w, h); + draw_->SetScissorRect(0, 0, destW, destH); draw_->BindPipeline(depthReadbackPipeline_); DepthUB ub{}; @@ -241,7 +247,7 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int }; draw_->DrawUP(positions, 3); - draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x, y, w, h, DataFormat::R8G8B8A8_UNORM, convBuf_, w, "ReadbackDepthbufferSync"); + draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x * scaleX, y * scaleY, w * scaleX, h * scaleY, DataFormat::R8G8B8A8_UNORM, convBuf_, w, "ReadbackDepthbufferSync"); textureCache_->ForgetLastTexture(); // TODO: Use 4444 so we can copy lines directly (instead of 32 -> 16 on CPU)? diff --git a/GPU/GLES/FramebufferManagerGLES.h b/GPU/GLES/FramebufferManagerGLES.h index c9100143757a..528fc0a79af6 100644 --- a/GPU/GLES/FramebufferManagerGLES.h +++ b/GPU/GLES/FramebufferManagerGLES.h @@ -37,7 +37,7 @@ class FramebufferManagerGLES : public FramebufferManagerCommon { protected: void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override; - bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) override; + bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) override; bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) override; private: From 4402530ca77d77bf9e0b8d94d1e7f2195753d9d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 3 Feb 2023 20:32:16 +0100 Subject: [PATCH 2/4] Use ShaderWriter to generate the depth readback shader --- GPU/GLES/DepthBufferGLES.cpp | 95 +++++++++++++++++------------------- 1 file changed, 46 insertions(+), 49 deletions(-) diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp index 39b9fed28302..b2e0ea1eef07 100644 --- a/GPU/GLES/DepthBufferGLES.cpp +++ b/GPU/GLES/DepthBufferGLES.cpp @@ -25,53 +25,13 @@ #include "GPU/GLES/FramebufferManagerGLES.h" #include "GPU/GLES/ShaderManagerGLES.h" #include "GPU/GLES/TextureCacheGLES.h" +#include "Common/GPU/ShaderWriter.h" -static const char *depth_dl_fs = R"( -#ifdef GL_ES -#ifdef GL_FRAGMENT_PRECISION_HIGH -precision highp float; -#else -precision mediump float; -#endif -#endif -#if __VERSION__ >= 130 -#define varying in -#define texture2D texture -#define gl_FragColor fragColor0 -out vec4 fragColor0; -#endif -varying vec2 v_texcoord; -uniform vec4 u_depthFactor; -uniform vec4 u_depthShift; -uniform vec4 u_depthTo8; -uniform sampler2D tex; -void main() { - float depth = texture2D(tex, v_texcoord).r; - // At this point, clamped maps [0, 1] to [0, 65535]. - float clamped = clamp((depth + u_depthFactor.x) * u_depthFactor.y, 0.0, 1.0); - - vec4 enc = u_depthShift * clamped; - enc = floor(mod(enc, 256.0)) * u_depthTo8; - // Let's ignore the bits outside 16 bit precision. - gl_FragColor = enc.yzww; -} -)"; -static const char *depth_vs = R"( -#ifdef GL_ES -precision highp float; -#endif -#if __VERSION__ >= 130 -#define attribute in -#define varying out -#endif -attribute vec2 a_position; -varying vec2 v_texcoord; -void main() { - v_texcoord = a_position * 2.0; - gl_Position = vec4(v_texcoord * 2.0 - vec2(1.0, 1.0), 0.0, 1.0); -} -)"; +static const InputDef inputs[2] = { + { "vec2", "a_position", Draw::SEM_POSITION }, + { "vec2", "a_texcoord0", Draw::SEM_TEXCOORD0 }, +}; struct DepthUB { float u_depthFactor[4]; @@ -79,12 +39,45 @@ struct DepthUB { float u_depthTo8[4]; }; +const UniformDef depthUniforms[3] = { + { "vec4", "u_depthFactor", 0 }, + { "vec4", "u_depthShift", 1}, + { "vec4", "u_depthTo8", 2}, +}; + const UniformBufferDesc depthUBDesc{ sizeof(DepthUB), { { "u_depthFactor", -1, -1, UniformType::FLOAT4, 0 }, { "u_depthShift", -1, -1, UniformType::FLOAT4, 16 }, { "u_depthTo8", -1, -1, UniformType::FLOAT4, 32 }, } }; +static const SamplerDef samplers[1] = { + { 0, "tex", SamplerFlags::ARRAY_ON_VULKAN }, +}; + +static const VaryingDef varyings[1] = { + { "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" }, +}; + +void GenerateDepthDownloadFs(ShaderWriter &writer) { + writer.DeclareSamplers(samplers); + writer.BeginFSMain(depthUniforms, varyings); + writer.C(" float depth = texture2D(tex, v_texcoord).r;\n"); + // At this point, clamped maps [0, 1] to [0, 65535]. + writer.C(" float clamped = clamp((depth + u_depthFactor.x) * u_depthFactor.y, 0.0, 1.0);\n"); + writer.C(" vec4 enc = u_depthShift * clamped;\n"); + writer.C(" enc = floor(mod(enc, 256.0)) * u_depthTo8;\n"); + writer.C(" vec4 outColor = enc.yzww;\n"); // Let's ignore the bits outside 16 bit precision. + writer.EndFSMain("outColor"); +} + +void GenerateDepthDownloadVs(ShaderWriter &writer) { + writer.BeginVSMain(inputs, Slice::empty(), varyings); + writer.C("v_texcoord = a_position * 2.0;\n"); + writer.C("gl_Position = vec4(v_texcoord * 2.0 - vec2(1.0, 1.0), 0.0, 1.0);"); + writer.EndVSMain(varyings); +} + static const char *stencil_dl_fs = R"( #ifdef GL_ES #ifdef GL_FRAGMENT_PRECISION_HIGH @@ -199,12 +192,16 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int bool useColorPath = gl_extensions.IsGLES || scaleX != 1.0f || scaleY != 1.0f; bool format16Bit = false; - // For testing. DO NOT merge. - useColorPath = true; - if (useColorPath) { if (!depthReadbackPipeline_) { - depthReadbackPipeline_ = CreateReadbackPipeline(draw_, "depth_dl", &depthUBDesc, depth_dl_fs, "depth_dl_fs", depth_vs, "depth_vs"); + const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc(); + char depth_dl_fs[1024]; + char depth_dl_vs[1024]; + ShaderWriter fsWriter(depth_dl_fs, shaderLanguageDesc, ShaderStage::Fragment); + ShaderWriter vsWriter(depth_dl_vs, shaderLanguageDesc, ShaderStage::Vertex); + GenerateDepthDownloadFs(fsWriter); + GenerateDepthDownloadVs(vsWriter); + depthReadbackPipeline_ = CreateReadbackPipeline(draw_, "depth_dl", &depthUBDesc, depth_dl_fs, "depth_dl_fs", depth_dl_vs, "depth_dl_vs"); depthReadbackSampler_ = draw_->CreateSamplerState({}); } From 23c8a79473a96765d507b5649b14d498e33ea266 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 3 Feb 2023 20:53:49 +0100 Subject: [PATCH 3/4] Make depth readback through the "color path" work on all backends except D3D9 --- GPU/Common/FramebufferManagerCommon.cpp | 13 +------------ GPU/Common/FramebufferManagerCommon.h | 4 ++++ GPU/Directx9/FramebufferManagerDX9.h | 5 +---- GPU/GLES/DepthBufferGLES.cpp | 17 ++++++++--------- GPU/GLES/FramebufferManagerGLES.cpp | 4 ---- GPU/GLES/FramebufferManagerGLES.h | 6 ------ 6 files changed, 14 insertions(+), 35 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 9c7a4de70802..e94728c0bd5d 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -80,6 +80,7 @@ FramebufferManagerCommon::~FramebufferManagerCommon() { bvfbs_.clear(); delete presentation_; + delete[] convBuf_; } void FramebufferManagerCommon::Init(int msaaLevel) { @@ -2769,18 +2770,6 @@ void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb, gpuStats.numReadbacks++; } -bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) { - Draw::DataFormat destFormat = GEFormatToThin3D(GE_FORMAT_DEPTH16); - - if (w != destW || h != destH) { - // This path can't handle stretch blits. That's fine, this path is going away later. - return false; - } - - // TODO: Apply depth scale factors if we don't have depth clamp. - return draw_->CopyFramebufferToMemorySync(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, pixels, pixelsStride, "ReadbackDepthbufferSync"); -} - bool FramebufferManagerCommon::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) { return draw_->CopyFramebufferToMemorySync(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, Draw::DataFormat::S8, pixels, pixelsStride, "ReadbackStencilbufferSync"); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 2e91e4b2a84c..e66812c22e97 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -601,4 +601,8 @@ class FramebufferManagerCommon { Draw2D draw2D_; // The fragment shaders are "owned" by the pipelines since they're 1:1. + + // Depth readback helper state + u8 *convBuf_ = nullptr; + u32 convBufSize_ = 0; }; diff --git a/GPU/Directx9/FramebufferManagerDX9.h b/GPU/Directx9/FramebufferManagerDX9.h index bcbe3f7941b2..5ac924e4e14d 100644 --- a/GPU/Directx9/FramebufferManagerDX9.h +++ b/GPU/Directx9/FramebufferManagerDX9.h @@ -21,10 +21,6 @@ #include -// Keeps track of allocated FBOs. -// Also provides facilities for drawing and later converting raw -// pixel data. - #include "GPU/GPUCommon.h" #include "GPU/Common/FramebufferManagerCommon.h" @@ -38,5 +34,6 @@ class FramebufferManagerDX9 : public FramebufferManagerCommon { ~FramebufferManagerDX9(); protected: + // TODO: The non-color path of FramebufferManagerCommon::ReadbackDepthbufferSync seems to work just as well. bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) override; }; diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp index b2e0ea1eef07..b249331b1c6f 100644 --- a/GPU/GLES/DepthBufferGLES.cpp +++ b/GPU/GLES/DepthBufferGLES.cpp @@ -28,9 +28,8 @@ #include "Common/GPU/ShaderWriter.h" -static const InputDef inputs[2] = { +static const InputDef vs_inputs[] = { { "vec2", "a_position", Draw::SEM_POSITION }, - { "vec2", "a_texcoord0", Draw::SEM_TEXCOORD0 }, }; struct DepthUB { @@ -39,7 +38,7 @@ struct DepthUB { float u_depthTo8[4]; }; -const UniformDef depthUniforms[3] = { +const UniformDef depthUniforms[] = { { "vec4", "u_depthFactor", 0 }, { "vec4", "u_depthShift", 1}, { "vec4", "u_depthTo8", 2}, @@ -51,18 +50,18 @@ const UniformBufferDesc depthUBDesc{ sizeof(DepthUB), { { "u_depthTo8", -1, -1, UniformType::FLOAT4, 32 }, } }; -static const SamplerDef samplers[1] = { - { 0, "tex", SamplerFlags::ARRAY_ON_VULKAN }, +static const SamplerDef samplers[] = { + { 0, "tex" }, }; -static const VaryingDef varyings[1] = { +static const VaryingDef varyings[] = { { "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" }, }; void GenerateDepthDownloadFs(ShaderWriter &writer) { writer.DeclareSamplers(samplers); writer.BeginFSMain(depthUniforms, varyings); - writer.C(" float depth = texture2D(tex, v_texcoord).r;\n"); + writer.C(" float depth = ").SampleTexture2D("tex", "v_texcoord").C(".r; \n"); // At this point, clamped maps [0, 1] to [0, 65535]. writer.C(" float clamped = clamp((depth + u_depthFactor.x) * u_depthFactor.y, 0.0, 1.0);\n"); writer.C(" vec4 enc = u_depthShift * clamped;\n"); @@ -72,7 +71,7 @@ void GenerateDepthDownloadFs(ShaderWriter &writer) { } void GenerateDepthDownloadVs(ShaderWriter &writer) { - writer.BeginVSMain(inputs, Slice::empty(), varyings); + writer.BeginVSMain(vs_inputs, Slice::empty(), varyings); writer.C("v_texcoord = a_position * 2.0;\n"); writer.C("gl_Position = vec4(v_texcoord * 2.0 - vec2(1.0, 1.0), 0.0, 1.0);"); writer.EndVSMain(varyings); @@ -166,7 +165,7 @@ static Draw::Pipeline *CreateReadbackPipeline(Draw::DrawContext *draw, const cha return pipeline; } -bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) { +bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) { using namespace Draw; if (!fbo) { diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index e5a2308fb4b0..4c0b7480f9ab 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -45,10 +45,6 @@ FramebufferManagerGLES::FramebufferManagerGLES(Draw::DrawContext *draw) : presentation_->SetLanguage(draw_->GetShaderLanguageDesc().shaderLanguage); } -FramebufferManagerGLES::~FramebufferManagerGLES() { - delete[] convBuf_; -} - void FramebufferManagerGLES::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) { _assert_msg_(nvfb->fbo, "Expecting a valid nvfb in UpdateDownloadTempBuffer"); diff --git a/GPU/GLES/FramebufferManagerGLES.h b/GPU/GLES/FramebufferManagerGLES.h index 528fc0a79af6..15c123a1e595 100644 --- a/GPU/GLES/FramebufferManagerGLES.h +++ b/GPU/GLES/FramebufferManagerGLES.h @@ -29,7 +29,6 @@ class GLRProgram; class FramebufferManagerGLES : public FramebufferManagerCommon { public: FramebufferManagerGLES(Draw::DrawContext *draw); - ~FramebufferManagerGLES(); void NotifyDisplayResized() override; @@ -37,10 +36,5 @@ class FramebufferManagerGLES : public FramebufferManagerCommon { protected: void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override; - bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH) override; bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) override; - -private: - u8 *convBuf_ = nullptr; - u32 convBufSize_ = 0; }; From fb8aff5a8c710f4159b9dfec124d1c7b4869f6e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 3 Feb 2023 21:06:43 +0100 Subject: [PATCH 4/4] Split out the depth stuff from the file, move to GPU/Common --- CMakeLists.txt | 3 +- .../DepthBufferCommon.cpp} | 83 +-------- GPU/GLES/StencilBufferGLES.cpp | 170 ++++++++++++++++++ GPU/GPU.vcxproj | 8 +- GPU/GPU.vcxproj.filters | 9 +- UWP/GPU_UWP/GPU_UWP.vcxproj | 3 +- UWP/GPU_UWP/GPU_UWP.vcxproj.filters | 3 +- android/jni/Android.mk | 3 +- libretro/Makefile.common | 3 +- 9 files changed, 193 insertions(+), 92 deletions(-) rename GPU/{GLES/DepthBufferGLES.cpp => Common/DepthBufferCommon.cpp} (75%) create mode 100644 GPU/GLES/StencilBufferGLES.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 105200b0ea01..7403596347ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1499,7 +1499,7 @@ if(NOT MOBILE_DEVICE) endif() set(GPU_GLES - GPU/GLES/DepthBufferGLES.cpp + GPU/GLES/StencilBufferGLES.cpp GPU/GLES/GPU_GLES.cpp GPU/GLES/GPU_GLES.h GPU/GLES/FragmentTestCacheGLES.cpp @@ -1580,6 +1580,7 @@ set(GPU_SOURCES ${GPU_NEON} GPU/Common/Draw2D.cpp GPU/Common/Draw2D.h + GPU/Common/DepthBufferCommon.cpp GPU/Common/TextureShaderCommon.cpp GPU/Common/TextureShaderCommon.h GPU/Common/DepalettizeShaderCommon.cpp diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/Common/DepthBufferCommon.cpp similarity index 75% rename from GPU/GLES/DepthBufferGLES.cpp rename to GPU/Common/DepthBufferCommon.cpp index b249331b1c6f..620131a0bd03 100644 --- a/GPU/GLES/DepthBufferGLES.cpp +++ b/GPU/Common/DepthBufferCommon.cpp @@ -21,10 +21,9 @@ #include "Common/LogReporting.h" #include "Core/ConfigValues.h" #include "GPU/Common/GPUStateUtils.h" -#include "GPU/GLES/DrawEngineGLES.h" -#include "GPU/GLES/FramebufferManagerGLES.h" -#include "GPU/GLES/ShaderManagerGLES.h" -#include "GPU/GLES/TextureCacheGLES.h" +#include "GPU/Common/DrawEngineCommon.h" +#include "GPU/Common/FramebufferManagerCommon.h" +#include "GPU/Common/TextureCacheCommon.h" #include "Common/GPU/ShaderWriter.h" @@ -123,7 +122,7 @@ static bool SupportsDepthTexturing() { return gl_extensions.ARB_texture_float; } -static Draw::Pipeline *CreateReadbackPipeline(Draw::DrawContext *draw, const char *tag, const UniformBufferDesc *ubDesc, const char *fs, const char *fsTag, const char *vs, const char *vsTag) { +Draw::Pipeline *CreateReadbackPipeline(Draw::DrawContext *draw, const char *tag, const UniformBufferDesc *ubDesc, const char *fs, const char *fsTag, const char *vs, const char *vsTag) { using namespace Draw; const ShaderLanguageDesc &shaderLanguageDesc = draw->GetShaderLanguageDesc(); @@ -268,7 +267,7 @@ bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, i // TODO: Apply this in the shader? May have precision issues if it becomes important to match. // We downloaded float values directly in this case. uint16_t *dest = pixels; - const GLfloat *packedf = (GLfloat *)convBuf_; + const float *packedf = (float *)convBuf_; DepthScaleFactors depthScale = GetDepthScaleFactors(); for (int yp = 0; yp < h; ++yp) { for (int xp = 0; xp < w; ++xp) { @@ -289,75 +288,3 @@ bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, i gstate_c.Dirty(DIRTY_ALL_RENDER_STATE); return true; } - -// Well, this is not depth, but it's depth/stencil related. -bool FramebufferManagerGLES::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) { - using namespace Draw; - - if (!fbo) { - ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "ReadbackStencilbufferSync: bad fbo"); - return false; - } - - const bool useColorPath = gl_extensions.IsGLES; - if (!useColorPath) { - return draw_->CopyFramebufferToMemorySync(fbo, FB_STENCIL_BIT, x, y, w, h, DataFormat::S8, pixels, pixelsStride, "ReadbackStencilbufferSync"); - } - - // Unsupported below GLES 3.1 or without ARB_stencil_texturing. - // OES_texture_stencil8 is related, but used to specify texture data. - if ((gl_extensions.IsGLES && !gl_extensions.VersionGEThan(3, 1)) && !gl_extensions.ARB_stencil_texturing) - return false; - - // Pixel size always 4 here because we always request RGBA back. - const u32 bufSize = w * h * 4; - if (!convBuf_ || convBufSize_ < bufSize) { - delete[] convBuf_; - convBuf_ = new u8[bufSize]; - convBufSize_ = bufSize; - } - - if (!stencilReadbackPipeline_) { - stencilReadbackPipeline_ = CreateReadbackPipeline(draw_, "stencil_dl", &depthUBDesc, stencil_dl_fs, "stencil_dl_fs", stencil_vs, "stencil_vs"); - stencilReadbackSampler_ = draw_->CreateSamplerState({}); - } - - shaderManager_->DirtyLastShader(); - auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width(), fbo->Height()); - draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackStencilbufferSync"); - Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f }; - draw_->SetViewports(1, &viewport); - - draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_STENCIL_BIT, 0); - draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &stencilReadbackSampler_); - - // We must bind the program after starting the render pass. - draw_->SetScissorRect(0, 0, w, h); - draw_->BindPipeline(stencilReadbackPipeline_); - - // Fullscreen triangle coordinates. - static const float positions[6] = { - 0.0, 0.0, - 1.0, 0.0, - 0.0, 1.0, - }; - draw_->DrawUP(positions, 3); - - draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x, y, w, h, DataFormat::R8G8B8A8_UNORM, convBuf_, w, "ReadbackStencilbufferSync"); - - textureCache_->ForgetLastTexture(); - - // TODO: Use 1/4 width to write all values directly and skip CPU conversion? - uint8_t *dest = pixels; - const u32_le *packed32 = (u32_le *)convBuf_; - for (int yp = 0; yp < h; ++yp) { - for (int xp = 0; xp < w; ++xp) { - dest[xp] = packed32[xp] & 0xFF; - } - dest += pixelsStride; - packed32 += w; - } - - gstate_c.Dirty(DIRTY_ALL_RENDER_STATE); - return true; -} diff --git a/GPU/GLES/StencilBufferGLES.cpp b/GPU/GLES/StencilBufferGLES.cpp new file mode 100644 index 000000000000..32cd92497d5d --- /dev/null +++ b/GPU/GLES/StencilBufferGLES.cpp @@ -0,0 +1,170 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include + +#include "Common/GPU/OpenGL/GLFeatures.h" +#include "Common/LogReporting.h" +#include "Core/ConfigValues.h" +#include "GPU/Common/GPUStateUtils.h" +#include "GPU/Common/DrawEngineCommon.h" +#include "GPU/Common/TextureCacheCommon.h" +#include "GPU/GLES/FramebufferManagerGLES.h" +#include "Common/GPU/ShaderWriter.h" + +static const InputDef vs_inputs[] = { + { "vec2", "a_position", Draw::SEM_POSITION }, +}; + +struct DepthUB { + float u_depthFactor[4]; + float u_depthShift[4]; + float u_depthTo8[4]; +}; + +const UniformDef depthUniforms[] = { + { "vec4", "u_depthFactor", 0 }, + { "vec4", "u_depthShift", 1}, + { "vec4", "u_depthTo8", 2}, +}; + +const UniformBufferDesc depthUBDesc{ sizeof(DepthUB), { + { "u_depthFactor", -1, -1, UniformType::FLOAT4, 0 }, + { "u_depthShift", -1, -1, UniformType::FLOAT4, 16 }, + { "u_depthTo8", -1, -1, UniformType::FLOAT4, 32 }, +} }; + +static const SamplerDef samplers[] = { + { 0, "tex" }, +}; + +static const VaryingDef varyings[] = { + { "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" }, +}; + +static const char *stencil_dl_fs = R"( +#ifdef GL_ES +#ifdef GL_FRAGMENT_PRECISION_HIGH +precision highp float; +#else +precision mediump float; +#endif +#endif +#if __VERSION__ >= 130 +#define varying in +#define texture2D texture +#define gl_FragColor fragColor0 +out vec4 fragColor0; +#endif +varying vec2 v_texcoord; +lowp uniform usampler2D tex; +void main() { + uint stencil = texture2D(tex, v_texcoord).r; + float scaled = float(stencil) / 255.0; + gl_FragColor = vec4(scaled, scaled, scaled, scaled); +} +)"; + +static const char *stencil_vs = R"( +#ifdef GL_ES +precision highp float; +#endif +#if __VERSION__ >= 130 +#define attribute in +#define varying out +#endif +attribute vec2 a_position; +varying vec2 v_texcoord; +void main() { + v_texcoord = a_position * 2.0; + gl_Position = vec4(v_texcoord * 2.0 - vec2(1.0, 1.0), 0.0, 1.0); +} +)"; + +Draw::Pipeline *CreateReadbackPipeline(Draw::DrawContext *draw, const char *tag, const UniformBufferDesc *ubDesc, const char *fs, const char *fsTag, const char *vs, const char *vsTag); + +// Well, this is not depth, but it's depth/stencil related. +bool FramebufferManagerGLES::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) { + using namespace Draw; + + if (!fbo) { + ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "ReadbackStencilbufferSync: bad fbo"); + return false; + } + + const bool useColorPath = gl_extensions.IsGLES; + if (!useColorPath) { + return draw_->CopyFramebufferToMemorySync(fbo, FB_STENCIL_BIT, x, y, w, h, DataFormat::S8, pixels, pixelsStride, "ReadbackStencilbufferSync"); + } + + // Unsupported below GLES 3.1 or without ARB_stencil_texturing. + // OES_texture_stencil8 is related, but used to specify texture data. + if ((gl_extensions.IsGLES && !gl_extensions.VersionGEThan(3, 1)) && !gl_extensions.ARB_stencil_texturing) + return false; + + // Pixel size always 4 here because we always request RGBA back. + const u32 bufSize = w * h * 4; + if (!convBuf_ || convBufSize_ < bufSize) { + delete[] convBuf_; + convBuf_ = new u8[bufSize]; + convBufSize_ = bufSize; + } + + if (!stencilReadbackPipeline_) { + stencilReadbackPipeline_ = CreateReadbackPipeline(draw_, "stencil_dl", &depthUBDesc, stencil_dl_fs, "stencil_dl_fs", stencil_vs, "stencil_vs"); + stencilReadbackSampler_ = draw_->CreateSamplerState({}); + } + + shaderManager_->DirtyLastShader(); + auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width(), fbo->Height()); + draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackStencilbufferSync"); + Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f }; + draw_->SetViewports(1, &viewport); + + draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_STENCIL_BIT, 0); + draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &stencilReadbackSampler_); + + // We must bind the program after starting the render pass. + draw_->SetScissorRect(0, 0, w, h); + draw_->BindPipeline(stencilReadbackPipeline_); + + // Fullscreen triangle coordinates. + static const float positions[6] = { + 0.0, 0.0, + 1.0, 0.0, + 0.0, 1.0, + }; + draw_->DrawUP(positions, 3); + + draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x, y, w, h, DataFormat::R8G8B8A8_UNORM, convBuf_, w, "ReadbackStencilbufferSync"); + + textureCache_->ForgetLastTexture(); + + // TODO: Use 1/4 width to write all values directly and skip CPU conversion? + uint8_t *dest = pixels; + const u32_le *packed32 = (u32_le *)convBuf_; + for (int yp = 0; yp < h; ++yp) { + for (int xp = 0; xp < w; ++xp) { + dest[xp] = packed32[xp] & 0xFF; + } + dest += pixelsStride; + packed32 += w; + } + + gstate_c.Dirty(DIRTY_ALL_RENDER_STATE); + return true; +} diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 860dbc38f306..0f8711b03b4d 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -454,6 +454,7 @@ + @@ -525,12 +526,6 @@ - - true - true - true - true - true true @@ -561,6 +556,7 @@ true true + true true diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index b8a990374ae4..828e4c526575 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -461,9 +461,6 @@ Debugger - - GLES - Debugger @@ -518,6 +515,12 @@ Common + + Common + + + GLES + diff --git a/UWP/GPU_UWP/GPU_UWP.vcxproj b/UWP/GPU_UWP/GPU_UWP.vcxproj index 05b783973699..369a75da7215 100644 --- a/UWP/GPU_UWP/GPU_UWP.vcxproj +++ b/UWP/GPU_UWP/GPU_UWP.vcxproj @@ -441,6 +441,7 @@ + @@ -531,4 +532,4 @@ - + \ No newline at end of file diff --git a/UWP/GPU_UWP/GPU_UWP.vcxproj.filters b/UWP/GPU_UWP/GPU_UWP.vcxproj.filters index 2296b16a16e1..2481c765c966 100644 --- a/UWP/GPU_UWP/GPU_UWP.vcxproj.filters +++ b/UWP/GPU_UWP/GPU_UWP.vcxproj.filters @@ -60,6 +60,7 @@ + @@ -123,4 +124,4 @@ - + \ No newline at end of file diff --git a/android/jni/Android.mk b/android/jni/Android.mk index eae8a9c672c9..6932d49ecfc6 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -372,6 +372,7 @@ EXEC_AND_LIB_FILES := \ $(SRC)/GPU/Common/GPUStateUtils.cpp.arm \ $(SRC)/GPU/Common/SoftwareTransformCommon.cpp.arm \ $(SRC)/GPU/Common/ReinterpretFramebuffer.cpp \ + $(SRC)/GPU/Common/DepthBufferCommon.cpp \ $(SRC)/GPU/Common/VertexDecoderCommon.cpp.arm \ $(SRC)/GPU/Common/TextureCacheCommon.cpp.arm \ $(SRC)/GPU/Common/TextureScalerCommon.cpp.arm \ @@ -392,7 +393,7 @@ EXEC_AND_LIB_FILES := \ $(SRC)/GPU/Debugger/Record.cpp \ $(SRC)/GPU/Debugger/Stepping.cpp \ $(SRC)/GPU/GLES/FramebufferManagerGLES.cpp \ - $(SRC)/GPU/GLES/DepthBufferGLES.cpp \ + $(SRC)/GPU/GLES/StencilBufferGLES.cpp \ $(SRC)/GPU/GLES/GPU_GLES.cpp.arm \ $(SRC)/GPU/GLES/TextureCacheGLES.cpp.arm \ $(SRC)/GPU/GLES/DrawEngineGLES.cpp.arm \ diff --git a/libretro/Makefile.common b/libretro/Makefile.common index 4095102bbad5..2b62ff352d15 100644 --- a/libretro/Makefile.common +++ b/libretro/Makefile.common @@ -391,6 +391,7 @@ SOURCES_CXX += \ $(GPUDIR)/Common/TextureCacheCommon.cpp \ $(GPUDIR)/Common/TextureScalerCommon.cpp \ $(GPUDIR)/Common/SoftwareTransformCommon.cpp \ + $(GPUDIR)/Common/DepthBufferCommon.cpp \ $(GPUDIR)/Common/StencilCommon.cpp \ $(GPUDIR)/Software/TransformUnit.cpp \ $(GPUDIR)/Software/SoftGpu.cpp \ @@ -409,7 +410,7 @@ SOURCES_CXX += \ $(GPUDIR)/Software/Rasterizer.cpp \ $(GPUDIR)/Software/RasterizerRectangle.cpp \ $(GPUDIR)/Software/RasterizerRegCache.cpp \ - $(GPUDIR)/GLES/DepthBufferGLES.cpp \ + $(GPUDIR)/GLES/StencilBufferGLES.cpp \ $(GPUDIR)/GLES/DrawEngineGLES.cpp \ $(GPUDIR)/GLES/GPU_GLES.cpp \ $(GPUDIR)/GLES/FragmentTestCacheGLES.cpp \