Skip to content

Commit

Permalink
Enable depth uploads on render-to-clut-buffer. Esoteric but needed for
Browse files Browse the repository at this point in the history
…#11100. Compat flag for now.
  • Loading branch information
hrydgard committed Sep 12, 2022
1 parent 19e1f20 commit b4c133a
Show file tree
Hide file tree
Showing 15 changed files with 87 additions and 28 deletions.
1 change: 1 addition & 0 deletions Common/GPU/D3D11/thin3d_d3d11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ static DXGI_FORMAT dataFormatToD3D11(DataFormat format) {
case DataFormat::R8G8B8A8_UNORM_SRGB: return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
case DataFormat::B8G8R8A8_UNORM: return DXGI_FORMAT_B8G8R8A8_UNORM;
case DataFormat::B8G8R8A8_UNORM_SRGB: return DXGI_FORMAT_B8G8R8A8_UNORM_SRGB;
case DataFormat::R16_UNORM: return DXGI_FORMAT_R16_UNORM;
case DataFormat::R16_FLOAT: return DXGI_FORMAT_R16_FLOAT;
case DataFormat::R16G16_FLOAT: return DXGI_FORMAT_R16G16_FLOAT;
case DataFormat::R16G16B16A16_FLOAT: return DXGI_FORMAT_R16G16B16A16_FLOAT;
Expand Down
1 change: 1 addition & 0 deletions Common/GPU/D3D9/thin3d_d3d9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ static const D3DSTENCILOP stencilOpToD3D9[] = {

D3DFORMAT FormatToD3DFMT(DataFormat fmt) {
switch (fmt) {
case DataFormat::R16_UNORM: return D3DFMT_L16; // closest match, should be a fine substitution if we ignore channels except R.
case DataFormat::R8G8B8A8_UNORM: return D3DFMT_A8R8G8B8;
case DataFormat::B8G8R8A8_UNORM: return D3DFMT_A8R8G8B8;
case DataFormat::R4G4B4A4_UNORM_PACK16: return D3DFMT_A4R4G4B4; // emulated
Expand Down
2 changes: 2 additions & 0 deletions Common/GPU/DataFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ enum class DataFormat : uint8_t {
A1R5G5B5_UNORM_PACK16, // A1 in the UPPER bit.
A1B5G5R5_UNORM_PACK16, // A1 in the UPPER bit. OpenGL-only.

R16_UNORM,

R16_FLOAT,
R16G16_FLOAT,
R16G16B16A16_FLOAT,
Expand Down
8 changes: 7 additions & 1 deletion Common/GPU/OpenGL/DataFormatGL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@
namespace Draw {

// TODO: Also output storage format (GL_RGBA8 etc) for modern GL usage.
bool Thin3DFormatToFormatAndType(DataFormat fmt, GLuint &internalFormat, GLuint &format, GLuint &type, int &alignment) {
bool Thin3DFormatToGLFormatAndType(DataFormat fmt, GLuint &internalFormat, GLuint &format, GLuint &type, int &alignment) {
alignment = 4;
switch (fmt) {
case DataFormat::R16_UNORM:
internalFormat = GL_R16;
format = GL_R;
type = GL_UNSIGNED_SHORT;
break;

case DataFormat::R8G8B8A8_UNORM:
internalFormat = GL_RGBA;
format = GL_RGBA;
Expand Down
2 changes: 1 addition & 1 deletion Common/GPU/OpenGL/DataFormatGL.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@

namespace Draw {

bool Thin3DFormatToFormatAndType(DataFormat fmt, GLuint &internalFormat, GLuint &format, GLuint &type, int &alignment);
bool Thin3DFormatToGLFormatAndType(DataFormat fmt, GLuint &internalFormat, GLuint &format, GLuint &type, int &alignment);

}
4 changes: 2 additions & 2 deletions Common/GPU/OpenGL/GLQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ void GLQueueRunner::RunInitSteps(const std::vector<GLRInitStep> &steps, bool ski

GLenum internalFormat, format, type;
int alignment;
Thin3DFormatToFormatAndType(step.texture_image.format, internalFormat, format, type, alignment);
Thin3DFormatToGLFormatAndType(step.texture_image.format, internalFormat, format, type, alignment);
if (step.texture_image.depth == 1) {
glTexImage2D(tex->target,
step.texture_image.level, internalFormat,
Expand Down Expand Up @@ -1276,7 +1276,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
// For things to show in RenderDoc, need to split into glTexImage2D(..., nullptr) and glTexSubImage.
GLuint internalFormat, format, type;
int alignment;
Thin3DFormatToFormatAndType(c.texture_subimage.format, internalFormat, format, type, alignment);
Thin3DFormatToGLFormatAndType(c.texture_subimage.format, internalFormat, format, type, alignment);
glTexSubImage2D(tex->target, c.texture_subimage.level, c.texture_subimage.x, c.texture_subimage.y, c.texture_subimage.width, c.texture_subimage.height, format, type, c.texture_subimage.data);
if (c.texture_subimage.allocType == GLRAllocType::ALIGNED) {
FreeAlignedMemory(c.texture_subimage.data);
Expand Down
8 changes: 8 additions & 0 deletions Common/GPU/Vulkan/thin3d_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,11 @@ static int GetBpp(VkFormat format) {
case VK_FORMAT_R8G8B8A8_UNORM:
case VK_FORMAT_B8G8R8A8_UNORM:
return 32;
case VK_FORMAT_R8_UNORM:
return 8;
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R16_UNORM:
return 16;
case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
Expand All @@ -586,6 +591,9 @@ static VkFormat DataFormatToVulkan(DataFormat format) {
case DataFormat::D32F: return VK_FORMAT_D32_SFLOAT;
case DataFormat::D32F_S8: return VK_FORMAT_D32_SFLOAT_S8_UINT;
case DataFormat::S8: return VK_FORMAT_S8_UINT;

case DataFormat::R16_UNORM: return VK_FORMAT_R16_UNORM;

case DataFormat::R16_FLOAT: return VK_FORMAT_R16_SFLOAT;
case DataFormat::R16G16_FLOAT: return VK_FORMAT_R16G16_SFLOAT;
case DataFormat::R16G16B16A16_FLOAT: return VK_FORMAT_R16G16B16A16_SFLOAT;
Expand Down
3 changes: 3 additions & 0 deletions Common/GPU/thin3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ size_t DataFormatSizeInBytes(DataFormat fmt) {
case DataFormat::R8G8B8A8_SNORM: return 4;
case DataFormat::R8G8B8A8_UINT: return 4;
case DataFormat::R8G8B8A8_SINT: return 4;

case DataFormat::R16_UNORM: return 2;

case DataFormat::R16_FLOAT: return 2;
case DataFormat::R16G16_FLOAT: return 4;
case DataFormat::R16G16B16A16_FLOAT: return 8;
Expand Down
1 change: 1 addition & 0 deletions Common/GPU/thin3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,7 @@ typedef std::function<bool(uint8_t *data, const uint8_t *initData, uint32_t w, u
struct TextureDesc {
TextureType type;
DataFormat format;

int width;
int height;
int depth;
Expand Down
1 change: 1 addition & 0 deletions Core/Compatibility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
CheckSetting(iniFile, gameID, "SplitFramebufferMargin", &flags_.SplitFramebufferMargin);
CheckSetting(iniFile, gameID, "ForceLowerResolutionForEffectsOn", &flags_.ForceLowerResolutionForEffectsOn);
CheckSetting(iniFile, gameID, "AllowDownloadCLUT", &flags_.AllowDownloadCLUT);
CheckSetting(iniFile, gameID, "UploadDepthForCLUTTextures", &flags_.UploadDepthForCLUTTextures);
}

void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) {
Expand Down
1 change: 1 addition & 0 deletions Core/Compatibility.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ struct CompatFlags {
bool SplitFramebufferMargin;
bool ForceLowerResolutionForEffectsOn;
bool AllowDownloadCLUT;
bool UploadDepthForCLUTTextures;
};

class IniFile;
Expand Down
11 changes: 6 additions & 5 deletions GPU/Common/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -745,13 +745,14 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
p.C(" uv_round = floor(uv * tsize);\n");
p.C(" int component = int(uv_round.x) & 3;\n");
p.C(" uv_round.x *= 0.25;\n");
p.C(" vec4 t = ivec4(").LoadTexture2D("tex", "ivec2(uv_round)", 0).C(");\n");
p.C(" uv_round /= tsize;\n");
p.C(" vec4 t = ").SampleTexture2D("tex", "uv_round").C(";\n");
p.C(" int index;\n");
p.C(" switch (component) {\n");
p.C(" case 0: index = int(t.x * 255.99); break;\n");
p.C(" case 1: index = int(t.y * 255.99); break;\n");
p.C(" case 2: index = int(t.z * 255.99); break;\n");
p.C(" case 3: index = int(t.w * 255.99); break;\n");
p.C(" case 0: index = int(t.x * 254.99); break;\n"); // TODO: Not sure why 254.99 instead of 255.99, but it's currently needed.
p.C(" case 1: index = int(t.y * 254.99); break;\n");
p.C(" case 2: index = int(t.z * 254.99); break;\n");
p.C(" case 3: index = int(t.w * 254.99); break;\n");
p.C(" }\n");
p.C(" t = ").LoadTexture2D("pal", "ivec2(index, 0)", 0).C(";\n");
break;
Expand Down
63 changes: 45 additions & 18 deletions GPU/Common/FramebufferManagerCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,9 +475,9 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(Framebuffer
vfb->fb_format = params.fb_format;
vfb->usageFlags = FB_USAGE_RENDER_COLOR;

u32 byteSize = ColorBufferByteSize(vfb);
if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + byteSize > framebufRangeEnd_) {
framebufRangeEnd_ = params.fb_address + byteSize;
u32 colorByteSize = ColorBufferByteSize(vfb);
if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + colorByteSize > framebufRangeEnd_) {
framebufRangeEnd_ = params.fb_address + colorByteSize;
}

// This is where we actually create the framebuffer. The true is "force".
Expand All @@ -499,9 +499,9 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(Framebuffer

// Assume that if we're clearing right when switching to a new framebuffer, we don't need to upload.
if (useBufferedRendering_ && params.isDrawing) {
gpu->PerformMemoryUpload(params.fb_address, byteSize);
gpu->PerformMemoryUpload(params.fb_address, colorByteSize);
// Alpha was already done by PerformMemoryUpload.
PerformStencilUpload(params.fb_address, byteSize, StencilUpload::STENCIL_IS_ZERO | StencilUpload::IGNORE_ALPHA);
PerformStencilUpload(params.fb_address, colorByteSize, StencilUpload::STENCIL_IS_ZERO | StencilUpload::IGNORE_ALPHA);
// TODO: Is it worth trying to upload the depth buffer (only if it wasn't copied above..?)
}

Expand Down Expand Up @@ -551,9 +551,20 @@ void FramebufferManagerCommon::SetDepthFrameBuffer(bool isClearingDepth) {
// by copying from any overlapping buffers with fresher content.
if (!isClearingDepth) {
CopyToDepthFromOverlappingFramebuffers(currentRenderVfb_);
}

// Special compatibility trick for Burnout Dominator lens flares. Not sure how to best generalize this. See issue #11100
if (PSP_CoreParameter().compat.flags().UploadDepthForCLUTTextures && (currentRenderVfb_->usageFlags & FB_USAGE_CLUT) != 0) {
// Set the flag, then upload memory contents to depth channel.
// Sanity check the depth buffer pointer.
if (currentRenderVfb_->z_address != 0 && currentRenderVfb_->z_address != currentRenderVfb_->fb_address) {
const u16 *src = (const u16 *)Memory::GetPointerUnchecked(currentRenderVfb_->z_address);
DrawPixels(currentRenderVfb_, 0, 0, (const u8 *)src, GE_FORMAT_DEPTH16, currentRenderVfb_->z_stride, currentRenderVfb_->width, currentRenderVfb_->height, RASTER_DEPTH, "Depth Upload");
}
}
}
// First time use of this framebuffer's depth buffer.
currentRenderVfb_->usageFlags |= FB_USAGE_RENDER_DEPTH;

currentRenderVfb_->depthBindSeq = GetBindSeqCount();
}

Expand Down Expand Up @@ -1022,22 +1033,19 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size) {
}

void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, RasterChannel channel, const char *tag) {
// Add depth support later for depth uploads.
_dbg_assert_(channel == RASTER_COLOR);

textureCache_->ForgetLastTexture();
shaderManager_->DirtyLastShader(); // On GL, important that this is BEFORE drawing
shaderManager_->DirtyLastShader();
float u0 = 0.0f, u1 = 1.0f;
float v0 = 0.0f, v1 = 1.0f;

DrawTextureFlags flags;
if (useBufferedRendering_ && vfb && vfb->fbo) {
flags = DRAWTEX_LINEAR;
flags = channel == RASTER_COLOR ? DRAWTEX_LINEAR : DRAWTEX_NEAREST;
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag);
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
SetViewport2D(0, 0, vfb->renderWidth, vfb->renderHeight);
draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight);
} else {
_dbg_assert_(channel == RASTER_COLOR);
// We are drawing directly to the back buffer so need to flip.
// Should more of this be handled by the presentation engine?
if (needBackBufferYSwap_)
Expand All @@ -1051,11 +1059,18 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int
draw_->SetScissorRect(0, 0, pixelWidth_, pixelHeight_);
}

if (channel == RASTER_DEPTH) {
_dbg_assert_(srcPixelFormat == GE_FORMAT_DEPTH16);
flags = flags | DRAWTEX_DEPTH;
}

Draw::Texture *pixelsTex = MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height);
if (pixelsTex) {
draw_->BindTextures(0, 1, &pixelsTex);
// TODO: Replace with BlitUsingRaster for simplicity.

// TODO: Replace with draw2D_.Blit() directly.
DrawActiveTexture(dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, u0, v0, u1, v1, ROTATION_LOCKED_HORIZONTAL, flags);

gpuStats.numUploads++;
pixelsTex->Release();
draw_->InvalidateCachedState();
Expand Down Expand Up @@ -1145,6 +1160,7 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
const u16_le *src16 = (const u16_le *)srcPixels + srcStride * y;
const u32_le *src32 = (const u32_le *)srcPixels + srcStride * y;
u32 *dst = (u32 *)(data + byteStride * y);
u16 *dst16 = (u16 *)(data + byteStride * y);
switch (srcPixelFormat) {
case GE_FORMAT_565:
if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
Expand Down Expand Up @@ -1177,18 +1193,28 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
memcpy(dst, src32, width * 4);
break;

case GE_FORMAT_INVALID:
case GE_FORMAT_DEPTH16:
_dbg_assert_msg_(false, "Invalid pixelFormat passed to DrawPixels().");
// TODO: Must take the depth range into account, unless it's already 0-1.
// TODO: Depending on the color buffer format used with this depth buffer, we need
// to do one of two different swizzle operations. However, for the only use of this so far,
// the Burnout lens flare trickery, swizzle doesn't matter since it's just a 0, 7fff, 0, 7fff pattern
// which comes out the same.
memcpy(dst16, src16, w * 2);
break;

case GE_FORMAT_INVALID:
// Bad
break;
}
}
return true;
};

// Note: For depth, we create an R16_UNORM texture, that'll be just fine for uploading depth through a shader,
// and likely more efficient.
Draw::TextureDesc desc{
Draw::TextureType::LINEAR2D,
preferredPixelsFormat_,
srcPixelFormat == GE_FORMAT_DEPTH16 ? Draw::DataFormat::R16_UNORM : preferredPixelsFormat_,
width,
height,
1,
Expand All @@ -1198,6 +1224,7 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
{ (uint8_t *)srcPixels },
generateTexture,
};

// Hot Shots Golf (#12355) does tons of these in a frame in some situations! So creating textures
// better be fast.
Draw::Texture *tex = draw_->CreateTexture(desc);
Expand All @@ -1221,7 +1248,7 @@ void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, int
if (needBackBufferYSwap_) {
flags |= OutputFlags::BACKBUFFER_FLIPPED;
}
// DrawActiveTexture reverses these, probably to match "up".
// CopyToOutput reverses these, probably to match "up".
if (GetGPUBackend() == GPUBackend::DIRECT3D9 || GetGPUBackend() == GPUBackend::DIRECT3D11) {
flags |= OutputFlags::POSITION_FLIPPED;
}
Expand Down Expand Up @@ -2715,7 +2742,7 @@ void FramebufferManagerCommon::DrawActiveTexture(float x, float y, float w, floa
// Rearrange to strip form.
std::swap(coord[2], coord[3]);

draw2D_.DrawStrip2D(nullptr, coord, 4, (flags & DRAWTEX_LINEAR) != 0, Get2DPipeline(DRAW2D_COPY_COLOR));
draw2D_.DrawStrip2D(nullptr, coord, 4, (flags & DRAWTEX_LINEAR) != 0, Get2DPipeline((flags & DRAWTEX_DEPTH) ? DRAW2D_COPY_DEPTH : DRAW2D_COPY_COLOR));

gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
}
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/FramebufferManagerCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ enum DrawTextureFlags {
DRAWTEX_NEAREST = 0,
DRAWTEX_LINEAR = 1,
DRAWTEX_TO_BACKBUFFER = 8,
DRAWTEX_DEPTH = 16,
};

inline DrawTextureFlags operator | (const DrawTextureFlags &lhs, const DrawTextureFlags &rhs) {
Expand Down
8 changes: 7 additions & 1 deletion assets/compat.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1307,6 +1307,12 @@ ULKS46087 = true
[AllowDownloadCLUT]
# Temporary compatibility option, while developing a GPU CLUT-from-framebuffer path.

# Burnout Dominator - lens flare effect (issue )#11100)
# Burnout Dominator - lens flare effect (issue #11100)
ULUS10236 = true
ULES00703 = true

[UploadDepthForCLUTTextures]
# Burnout Dominator - lens flare effect (issue #11100)
# We need a preinitialized depth buffer
ULUS10236 = true
ULES00703 = true

0 comments on commit b4c133a

Please sign in to comment.