Skip to content

Commit

Permalink
Get depal-from-dynamic-CLUT working
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Sep 14, 2022
1 parent 51c97c7 commit d6d7a15
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 13 deletions.
2 changes: 2 additions & 0 deletions Common/GPU/Vulkan/VulkanRenderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRe
width = _width;
height = _height;

_dbg_assert_(tag);

CreateImage(vulkan_, initCmd, color, width, height, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
CreateImage(vulkan_, initCmd, depth, width, height, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);

Expand Down
91 changes: 84 additions & 7 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,8 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
u32 cluthash;
if (hasClut) {
if (clutRenderAddress_ != 0xFFFFFFFF) {
gstate_c.curTextureXOffset = 0.0f;
gstate_c.curTextureYOffset = 0.0f;
hasClutGPU = true;
cluthash = 0; // Or should we use some other marker value?
} else {
Expand Down Expand Up @@ -1491,12 +1493,28 @@ inline u32 TfmtRawToFullAlpha(GETextureFormat fmt) {
}
}

// Used for converting CLUT4 to CLUT8.
// Could SIMD or whatever, though will hardly be a bottleneck.
static void Expand4To8Bits(u8 *dest, const u8 *src, int srcWidth) {
for (int i = 0; i < (srcWidth + 1) / 2; i++) {
u8 lower = src[i] & 0xF;
u8 upper = src[i] >> 4;
dest[i * 2] = lower;
dest[i * 2 + 1] = upper;
}
}

CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, TexDecodeFlags flags) {
u32 alphaSum = 0xFFFFFFFF;
u32 fullAlphaMask = 0x0;

bool expandTo32bit = (flags & TexDecodeFlags::EXPAND32) != 0;
bool reverseColors = (flags & TexDecodeFlags::REVERSE_COLORS) != 0;
bool toClut8 = (flags & TexDecodeFlags::TO_CLUT8) != 0;

if (toClut8 && format != GE_TFMT_CLUT8 && format != GE_TFMT_CLUT4) {
_dbg_assert_(false);
}

bool swizzled = gstate.isTextureSwizzled();
if ((texaddr & 0x00600000) != 0 && Memory::IsVRAMAddress(texaddr)) {
Expand Down Expand Up @@ -1531,6 +1549,15 @@ CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, G
texptr = (u8 *)tmpTexBuf32_.data();
}

if (toClut8) {
// We just need to expand from 4 to 8 bits.
for (int y = 0; y < h; ++y) {
Expand4To8Bits((u8 *)out + outPitch * y, texptr + (bufw * y) / 2, w);
}
// We can't know anything about alpha.
return CHECKALPHA_ANY;
}

switch (clutformat) {
case GE_CMODE_16BIT_BGR5650:
case GE_CMODE_16BIT_ABGR5551:
Expand Down Expand Up @@ -1593,6 +1620,19 @@ CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, G
break;

case GE_TFMT_CLUT8:
if (toClut8) {
if (gstate.isTextureSwizzled()) {
tmpTexBuf32_.resize(bufw * ((h + 7) & ~7));
UnswizzleFromMem(tmpTexBuf32_.data(), bufw, texptr, bufw, h, 1);
texptr = (u8 *)tmpTexBuf32_.data();
}
// After deswizzling, we are in the correct format and can just copy.
for (int y = 0; y < h; ++y) {
memcpy((u8 *)out + outPitch * y, texptr + (bufw * y), w);
}
// We can't know anything about alpha.
return CHECKALPHA_ANY;
}
return ReadIndexedTex(out, outPitch, level, texptr, 1, bufw, reverseColors, expandTo32bit);

case GE_TFMT_CLUT16:
Expand Down Expand Up @@ -1878,10 +1918,18 @@ void TextureCacheCommon::ApplyTexture() {
InvalidateLastTexture();
}

entry->lastFrame = gpuStats.numFlips;
BindTexture(entry);
gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL);
gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0);
if (entry->status & TexCacheEntry::STATUS_CLUT_GPU) {
// Special process.
ApplyTextureDepal(entry);
entry->lastFrame = gpuStats.numFlips;
gstate_c.SetTextureFullAlpha(false);
gstate_c.SetTextureIs3D(false);
} else {
entry->lastFrame = gpuStats.numFlips;
BindTexture(entry);
gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL);
gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0);
}
}

static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) {
Expand Down Expand Up @@ -2093,6 +2141,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
}

// Applies depal to a normal (non-framebuffer) texture, pre-decoded to CLUT8 format.
void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) {
Draw2DPipeline *textureShader = nullptr;
uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
Expand All @@ -2114,6 +2163,7 @@ void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) {
desc.depth = 1;
desc.z_stencil = false;
desc.numColorAttachments = 1;
desc.tag = "dynamic_clut";
dynamicClutFbo_ = draw_->CreateFramebuffer(desc);
dynamicClutReinterpreted_ = draw_->CreateFramebuffer(desc);
}
Expand Down Expand Up @@ -2144,7 +2194,7 @@ void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) {
float scaleFactorX = 1.0f;
Draw2DPipeline *reinterpret = framebufferManager_->GetReinterpretPipeline(src->fb_format, expectedCLUTBufferFormat, &scaleFactorX);
framebufferManager_->BlitUsingRaster(
dynamicClutFbo_, 0.0f, 0.0f, 512.0f, 1.0f, dynamicClutReinterpreted_, 0.0f, 0.0f, 512.0f, 1.0f, false, 1.0f, framebufferManager_->Get2DPipeline(DRAW2D_COPY_COLOR), "copy_clut");
dynamicClutFbo_, 0.0f, 0.0f, 512.0f, 1.0f, dynamicClutReinterpreted_, 0.0f, 0.0f, scaleFactorX * 512.0f, 1.0f, false, 1.0f, reinterpret, "reinterpret_clut");
clutFbo = dynamicClutReinterpreted_;
}

Expand All @@ -2158,8 +2208,8 @@ void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) {
const KnownVertexBounds &bounds = gstate_c.vertBounds;
float u1 = 0.0f;
float v1 = 0.0f;
float u2 = 1.0f;
float v2 = 1.0f;
float u2 = texWidth;
float v2 = texHeight;
if (bounds.minV < bounds.maxV) {
u1 = (bounds.minU + gstate_c.curTextureXOffset) * texWidth;
v1 = (bounds.minV + gstate_c.curTextureYOffset) * texHeight;
Expand Down Expand Up @@ -2232,6 +2282,15 @@ void TextureCacheCommon::Clear(bool delete_them) {
secondCacheSizeEstimate_ = 0;
}
videos_.clear();

if (dynamicClutFbo_) {
dynamicClutFbo_->Release();
dynamicClutFbo_ = nullptr;
}
if (dynamicClutReinterpreted_) {
dynamicClutReinterpreted_->Release();
dynamicClutReinterpreted_ = nullptr;
}
}

void TextureCacheCommon::DeleteTexture(TexCache::iterator it) {
Expand Down Expand Up @@ -2598,6 +2657,21 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
plan.maxPossibleLevels = log2i(std::min(plan.createW, plan.createH)) + 1;
}

if (entry->status & TexCacheEntry::TexStatus::STATUS_CLUT_GPU) {
_dbg_assert_(entry->format == GE_TFMT_CLUT4 || entry->format == GE_TFMT_CLUT8);
plan.decodeToClut8 = true;
// We only support 1 mip level when doing CLUT on GPU for now.
// Supporting more would be possible, just not very interesting until we need it.
plan.levelsToCreate = 1;
plan.levelsToLoad = 1;
plan.maxPossibleLevels = 1;
plan.scaleFactor = 1;
plan.saveTexture = false; // Can't yet save these properly.
// TODO: Also forcibly disable replacement, or check that the replacement is a 8-bit paletted texture.
} else {
plan.decodeToClut8 = false;
}

if (plan.levelsToCreate == 1) {
entry->status |= TexCacheEntry::STATUS_NO_MIPS;
} else {
Expand Down Expand Up @@ -2639,6 +2713,9 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i
if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || dstFmt == Draw::DataFormat::R8G8B8A8_UNORM) {
texDecFlags |= TexDecodeFlags::EXPAND32;
}
if (entry.status & TexCacheEntry::STATUS_CLUT_GPU) {
texDecFlags |= TexDecodeFlags::TO_CLUT8;
}

CheckAlphaResult alphaResult = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, srcLevel, bufw, texDecFlags);
entry.SetAlphaStatus(alphaResult, srcLevel);
Expand Down
4 changes: 4 additions & 0 deletions GPU/Common/TextureCacheCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class ShaderManagerCommon;
enum class TexDecodeFlags {
EXPAND32 = 1,
REVERSE_COLORS = 2,
TO_CLUT8 = 4,
};
ENUM_CLASS_BITOPS(TexDecodeFlags);

Expand Down Expand Up @@ -285,6 +286,9 @@ struct BuildTexturePlan {
bool replaceValid;
bool saveTexture;

// TODO: Expand32 should probably also be decided in PrepareBuildTexture.
bool decodeToClut8;

void GetMipSize(int level, int *w, int *h) const {
if (replaceValid) {
replaced->GetSize(level, *w, *h);
Expand Down
22 changes: 18 additions & 4 deletions GPU/Vulkan/TextureCacheVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,15 @@ void main() {
)";

static int VkFormatBytesPerPixel(VkFormat format) {
switch (format) {
case VULKAN_8888_FORMAT: return 4;
case VULKAN_CLUT8_FORMAT: return 1;
default: break;
}
return 2;
}

SamplerCache::~SamplerCache() {
DeviceLost();
}
Expand Down Expand Up @@ -448,6 +457,8 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
if (plan.scaleFactor > 1) {
// Whether hardware or software scaling, this is the dest format.
dstFmt = VULKAN_8888_FORMAT;
} else if (plan.decodeToClut8) {
dstFmt = VULKAN_CLUT8_FORMAT;
}

// We don't generate mipmaps for 512x512 textures because they're almost exclusively used for menu backgrounds
Expand Down Expand Up @@ -479,7 +490,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
case VULKAN_4444_FORMAT: mapping = &VULKAN_4444_SWIZZLE; break;
case VULKAN_1555_FORMAT: mapping = &VULKAN_1555_SWIZZLE; break;
case VULKAN_565_FORMAT: mapping = &VULKAN_565_SWIZZLE; break;
default: mapping = &VULKAN_8888_SWIZZLE; break;
default: mapping = &VULKAN_8888_SWIZZLE; break; // no swizzle
}

VkImageLayout imageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
Expand Down Expand Up @@ -562,7 +573,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
int mipHeight;
plan.GetMipSize(i, &mipWidth, &mipHeight);

int bpp = actualFmt == VULKAN_8888_FORMAT ? 4 : 2; // output bpp
int bpp = VkFormatBytesPerPixel(actualFmt);
int stride = (mipWidth * bpp + 15) & ~15; // output stride
int uploadSize = stride * mipHeight;

Expand Down Expand Up @@ -602,7 +613,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
loadLevel(uploadSize, i, stride, plan.scaleFactor);
entry->vkTex->UploadMip(cmdInit, 0, mipWidth, mipHeight, i, texBuf, bufferOffset, stride / bpp);
} else if (computeUpload) {
int srcBpp = dstFmt == VULKAN_8888_FORMAT ? 4 : 2;
int srcBpp = VkFormatBytesPerPixel(dstFmt);
int srcStride = mipUnscaledWidth * srcBpp;
int srcSize = srcStride * mipUnscaledHeight;
loadLevel(srcSize, i == 0 ? plan.baseLevelSrc : i, srcStride, 1);
Expand Down Expand Up @@ -723,7 +734,7 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt
_assert_msg_(texaddr != 0, "Can't load a texture from address null")

int bufw = GetTextureBufw(level, texaddr, tfmt);
int bpp = dstFmt == VULKAN_8888_FORMAT ? 4 : 2;
int bpp = VkFormatBytesPerPixel(dstFmt);

u32 *pixelData;
int decPitch;
Expand All @@ -732,6 +743,9 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt
if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || scaleFactor > 1 || dstFmt == VULKAN_8888_FORMAT) {
texDecFlags |= TexDecodeFlags::EXPAND32;
}
if (entry.status & TexCacheEntry::STATUS_CLUT_GPU) {
texDecFlags |= TexDecodeFlags::TO_CLUT8;
}

if (scaleFactor > 1) {
tmpTexBufRearrange_.resize(std::max(bufw, w) * h);
Expand Down
1 change: 1 addition & 0 deletions GPU/Vulkan/VulkanUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ extern const VkComponentMapping VULKAN_8888_SWIZZLE;
#define VULKAN_1555_FORMAT VK_FORMAT_A1R5G5B5_UNORM_PACK16
#define VULKAN_565_FORMAT VK_FORMAT_B5G6R5_UNORM_PACK16 // TODO: Does not actually have mandatory support, though R5G6B5 does! See #14602
#define VULKAN_8888_FORMAT VK_FORMAT_R8G8B8A8_UNORM
#define VULKAN_CLUT8_FORMAT VK_FORMAT_R8_UNORM

// Manager for compute shaders that upload things (and those have two bindings: a storage buffer to read from and an image to write to).
class VulkanComputeShaderManager {
Expand Down
4 changes: 2 additions & 2 deletions assets/compat.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1313,8 +1313,8 @@ ULES00703 = true
# Temporary compatibility option, while developing a GPU CLUT-from-framebuffer path.

# Burnout Dominator - lens flare effect (issue #11100)
ULUS10236 = true
ULES00703 = true
# ULUS10236 = true
# ULES00703 = true

[UploadDepthForCLUTTextures]
# Burnout Dominator - lens flare effect (issue #11100)
Expand Down

0 comments on commit d6d7a15

Please sign in to comment.