Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TexCache: Fix 16->32 colors with CLUT start pos #16358

Merged
merged 3 commits into from
Nov 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 25 additions & 11 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,19 +105,19 @@ inline int dimHeight(u16 dim) {
return 1 << ((dim >> 8) & 0xFF);
}

// Vulkan color formats:
// TODO
TextureCacheCommon::TextureCacheCommon(Draw::DrawContext *draw, Draw2D *draw2D)
: draw_(draw), draw2D_(draw2D) {
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;

// TODO: Clamp down to 256/1KB? Need to check mipmapShareClut and clamp loadclut.
clutBufRaw_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB
clutBufConverted_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB
// It's only possible to have 1KB of palette entries, although we allow 2KB in a hack.
clutBufRaw_ = (u32 *)AllocateAlignedMemory(2048, 16);
clutBufConverted_ = (u32 *)AllocateAlignedMemory(2048, 16);
// Here we need 2KB to expand a 1KB CLUT.
expandClut_ = (u32 *)AllocateAlignedMemory(2048, 16);

// Zap so we get consistent behavior if the game fails to load some of the CLUT.
memset(clutBufRaw_, 0, 1024 * sizeof(u32));
memset(clutBufConverted_, 0, 1024 * sizeof(u32));
memset(clutBufRaw_, 0, 2048);
memset(clutBufConverted_, 0, 2048);
clutBuf_ = clutBufConverted_;

// These buffers will grow if necessary, but most won't need more than this.
Expand All @@ -134,6 +134,7 @@ TextureCacheCommon::~TextureCacheCommon() {

FreeAlignedMemory(clutBufConverted_);
FreeAlignedMemory(clutBufRaw_);
FreeAlignedMemory(expandClut_);
}

// Produces a signed 1.23.8 value.
Expand Down Expand Up @@ -1236,8 +1237,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
return;
}

u32 startPos = gstate.getClutIndexStartPos();

_assert_(loadBytes <= 2048);
clutTotalBytes_ = loadBytes;
clutRenderAddress_ = 0xFFFFFFFF;

Expand Down Expand Up @@ -1320,6 +1320,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {

// It's possible for a game to load CLUT outside valid memory without crashing, should result in zeroes.
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
_assert_(bytes <= 2048);
bool performDownload = PSP_CoreParameter().compat.flags().AllowDownloadCLUT;
if (GPURecord::IsActive())
performDownload = true;
Expand Down Expand Up @@ -1681,7 +1682,13 @@ CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, G
if (expandTo32bit) {
// We simply expand the CLUT to 32-bit, then we deindex as usual. Probably the fastest way.
const u16 *clut = GetCurrentRawClut<u16>() + clutSharingOffset;
ConvertFormatToRGBA8888(clutformat, expandClut_, clut, 16);
const int clutStart = gstate.getClutIndexStartPos();
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexMask() <= 16) {
ConvertFormatToRGBA8888(clutformat, expandClut_ + clutStart, clut + clutStart, 16);
} else {
// To be safe for shifts and wrap around, convert the entire CLUT.
ConvertFormatToRGBA8888(clutformat, expandClut_, clut, 512);
}
fullAlphaMask = 0xFF000000;
for (int y = 0; y < h; ++y) {
DeIndexTexture4<u32>((u32 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, expandClut_, &alphaSum);
Expand Down Expand Up @@ -1880,7 +1887,14 @@ CheckAlphaResult TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int l

if (expandTo32Bit && palFormat != GE_CMODE_32BIT_ABGR8888) {
const u16 *clut16raw = (const u16 *)clutBufRaw_ + clutSharingOffset;
ConvertFormatToRGBA8888(GEPaletteFormat(palFormat), expandClut_, clut16raw, 256);
// It's possible to access the latter half of the CLUT using the start pos.
const int clutStart = gstate.getClutIndexStartPos();
if (clutStart > 256) {
// Access wraps around when start + index goes over.
ConvertFormatToRGBA8888(GEPaletteFormat(palFormat), expandClut_, clut16raw, 512);
} else {
ConvertFormatToRGBA8888(GEPaletteFormat(palFormat), expandClut_ + clutStart, clut16raw + clutStart, 256);
}
clut32 = expandClut_;
palFormat = GE_CMODE_32BIT_ABGR8888;
}
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/TextureCacheCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ class TextureCacheCommon {

bool isBgraBackend_ = false;

u32 expandClut_[256];
u32 *expandClut_;
};

inline bool TexCacheEntry::Matches(u16 dim2, u8 format2, u8 maxLevel2) const {
Expand Down