Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement smooth depal lookups for Test Drive's strange usage. #15710

Merged
merged 4 commits into from
Aug 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 55 additions & 14 deletions GPU/Common/DepalettizeShaderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ static const VaryingDef varyings[1] = {
};

// Uses integer instructions available since OpenGL 3.0, ES 3.0 (and 2.0 with extensions), and of course Vulkan and D3D11.
void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
const int shift = config.shift;
const int mask = config.mask;

Expand Down Expand Up @@ -140,7 +140,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con
}

// FP only, to suit GL(ES) 2.0 and DX9
void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) {
char lookupMethod[128] = "index.r";

const int shift = config.shift;
Expand Down Expand Up @@ -288,23 +288,64 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
}

void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) {
const char *sourceChannel = "error";
float indexMultiplier = 32.0f;

if (config.bufferFormat == GE_FORMAT_5551) {
_dbg_assert_(config.mask == 0x1F);
switch (config.shift) {
case 0: sourceChannel = "r"; break;
case 5: sourceChannel = "g"; break;
case 10: sourceChannel = "b"; break;
default: _dbg_assert_(false);
}
} else if (config.bufferFormat == GE_FORMAT_565) {
_dbg_assert_(config.mask == 0x1F || config.mask == 0x3F);
switch (config.shift) {
case 0: sourceChannel = "r"; break;
case 5: sourceChannel = "g"; indexMultiplier = 64.0f; break;
case 11: sourceChannel = "b"; break;
default: _dbg_assert_(false);
}
} else {
_dbg_assert_(false);
}

writer.C(" float index = ").SampleTexture2D("tex", "v_texcoord").F(".%s * %0.1f;\n", sourceChannel, indexMultiplier);

float texturePixels = 256.f;
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512.f;
}

writer.F(" float coord = (index + 0.5) * %f;\n", 1.0 / texturePixels);
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
}

void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
ShaderWriter writer(buffer, lang, ShaderStage::Fragment);
writer.DeclareSamplers(samplers);
writer.HighPrecisionFloat();
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
switch (lang.shaderLanguage) {
case HLSL_D3D9:
case GLSL_1xx:
GenerateDepalShaderFloat(writer, config, lang);
break;
case GLSL_VULKAN:
case GLSL_3xx:
case HLSL_D3D11:
GenerateDepalShader300(writer, config, lang);
break;
default:
_assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage);
if (config.smoothedDepal) {
// Handles a limited set of cases, but doesn't need any integer math so we don't
// need two variants.
GenerateDepalSmoothed(writer, config);
} else {
switch (lang.shaderLanguage) {
case HLSL_D3D9:
case GLSL_1xx:
GenerateDepalShaderFloat(writer, config);
break;
case GLSL_VULKAN:
case GLSL_3xx:
case HLSL_D3D11:
GenerateDepalShader300(writer, config);
break;
default:
_assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage);
}
}
writer.EndFSMain("outColor", FSFLAG_NONE);
}
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/DepalettizeShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ struct DepalConfig {
GEPaletteFormat clutFormat;
GETextureFormat textureFormat;
GEBufferFormat bufferFormat;
bool smoothedDepal;
};

void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang);
Expand Down
28 changes: 28 additions & 0 deletions GPU/Common/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
#include "Common/GPU/OpenGL/GLFeatures.h"
#include "Common/GPU/ShaderWriter.h"
#include "Common/GPU/thin3d.h"
#include "Core/Compatibility.h"
#include "Core/Reporting.h"
#include "Core/Config.h"
#include "Core/System.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/ShaderUniforms.h"
Expand Down Expand Up @@ -88,6 +90,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu

bool doFlatShading = id.Bit(FS_BIT_FLATSHADE) && !flatBug;
bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too.
bool smoothedDepal = id.Bit(FS_BIT_SHADER_SMOOTHED_DEPAL);
bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE);
bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps;

Expand Down Expand Up @@ -590,6 +593,31 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}
}
}
} else if (shaderDepal && smoothedDepal) {
// Specific mode for Test Drive. Fixes the banding.
if (doTextureProjection) {
// We don't use textureProj because we need better control and it's probably not much of a savings anyway.
// However it is good for precision on older hardware like PowerVR.
WRITE(p, " vec2 uv = %s.xy/%s.z;\n vec2 uv_round;\n", texcoord, texcoord);
} else {
WRITE(p, " vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord);
}
// Restrictions on this are checked before setting the smoothed flag.
// Only RGB565 and RGBA5551 are supported, and only the specific shifts hitting the
// channels directly.
WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord);
WRITE(p, " uint depalShift = (u_depal_mask_shift_off_fmt >> 8) & 0xFFU;\n");
WRITE(p, " uint depalFmt = (u_depal_mask_shift_off_fmt >> 24) & 0x3U;\n");
WRITE(p, " float index0 = t.r;\n");
WRITE(p, " float mul = 32.0 / 256.0;\n");
WRITE(p, " if (depalFmt == 0) {\n"); // yes, different versions of Test Drive use different formats. Could do compile time by adding more compat flags but meh.
WRITE(p, " if (depalShift == 5) { index0 = t.g; mul = 64.0 / 256.0; }\n");
WRITE(p, " else if (depalShift == 11) { index0 = t.b; }\n");
WRITE(p, " } else {\n");
WRITE(p, " if (depalShift == 5) { index0 = t.g; }\n");
WRITE(p, " else if (depalShift == 10) { index0 = t.b; }\n");
WRITE(p, " }\n");
WRITE(p, " t = %s(pal, vec2(index0 * mul, 0.0));\n", compat.texture);
} else {
if (doTextureProjection) {
// We don't use textureProj because we need better control and it's probably not much of a savings anyway.
Expand Down
2 changes: 2 additions & 0 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
bool useShaderDepal = gstate_c.useShaderDepal;
bool useSmoothedDepal = gstate_c.useSmoothedShaderDepal;
bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead);

// Note how we here recompute some of the work already done in state mapping.
Expand Down Expand Up @@ -290,6 +291,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
}
id.SetBit(FS_BIT_BGRA_TEXTURE, gstate_c.bgraTexture);
id.SetBit(FS_BIT_SHADER_DEPAL, useShaderDepal);
id.SetBit(FS_BIT_SHADER_SMOOTHED_DEPAL, useSmoothedDepal);
id.SetBit(FS_BIT_3D_TEXTURE, gstate_c.curTextureIs3D);
}

Expand Down
1 change: 1 addition & 0 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ enum FShaderBit : uint8_t {
FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49,
FS_BIT_COLOR_WRITEMASK = 50,
FS_BIT_3D_TEXTURE = 51,
FS_BIT_SHADER_SMOOTHED_DEPAL = 52,
};

static inline FShaderBit operator +(FShaderBit bit, int i) {
Expand Down
54 changes: 43 additions & 11 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1859,6 +1859,31 @@ bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) {
}
}

// If the palette is detected as a smooth ramp, we can interpolate for higher color precision.
// But we only do it if the mask/shift exactly matches a color channel, else something different might be going
// on and we definitely don't want to interpolate.
// Great enhancement for Test Drive.
static bool CanUseSmoothDepal(const GPUgstate &gstate, GEBufferFormat framebufferFormat, int rampLength) {
if (gstate.getClutIndexStartPos() == 0 &&
gstate.getClutIndexMask() <= rampLength) {
switch (framebufferFormat) {
case GE_FORMAT_565:
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 11) {
return gstate.getClutIndexMask() == 0x1F;
} else if (gstate.getClutIndexShift() == 5) {
return gstate.getClutIndexMask() == 0x3F;
}
break;
case GE_FORMAT_5551:
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 5 || gstate.getClutIndexShift() == 10) {
return gstate.getClutIndexMask() == 0x1F;
}
break;
}
}
return false;
}

void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel) {
TextureShader *textureShader = nullptr;
uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
Expand All @@ -1881,13 +1906,18 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
break;
}

const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
ClutTexture clutTexture{};
bool smoothedDepal = false;

if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) {
clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
smoothedDepal = CanUseSmoothDepal(gstate, framebuffer->drawnFormat, clutTexture.rampLength);

if (useShaderDepal) {
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();

// Very icky conflation here of native and thin3d rendering. This will need careful work per backend in BindAsClutTexture.
Draw::Texture *clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
BindAsClutTexture(clutTexture);
BindAsClutTexture(clutTexture.texture);

framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
// Vulkan needs to do some extra work here to pick out the native handle from Draw.
Expand All @@ -1901,7 +1931,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer

// Since we started/ended render passes, might need these.
gstate_c.Dirty(DIRTY_DEPAL);
gstate_c.SetUseShaderDepal(true);

gstate_c.SetUseShaderDepal(true, smoothedDepal);
gstate_c.depalFramebufferFormat = framebuffer->drawnFormat;
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
Expand All @@ -1913,13 +1944,13 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
return;
}

textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
gstate_c.SetUseShaderDepal(false);
textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat, smoothedDepal);
gstate_c.SetUseShaderDepal(false, false);
}

if (textureShader) {
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
Draw::Texture *clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
ClutTexture clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, framebuffer->renderWidth, framebuffer->renderHeight);
draw_->BindTexture(0, nullptr);
draw_->BindTexture(1, nullptr);
Expand All @@ -1930,10 +1961,11 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
draw_->SetViewports(1, &vp);

draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);
draw_->BindTexture(1, clutTexture);
Draw::SamplerState *nearest = textureShaderCache_->GetSampler();
draw_->BindTexture(1, clutTexture.texture);
Draw::SamplerState *nearest = textureShaderCache_->GetSampler(false);
Draw::SamplerState *clutSampler = textureShaderCache_->GetSampler(smoothedDepal);
draw_->BindSamplerStates(0, 1, &nearest);
draw_->BindSamplerStates(1, 1, &nearest);
draw_->BindSamplerStates(1, 1, &clutSampler);

textureShaderCache_->ApplyShader(textureShader,
framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight,
Expand All @@ -1958,7 +1990,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
BoundFramebufferTexture();

gstate_c.SetUseShaderDepal(false);
gstate_c.SetUseShaderDepal(false, false);
gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
}

Expand Down
Loading