Skip to content

Commit

Permalink
Implement smoothed depal for the "old" depal path as well.
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Aug 22, 2022
1 parent 2a6015c commit a2e2d00
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 31 deletions.
186 changes: 186 additions & 0 deletions GPU/Common/DepalettizeCommon.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
// Copyright (c) 2014- PPSSPP Project.

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.

// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/

// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.

#pragma once

#include <map>
#include <vector>
#include <string>

#include "Common/CommonTypes.h"
#include "Common/GPU/Shader.h"
#include "Common/GPU/thin3d.h"
#include "GPU/ge_constants.h"
#include "GPU/Common/Draw2D.h"
#include "GPU/Common/ShaderCommon.h"
#include "GPU/Common/DepalettizeShaderCommon.h"

class DepalShader {
public:
Draw::ShaderModule *fragShader;
Draw::Pipeline *pipeline;
std::string code;
};

class DepalTexture {
public:
Draw::Texture *texture;
int lastFrame;
// How many entries are continuously growing (each value larger than the previous) from entry 0.
int rampLength;
};

// Caches both shaders and palette textures.
class DepalShaderCache {
public:
DepalShaderCache(Draw::DrawContext *draw);
~DepalShaderCache();

// This also uploads the palette and binds the correct texture.
DepalShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat, bool smoothedDepal);
DepalTexture GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut);

Draw::SamplerState *GetSampler(bool linearFilter);

void Clear();
void Decimate();
std::vector<std::string> DebugGetShaderIDs(DebugShaderType type);
std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType);

void DeviceLost();
void DeviceRestore(Draw::DrawContext *draw);

private:
static uint32_t GenerateShaderID(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat, bool smoothedDepal) {
return (clutMode & 0xFFFFFF) | (pixelFormat << 24) | (texFormat << 28) | ((int)smoothedDepal << 27);
}

static uint32_t GetClutID(GEPaletteFormat clutFormat, uint32_t clutHash) {
// Simplistic.
return clutHash ^ (uint32_t)clutFormat;
}

Draw::DrawContext *draw_;
Draw::ShaderModule *vertexShader_ = nullptr;
Draw::SamplerState *nearestSampler_ = nullptr;
Draw::SamplerState *linearSampler_ = nullptr;

std::map<u32, DepalShader *> cache_;
std::map<u32, DepalTexture *> texCache_;
};

// TODO: Merge with DepalShaderCache?
class TextureShaderApplier {
public:
struct Pos {
float x;
float y;
};
struct UV {
float u;
float v;
};

TextureShaderApplier(Draw::DrawContext *draw, DepalShader *shader, float bufferW, float bufferH, int renderW, int renderH)
: draw_(draw), shader_(shader), bufferW_(bufferW), bufferH_(bufferH), renderW_(renderW), renderH_(renderH) {
static const Pos pos[4] = {
{-1, -1 },
{ 1, -1 },
{-1, 1 },
{ 1, 1 },
};
memcpy(pos_, pos, sizeof(pos_));

static const UV uv[4] = {
{ 0, 0 },
{ 1, 0 },
{ 0, 1 },
{ 1, 1 },
};
memcpy(uv_, uv, sizeof(uv_));
}

void ApplyBounds(const KnownVertexBounds &bounds, u32 uoff, u32 voff) {
// If min is not < max, then we don't have values (wasn't set during decode.)
if (bounds.minV < bounds.maxV) {
const float invWidth = 1.0f / bufferW_;
const float invHeight = 1.0f / bufferH_;
// Inverse of half = double.
const float invHalfWidth = invWidth * 2.0f;
const float invHalfHeight = invHeight * 2.0f;

const int u1 = bounds.minU + uoff;
const int v1 = bounds.minV + voff;
const int u2 = bounds.maxU + uoff;
const int v2 = bounds.maxV + voff;

const float left = u1 * invHalfWidth - 1.0f;
const float right = u2 * invHalfWidth - 1.0f;
const float top = v1 * invHalfHeight - 1.0f;
const float bottom = v2 * invHalfHeight - 1.0f;
// Points are: BL, BR, TR, TL.
pos_[0] = Pos{ left, bottom };
pos_[1] = Pos{ right, bottom };
pos_[2] = Pos{ left, top };
pos_[3] = Pos{ right, top };

// And also the UVs, same order.
const float uvleft = u1 * invWidth;
const float uvright = u2 * invWidth;
const float uvtop = v1 * invHeight;
const float uvbottom = v2 * invHeight;
uv_[0] = UV{ uvleft, uvbottom };
uv_[1] = UV{ uvright, uvbottom };
uv_[2] = UV{ uvleft, uvtop };
uv_[3] = UV{ uvright, uvtop };

// We need to reapply the texture next time since we cropped UV.
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
}
}

void Use() {
draw_->BindPipeline(shader_->pipeline);
struct SimpleVertex {
float pos[2];
float uv[2];
};
for (int i = 0; i < 4; i++) {
memcpy(&verts_[i].x, &pos_[i], sizeof(Pos));
memcpy(&verts_[i].u, &uv_[i], sizeof(UV));
}
}

void Shade() {
Draw::Viewport vp{ 0.0f, 0.0f, (float)renderW_, (float)renderH_, 0.0f, 1.0f };
// TODO: Half pixel offset for D3D9?
draw_->SetViewports(1, &vp);
draw_->SetScissorRect(0, 0, renderW_, renderH_);
draw_->DrawUP((const uint8_t *)verts_, 4);
}

protected:
Draw::DrawContext *draw_;
DepalShader *shader_;
Pos pos_[4];
UV uv_[4];
Draw2DVertex verts_[4];
float bufferW_;
float bufferH_;
int renderW_;
int renderH_;
};
69 changes: 55 additions & 14 deletions GPU/Common/DepalettizeShaderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ static const VaryingDef varyings[1] = {
};

// Uses integer instructions available since OpenGL 3.0, ES 3.0 (and 2.0 with extensions), and of course Vulkan and D3D11.
void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
const int shift = config.shift;
const int mask = config.mask;

Expand Down Expand Up @@ -140,7 +140,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con
}

// FP only, to suit GL(ES) 2.0 and DX9
void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) {
char lookupMethod[128] = "index.r";

const int shift = config.shift;
Expand Down Expand Up @@ -288,23 +288,64 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
}

void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) {
const char *sourceChannel = "error";
float indexMultiplier = 32.0f;

if (config.bufferFormat == GE_FORMAT_5551) {
_dbg_assert_(config.mask == 0x1F);
switch (config.shift) {
case 0: sourceChannel = "r"; break;
case 5: sourceChannel = "g"; break;
case 10: sourceChannel = "b"; break;
default: _dbg_assert_(false);
}
} else if (config.bufferFormat == GE_FORMAT_565) {
_dbg_assert_(config.mask == 0x1F || config.mask == 0x3F);
switch (config.shift) {
case 0: sourceChannel = "r"; break;
case 5: sourceChannel = "g"; indexMultiplier = 64.0f; break;
case 11: sourceChannel = "b"; break;
default: _dbg_assert_(false);
}
} else {
_dbg_assert_(false);
}

writer.C(" float index = ").SampleTexture2D("tex", "v_texcoord").F(".%s * %0.1f;\n", sourceChannel, indexMultiplier);

float texturePixels = 256.f;
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512.f;
}

writer.F(" float coord = (index + 0.5) * %f;\n", 1.0 / texturePixels);
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
}

void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
ShaderWriter writer(buffer, lang, ShaderStage::Fragment);
writer.DeclareSamplers(samplers);
writer.HighPrecisionFloat();
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
switch (lang.shaderLanguage) {
case HLSL_D3D9:
case GLSL_1xx:
GenerateDepalShaderFloat(writer, config, lang);
break;
case GLSL_VULKAN:
case GLSL_3xx:
case HLSL_D3D11:
GenerateDepalShader300(writer, config, lang);
break;
default:
_assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage);
if (config.smoothedDepal) {
// Handles a limited set of cases, but doesn't need any integer math so we don't
// need two variants.
GenerateDepalSmoothed(writer, config);
} else {
switch (lang.shaderLanguage) {
case HLSL_D3D9:
case GLSL_1xx:
GenerateDepalShaderFloat(writer, config);
break;
case GLSL_VULKAN:
case GLSL_3xx:
case HLSL_D3D11:
GenerateDepalShader300(writer, config);
break;
default:
_assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage);
}
}
writer.EndFSMain("outColor", FSFLAG_NONE);
}
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/DepalettizeShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ struct DepalConfig {
GEPaletteFormat clutFormat;
GETextureFormat textureFormat;
GEBufferFormat bufferFormat;
bool smoothedDepal;
};

void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang);
Expand Down
18 changes: 12 additions & 6 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1906,12 +1906,17 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
break;
}

const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
ClutTexture clutTexture{};
bool smoothedDepal = false;

if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) {
clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
smoothedDepal = CanUseSmoothDepal(gstate, framebuffer->drawnFormat, clutTexture.rampLength);

if (useShaderDepal) {
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();

// Very icky conflation here of native and thin3d rendering. This will need careful work per backend in BindAsClutTexture.
ClutTexture clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
BindAsClutTexture(clutTexture.texture);

framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
Expand All @@ -1927,7 +1932,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
// Since we started/ended render passes, might need these.
gstate_c.Dirty(DIRTY_DEPAL);

gstate_c.SetUseShaderDepal(true, CanUseSmoothDepal(gstate, framebuffer->drawnFormat, clutTexture.rampLength));
gstate_c.SetUseShaderDepal(true, smoothedDepal);
gstate_c.depalFramebufferFormat = framebuffer->drawnFormat;
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
Expand All @@ -1939,7 +1944,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
return;
}

textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat, smoothedDepal);
gstate_c.SetUseShaderDepal(false, false);
}

Expand All @@ -1957,9 +1962,10 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer

draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);
draw_->BindTexture(1, clutTexture.texture);
Draw::SamplerState *nearest = textureShaderCache_->GetSampler();
Draw::SamplerState *nearest = textureShaderCache_->GetSampler(false);
Draw::SamplerState *clutSampler = textureShaderCache_->GetSampler(smoothedDepal);
draw_->BindSamplerStates(0, 1, &nearest);
draw_->BindSamplerStates(1, 1, &nearest);
draw_->BindSamplerStates(1, 1, &clutSampler);

textureShaderCache_->ApplyShader(textureShader,
framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight,
Expand Down
Loading

0 comments on commit a2e2d00

Please sign in to comment.