From 6227e4f5ba84ff5d4ce69cb653b80717a2f49f93 Mon Sep 17 00:00:00 2001 From: aliaspider Date: Fri, 11 May 2018 15:42:32 +0100 Subject: [PATCH] add GPU_GX2, doesn't work yet ... --- CMakeLists.txt | 57 +- GPU/Common/ShaderTranslation.cpp | 6 +- GPU/GPU.cpp | 18 +- GPU/GX2/DepalettizeShaderGX2.cpp | 194 ++++++ GPU/GX2/DepalettizeShaderGX2.h | 63 ++ GPU/GX2/DrawEngineGX2.cpp | 749 ++++++++++++++++++++++++ GPU/GX2/DrawEngineGX2.h | 220 +++++++ GPU/GX2/FragmentShaderGeneratorGX2.cpp | 25 + GPU/GX2/FragmentShaderGeneratorGX2.h | 24 + GPU/GX2/FramebufferManagerGX2.cpp | 519 ++++++++++++++++ GPU/GX2/FramebufferManagerGX2.h | 110 ++++ GPU/GX2/GPU_GX2.cpp | 368 ++++++++++++ GPU/GX2/GPU_GX2.h | 86 +++ GPU/GX2/GX2StaticShaders.c | 97 +++ GPU/GX2/GX2StaticShaders.h | 15 + GPU/GX2/GX2Util.cpp | 43 ++ GPU/GX2/GX2Util.h | 86 +++ GPU/GX2/ShaderManagerGX2.cpp | 248 ++++++++ GPU/GX2/ShaderManagerGX2.h | 119 ++++ GPU/GX2/StateMappingGX2.cpp | 422 +++++++++++++ GPU/GX2/StateMappingGX2.h | 81 +++ GPU/GX2/StencilBufferGX2.cpp | 205 +++++++ GPU/GX2/TextureCacheGX2.cpp | 743 +++++++++++++++++++++++ GPU/GX2/TextureCacheGX2.h | 103 ++++ GPU/GX2/TextureScalerGX2.cpp | 59 ++ GPU/GX2/TextureScalerGX2.h | 29 + GPU/GX2/VertexShaderGeneratorGX2.cpp | 26 + GPU/GX2/VertexShaderGeneratorGX2.h | 23 + UI/EmuScreen.cpp | 13 +- ext/native/thin3d/GX2Shaders.c | 1 - ext/native/thin3d/thin3d_gx2.cpp | 242 ++++---- ext/wiiu/imports.h | 8 + ext/wiiu/include/wiiu/gx2/common.h | 4 +- ext/wiiu/include/wiiu/gx2/context.h | 1 + ext/wiiu/include/wiiu/gx2/draw.h | 11 +- ext/wiiu/include/wiiu/gx2/enum.h | 22 +- ext/wiiu/include/wiiu/gx2/registers.h | 7 +- ext/wiiu/include/wiiu/gx2/shaders.h | 31 +- ext/wiiu/include/wiiu/gx2/shaders_asm.h | 3 + ext/wiiu/include/wiiu/gx2/surface.h | 5 + ext/wiiu/include/wiiu/gx2/texture.h | 2 - ext/wiiu/include/wiiu/os/debug.h | 2 + 42 files changed, 4931 insertions(+), 159 deletions(-) create mode 100644 GPU/GX2/DepalettizeShaderGX2.cpp create mode 100644 GPU/GX2/DepalettizeShaderGX2.h create mode 100644 GPU/GX2/DrawEngineGX2.cpp create mode 100644 GPU/GX2/DrawEngineGX2.h create mode 100644 GPU/GX2/FragmentShaderGeneratorGX2.cpp create mode 100644 GPU/GX2/FragmentShaderGeneratorGX2.h create mode 100644 GPU/GX2/FramebufferManagerGX2.cpp create mode 100644 GPU/GX2/FramebufferManagerGX2.h create mode 100644 GPU/GX2/GPU_GX2.cpp create mode 100644 GPU/GX2/GPU_GX2.h create mode 100644 GPU/GX2/GX2StaticShaders.c create mode 100644 GPU/GX2/GX2StaticShaders.h create mode 100644 GPU/GX2/GX2Util.cpp create mode 100644 GPU/GX2/GX2Util.h create mode 100644 GPU/GX2/ShaderManagerGX2.cpp create mode 100644 GPU/GX2/ShaderManagerGX2.h create mode 100644 GPU/GX2/StateMappingGX2.cpp create mode 100644 GPU/GX2/StateMappingGX2.h create mode 100644 GPU/GX2/StencilBufferGX2.cpp create mode 100644 GPU/GX2/TextureCacheGX2.cpp create mode 100644 GPU/GX2/TextureCacheGX2.h create mode 100644 GPU/GX2/TextureScalerGX2.cpp create mode 100644 GPU/GX2/TextureScalerGX2.h create mode 100644 GPU/GX2/VertexShaderGeneratorGX2.cpp create mode 100644 GPU/GX2/VertexShaderGeneratorGX2.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 69012637cf30..f9dbc38c888b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1375,21 +1375,39 @@ set(GPU_D3D11 GPU/D3D11/VertexShaderGeneratorD3D11.h ) -if(NOT WIIU) +set(GPU_GX2 + GPU/GX2/DepalettizeShaderGX2.cpp + GPU/GX2/DepalettizeShaderGX2.h + GPU/GX2/DrawEngineGX2.cpp + GPU/GX2/DrawEngineGX2.h + GPU/GX2/FragmentShaderGeneratorGX2.cpp + GPU/GX2/FragmentShaderGeneratorGX2.h + GPU/GX2/FramebufferManagerGX2.cpp + GPU/GX2/FramebufferManagerGX2.h + GPU/GX2/GPU_GX2.cpp + GPU/GX2/GPU_GX2.h + GPU/GX2/GX2StaticShaders.c + GPU/GX2/GX2StaticShaders.h + GPU/GX2/GX2Util.cpp + GPU/GX2/GX2Util.h + GPU/GX2/ShaderManagerGX2.cpp + GPU/GX2/ShaderManagerGX2.h + GPU/GX2/StateMappingGX2.cpp + GPU/GX2/StateMappingGX2.h + GPU/GX2/StencilBufferGX2.cpp + GPU/GX2/TextureCacheGX2.cpp + GPU/GX2/TextureCacheGX2.h + GPU/GX2/TextureScalerGX2.cpp + GPU/GX2/TextureScalerGX2.h + GPU/GX2/VertexShaderGeneratorGX2.cpp + GPU/GX2/VertexShaderGeneratorGX2.h +) + +if(WIIU) + set(GPU_IMPLS ${GPU_GX2}) +else() # We build Vulkan even on Apple to avoid annoying build differences. - set(GPU_IMPLS ${GPU_GLES} ${GPU_VULKAN}) - set(GPU_SHADERS - GPU/Common/DepalettizeShaderCommon.cpp - GPU/Common/DepalettizeShaderCommon.h - GPU/Common/ShaderId.cpp - GPU/Common/ShaderId.h - GPU/Common/ShaderUniforms.cpp - GPU/Common/ShaderUniforms.h - GPU/Common/ShaderCommon.cpp - GPU/Common/ShaderCommon.h - GPU/Common/ShaderTranslation.cpp - GPU/Common/ShaderTranslation.h - ) + set(GPU_IMPLS ${GPU_GLES} ${GPU_VULKAN} ) endif() if(WIN32) list(APPEND GPU_IMPLS ${GPU_D3D9} ${GPU_D3D11}) @@ -1401,7 +1419,6 @@ endif() set(GPU_SOURCES ${GPU_IMPLS} ${GPU_NEON} - ${GPU_SHADERS} GPU/Common/FramebufferCommon.cpp GPU/Common/FramebufferCommon.h GPU/Common/GPUDebugInterface.cpp @@ -1418,6 +1435,16 @@ set(GPU_SOURCES GPU/Common/SoftwareTransformCommon.h GPU/Common/VertexDecoderCommon.cpp GPU/Common/VertexDecoderCommon.h + GPU/Common/DepalettizeShaderCommon.cpp + GPU/Common/DepalettizeShaderCommon.h + GPU/Common/ShaderId.cpp + GPU/Common/ShaderId.h + GPU/Common/ShaderCommon.cpp + GPU/Common/ShaderCommon.h + GPU/Common/ShaderUniforms.cpp + GPU/Common/ShaderUniforms.h + GPU/Common/ShaderTranslation.cpp + GPU/Common/ShaderTranslation.h GPU/Common/TransformCommon.cpp GPU/Common/TransformCommon.h GPU/Common/IndexGenerator.cpp diff --git a/GPU/Common/ShaderTranslation.cpp b/GPU/Common/ShaderTranslation.cpp index 4bc34908be9a..b3686b9edf2b 100644 --- a/GPU/Common/ShaderTranslation.cpp +++ b/GPU/Common/ShaderTranslation.cpp @@ -58,12 +58,12 @@ static EShLanguage GetLanguage(const Draw::ShaderStage stage) { void ShaderTranslationInit() { // TODO: We have TLS issues on UWP -#if !PPSSPP_PLATFORM(UWP) +#if !PPSSPP_PLATFORM(UWP) && !PPSSPP_PLATFORM(WIIU) glslang::InitializeProcess(); #endif } void ShaderTranslationShutdown() { -#if !PPSSPP_PLATFORM(UWP) +#if !PPSSPP_PLATFORM(UWP) && !PPSSPP_PLATFORM(WIIU) glslang::FinalizeProcess(); #endif } @@ -197,7 +197,7 @@ bool TranslateShader(std::string *dest, ShaderLanguage destLang, TranslatedShade *errorMessage = ""; } -#if PPSSPP_PLATFORM(UWP) +#if PPSSPP_PLATFORM(UWP) || PPSSPP_PLATFORM(WIIU) return false; #endif diff --git a/GPU/GPU.cpp b/GPU/GPU.cpp index cfb8db8a2ddf..dd8cb3880593 100644 --- a/GPU/GPU.cpp +++ b/GPU/GPU.cpp @@ -23,18 +23,19 @@ #include "GPU/GPU.h" #include "GPU/GPUInterface.h" +#include "GPU/Null/NullGpu.h" +#include "GPU/Software/SoftGpu.h" + #if PPSSPP_PLATFORM(UWP) #include "GPU/D3D11/GPU_D3D11.h" +#elif PPSSPP_PLATFORM(WIIU) +#include "GPU/GX2/GPU_GX2.h" #else -#ifndef __wiiu__ #include "GPU/GLES/GPU_GLES.h" -#endif #ifndef NO_VULKAN #include "GPU/Vulkan/GPU_Vulkan.h" #endif -#include "GPU/Null/NullGpu.h" -#include "GPU/Software/SoftGpu.h" #if defined(_WIN32) #include "GPU/Directx9/GPU_DX9.h" @@ -73,7 +74,7 @@ bool GPU_Init(GraphicsContext *ctx, Draw::DrawContext *draw) { SetGPU(new NullGPU()); break; case GPUCORE_GLES: -#ifndef __wiiu__ +#if !PPSSPP_PLATFORM(WIIU) SetGPU(new GPU_GLES(ctx, draw)); break; #else @@ -105,6 +106,13 @@ bool GPU_Init(GraphicsContext *ctx, Draw::DrawContext *draw) { SetGPU(new GPU_Vulkan(ctx, draw)); #endif break; + case GPUCORE_GX2: +#if PPSSPP_PLATFORM(WIIU) + SetGPU(new GPU_GX2(ctx, draw)); + break; +#else + return false; +#endif } return gpu != NULL; diff --git a/GPU/GX2/DepalettizeShaderGX2.cpp b/GPU/GX2/DepalettizeShaderGX2.cpp new file mode 100644 index 000000000000..d944bba45ce1 --- /dev/null +++ b/GPU/GX2/DepalettizeShaderGX2.cpp @@ -0,0 +1,194 @@ +// Copyright (c) 2014- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include +#include + +#include "base/basictypes.h" +#include "base/logging.h" +#include "Common/Log.h" +#include "Common/ColorConv.h" +#include "Common/StringUtils.h" +#include "Core/Reporting.h" +#include "GPU/GX2/TextureCacheGX2.h" +#include "GPU/GX2/DepalettizeShaderGX2.h" +#include "GPU/GX2/GX2Util.h" +#include "GPU/Common/DepalettizeShaderCommon.h" + +DepalShaderCacheGX2::DepalShaderCacheGX2(Draw::DrawContext *draw) { + static const GX2AttribStream depalAttribStream[] = { + { 0, 0, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32_32, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _z, _1), GX2_ENDIAN_SWAP_DEFAULT }, + { 1, 0, 12, GX2_ATTRIB_FORMAT_FLOAT_32_32, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _0), GX2_ENDIAN_SWAP_DEFAULT }, + }; + fetchShader_.size = GX2CalcFetchShaderSizeEx(countof(depalAttribStream), GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE); + fetchShader_.program = (u8 *)MEM2_alloc(fetchShader_.size, GX2_SHADER_ALIGNMENT); + GX2InitFetchShaderEx(&fetchShader_, fetchShader_.program, countof(depalAttribStream), depalAttribStream, GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, fetchShader_.program, fetchShader_.size); + + context_ = (GX2ContextState *)draw->GetNativeObject(Draw::NativeObject::CONTEXT); +} + +DepalShaderCacheGX2::~DepalShaderCacheGX2() { + Clear(); + MEM2_free(fetchShader_.program); +} + +GX2Texture *DepalShaderCacheGX2::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut, bool expandTo32bit) { + const u32 clutId = GetClutID(clutFormat, clutHash); + + auto oldtex = texCache_.find(clutId); + if (oldtex != texCache_.end()) { + oldtex->second->lastFrame = gpuStats.numFlips; + return oldtex->second; + } + + int texturePixels = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512; + int bpp = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 4 : 2; + GX2SurfaceFormat dstFmt; + u32 *expanded = nullptr; + if (expandTo32bit && clutFormat != GE_CMODE_32BIT_ABGR8888) { + expanded = new u32[texturePixels]; + switch (clutFormat) { + case GE_CMODE_16BIT_ABGR4444: + ConvertRGBA4444ToRGBA8888(expanded, (const u16 *)rawClut, texturePixels); + break; + case GE_CMODE_16BIT_ABGR5551: + ConvertRGBA5551ToRGBA8888(expanded, (const u16 *)rawClut, texturePixels); + break; + case GE_CMODE_16BIT_BGR5650: + ConvertRGBA565ToRGBA8888(expanded, (const u16 *)rawClut, texturePixels); + break; + } + rawClut = expanded; + dstFmt = GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8; + bpp = 4; + } else { + dstFmt = GetClutDestFormatGX2(clutFormat); + } + + DepalTextureGX2 *tex = new DepalTextureGX2(); + + tex->surface.width = texturePixels; + tex->surface.height = 1; + tex->surface.depth = 1; + tex->surface.dim = GX2_SURFACE_DIM_TEXTURE_1D; + tex->surface.tileMode = GX2_TILE_MODE_LINEAR_ALIGNED; + tex->surface.use = GX2_SURFACE_USE_TEXTURE; + tex->viewNumSlices = 1; + + tex->surface.format = dstFmt; + tex->compMap = GX2_COMP_SEL(_a, _r, _g, _b); + + GX2CalcSurfaceSizeAndAlignment(&tex->surface); + GX2InitTextureRegs(tex); + + tex->surface.image = MEM2_alloc(tex->surface.imageSize, tex->surface.alignment); + _assert_(tex->surface.image); + + if (bpp == 2) { + const u16 *src = (const u16 *)rawClut; + u16 *dst = (u16 *)tex->surface.image; + while (src < (u16 *)rawClut + texturePixels) { + *dst++ = __builtin_bswap16(*src++); + } + } else { + const u32 *src = (const u32 *)rawClut; + u32 *dst = (u32 *)tex->surface.image; + while (src < (u32 *)rawClut + texturePixels) { + *dst++ = __builtin_bswap32(*src++); + } + } + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, tex->surface.image, tex->surface.imageSize); + + tex->lastFrame = gpuStats.numFlips; + texCache_[clutId] = tex; + + if (expandTo32bit) { + delete[] expanded; + } + return tex; +} + +void DepalShaderCacheGX2::Clear() { + for (auto shader = cache_.begin(); shader != cache_.end(); ++shader) { + delete shader->second; + } + cache_.clear(); + + for (auto tex = texCache_.begin(); tex != texCache_.end(); ++tex) { + delete tex->second; + } + texCache_.clear(); +} + +void DepalShaderCacheGX2::Decimate() { + for (auto tex = texCache_.begin(); tex != texCache_.end();) { + if (tex->second->lastFrame + DEPAL_TEXTURE_OLD_AGE < gpuStats.numFlips) { + delete tex->second; + texCache_.erase(tex++); + } else { + ++tex; + } + } +} + +extern "C" GX2PixelShader GX2_fsCol; +DepalShaderCacheGX2::DepalShaderGX2::DepalShaderGX2(GEBufferFormat pixelFormat) : GX2PixelShader(GX2_fsCol) { + // TODO; + program = (u8*)MEM2_alloc(size, GX2_SHADER_ALIGNMENT); + memcpy(program, GX2_fsCol.program, size); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, program, size); +} + +GX2PixelShader *DepalShaderCacheGX2::GetDepalettizePixelShader(u32 clutMode, GEBufferFormat pixelFormat) { + u32 id = GenerateShaderID(clutMode, pixelFormat); + + auto shader = cache_.find(id); + if (shader != cache_.end()) { + return shader->second; + } + + DepalShaderGX2 *depal = new DepalShaderGX2(pixelFormat); + cache_[id] = depal; + + return depal; +} + +std::vector DepalShaderCacheGX2::DebugGetShaderIDs(DebugShaderType type) { + std::vector ids; + for (auto &iter : cache_) { + ids.push_back(StringFromFormat("%08x", iter.first)); + } + return ids; +} + +std::string DepalShaderCacheGX2::DebugGetShaderString(std::string idstr, DebugShaderType type, DebugShaderStringType stringType) { + u32 id; + sscanf(idstr.c_str(), "%08x", &id); + auto iter = cache_.find(id); + if (iter == cache_.end()) + return ""; + switch (stringType) { + case SHADER_STRING_SHORT_DESC: + return idstr; + case SHADER_STRING_SOURCE_CODE: + // TODO: disassemble shader + return "N/A"; + default: + return ""; + } +} diff --git a/GPU/GX2/DepalettizeShaderGX2.h b/GPU/GX2/DepalettizeShaderGX2.h new file mode 100644 index 000000000000..fc58aa5c1215 --- /dev/null +++ b/GPU/GX2/DepalettizeShaderGX2.h @@ -0,0 +1,63 @@ +// Copyright (c) 2017- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include +#include +#include +#include +#include + +#include "Common/CommonTypes.h" +#include "GPU/ge_constants.h" +#include "thin3d/thin3d.h" +#include "GPU/Common/DepalettizeShaderCommon.h" +#include "GPU/GX2/GX2StaticShaders.h" + +// Caches both shaders and palette textures. +class DepalShaderCacheGX2 : public DepalShaderCacheCommon { +public: + DepalShaderCacheGX2(Draw::DrawContext *draw); + ~DepalShaderCacheGX2(); + + // This also uploads the palette and binds the correct texture. + GX2PixelShader *GetDepalettizePixelShader(u32 clutMode, GEBufferFormat pixelFormat); + GX2VertexShader *GetDepalettizeVertexShader() { return &defVShaderGX2; } + GX2FetchShader *GetFetchShader() { return &fetchShader_; } + GX2Texture *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut, bool expandTo32bit); + void Clear(); + void Decimate(); + std::vector DebugGetShaderIDs(DebugShaderType type); + std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType); + +private: + struct DepalShaderGX2 : public GX2PixelShader { + DepalShaderGX2(GEBufferFormat pixelFormat); + ~DepalShaderGX2() { MEM2_free(program); } + }; + + struct DepalTextureGX2 : public GX2Texture { + DepalTextureGX2() : GX2Texture({}) {} + ~DepalTextureGX2() { MEM2_free(surface.image); } + int lastFrame; + }; + + GX2ContextState *context_; + GX2FetchShader fetchShader_ = {}; + + std::map cache_; + std::map texCache_; +}; diff --git a/GPU/GX2/DrawEngineGX2.cpp b/GPU/GX2/DrawEngineGX2.cpp new file mode 100644 index 000000000000..8437e8c13976 --- /dev/null +++ b/GPU/GX2/DrawEngineGX2.cpp @@ -0,0 +1,749 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include + +#include "base/logging.h" +#include "base/timeutil.h" + +#include "Common/MemoryUtil.h" +#include "Core/MemMap.h" +#include "Core/Host.h" +#include "Core/System.h" +#include "Core/Reporting.h" +#include "Core/Config.h" +#include "Core/CoreTiming.h" + +#include "GPU/Math3D.h" +#include "GPU/GPUState.h" +#include "GPU/ge_constants.h" + +#include "GPU/Common/TextureDecoder.h" +#include "GPU/Common/SplineCommon.h" + +#include "GPU/Common/TransformCommon.h" +#include "GPU/Common/VertexDecoderCommon.h" +#include "GPU/Common/SoftwareTransformCommon.h" +#include "GPU/GX2/FramebufferManagerGX2.h" +#include "GPU/GX2/TextureCacheGX2.h" +#include "GPU/GX2/DrawEngineGX2.h" +#include "GPU/GX2/ShaderManagerGX2.h" +#include "GPU/GX2/GPU_GX2.h" + +static const GX2PrimitiveMode GX2prim[8] = { GX2_PRIMITIVE_MODE_POINTS, GX2_PRIMITIVE_MODE_LINES, GX2_PRIMITIVE_MODE_LINE_STRIP, GX2_PRIMITIVE_MODE_TRIANGLES, GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, GX2_PRIMITIVE_MODE_TRIANGLE_FAN, GX2_PRIMITIVE_MODE_QUADS, GX2_PRIMITIVE_MODE_INVALID }; + +#define VERTEXCACHE_DECIMATION_INTERVAL 17 + +enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 }; +enum { + VERTEX_PUSH_SIZE = 1024 * 1024 * 16, + INDEX_PUSH_SIZE = 1024 * 1024 * 4, +}; + +static const GX2AttribStream TransformedVertexElements[] = { + { 0, 0, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32_32_32, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _z, _w), GX2_ENDIAN_SWAP_DEFAULT }, + { 1, 0, 16, GX2_ATTRIB_FORMAT_FLOAT_32_32_32, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _z, _1), GX2_ENDIAN_SWAP_DEFAULT }, + { 2, 0, 28, GX2_ATTRIB_FORMAT_UNORM_8_8_8_8, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_r, _g, _b, _a), GX2_ENDIAN_SWAP_DEFAULT }, + { 3, 0, 32, GX2_ATTRIB_FORMAT_UNORM_8_8_8_8, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_r, _g, _b, _a), GX2_ENDIAN_SWAP_DEFAULT }, +}; + +DrawEngineGX2::DrawEngineGX2(Draw::DrawContext *draw, GX2ContextState *context) : draw_(draw), context_(context), vai_(256), fetchShaderMap_(32), blendCache_(32), depthStencilCache_(64), rasterCache_(4) { + decOptions_.expandAllWeightsToFloat = true; + decOptions_.expand8BitNormalsToFloat = true; + + decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; + + // All this is a LOT of memory, need to see if we can cut down somehow. + decoded = (u8 *)MEM2_alloc(DECODED_VERTEX_BUFFER_SIZE, GX2_VERTEX_BUFFER_ALIGNMENT); + decIndex = (u16 *)MEM2_alloc(DECODED_INDEX_BUFFER_SIZE, GX2_INDEX_BUFFER_ALIGNMENT); + splineBuffer = (u8 *)MEM2_alloc(SPLINE_BUFFER_SIZE, GX2_UNIFORM_BLOCK_ALIGNMENT); + + indexGen.Setup(decIndex); + + InitDeviceObjects(); +} + +DrawEngineGX2::~DrawEngineGX2() { + DestroyDeviceObjects(); + FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE); + FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE); + FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE); +} + +void DrawEngineGX2::InitDeviceObjects() { + pushVerts_ = new PushBufferGX2(VERTEX_PUSH_SIZE, GX2_VERTEX_BUFFER_ALIGNMENT); + pushInds_ = new PushBufferGX2(INDEX_PUSH_SIZE, GX2_INDEX_BUFFER_ALIGNMENT); + + tessDataTransfer = new TessellationDataTransferGX2(context_); +} + +void DrawEngineGX2::ClearTrackedVertexArrays() { + vai_.Iterate([&](u32 hash, VertexArrayInfoGX2 *vai) { delete vai; }); + vai_.Clear(); +} + +void DrawEngineGX2::ClearInputLayoutMap() { + fetchShaderMap_.Iterate([&](const FetchShaderKey &key, GX2FetchShader *il) { + MEM2_free(il->program); + free(il); + }); + fetchShaderMap_.Clear(); +} + +void DrawEngineGX2::Resized() { + DrawEngineCommon::Resized(); + ClearInputLayoutMap(); +} + +void DrawEngineGX2::DestroyDeviceObjects() { + ClearTrackedVertexArrays(); + ClearInputLayoutMap(); + delete tessDataTransfer; + delete pushVerts_; + delete pushInds_; + depthStencilCache_.Iterate([&](const u64 &key, GX2DepthStencilControlReg *ds) { free(ds); }); + depthStencilCache_.Clear(); + blendCache_.Iterate([&](const u64 &key, GX2BlendState *bs) { free(bs); }); + blendCache_.Clear(); + rasterCache_.Iterate([&](const u32 &key, GX2RasterizerState *rs) { free(rs); }); + rasterCache_.Clear(); +} + +struct DeclTypeInfo { + GX2AttribFormat type; + const char *name; +}; + +static const DeclTypeInfo VComp[] = { + { GX2_ATTRIB_FORMAT_FLOAT_32_32_32_32, "NULL" }, // DEC_NONE, + { GX2_ATTRIB_FORMAT_FLOAT_32, "D3DDECLTYPE_FLOAT1 " }, // DEC_FLOAT_1, + { GX2_ATTRIB_FORMAT_FLOAT_32_32, "D3DDECLTYPE_FLOAT2 " }, // DEC_FLOAT_2, + { GX2_ATTRIB_FORMAT_FLOAT_32_32_32, "D3DDECLTYPE_FLOAT3 " }, // DEC_FLOAT_3, + { GX2_ATTRIB_FORMAT_FLOAT_32_32_32_32, "D3DDECLTYPE_FLOAT4 " }, // DEC_FLOAT_4, + + { GX2_ATTRIB_FORMAT_SNORM_8_8_8_8, "UNUSED" }, // DEC_S8_3, + + { GX2_ATTRIB_FORMAT_SNORM_16_16_16_16, "D3DDECLTYPE_SHORT4N " }, // DEC_S16_3, + { GX2_ATTRIB_FORMAT_UNORM_8_8_8_8, "D3DDECLTYPE_UBYTE4N " }, // DEC_U8_1, + { GX2_ATTRIB_FORMAT_UNORM_8_8_8_8, "D3DDECLTYPE_UBYTE4N " }, // DEC_U8_2, + { GX2_ATTRIB_FORMAT_UNORM_8_8_8_8, "D3DDECLTYPE_UBYTE4N " }, // DEC_U8_3, + { GX2_ATTRIB_FORMAT_UNORM_8_8_8_8, "D3DDECLTYPE_UBYTE4N " }, // DEC_U8_4, + + { GX2_ATTRIB_FORMAT_UINT_16, "UNUSED_DEC_U16_1" }, // DEC_U16_1, + { GX2_ATTRIB_FORMAT_UINT_16, "UNUSED_DEC_U16_2" }, // DEC_U16_2, + { GX2_ATTRIB_FORMAT_UNORM_16_16_16_16, "D3DDECLTYPE_USHORT4N " }, // DEC_U16_3, + { GX2_ATTRIB_FORMAT_UNORM_16_16_16_16, "D3DDECLTYPE_USHORT4N " }, // DEC_U16_4, +}; + +static void VertexAttribSetup(GX2AttribStream *VertexElement, u8 fmt, u8 offset, const char *semantic, u8 semantic_index = 0) { + VertexElement->location = semantic_index; + VertexElement->buffer = 0; + VertexElement->offset = offset; + VertexElement->format = VComp[fmt].type; + VertexElement->type = GX2_ATTRIB_INDEX_PER_VERTEX; + VertexElement->aluDivisor = 0; + VertexElement->mask = GX2_COMP_SEL(_x, _y, _z, _w); + VertexElement->endianSwap = GX2_ENDIAN_SWAP_DEFAULT; +} + +GX2FetchShader *DrawEngineGX2::SetupFetchShaderForDraw(GX2VertexShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt) { + // TODO: Instead of one for each vshader, we can reduce it to one for each type of shader + // that reads TEXCOORD or not, etc. Not sure if worth it. + FetchShaderKey key{ vshader, decFmt.id }; + GX2FetchShader *fetchShader = fetchShaderMap_.Get(key); + if (fetchShader) { + return fetchShader; + } else { + GX2AttribStream VertexElements[8]; + GX2AttribStream *VertexElement = &VertexElements[0]; + + // Vertices Elements orders + // WEIGHT + if (decFmt.w0fmt != 0) { + VertexAttribSetup(VertexElement, decFmt.w0fmt, decFmt.w0off, "TEXCOORD", 1); + VertexElement++; + } + + if (decFmt.w1fmt != 0) { + VertexAttribSetup(VertexElement, decFmt.w1fmt, decFmt.w1off, "TEXCOORD", 2); + VertexElement++; + } + + // TC + if (decFmt.uvfmt != 0) { + VertexAttribSetup(VertexElement, decFmt.uvfmt, decFmt.uvoff, "TEXCOORD", 0); + VertexElement++; + } + + // COLOR + if (decFmt.c0fmt != 0) { + VertexAttribSetup(VertexElement, decFmt.c0fmt, decFmt.c0off, "COLOR", 0); + VertexElement++; + } + // Never used ? + if (decFmt.c1fmt != 0) { + VertexAttribSetup(VertexElement, decFmt.c1fmt, decFmt.c1off, "COLOR", 1); + VertexElement++; + } + + // NORMAL + if (decFmt.nrmfmt != 0) { + VertexAttribSetup(VertexElement, decFmt.nrmfmt, decFmt.nrmoff, "NORMAL", 0); + VertexElement++; + } + + // POSITION + // Always + VertexAttribSetup(VertexElement, decFmt.posfmt, decFmt.posoff, "POSITION", 0); + VertexElement++; + + // Create fetchShader + fetchShader = new GX2FetchShader; + fetchShader->size = GX2CalcFetchShaderSize(VertexElement - VertexElements); + fetchShader->program = (u8 *)MEM2_alloc(fetchShader->size, GX2_SHADER_ALIGNMENT); + GX2InitFetchShader(fetchShader, fetchShader->program, VertexElement - VertexElements, VertexElements); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, fetchShader->program, fetchShader->size); + + // Add it to map + fetchShaderMap_.Insert(key, fetchShader); + return fetchShader; + } +} + +void DrawEngineGX2::MarkUnreliable(VertexArrayInfoGX2 *vai) { + vai->status = VertexArrayInfoGX2::VAI_UNRELIABLE; + + MEM2_free(vai->vbo); + vai->vbo = nullptr; + + MEM2_free(vai->ebo); + vai->ebo = nullptr; +} + +void DrawEngineGX2::BeginFrame() { + pushVerts_->Reset(); + pushInds_->Reset(); + + if (--decimationCounter_ <= 0) { + decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; + } else { + return; + } + + const int threshold = gpuStats.numFlips - VAI_KILL_AGE; + const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE; + int unreliableLeft = VAI_UNRELIABLE_KILL_MAX; + vai_.Iterate([&](u32 hash, VertexArrayInfoGX2 *vai) { + bool kill; + if (vai->status == VertexArrayInfoGX2::VAI_UNRELIABLE) { + // We limit killing unreliable so we don't rehash too often. + kill = vai->lastFrame < unreliableThreshold && --unreliableLeft >= 0; + } else { + kill = vai->lastFrame < threshold; + } + if (kill) { + delete vai; + vai_.Remove(hash); + } + }); + vai_.Maintain(); + + // Enable if you want to see vertex decoders in the log output. Need a better way. +#if 0 + char buffer[16384]; + for (std::map::iterator dec = decoderMap_.begin(); dec != decoderMap_.end(); ++dec) { + char *ptr = buffer; + ptr += dec->second->ToString(ptr); + // *ptr++ = '\n'; + NOTICE_LOG(G3D, buffer); + } +#endif +} + +VertexArrayInfoGX2::~VertexArrayInfoGX2() { + MEM2_free(vbo); + MEM2_free(ebo); +} + +static u32 SwapRB(u32 c) { return (c & 0xFF00FF00) | ((c >> 16) & 0xFF) | ((c << 16) & 0xFF0000); } + +// The inline wrapper in the header checks for numDrawCalls == 0 +void DrawEngineGX2::DoFlush() { + gpuStats.numFlushes++; + gpuStats.numTrackedVertexArrays = (int)vai_.size(); + + // This is not done on every drawcall, we should collect vertex data + // until critical state changes. That's when we draw (flush). + + GEPrimitiveType prim = prevPrim_; + ApplyDrawState(prim); + + bool useHWTransform = CanUseHardwareTransform(prim); + + if (useHWTransform) { + void *vb_ = nullptr; + void *ib_ = nullptr; + + int vertexCount = 0; + int maxIndex = 0; + bool useElements = true; + + // Cannot cache vertex data with morph enabled. + bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK); + // Also avoid caching when software skinning. + if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) + useCache = false; + + if (useCache) { + u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263 + + VertexArrayInfoGX2 *vai = vai_.Get(id); + if (!vai) { + vai = new VertexArrayInfoGX2(); + vai_.Insert(id, vai); + } + + switch (vai->status) { + case VertexArrayInfoGX2::VAI_NEW: { + // Haven't seen this one before. + ReliableHashType dataHash = ComputeHash(); + vai->hash = dataHash; + vai->minihash = ComputeMiniHash(); + vai->status = VertexArrayInfoGX2::VAI_HASHING; + vai->drawsUntilNextFullHash = 0; + DecodeVerts(decoded); // writes to indexGen + vai->numVerts = indexGen.VertexCount(); + vai->prim = indexGen.Prim(); + vai->maxIndex = indexGen.MaxIndex(); + vai->flags = gstate_c.vertexFullAlpha ? VAI11_FLAG_VERTEXFULLALPHA : 0; + goto rotateVBO; + } + + // Hashing - still gaining confidence about the buffer. + // But if we get this far it's likely to be worth creating a vertex buffer. + case VertexArrayInfoGX2::VAI_HASHING: { + vai->numDraws++; + if (vai->lastFrame != gpuStats.numFlips) { + vai->numFrames++; + } + if (vai->drawsUntilNextFullHash == 0) { + // Let's try to skip a full hash if mini would fail. + const u32 newMiniHash = ComputeMiniHash(); + ReliableHashType newHash = vai->hash; + if (newMiniHash == vai->minihash) { + newHash = ComputeHash(); + } + if (newMiniHash != vai->minihash || newHash != vai->hash) { + MarkUnreliable(vai); + DecodeVerts(decoded); + goto rotateVBO; + } + if (vai->numVerts > 64) { + // exponential backoff up to 16 draws, then every 24 + vai->drawsUntilNextFullHash = std::min(24, vai->numFrames); + } else { + // Lower numbers seem much more likely to change. + vai->drawsUntilNextFullHash = 0; + } + // TODO: tweak + // if (vai->numFrames > 1000) { + // vai->status = VertexArrayInfo::VAI_RELIABLE; + //} + } else { + vai->drawsUntilNextFullHash--; + u32 newMiniHash = ComputeMiniHash(); + if (newMiniHash != vai->minihash) { + MarkUnreliable(vai); + DecodeVerts(decoded); + goto rotateVBO; + } + } + + if (vai->vbo == 0) { + DecodeVerts(decoded); + vai->numVerts = indexGen.VertexCount(); + vai->prim = indexGen.Prim(); + vai->maxIndex = indexGen.MaxIndex(); + vai->flags = gstate_c.vertexFullAlpha ? VAI11_FLAG_VERTEXFULLALPHA : 0; + useElements = !indexGen.SeenOnlyPurePrims() || prim == GE_PRIM_TRIANGLE_FAN; + if (!useElements && indexGen.PureCount()) { + vai->numVerts = indexGen.PureCount(); + } + + _dbg_assert_msg_(G3D, gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching."); + + // TODO: Combine these two into one buffer? + u32 size = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex(); + vai->vbo = MEM2_alloc(size, GX2_VERTEX_BUFFER_ALIGNMENT); + memcpy(vai->vbo, decoded, size); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, vai->vbo, size); + if (useElements) { + u32 size = sizeof(short) * indexGen.VertexCount(); + vai->ebo = MEM2_alloc(size, GX2_INDEX_BUFFER_ALIGNMENT); + memcpy(vai->ebo, decIndex, size); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, vai->ebo, size); + } else { + vai->ebo = 0; + } + } else { + gpuStats.numCachedDrawCalls++; + useElements = vai->ebo ? true : false; + gpuStats.numCachedVertsDrawn += vai->numVerts; + gstate_c.vertexFullAlpha = vai->flags & VAI11_FLAG_VERTEXFULLALPHA; + } + vb_ = vai->vbo; + ib_ = vai->ebo; + vertexCount = vai->numVerts; + maxIndex = vai->maxIndex; + prim = static_cast(vai->prim); + break; + } + + // Reliable - we don't even bother hashing anymore. Right now we don't go here until after a very long time. + case VertexArrayInfoGX2::VAI_RELIABLE: { + vai->numDraws++; + if (vai->lastFrame != gpuStats.numFlips) { + vai->numFrames++; + } + gpuStats.numCachedDrawCalls++; + gpuStats.numCachedVertsDrawn += vai->numVerts; + vb_ = vai->vbo; + ib_ = vai->ebo; + + vertexCount = vai->numVerts; + + maxIndex = vai->maxIndex; + prim = static_cast(vai->prim); + + gstate_c.vertexFullAlpha = vai->flags & VAI11_FLAG_VERTEXFULLALPHA; + break; + } + + case VertexArrayInfoGX2::VAI_UNRELIABLE: { + vai->numDraws++; + if (vai->lastFrame != gpuStats.numFlips) { + vai->numFrames++; + } + DecodeVerts(decoded); + goto rotateVBO; + } + } + + vai->lastFrame = gpuStats.numFlips; + } else { + DecodeVerts(decoded); + rotateVBO: + gpuStats.numUncachedVertsDrawn += indexGen.VertexCount(); + useElements = !indexGen.SeenOnlyPurePrims() || prim == GE_PRIM_TRIANGLE_FAN; + vertexCount = indexGen.VertexCount(); + maxIndex = indexGen.MaxIndex(); + if (!useElements && indexGen.PureCount()) { + vertexCount = indexGen.PureCount(); + } + prim = indexGen.Prim(); + } + + VERBOSE_LOG(G3D, "Flush prim %i! %i verts in one go", prim, vertexCount); + bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; + if (gstate.isModeThrough()) { + gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255); + } else { + gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255); + } + + ApplyDrawStateLate(true, dynState_.stencilRef); + + GX2VShader *vshader; + GX2PShader *fshader; + shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, useHWTransform); + GX2FetchShader *fetchShader = SetupFetchShaderForDraw(vshader, dec_->GetDecVtxFmt(), dec_->VertexType()); + GX2SetPixelShader(fshader); + GX2SetVertexShader(vshader); + shaderManager_->UpdateUniforms(); + shaderManager_->BindUniforms(); + + GX2SetFetchShader(fetchShader); + u32 stride = dec_->GetDecVtxFmt().stride; + // GX2prim[prim]; + if (!vb_) { + // Push! + u32 vOffset; + int vSize = (maxIndex + 1) * dec_->GetDecVtxFmt().stride; + u8 *vptr = pushVerts_->BeginPush(&vOffset, vSize); + memcpy(vptr, decoded, vSize); + pushVerts_->EndPush(); + void *buf = pushVerts_->Buf(); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, buf, vSize); + GX2SetAttribBuffer(0, vSize, stride, buf); + if (useElements) { + u32 iOffset; + int iSize = 2 * indexGen.VertexCount(); + u8 *iptr = pushInds_->BeginPush(&iOffset, iSize); + memcpy(iptr, decIndex, iSize); + pushInds_->EndPush(); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, iptr, iSize); + if (gstate_c.bezier || gstate_c.spline) + GX2DrawIndexedEx(GX2prim[prim], vertexCount, GX2_INDEX_TYPE_U16, iptr, 0, numPatches); + else + GX2DrawIndexedEx(GX2prim[prim], vertexCount, GX2_INDEX_TYPE_U16, iptr, 0, 1); + } else { + GX2DrawEx(GX2prim[prim], vertexCount, 0, 1); + } + } else { + GX2SetAttribBuffer(0, vertexCount * stride, stride, vb_); + if (useElements) { + if (gstate_c.bezier || gstate_c.spline) + GX2DrawIndexedEx(GX2prim[prim], vertexCount, GX2_INDEX_TYPE_U16, ib_, 0, numPatches); + else + GX2DrawIndexedEx(GX2prim[prim], vertexCount, GX2_INDEX_TYPE_U16, ib_, 0, 1); + } else { + GX2DrawEx(GX2prim[prim], vertexCount, 0, 1); + } + } + } else { + DecodeVerts(decoded); + bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE; + if (gstate.isModeThrough()) { + gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255); + } else { + gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255); + } + + gpuStats.numUncachedVertsDrawn += indexGen.VertexCount(); + prim = indexGen.Prim(); + // Undo the strip optimization, not supported by the SW code yet. + if (prim == GE_PRIM_TRIANGLE_STRIP) + prim = GE_PRIM_TRIANGLES; + VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount()); + + int numTrans = 0; + bool drawIndexed = false; + u16 *inds = decIndex; + TransformedVertex *drawBuffer = NULL; + SoftwareTransformResult result{}; + SoftwareTransformParams params{}; + params.decoded = decoded; + params.transformed = transformed; + params.transformedExpanded = transformedExpanded; + params.fbman = framebufferManager_; + params.texCache = textureCache_; + params.allowClear = true; + params.allowSeparateAlphaClear = false; // GX2 doesn't support separate alpha clears + + int maxIndex = indexGen.MaxIndex(); + SoftwareTransform(prim, indexGen.VertexCount(), dec_->VertexType(), inds, GE_VTYPE_IDX_16BIT, dec_->GetDecVtxFmt(), maxIndex, drawBuffer, numTrans, drawIndexed, ¶ms, &result); + + if (result.action == SW_DRAW_PRIMITIVES) { + ApplyDrawStateLate(result.setStencil, result.stencilValue); + + GX2VShader *vshader; + GX2PShader *fshader; + shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, false); + GX2SetPixelShader(fshader); + GX2SetVertexShader(vshader); + shaderManager_->UpdateUniforms(); + shaderManager_->BindUniforms(); + + // We really do need a vertex layout for each vertex shader (or at least check its ID bits for what inputs it uses)! + // Some vertex shaders ignore one of the inputs, and then the layout created from it will lack it, which will be a problem for others. + FetchShaderKey key{ vshader, 0xFFFFFFFF }; // Let's use 0xFFFFFFFF to signify TransformedVertex + GX2FetchShader *fetchShader = fetchShaderMap_.Get(key); + if (!fetchShader) { + fetchShader = new GX2FetchShader; + fetchShader->size = GX2CalcFetchShaderSize(ARRAY_SIZE(TransformedVertexElements)); + fetchShader->program = (u8 *)MEM2_alloc(fetchShader->size, GX2_SHADER_ALIGNMENT); + GX2InitFetchShader(fetchShader, fetchShader->program, ARRAY_SIZE(TransformedVertexElements), TransformedVertexElements); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, fetchShader->program, fetchShader->size); + fetchShaderMap_.Insert(key, fetchShader); + } + GX2SetFetchShader(fetchShader); + + u32 stride = sizeof(TransformedVertex); + u32 vOffset = 0; + int vSize = maxIndex * stride; + u8 *vptr = pushVerts_->BeginPush(&vOffset, vSize); + memcpy(vptr, drawBuffer, vSize); + pushVerts_->EndPush(); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, vptr, vSize); + GX2SetAttribBuffer(0, vSize, stride, vptr); + if (drawIndexed) { + u32 iOffset; + int iSize = sizeof(u16) * numTrans; + u8 *iptr = pushInds_->BeginPush(&iOffset, iSize); + memcpy(iptr, inds, iSize); + pushInds_->EndPush(); + GX2DrawIndexedEx(GX2prim[prim], numTrans, GX2_INDEX_TYPE_U16, iptr, 0, 1); + } else { + GX2DrawEx(GX2prim[prim], numTrans, 0, 1); + } + } else if (result.action == SW_CLEAR) { + u32 clearColor = result.color; + float clearDepth = result.depth; + + u32 clearFlag = 0; + + if (gstate.isClearModeColorMask()) + clearFlag |= Draw::FBChannel::FB_COLOR_BIT; + if (gstate.isClearModeAlphaMask()) + clearFlag |= Draw::FBChannel::FB_STENCIL_BIT; + if (gstate.isClearModeDepthMask()) + clearFlag |= Draw::FBChannel::FB_DEPTH_BIT; + + if (clearFlag & Draw::FBChannel::FB_DEPTH_BIT) { + framebufferManager_->SetDepthUpdated(); + } + if (clearFlag & Draw::FBChannel::FB_COLOR_BIT) { + framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason); + } + + u8 clearStencil = clearColor >> 24; + draw_->Clear(clearFlag, clearColor, clearDepth, clearStencil); + + int scissorX2 = gstate.getScissorX2() + 1; + int scissorY2 = gstate.getScissorY2() + 1; + framebufferManager_->SetSafeSize(scissorX2, scissorY2); + if (g_Config.bBlockTransferGPU && (gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) { + int scissorX1 = gstate.getScissorX1(); + int scissorY1 = gstate.getScissorY1(); + framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor); + } + } + } + + gpuStats.numDrawCalls += numDrawCalls; + gpuStats.numVertsSubmitted += vertexCountInDrawCalls_; + + indexGen.Reset(); + decodedVerts_ = 0; + numDrawCalls = 0; + vertexCountInDrawCalls_ = 0; + decodeCounter_ = 0; + dcid_ = 0; + prevPrim_ = GE_PRIM_INVALID; + gstate_c.vertexFullAlpha = true; + framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason); + + // Now seems as good a time as any to reset the min/max coords, which we may examine later. + gstate_c.vertBounds.minU = 512; + gstate_c.vertBounds.minV = 512; + gstate_c.vertBounds.maxU = 0; + gstate_c.vertBounds.maxV = 0; + +#if 0 + // We only support GPU debugging on Windows, and that's the only use case for this. + host->GPUNotifyDraw(); +#endif +} + +void DrawEngineGX2::TessellationDataTransferGX2::SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) { + // Position + if (prevSize < size) { + prevSize = size; + if (data_tex[0].surface.image) { + MEM2_free(data_tex[0].surface.image); + } + data_tex[0].surface.width = size; + data_tex[0].surface.height = 1; + data_tex[0].surface.depth = 1; + data_tex[0].surface.dim = GX2_SURFACE_DIM_TEXTURE_1D; + data_tex[0].surface.tileMode = GX2_TILE_MODE_LINEAR_ALIGNED; + data_tex[0].surface.use = GX2_SURFACE_USE_TEXTURE; + data_tex[0].viewNumSlices = 1; + data_tex[0].surface.format = GX2_SURFACE_FORMAT_FLOAT_R32_G32_B32_A32; + data_tex[0].compMap = GX2_COMP_SEL(_a, _r, _g, _b); + GX2CalcSurfaceSizeAndAlignment(&data_tex[0].surface); + GX2InitTextureRegs(&data_tex[0]); + data_tex[0].surface.image = MEM2_alloc(data_tex[0].surface.imageSize, data_tex[0].surface.alignment); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, data_tex[0].surface.image, data_tex[0].surface.imageSize); + + if (!data_tex[0].surface.image) { + INFO_LOG(G3D, "Failed to create GX2 texture for HW tessellation"); + return; // TODO: Turn off HW tessellation if texture creation error occured. + } + GX2SetVertexTexture(&data_tex[0], 0); + } + const u32 *src = (const u32 *)pos; + u32 *dst = (u32 *)data_tex[0].surface.image; + while (src < (u32 *)pos + size) { + *dst++ = __builtin_bswap32(*src++); + } + + // Texcoords + if (hasTexCoords) { + if (prevSizeTex < size) { + prevSizeTex = size; + if (data_tex[1].surface.image) { + MEM2_free(data_tex[1].surface.image); + } + data_tex[1].surface.width = size; + data_tex[1].surface.height = 1; + data_tex[1].surface.depth = 1; + data_tex[1].surface.dim = GX2_SURFACE_DIM_TEXTURE_1D; + data_tex[1].surface.tileMode = GX2_TILE_MODE_LINEAR_ALIGNED; + data_tex[1].surface.use = GX2_SURFACE_USE_TEXTURE; + data_tex[1].viewNumSlices = 1; + data_tex[1].surface.format = GX2_SURFACE_FORMAT_FLOAT_R32_G32_B32_A32; + data_tex[1].compMap = GX2_COMP_SEL(_a, _r, _g, _b); + GX2CalcSurfaceSizeAndAlignment(&data_tex[1].surface); + GX2InitTextureRegs(&data_tex[1]); + data_tex[1].surface.image = MEM2_alloc(data_tex[1].surface.imageSize, data_tex[1].surface.alignment); + if (!data_tex[1].surface.image) { + INFO_LOG(G3D, "Failed to create GX2 texture for HW tessellation"); + return; // TODO: Turn off HW tessellation if texture creation error occured. + } + GX2SetVertexTexture(&data_tex[1], 1); + } + src = (const u32 *)pos; + dst = (u32 *)data_tex[1].surface.image; + while (src < (u32 *)pos + size) { + *dst++ = __builtin_bswap32(*src++); + } + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, data_tex[1].surface.image, data_tex[1].surface.imageSize); + } + + // Color + int sizeColor = hasColor ? size : 1; + if (prevSizeCol < sizeColor) { + prevSizeCol = sizeColor; + if (data_tex[2].surface.image) { + MEM2_free(data_tex[2].surface.image); + } + data_tex[2].surface.width = sizeColor; + data_tex[2].surface.height = 1; + data_tex[2].surface.depth = 1; + data_tex[2].surface.dim = GX2_SURFACE_DIM_TEXTURE_1D; + data_tex[2].surface.tileMode = GX2_TILE_MODE_LINEAR_ALIGNED; + data_tex[2].surface.use = GX2_SURFACE_USE_TEXTURE; + data_tex[2].viewNumSlices = 1; + data_tex[2].surface.format = GX2_SURFACE_FORMAT_FLOAT_R32_G32_B32_A32; + data_tex[2].compMap = GX2_COMP_SEL(_a, _r, _g, _b); + GX2CalcSurfaceSizeAndAlignment(&data_tex[2].surface); + GX2InitTextureRegs(&data_tex[2]); + data_tex[2].surface.image = MEM2_alloc(data_tex[2].surface.imageSize, data_tex[2].surface.alignment); + if (!data_tex[2].surface.image) { + INFO_LOG(G3D, "Failed to create GX2 texture for HW tessellation"); + return; // TODO: Turn off HW tessellation if texture creation error occured. + } + GX2SetVertexTexture(&data_tex[2], 2); + } + src = (const u32 *)col; + dst = (u32 *)data_tex[2].surface.image; + while (src < (u32 *)pos + sizeColor) { + *dst++ = __builtin_bswap32(*src++); + } + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, data_tex[2].surface.image, data_tex[2].surface.imageSize); +} diff --git a/GPU/GX2/DrawEngineGX2.h b/GPU/GX2/DrawEngineGX2.h new file mode 100644 index 000000000000..86cedd2b4603 --- /dev/null +++ b/GPU/GX2/DrawEngineGX2.h @@ -0,0 +1,220 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include +#include + +#include "Common/Hashmaps.h" +#include "GPU/GPUState.h" +#include "GPU/Common/GPUDebugInterface.h" +#include "GPU/Common/IndexGenerator.h" +#include "GPU/Common/VertexDecoderCommon.h" +#include "GPU/Common/DrawEngineCommon.h" +#include "GPU/Common/GPUStateUtils.h" +#include "GPU/GX2/FragmentShaderGeneratorGX2.h" +#include "GPU/GX2/StateMappingGX2.h" +#include "GPU/GX2/GX2Util.h" + +struct DecVtxFormat; +struct UVScale; + +class GX2VertexShader; +class ShaderManagerGX2; +class TextureCacheGX2; +class FramebufferManagerGX2; + +// States transitions: +// On creation: DRAWN_NEW +// DRAWN_NEW -> DRAWN_HASHING +// DRAWN_HASHING -> DRAWN_RELIABLE +// DRAWN_HASHING -> DRAWN_UNRELIABLE +// DRAWN_ONCE -> UNRELIABLE +// DRAWN_RELIABLE -> DRAWN_SAFE +// UNRELIABLE -> death +// DRAWN_ONCE -> death +// DRAWN_RELIABLE -> death + +enum { + VAI11_FLAG_VERTEXFULLALPHA = 1, +}; + +// Try to keep this POD. +class VertexArrayInfoGX2 { +public: + VertexArrayInfoGX2() { + status = VAI_NEW; + vbo = nullptr; + ebo = nullptr; + prim = GE_PRIM_INVALID; + numDraws = 0; + numFrames = 0; + lastFrame = gpuStats.numFlips; + numVerts = 0; + drawsUntilNextFullHash = 0; + flags = 0; + } + ~VertexArrayInfoGX2(); + + enum Status : u8 { + VAI_NEW, + VAI_HASHING, + VAI_RELIABLE, // cache, don't hash + VAI_UNRELIABLE, // never cache + }; + + ReliableHashType hash; + u32 minihash; + + void *vbo; + void *ebo; + + // Precalculated parameter for drawRangeElements + u16 numVerts; + u16 maxIndex; + s8 prim; + Status status; + + // ID information + int numDraws; + int numFrames; + int lastFrame; // So that we can forget. + u16 drawsUntilNextFullHash; + u8 flags; +}; + +// Handles transform, lighting and drawing. +class DrawEngineGX2 : public DrawEngineCommon { +public: + DrawEngineGX2(Draw::DrawContext *draw, GX2ContextState *context); + virtual ~DrawEngineGX2(); + + void SetShaderManager(ShaderManagerGX2 *shaderManager) { shaderManager_ = shaderManager; } + void SetTextureCache(TextureCacheGX2 *textureCache) { textureCache_ = textureCache; } + void SetFramebufferManager(FramebufferManagerGX2 *fbManager) { framebufferManager_ = fbManager; } + void InitDeviceObjects(); + void DestroyDeviceObjects(); + + void BeginFrame(); + + // So that this can be inlined + void Flush() { + if (!numDrawCalls) + return; + DoFlush(); + } + + void FinishDeferred() { + if (!numDrawCalls) + return; + DecodeVerts(decoded); + } + + void DispatchFlush() override { Flush(); } + + void ClearTrackedVertexArrays() override; + + void Resized() override; + + void ClearInputLayoutMap(); + +private: + void DoFlush(); + + void ApplyDrawState(int prim); + void ApplyDrawStateLate(bool applyStencilRef, u8 stencilRef); + void ResetShaderBlending(); + + GX2FetchShader *SetupFetchShaderForDraw(GX2VertexShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt); + + void MarkUnreliable(VertexArrayInfoGX2 *vai); + + Draw::DrawContext *draw_; // Used for framebuffer related things exclusively. + GX2ContextState *context_; + + PrehashMap vai_; + + struct FetchShaderKey { + GX2VertexShader *vshader; + u32 decFmtId; + bool operator<(const FetchShaderKey &other) const { + if (decFmtId < other.decFmtId) + return true; + if (decFmtId > other.decFmtId) + return false; + return vshader < other.vshader; + } + }; + + DenseHashMap fetchShaderMap_; + + // Other + ShaderManagerGX2 *shaderManager_ = nullptr; + TextureCacheGX2 *textureCache_ = nullptr; + FramebufferManagerGX2 *framebufferManager_ = nullptr; + + // Pushbuffers + PushBufferGX2 *pushVerts_; + PushBufferGX2 *pushInds_; + + // GX2 state object caches. + + struct GX2BlendState { + GX2ColorControlReg color; + GX2BlendControlReg blend; + GX2BlendConstantColorReg constant; + GX2TargetChannelMaskReg mask; + }; + + struct GX2RasterizerState { + GX2FrontFace frontFace_; + BOOL cullFront_; + BOOL cullBack_; + }; + + DenseHashMap blendCache_; + DenseHashMap depthStencilCache_; + DenseHashMap rasterCache_; + + // Keep the depth state between ApplyDrawState and ApplyDrawStateLate + GX2BlendState* blendState_ = nullptr; + GX2DepthStencilControlReg* depthStencilState_ = nullptr; + GX2RasterizerState* rasterState_ = nullptr; + + // State keys + GX2StateKeys keys_{}; + GX2DynamicState dynState_{}; + + // Hardware tessellation + class TessellationDataTransferGX2 : public TessellationDataTransfer { + private: + GX2ContextState *context_; + GX2Texture data_tex[3]; + + public: + TessellationDataTransferGX2(GX2ContextState *context_) : TessellationDataTransfer(), context_(context_), data_tex({}) {} + ~TessellationDataTransferGX2() { + for (int i = 0; i < 3; i++) { + if (data_tex[i].surface.image) { + MEM2_free(data_tex[i].surface.image); + } + } + } + void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) override; + }; +}; diff --git a/GPU/GX2/FragmentShaderGeneratorGX2.cpp b/GPU/GX2/FragmentShaderGeneratorGX2.cpp new file mode 100644 index 000000000000..c6a9ff3bdf63 --- /dev/null +++ b/GPU/GX2/FragmentShaderGeneratorGX2.cpp @@ -0,0 +1,25 @@ +// Copyright (c) 2017- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include +#undef ARRAY_SIZE + +#include "GPU/Common/ShaderCommon.h" +#include "GPU/GX2/FragmentShaderGeneratorGX2.h" + +void GenerateFragmentShaderGX2(const FShaderID &id, GX2PixelShader *ps) { +} diff --git a/GPU/GX2/FragmentShaderGeneratorGX2.h b/GPU/GX2/FragmentShaderGeneratorGX2.h new file mode 100644 index 000000000000..71338fd54375 --- /dev/null +++ b/GPU/GX2/FragmentShaderGeneratorGX2.h @@ -0,0 +1,24 @@ +// Copyright (c) 2017- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include + +#include "GPU/Common/ShaderId.h" + +void GenerateFragmentShaderGX2(const FShaderID &id, GX2PixelShader *ps); diff --git a/GPU/GX2/FramebufferManagerGX2.cpp b/GPU/GX2/FramebufferManagerGX2.cpp new file mode 100644 index 000000000000..94ae8b1a456c --- /dev/null +++ b/GPU/GX2/FramebufferManagerGX2.cpp @@ -0,0 +1,519 @@ +// Copyright (c) 2017- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "base/display.h" +#include "math/lin/matrix4x4.h" +#include "ext/native/thin3d/thin3d.h" +#include "base/basictypes.h" +#include "file/vfs.h" +#include "file/zip_read.h" +#include "i18n/i18n.h" + +#include "Common/ColorConv.h" +#include "Common/MathUtil.h" +#include "Core/Host.h" +#include "Core/MemMap.h" +#include "Core/Config.h" +#include "Core/System.h" +#include "Core/Reporting.h" +#include "GPU/ge_constants.h" +#include "GPU/GPUState.h" +#include "GPU/Debugger/Stepping.h" + +#include "GPU/Common/FramebufferCommon.h" +#include "GPU/Common/ShaderTranslation.h" +#include "GPU/Common/TextureDecoder.h" +#include "GPU/Common/PostShader.h" +#include "GPU/GX2/FramebufferManagerGX2.h" +#include "GPU/GX2/ShaderManagerGX2.h" +#include "GPU/GX2/TextureCacheGX2.h" +#include "GPU/GX2/DrawEngineGX2.h" +#include "GPU/GX2/GX2StaticShaders.h" + +#include "ext/native/thin3d/thin3d.h" + +#include +#include + +// clang-format off +const GX2AttribStream FramebufferManagerGX2::g_QuadAttribStream[2] = { + { 0, 0, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32_32, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _z, _1), GX2_ENDIAN_SWAP_DEFAULT }, + { 1, 0, 12, GX2_ATTRIB_FORMAT_FLOAT_32_32, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _0), GX2_ENDIAN_SWAP_DEFAULT }, +}; + +// STRIP geometry +__attribute__((aligned(GX2_VERTEX_BUFFER_ALIGNMENT))) +const float FramebufferManagerGX2::fsQuadBuffer_[20] = { + -1.0f,-1.0f, 0.0f, 0.0f, 0.0f, + 1.0f,-1.0f, 0.0f, 1.0f, 0.0f, + -1.0f, 1.0f, 0.0f, 0.0f, 1.0f, + 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, +}; +// clang-format on + +FramebufferManagerGX2::FramebufferManagerGX2(Draw::DrawContext *draw) : FramebufferManagerCommon(draw) { + context_ = (GX2ContextState *)draw->GetNativeObject(Draw::NativeObject::CONTEXT); + + quadFetchShader_.size = GX2CalcFetchShaderSize(ARRAY_SIZE(g_QuadAttribStream)); + quadFetchShader_.program = (u8 *)MEM2_alloc(quadFetchShader_.size, GX2_SHADER_ALIGNMENT); + GX2InitFetchShader(&quadFetchShader_, quadFetchShader_.program, ARRAY_SIZE(g_QuadAttribStream), g_QuadAttribStream); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, quadFetchShader_.program, quadFetchShader_.size); + + quadBuffer_ = (float*)MEM2_alloc(quadStride_ * sizeof(float), GX2_VERTEX_BUFFER_ALIGNMENT); + postConstants_ = MEM2_alloc(ROUND_UP(sizeof(PostShaderUniforms), 64), GX2_UNIFORM_BLOCK_ALIGNMENT); + + for (int i = 0; i < 256; i++) { + GX2InitStencilMaskReg(&stencilMaskStates_[i], i, 0xFF, 0xFF, i, 0xFF, 0xFF); + } + + ShaderTranslationInit(); + CompilePostShader(); +} + +FramebufferManagerGX2::~FramebufferManagerGX2() { + ShaderTranslationShutdown(); + + // Drawing cleanup + MEM2_free(quadFetchShader_.program); + MEM2_free(quadBuffer_); + MEM2_free(postConstants_); + + if (drawPixelsTex_.surface.image) + MEM2_free(drawPixelsTex_.surface.image); + + if (postFetchShader_.program) { + MEM2_free(postFetchShader_.program); + } + + // FBO cleanup + for (auto it = tempFBOs_.begin(), end = tempFBOs_.end(); it != end; ++it) { + it->second.fbo->Release(); + } + + // Stencil cleanup + if (stencilValueBuffer_) + MEM2_free(stencilValueBuffer_); +} + +void FramebufferManagerGX2::SetTextureCache(TextureCacheGX2 *tc) { + textureCacheGX2_ = tc; + textureCache_ = tc; +} + +void FramebufferManagerGX2::SetShaderManager(ShaderManagerGX2 *sm) { + shaderManagerGX2_ = sm; + shaderManager_ = sm; +} + +void FramebufferManagerGX2::SetDrawEngine(DrawEngineGX2 *td) { + drawEngineGX2_ = td; + drawEngine_ = td; +} + +void FramebufferManagerGX2::CompilePostShader() { + // TODO +} + +void FramebufferManagerGX2::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) { + // TODO: Check / use D3DCAPS2_DYNAMICTEXTURES? + if (drawPixelsTex_.surface.image && (drawPixelsTex_.surface.width != width || drawPixelsTex_.surface.height != height)) { + MEM2_free(drawPixelsTex_.surface.image); + drawPixelsTex_ = {}; + } + + if (!drawPixelsTex_.surface.image) { + drawPixelsTex_.surface.width = width; + drawPixelsTex_.surface.height = height; + drawPixelsTex_.surface.depth = 1; + drawPixelsTex_.surface.dim = GX2_SURFACE_DIM_TEXTURE_2D; + drawPixelsTex_.surface.tileMode = GX2_TILE_MODE_LINEAR_ALIGNED; + drawPixelsTex_.surface.use = GX2_SURFACE_USE_TEXTURE; + drawPixelsTex_.viewNumSlices = 1; + + drawPixelsTex_.surface.format = GX2_SURFACE_FORMAT_UINT_R8_G8_B8_A8; + drawPixelsTex_.compMap = GX2_COMP_SEL(_a, _r, _g, _b); + + GX2CalcSurfaceSizeAndAlignment(&drawPixelsTex_.surface); + GX2InitTextureRegs(&drawPixelsTex_); + + drawPixelsTex_.surface.image = MEM2_alloc(drawPixelsTex_.surface.imageSize, drawPixelsTex_.surface.alignment); + _assert_(drawPixelsTex_.surface.image); + } + + for (int y = 0; y < height; y++) { + u32_le *dst = (u32_le *)drawPixelsTex_.surface.image + drawPixelsTex_.surface.pitch * y; + if (srcPixelFormat != GE_FORMAT_8888) { + const u16_le *src = (const u16_le *)srcPixels + srcStride * y; + for (u32 x = 0; x < width; x++) { + switch (srcPixelFormat) { + case GE_FORMAT_565: dst[x] = RGB565ToRGBA8888(src[x]); break; + case GE_FORMAT_5551: dst[x] = RGBA5551ToRGBA8888(src[x]); break; + case GE_FORMAT_4444: dst[x] = RGBA4444ToRGBA8888(src[x]); break; + } + } + } else { + const u32_le *src = (const u32_le *)srcPixels + srcStride * y; + memcpy(dst, src, width * sizeof(u32_le)); + } + } + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, drawPixelsTex_.surface.image, drawPixelsTex_.surface.imageSize); +} + +void FramebufferManagerGX2::DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags) { + struct Coord { + Vec3 pos; + float u, v; + }; + Coord coord[4] = { + { { x, y, 0 }, u0, v0 }, + { { x + w, y, 0 }, u1, v0 }, + { { x + w, y + h, 0 }, u1, v1 }, + { { x, y + h, 0 }, u0, v1 }, + }; + + static const short indices[4] = { 0, 1, 3, 2 }; + + if (uvRotation != ROTATION_LOCKED_HORIZONTAL) { + float temp[8]; + int rotation = 0; + switch (uvRotation) { + case ROTATION_LOCKED_HORIZONTAL180: rotation = 2; break; + case ROTATION_LOCKED_VERTICAL: rotation = 1; break; + case ROTATION_LOCKED_VERTICAL180: rotation = 3; break; + } + for (int i = 0; i < 4; i++) { + temp[i * 2] = coord[((i + rotation) & 3)].u; + temp[i * 2 + 1] = coord[((i + rotation) & 3)].v; + } + + for (int i = 0; i < 4; i++) { + coord[i].u = temp[i * 2]; + coord[i].v = temp[i * 2 + 1]; + } + } + + float invDestW = 1.0f / (destW * 0.5f); + float invDestH = 1.0f / (destH * 0.5f); + for (int i = 0; i < 4; i++) { + coord[i].pos.x = coord[i].pos.x * invDestW - 1.0f; + coord[i].pos.y = -(coord[i].pos.y * invDestH - 1.0f); + } + + if (g_display_rotation != DisplayRotation::ROTATE_0) { + for (int i = 0; i < 4; i++) { + // backwards notation, should fix that... + coord[i].pos = coord[i].pos * g_display_rot_matrix; + } + } + + // The above code is for FAN geometry but we can only do STRIP. So rearrange it a little. + memcpy(quadBuffer_, coord, sizeof(Coord)); + memcpy(quadBuffer_ + 5, coord + 1, sizeof(Coord)); + memcpy(quadBuffer_ + 10, coord + 3, sizeof(Coord)); + memcpy(quadBuffer_ + 15, coord + 2, sizeof(Coord)); + + GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE); + GX2SetColorControlReg(&StockGX2::blendDisabledColorWrite); + GX2SetTargetChannelMasksReg(&StockGX2::TargetChannelMasks[0xF]); + GX2SetDepthStencilControlReg(&StockGX2::depthStencilDisabled); + GX2SetPixelSampler((flags & DRAWTEX_LINEAR) ? &StockGX2::samplerLinear2DClamp : &StockGX2::samplerPoint2DClamp, 0); + GX2SetAttribBuffer(0, sizeof(coord), sizeof(*coord), quadBuffer_); + GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, 4, 0, 1); + gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE); +} + +void FramebufferManagerGX2::Bind2DShader() { + GX2SetFetchShader(&quadFetchShader_); + GX2SetPixelShader(&defPShaderGX2); + GX2SetVertexShader(&defVShaderGX2); +} + +void FramebufferManagerGX2::BindPostShader(const PostShaderUniforms &uniforms) { + if (!postPixelShader_) { + if (usePostShader_) { + CompilePostShader(); + } + if (!usePostShader_) { + SetNumExtraFBOs(0); + GX2SetFetchShader(&quadFetchShader_); + GX2SetPixelShader(&defPShaderGX2); + GX2SetVertexShader(&defVShaderGX2); + return; + } else { + SetNumExtraFBOs(1); + } + } + GX2SetFetchShader(&postFetchShader_); + GX2SetPixelShader(postPixelShader_); + GX2SetVertexShader(postVertexShader_); + + memcpy(postConstants_, &uniforms, sizeof(uniforms)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, postConstants_, sizeof(uniforms)); + GX2SetVertexUniformBlock(1, sizeof(uniforms), postConstants_); // Probably not necessary + GX2SetPixelUniformBlock(1, sizeof(uniforms), postConstants_); +} + +void FramebufferManagerGX2::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) { + if (!useBufferedRendering_ || !vfb->fbo) { + return; + } + + // Technically, we should at this point re-interpret the bytes of the old format to the new. + // That might get tricky, and could cause unnecessary slowness in some games. + // For now, we just clear alpha/stencil from 565, which fixes shadow issues in Kingdom Hearts. + // (it uses 565 to write zeros to the buffer, than 4444 to actually render the shadow.) + // + // The best way to do this may ultimately be to create a new FBO (combine with any resize?) + // and blit with a shader to that, then replace the FBO on vfb. Stencil would still be complex + // to exactly reproduce in 4444 and 8888 formats. + if (old == GE_FORMAT_565) { + draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP, Draw::RPAction::KEEP }); + + // TODO: There's no way this does anything useful :( + GX2SetDepthStencilControlReg(&StockGX2::depthDisabledStencilWrite); + GX2SetStencilMask(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); // TODO, and maye GX2SetStencilMaskReg? + GX2SetColorControlReg(&StockGX2::blendColorDisabled); + GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE); + GX2SetFetchShader(&quadFetchShader_); + GX2SetPixelShader(&defPShaderGX2); + GX2SetVertexShader(&defVShaderGX2); + GX2SetAttribBuffer(0, sizeof(fsQuadBuffer_), quadStride_, fsQuadBuffer_); + shaderManagerGX2_->DirtyLastShader(); + GX2SetViewport( 0.0f, 0.0f, (float)vfb->renderWidth, (float)vfb->renderHeight, 0.0f, 1.0f); + GX2SetScissor(0, 0, vfb->renderWidth, vfb->renderHeight); + GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, 4, 0, 1); + } + + RebindFramebuffer(); + gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE); +} + +static void CopyPixelDepthOnly(u32 *dstp, const u32 *srcp, size_t c) { + for (size_t x = 0; x < c; ++x) { + memcpy(dstp + x, srcp + x, 3); + } +} + +void FramebufferManagerGX2::BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst) { + if (g_Config.bDisableSlowFramebufEffects) { + return; + } + bool matchingDepthBuffer = src->z_address == dst->z_address && src->z_stride != 0 && dst->z_stride != 0; + bool matchingSize = src->width == dst->width && src->height == dst->height; + bool matchingRenderSize = src->renderWidth == dst->renderWidth && src->renderHeight == dst->renderHeight; + if (matchingDepthBuffer && matchingSize && matchingRenderSize) { + // TODO: Currently, this copies depth AND stencil, which is a problem. See #9740. + draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, src->renderWidth, src->renderHeight, 1, Draw::FB_DEPTH_BIT); + RebindFramebuffer(); + dst->last_frame_depth_updated = gpuStats.numFlips; + } +} + +void FramebufferManagerGX2::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags) { + if (!framebuffer->fbo || !useBufferedRendering_) { + // GX2SetPixelTexture(nullptr, 1); // TODO: what is the correct way to unbind a texture ? + gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE; + return; + } + + // currentRenderVfb_ will always be set when this is called, except from the GE debugger. + // Let's just not bother with the copy in that case. + bool skipCopy = (flags & BINDFBCOLOR_MAY_COPY) == 0; + if (GPUStepping::IsStepping() || g_Config.bDisableSlowFramebufEffects) { + skipCopy = true; + } + // Currently rendering to this framebuffer. Need to make a copy. + if (!skipCopy && framebuffer == currentRenderVfb_) { + // TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size. + Draw::Framebuffer *renderCopy = GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, (Draw::FBColorDepth)framebuffer->colorDepth); + if (renderCopy) { + VirtualFramebuffer copyInfo = *framebuffer; + copyInfo.fbo = renderCopy; + CopyFramebufferForColorTexture(©Info, framebuffer, flags); + RebindFramebuffer(); + draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT, 0); + } else { + draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0); + } + } else if (framebuffer != currentRenderVfb_) { + draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0); + } else { + ERROR_LOG_REPORT_ONCE(GX2SelfTexture, G3D, "Attempting to texture from target (src=%08x / target=%08x / flags=%d)", framebuffer->fb_address, currentRenderVfb_->fb_address, flags); + // Badness on GX2 to bind the currently rendered-to framebuffer as a texture. + // GX2SetPixelTexture(nullptr, 1); // TODO: what is the correct way to unbind a texture ? + gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE; + return; + } +} + +bool FramebufferManagerGX2::CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) { + nvfb->colorDepth = Draw::FBO_8888; + + nvfb->fbo = draw_->CreateFramebuffer({ nvfb->width, nvfb->height, 1, 1, true, (Draw::FBColorDepth)nvfb->colorDepth }); + if (!(nvfb->fbo)) { + ERROR_LOG(FRAMEBUF, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight); + return false; + } + + draw_->BindFramebufferAsRenderTarget(nvfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }); + return true; +} + +void FramebufferManagerGX2::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) { + // Nothing to do here. +} + +void FramebufferManagerGX2::SimpleBlit(Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2, Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2, bool linearFilter) { + int destW, destH, srcW, srcH; + draw_->GetFramebufferDimensions(src, &srcW, &srcH); + draw_->GetFramebufferDimensions(dest, &destW, &destH); + + if (srcW == destW && srcH == destH && destX2 - destX1 == srcX2 - srcX1 && destY2 - destY1 == srcY2 - srcY1) { + // Optimize to a copy + draw_->CopyFramebufferImage(src, 0, (int)srcX1, (int)srcY1, 0, dest, 0, (int)destX1, (int)destY1, 0, (int)(srcX2 - srcX1), (int)(srcY2 - srcY1), 1, Draw::FB_COLOR_BIT); + return; + } + + float dX = 1.0f / (float)destW; + float dY = 1.0f / (float)destH; + float sX = 1.0f / (float)srcW; + float sY = 1.0f / (float)srcH; + struct Vtx { + float x, y, z, u, v; + }; + Vtx vtx[4] = { + { -1.0f + 2.0f * dX * destX1, 1.0f - 2.0f * dY * destY1, 0.0f, sX * srcX1, sY * srcY1 }, + { -1.0f + 2.0f * dX * destX2, 1.0f - 2.0f * dY * destY1, 0.0f, sX * srcX2, sY * srcY1 }, + { -1.0f + 2.0f * dX * destX1, 1.0f - 2.0f * dY * destY2, 0.0f, sX * srcX1, sY * srcY2 }, + { -1.0f + 2.0f * dX * destX2, 1.0f - 2.0f * dY * destY2, 0.0f, sX * srcX2, sY * srcY2 }, + }; + + memcpy(quadBuffer_, vtx, 4 * sizeof(Vtx)); + GX2Invalidate(GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER, quadBuffer_, 4 * sizeof(Vtx)); + + // Unbind the texture first to avoid the GX2 hazard check (can't set render target to things bound as textures and vice versa, not even temporarily). + draw_->BindTexture(0, nullptr); + draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }); + draw_->BindFramebufferAsTexture(src, 0, Draw::FB_COLOR_BIT, 0); + + Bind2DShader(); + GX2SetViewport( 0.0f, 0.0f, (float)destW, (float)destH, 0.0f, 1.0f ); + GX2SetScissor(0, 0, destW, destH); + GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE); + GX2SetColorControlReg(&StockGX2::blendDisabledColorWrite); + GX2SetTargetChannelMasksReg(&StockGX2::TargetChannelMasks[0xF]); + GX2SetDepthStencilControlReg(&StockGX2::depthStencilDisabled); + GX2SetPixelSampler(linearFilter ? &StockGX2::samplerLinear2DClamp : &StockGX2::samplerPoint2DClamp, 0); + GX2SetAttribBuffer(0, 4 * sizeof(Vtx), sizeof(Vtx), quadBuffer_); + GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, 4, 0, 1); + + gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE); +} + +void FramebufferManagerGX2::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) { + if (!dst->fbo || !src->fbo || !useBufferedRendering_) { + // This can happen if they recently switched from non-buffered. + if (useBufferedRendering_) { + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }); + } + return; + } + + float srcXFactor = (float)src->renderWidth / (float)src->bufferWidth; + float srcYFactor = (float)src->renderHeight / (float)src->bufferHeight; + const int srcBpp = src->format == GE_FORMAT_8888 ? 4 : 2; + if (srcBpp != bpp && bpp != 0) { + srcXFactor = (srcXFactor * bpp) / srcBpp; + } + int srcX1 = srcX * srcXFactor; + int srcX2 = (srcX + w) * srcXFactor; + int srcY1 = srcY * srcYFactor; + int srcY2 = (srcY + h) * srcYFactor; + + float dstXFactor = (float)dst->renderWidth / (float)dst->bufferWidth; + float dstYFactor = (float)dst->renderHeight / (float)dst->bufferHeight; + const int dstBpp = dst->format == GE_FORMAT_8888 ? 4 : 2; + if (dstBpp != bpp && bpp != 0) { + dstXFactor = (dstXFactor * bpp) / dstBpp; + } + int dstX1 = dstX * dstXFactor; + int dstX2 = (dstX + w) * dstXFactor; + int dstY1 = dstY * dstYFactor; + int dstY2 = (dstY + h) * dstYFactor; + + // Direct3D doesn't support rect -> self. + Draw::Framebuffer *srcFBO = src->fbo; + if (src == dst) { + Draw::Framebuffer *tempFBO = GetTempFBO(src->renderWidth, src->renderHeight, (Draw::FBColorDepth)src->colorDepth); + SimpleBlit(tempFBO, dstX1, dstY1, dstX2, dstY2, src->fbo, srcX1, srcY1, srcX2, srcY2, false); + srcFBO = tempFBO; + } + SimpleBlit(dst->fbo, dstX1, dstY1, dstX2, dstY2, srcFBO, srcX1, srcY1, srcX2, srcY2, false); +} + +// Nobody calls this yet. +void FramebufferManagerGX2::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h) { + if (!vfb->fbo) { + ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackDepthbuffer: vfb->fbo == 0"); + return; + } + + const u32 z_address = (0x04000000) | vfb->z_address; + // TODO +} + +void FramebufferManagerGX2::EndFrame() {} + +void FramebufferManagerGX2::DeviceLost() { DestroyAllFBOs(); } + +void FramebufferManagerGX2::DestroyAllFBOs() { + currentRenderVfb_ = nullptr; + displayFramebuf_ = nullptr; + prevDisplayFramebuf_ = nullptr; + prevPrevDisplayFramebuf_ = nullptr; + + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebuffer *vfb = vfbs_[i]; + INFO_LOG(FRAMEBUF, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format); + DestroyFramebuf(vfb); + } + vfbs_.clear(); + + for (size_t i = 0; i < bvfbs_.size(); ++i) { + VirtualFramebuffer *vfb = bvfbs_[i]; + DestroyFramebuf(vfb); + } + bvfbs_.clear(); + + for (auto it = tempFBOs_.begin(), end = tempFBOs_.end(); it != end; ++it) { + it->second.fbo->Release(); + } + tempFBOs_.clear(); + + SetNumExtraFBOs(0); +} + +void FramebufferManagerGX2::Resized() { + FramebufferManagerCommon::Resized(); + + if (UpdateSize()) { + DestroyAllFBOs(); + } + + // Might have a new post shader - let's compile it. + CompilePostShader(); +} diff --git a/GPU/GX2/FramebufferManagerGX2.h b/GPU/GX2/FramebufferManagerGX2.h new file mode 100644 index 000000000000..b3c408054eec --- /dev/null +++ b/GPU/GX2/FramebufferManagerGX2.h @@ -0,0 +1,110 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include +#include +#include + +#include + +// Keeps track of allocated FBOs. +// Also provides facilities for drawing and later converting raw +// pixel data. + +#include "GPU/GPUCommon.h" +#include "GPU/Common/FramebufferCommon.h" +#include "Core/Config.h" +#include "ext/native/thin3d/thin3d.h" + +class TextureCacheGX2; +class DrawEngineGX2; +class ShaderManagerGX2; + +class FramebufferManagerGX2 : public FramebufferManagerCommon { +public: + FramebufferManagerGX2(Draw::DrawContext *draw); + ~FramebufferManagerGX2(); + + void SetTextureCache(TextureCacheGX2 *tc); + void SetShaderManager(ShaderManagerGX2 *sm); + void SetDrawEngine(DrawEngineGX2 *td); + void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags) override; + + void DestroyAllFBOs(); + + void EndFrame(); + void Resized() override; + void DeviceLost(); + void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) override; + + void BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst) override; + + void BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags); + + virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) override; + + // TODO: Remove + void *GetDynamicQuadBuffer() { return quadBuffer_; } + +protected: + // Used by ReadFramebufferToMemory and later framebuffer block copies + void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) override; + + bool CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) override; + void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override; + +private: + void CompilePostShader(); + void BindPostShader(const PostShaderUniforms &uniforms) override; + void Bind2DShader() override; + void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) override; + void PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h); + void SimpleBlit(Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2, Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2, bool linearFilter); + + GX2ContextState *context_; + + // Used by DrawPixels + GX2Texture drawPixelsTex_ = {}; + + GX2FetchShader quadFetchShader_ = {}; + static const float fsQuadBuffer_[20]; + const u32 quadStride_ = ARRAY_SIZE(fsQuadBuffer_) * sizeof(float) / 4; + // Dynamic + float *quadBuffer_; + + int plainColorLoc_; + struct __attribute__((aligned(64))) StencilValueUB { + u32_le u_stencilValue[4]; + }; + StencilValueUB *stencilValueBuffer_ = nullptr; + GX2StencilMaskReg stencilMaskStates_[256]{}; + + TextureCacheGX2 *textureCacheGX2_; + ShaderManagerGX2 *shaderManagerGX2_; + DrawEngineGX2 *drawEngineGX2_; + + // Used by post-processing shader + // Postprocessing + const GX2VertexShader *postVertexShader_ = nullptr; + const GX2PixelShader *postPixelShader_ = nullptr; + GX2FetchShader postFetchShader_ = {}; + void *postConstants_ = nullptr; + + static const GX2AttribStream g_QuadAttribStream[2]; +}; diff --git a/GPU/GX2/GPU_GX2.cpp b/GPU/GX2/GPU_GX2.cpp new file mode 100644 index 000000000000..f2fe8bcaeea6 --- /dev/null +++ b/GPU/GX2/GPU_GX2.cpp @@ -0,0 +1,368 @@ +// Copyright (c) 2017- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "GPU/GX2/GPU_GX2.h" + +#include + +#include "Common/ChunkFile.h" +#include "Common/GraphicsContext.h" +#include "base/NativeApp.h" +#include "base/logging.h" +#include "profiler/profiler.h" +#include "i18n/i18n.h" +#include "Core/Debugger/Breakpoints.h" +#include "Core/MemMapHelpers.h" +#include "Core/MIPS/MIPS.h" +#include "Core/Host.h" +#include "Core/Config.h" +#include "Core/Reporting.h" +#include "Core/System.h" + +#include "GPU/GPUState.h" +#include "GPU/ge_constants.h" +#include "GPU/GeDisasm.h" + +#include "GPU/Common/FramebufferCommon.h" +#include "GPU/GX2/ShaderManagerGX2.h" +#include "GPU/GX2/GPU_GX2.h" +#include "GPU/GX2/FramebufferManagerGX2.h" +#include "GPU/GX2/DrawEngineGX2.h" +#include "GPU/GX2/TextureCacheGX2.h" +#include "GPU/GX2/GX2Util.h" + +#include "Core/HLE/sceKernelThread.h" +#include "Core/HLE/sceKernelInterrupt.h" +#include "Core/HLE/sceGe.h" + +GPU_GX2::GPU_GX2(GraphicsContext *gfxCtx, Draw::DrawContext *draw) + : GPUCommon(gfxCtx, draw), drawEngine_(draw, + (GX2ContextState *)draw->GetNativeObject(Draw::NativeObject::CONTEXT)) { + context_ = (GX2ContextState *)draw->GetNativeObject(Draw::NativeObject::CONTEXT); + lastVsync_ = g_Config.bVSync ? 1 : 0; + + StockGX2::Init(); + + shaderManagerGX2_ = new ShaderManagerGX2(context_); + framebufferManagerGX2_ = new FramebufferManagerGX2(draw); + framebufferManager_ = framebufferManagerGX2_; + textureCacheGX2_ = new TextureCacheGX2(draw); + textureCache_ = textureCacheGX2_; + drawEngineCommon_ = &drawEngine_; + shaderManager_ = shaderManagerGX2_; + depalShaderCache_ = new DepalShaderCacheGX2(draw); + drawEngine_.SetShaderManager(shaderManagerGX2_); + drawEngine_.SetTextureCache(textureCacheGX2_); + drawEngine_.SetFramebufferManager(framebufferManagerGX2_); + framebufferManagerGX2_->Init(); + framebufferManagerGX2_->SetTextureCache(textureCacheGX2_); + framebufferManagerGX2_->SetShaderManager(shaderManagerGX2_); + framebufferManagerGX2_->SetDrawEngine(&drawEngine_); + textureCacheGX2_->SetFramebufferManager(framebufferManagerGX2_); + textureCacheGX2_->SetDepalShaderCache(depalShaderCache_); + textureCacheGX2_->SetShaderManager(shaderManagerGX2_); + + // Sanity check gstate + if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) { + ERROR_LOG(G3D, "gstate has drifted out of sync!"); + } + + // No need to flush before the tex scale/offset commands if we are baking + // the tex scale/offset into the vertices anyway. + UpdateCmdInfo(); + CheckGPUFeatures(); + + BuildReportingInfo(); + + // Some of our defaults are different from hw defaults, let's assert them. + // We restore each frame anyway, but here is convenient for tests. + textureCache_->NotifyConfigChanged(); +} + +GPU_GX2::~GPU_GX2() { + delete depalShaderCache_; + framebufferManagerGX2_->DestroyAllFBOs(); + delete framebufferManagerGX2_; + shaderManagerGX2_->ClearShaders(); + delete shaderManagerGX2_; + delete textureCacheGX2_; + draw_->BindPipeline(nullptr); +} + +void GPU_GX2::CheckGPUFeatures() { + u32 features = 0; + + features |= GPU_SUPPORTS_BLEND_MINMAX; + features |= GPU_PREFER_CPU_DOWNLOAD; + + // Accurate depth is required on AMD/nVidia (for reverse Z) so we ignore the compat flag to disable it on those. See #9545 + auto vendor = draw_->GetDeviceCaps().vendor; + + if (!PSP_CoreParameter().compat.flags().DisableAccurateDepth || vendor == Draw::GPUVendor::VENDOR_AMD || vendor == Draw::GPUVendor::VENDOR_NVIDIA) { + features |= GPU_SUPPORTS_ACCURATE_DEPTH; // Breaks text in PaRappa for some reason. + } + +#ifndef _M_ARM + // TODO: Do proper feature detection + features |= GPU_SUPPORTS_ANISOTROPY; +#endif + + features |= GPU_SUPPORTS_OES_TEXTURE_NPOT; + features |= GPU_SUPPORTS_LARGE_VIEWPORTS; + if (draw_->GetDeviceCaps().dualSourceBlend) + features |= GPU_SUPPORTS_DUALSOURCE_BLEND; + features |= GPU_SUPPORTS_ANY_COPY_IMAGE; + features |= GPU_SUPPORTS_TEXTURE_FLOAT; + features |= GPU_SUPPORTS_INSTANCE_RENDERING; + features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL; + features |= GPU_SUPPORTS_FBO; + + uint32_t fmt4444 = draw_->GetDataFormatSupport(Draw::DataFormat::A4R4G4B4_UNORM_PACK16); + uint32_t fmt1555 = draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16); + uint32_t fmt565 = draw_->GetDataFormatSupport(Draw::DataFormat::R5G6B5_UNORM_PACK16); + if ((fmt4444 & Draw::FMT_TEXTURE) && (fmt565 & Draw::FMT_TEXTURE) && (fmt1555 & Draw::FMT_TEXTURE)) { + features |= GPU_SUPPORTS_16BIT_FORMATS; + } + + if (draw_->GetDeviceCaps().logicOpSupported) { + features |= GPU_SUPPORTS_LOGIC_OP; + } + + if (!g_Config.bHighQualityDepth && (features & GPU_SUPPORTS_ACCURATE_DEPTH) != 0) { + features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT; + } else if (PSP_CoreParameter().compat.flags().PixelDepthRounding) { + // Use fragment rounding on desktop and GLES3, most accurate. + features |= GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT; + } else if (PSP_CoreParameter().compat.flags().VertexDepthRounding) { + features |= GPU_ROUND_DEPTH_TO_16BIT; + } + + // The Phantasy Star hack :( + if (PSP_CoreParameter().compat.flags().DepthRangeHack && (features & GPU_SUPPORTS_ACCURATE_DEPTH) == 0) { + features |= GPU_USE_DEPTH_RANGE_HACK; + } + + if (PSP_CoreParameter().compat.flags().ClearToRAM) { + features |= GPU_USE_CLEAR_RAM_HACK; + } + + gstate_c.featureFlags = features; +} + +// Needs to be called on GPU thread, not reporting thread. +void GPU_GX2::BuildReportingInfo() { + using namespace Draw; + DrawContext *thin3d = gfxCtx_->GetDrawContext(); + + reportingPrimaryInfo_ = thin3d->GetInfoString(InfoField::VENDORSTRING); + reportingFullInfo_ = reportingPrimaryInfo_ + " - " + System_GetProperty(SYSPROP_GPUDRIVER_VERSION) + " - " + thin3d->GetInfoString(InfoField::SHADELANGVERSION); +} + +void GPU_GX2::DeviceLost() { + // Simply drop all caches and textures. + // FBOs appear to survive? Or no? + shaderManagerGX2_->ClearShaders(); + drawEngine_.ClearInputLayoutMap(); + textureCacheGX2_->Clear(false); + framebufferManagerGX2_->DeviceLost(); +} + +void GPU_GX2::DeviceRestore() { + // Nothing needed. +} + +void GPU_GX2::InitClear() { + bool useNonBufferedRendering = g_Config.iRenderingMode == FB_NON_BUFFERED_MODE; + if (useNonBufferedRendering) { + // device_->Clear(0, NULL, D3DCLEAR_STENCIL | D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 1.f, 0); + } +} + +void GPU_GX2::BeginHostFrame() { + GPUCommon::BeginHostFrame(); + UpdateCmdInfo(); + if (resized_) { + CheckGPUFeatures(); + framebufferManager_->Resized(); + drawEngine_.Resized(); + textureCacheGX2_->NotifyConfigChanged(); + shaderManagerGX2_->DirtyLastShader(); + resized_ = false; + } +} + +void GPU_GX2::ReapplyGfxState() { + GPUCommon::ReapplyGfxState(); + + // TODO: Dirty our caches for depth states etc +} + +void GPU_GX2::EndHostFrame() { + // Tell the DrawContext that it's time to reset everything. + draw_->BindPipeline(nullptr); +} + +void GPU_GX2::BeginFrame() { + GPUCommon::BeginFrame(); + + textureCacheGX2_->StartFrame(); + drawEngine_.BeginFrame(); + depalShaderCache_->Decimate(); + // fragmentTestCache_.Decimate(); + + shaderManagerGX2_->DirtyLastShader(); + + framebufferManagerGX2_->BeginFrame(); + gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX); +} + +void GPU_GX2::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) { + // TODO: Some games like Spongebob - Yellow Avenger, never change framebuffer, they blit to it. + // So breaking on frames doesn't work. Might want to move this to sceDisplay vsync. + host->GPUNotifyDisplay(framebuf, stride, format); + framebufferManagerGX2_->SetDisplayFramebuffer(framebuf, stride, format); +} + +void GPU_GX2::CopyDisplayToOutput() { + GX2SetColorControlReg(&StockGX2::blendDisabledColorWrite); + GX2SetTargetChannelMasksReg(&StockGX2::TargetChannelMasks[0xF]); + + drawEngine_.Flush(); + + framebufferManagerGX2_->CopyDisplayToOutput(); + framebufferManagerGX2_->EndFrame(); + + // shaderManager_->EndFrame(); + shaderManagerGX2_->DirtyLastShader(); + + gstate_c.Dirty(DIRTY_TEXTURE_IMAGE); +} + +void GPU_GX2::FinishDeferred() { + // This finishes reading any vertex data that is pending. + drawEngine_.FinishDeferred(); +} + +inline void GPU_GX2::CheckFlushOp(int cmd, u32 diff) { + const u8 cmdFlags = cmdInfo_[cmd].flags; + if ((cmdFlags & FLAG_FLUSHBEFORE) || (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE))) { + if (dumpThisFrame_) { + NOTICE_LOG(G3D, "================ FLUSH ================"); + } + drawEngine_.Flush(); + } +} + +void GPU_GX2::PreExecuteOp(u32 op, u32 diff) { + CheckFlushOp(op >> 24, diff); +} + +void GPU_GX2::ExecuteOp(u32 op, u32 diff) { + const u8 cmd = op >> 24; + const CommandInfo info = cmdInfo_[cmd]; + const u8 cmdFlags = info.flags; + if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) { + (this->*info.func)(op, diff); + } else if (diff) { + uint64_t dirty = info.flags >> 8; + if (dirty) + gstate_c.Dirty(dirty); + } +} + +void GPU_GX2::GetStats(char *buffer, size_t bufsize) { + float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; + snprintf(buffer, bufsize - 1, + "DL processing time: %0.2f ms\n" + "Draw calls: %i, flushes %i, clears %i\n" + "Cached Draw calls: %i\n" + "Num Tracked Vertex Arrays: %i\n" + "GPU cycles executed: %d (%f per vertex)\n" + "Commands per call level: %i %i %i %i\n" + "Vertices submitted: %i\n" + "Cached, Uncached Vertices Drawn: %i, %i\n" + "FBOs active: %i\n" + "Textures active: %i, decoded: %i invalidated: %i\n" + "Readbacks: %d, uploads: %d\n" + "Vertex, Fragment shaders loaded: %i, %i\n", + gpuStats.msProcessingDisplayLists * 1000.0f, + gpuStats.numDrawCalls, + gpuStats.numFlushes, + gpuStats.numClears, + gpuStats.numCachedDrawCalls, + gpuStats.numTrackedVertexArrays, + gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles, + vertexAverageCycles, + gpuStats.gpuCommandsAtCallLevel[0], gpuStats.gpuCommandsAtCallLevel[1], gpuStats.gpuCommandsAtCallLevel[2], gpuStats.gpuCommandsAtCallLevel[3], + gpuStats.numVertsSubmitted, + gpuStats.numCachedVertsDrawn, + gpuStats.numUncachedVertsDrawn, + (int)framebufferManagerGX2_->NumVFBs(), + (int)textureCacheGX2_->NumLoadedTextures(), + gpuStats.numTexturesDecoded, + gpuStats.numTextureInvalidations, + gpuStats.numReadbacks, + gpuStats.numUploads, + shaderManagerGX2_->GetNumVertexShaders(), + shaderManagerGX2_->GetNumFragmentShaders() + ); +} + +void GPU_GX2::ClearCacheNextFrame() { + textureCacheGX2_->ClearNextFrame(); +} + +void GPU_GX2::ClearShaderCache() { + shaderManagerGX2_->ClearShaders(); + drawEngine_.ClearInputLayoutMap(); +} + +void GPU_GX2::DoState(PointerWrap &p) { + GPUCommon::DoState(p); + + // TODO: Some of these things may not be necessary. + // None of these are necessary when saving. + if (p.mode == p.MODE_READ && !PSP_CoreParameter().frozen) { + textureCacheGX2_->Clear(true); + drawEngine_.ClearTrackedVertexArrays(); + + gstate_c.Dirty(DIRTY_TEXTURE_IMAGE); + framebufferManagerGX2_->DestroyAllFBOs(); + } +} + +std::vector GPU_GX2::DebugGetShaderIDs(DebugShaderType type) { + switch (type) { + case SHADER_TYPE_VERTEXLOADER: + return drawEngine_.DebugGetVertexLoaderIDs(); + case SHADER_TYPE_DEPAL: + return depalShaderCache_->DebugGetShaderIDs(type); + default: + return shaderManagerGX2_->DebugGetShaderIDs(type); + } +} + +std::string GPU_GX2::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) { + switch (type) { + case SHADER_TYPE_VERTEXLOADER: + return drawEngine_.DebugGetVertexLoaderString(id, stringType); + case SHADER_TYPE_DEPAL: + return depalShaderCache_->DebugGetShaderString(id, type, stringType); + default: + return shaderManagerGX2_->DebugGetShaderString(id, type, stringType); + } +} diff --git a/GPU/GX2/GPU_GX2.h b/GPU/GX2/GPU_GX2.h new file mode 100644 index 000000000000..169ed3667ecf --- /dev/null +++ b/GPU/GX2/GPU_GX2.h @@ -0,0 +1,86 @@ +// Copyright (c) 2017- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include +#include +#include + +#include "GPU/GPUCommon.h" +#include "GPU/GX2/DrawEngineGX2.h" +#include "GPU/GX2/TextureCacheGX2.h" +#include "GPU/GX2/DepalettizeShaderGX2.h" +#include "GPU/Common/VertexDecoderCommon.h" + +class FramebufferManagerGX2; +class ShaderManagerGX2; +class LinkedShaderGX2; + +class GPU_GX2 : public GPUCommon { +public: + GPU_GX2(GraphicsContext *gfxCtx, Draw::DrawContext *draw); + ~GPU_GX2(); + + void CheckGPUFeatures() override; + void PreExecuteOp(u32 op, u32 diff) override; + void ExecuteOp(u32 op, u32 diff) override; + + void ReapplyGfxState() override; + void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override; + void GetStats(char *buffer, size_t bufsize) override; + void ClearCacheNextFrame() override; + void DeviceLost() override; // Only happens on Android. Drop all textures and shaders. + void DeviceRestore() override; + + void DoState(PointerWrap &p) override; + + void ClearShaderCache() override; + + // Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend. + std::vector DebugGetShaderIDs(DebugShaderType shader) override; + std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override; + + void BeginHostFrame() override; + void EndHostFrame() override; + +protected: + void FinishDeferred() override; + +private: + void Flush() { + drawEngine_.Flush(); + } + // void ApplyDrawState(int prim); + void CheckFlushOp(int cmd, u32 diff); + void BuildReportingInfo(); + + void InitClear() override; + void BeginFrame() override; + void CopyDisplayToOutput() override; + + GX2ContextState *context_; + + FramebufferManagerGX2 *framebufferManagerGX2_; + TextureCacheGX2 *textureCacheGX2_; + DepalShaderCacheGX2 *depalShaderCache_; + DrawEngineGX2 drawEngine_; + ShaderManagerGX2 *shaderManagerGX2_; + + int lastVsync_; + int vertexCost_ = 0; +}; diff --git a/GPU/GX2/GX2StaticShaders.c b/GPU/GX2/GX2StaticShaders.c new file mode 100644 index 000000000000..36daf768aec2 --- /dev/null +++ b/GPU/GX2/GX2StaticShaders.c @@ -0,0 +1,97 @@ + +#include "GPU/GX2/GX2StaticShaders.h" +#include +#include + +// clang-format off +__attribute__((aligned(GX2_SHADER_ALIGNMENT))) +static u64 depalVCode [32] = +{ + CALL_FS NO_BARRIER, + EXP_DONE(POS0, _R1, _x, _y, _z, _1), + EXP_DONE(PARAM0, _R2, _x, _y, _0, _0) NO_BARRIER + END_OF_PROGRAM +}; +// clang-format on + +GX2VertexShader defVShaderGX2 = { + { + .sq_pgm_resources_vs.num_gprs = 3, + .sq_pgm_resources_vs.stack_size = 1, + .spi_vs_out_config.vs_export_count = 1, + .num_spi_vs_out_id = 1, + { + { .semantic_0 = 0x00, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF }, + { .semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF }, + { .semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF }, + { .semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF }, + { .semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF }, + { .semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF }, + { .semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF }, + { .semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF }, + { .semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF }, + { .semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF }, + }, + .sq_vtx_semantic_clear = ~0x3, + .num_sq_vtx_semantic = 2, + { 0, 1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .vgt_vertex_reuse_block_cntl.vtx_reuse_depth = 0xE, + .vgt_hos_reuse_depth.reuse_depth = 0x10, + }, /* regs */ + sizeof(depalVCode), + (uint8_t *)&depalVCode, + GX2_SHADER_MODE_UNIFORM_BLOCK, +}; + +// clang-format off +__attribute__((aligned(GX2_SHADER_ALIGNMENT))) static struct { + u64 cf[32]; + u64 tex[1 * 2]; +} quadPCode = { + { + TEX(32, 1) VALID_PIX, + EXP_DONE(PIX0, _R0, _x, _y, _z, _w) + END_OF_PROGRAM + }, + { + TEX_SAMPLE(_R0, _x, _y, _z, _w, _R0, _x, _y, _0, _0, _t0, _s0) + } +}; +// clang-format on + +GX2PixelShader defPShaderGX2 = { + { + .sq_pgm_resources_ps.num_gprs = 2, + .sq_pgm_exports_ps.export_mode = 0x2, + .spi_ps_in_control_0.num_interp = 2, + .spi_ps_in_control_0.persp_gradient_ena = 1, + .spi_ps_in_control_0.baryc_sample_cntl = spi_baryc_cntl_centers_only, + .num_spi_ps_input_cntl = 2, + { { .semantic = 0, .default_val = 1 }, { .semantic = 1, .default_val = 1 } }, + .cb_shader_mask.output0_enable = 0xF, + .cb_shader_control.rt0_enable = TRUE, + .db_shader_control.z_order = db_z_order_early_z_then_late_z, + }, /* regs */ + sizeof(quadPCode), + (uint8_t *)&quadPCode, + GX2_SHADER_MODE_UNIFORM_BLOCK, +}; + +// TODO +// clang-format off +__attribute__((aligned(GX2_SHADER_ALIGNMENT))) static const u32 stencilPCode[] = +{ + 0x20004040, 0x000004a4, 0x04000000, 0x00004085, // 0x0000 + 0x30000000, 0x0000c080, 0x22004040, 0x000020a0, 0x06000000, 0x0100c086, 0x2b000000, 0x000000a8, 0x00000000, 0x00002094, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x80001f80, 0x101d0060, 0x000c1f80, 0x0c210000, // 0x0040 + 0x000c1f00, 0x900c0000, 0x00ac1f80, 0x80000020, 0x71fd7f43, 0x00000000, 0xfe041f80, 0x000a0000, 0xfe001f80, 0x80350000, 0xffa01f80, 0x00180040, 0xff000000, 0x00000000, 0x80c01f81, 0x00180020, 0xfe041f80, 0x00230000, 0xf8001f80, 0x900c0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x10000000, 0x00fe0ff0, 0x00008010, 0xecdfea0d +}; +// clang-format on + +GX2PixelShader stencilUploadPSshaderGX2 = { + .regs.vals = { 0x00000101, 0x00000002, 0x14000001, 0x00000000, // 0x0000 + 0x00000001, 0x00000100, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000000f, 0x00000001, 0x00000050, 0x00000000 }, + sizeof(stencilPCode), + (uint8_t *)stencilPCode, + GX2_SHADER_MODE_UNIFORM_BLOCK, +}; diff --git a/GPU/GX2/GX2StaticShaders.h b/GPU/GX2/GX2StaticShaders.h new file mode 100644 index 000000000000..36fd398bd576 --- /dev/null +++ b/GPU/GX2/GX2StaticShaders.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +extern GX2VertexShader defVShaderGX2; +extern GX2PixelShader defPShaderGX2; +extern GX2PixelShader stencilUploadPSshaderGX2; + +#ifdef __cplusplus +} +#endif diff --git a/GPU/GX2/GX2Util.cpp b/GPU/GX2/GX2Util.cpp new file mode 100644 index 000000000000..222fb6c57ba3 --- /dev/null +++ b/GPU/GX2/GX2Util.cpp @@ -0,0 +1,43 @@ +#include "ppsspp_config.h" + +#include +#include +#include +#include + +#include "base/logging.h" +#include "base/stringutil.h" + +#include "GX2Util.h" + +GX2DepthStencilControlReg StockGX2::depthStencilDisabled; +GX2DepthStencilControlReg StockGX2::depthDisabledStencilWrite; +GX2TargetChannelMaskReg StockGX2::TargetChannelMasks[16]; +GX2StencilMaskReg StockGX2::stencilMask; +GX2ColorControlReg StockGX2::blendDisabledColorWrite; +GX2ColorControlReg StockGX2::blendColorDisabled; +GX2Sampler StockGX2::samplerPoint2DWrap; +GX2Sampler StockGX2::samplerLinear2DWrap; +GX2Sampler StockGX2::samplerPoint2DClamp; +GX2Sampler StockGX2::samplerLinear2DClamp; + +void StockGX2::Init() { + GX2InitColorControlReg(&blendDisabledColorWrite, GX2_LOGIC_OP_COPY, GX2_DISABLE, GX2_DISABLE, GX2_ENABLE); + GX2InitColorControlReg(&blendColorDisabled, GX2_LOGIC_OP_COPY, GX2_DISABLE, GX2_DISABLE, GX2_DISABLE); + for(int i = 0; i < countof(TargetChannelMasks); i++) + GX2InitTargetChannelMasksReg(TargetChannelMasks + i, (GX2ChannelMask)i, (GX2ChannelMask)i, (GX2ChannelMask)i, (GX2ChannelMask)i, (GX2ChannelMask)i, (GX2ChannelMask)i, (GX2ChannelMask)i, (GX2ChannelMask)i); + + GX2InitDepthStencilControlReg(&depthStencilDisabled, GX2_DISABLE, GX2_DISABLE, GX2_COMPARE_FUNC_NEVER, GX2_DISABLE, GX2_DISABLE, GX2_COMPARE_FUNC_NEVER, GX2_STENCIL_FUNCTION_KEEP, GX2_STENCIL_FUNCTION_KEEP, GX2_STENCIL_FUNCTION_KEEP, GX2_COMPARE_FUNC_NEVER, GX2_STENCIL_FUNCTION_KEEP, GX2_STENCIL_FUNCTION_KEEP, GX2_STENCIL_FUNCTION_KEEP); + GX2InitDepthStencilControlReg(&depthDisabledStencilWrite, GX2_DISABLE, GX2_DISABLE, GX2_COMPARE_FUNC_ALWAYS, GX2_ENABLE, GX2_ENABLE, GX2_COMPARE_FUNC_ALWAYS, GX2_STENCIL_FUNCTION_REPLACE, GX2_STENCIL_FUNCTION_REPLACE, GX2_STENCIL_FUNCTION_REPLACE, GX2_COMPARE_FUNC_ALWAYS, GX2_STENCIL_FUNCTION_REPLACE, GX2_STENCIL_FUNCTION_REPLACE, GX2_STENCIL_FUNCTION_REPLACE); + GX2InitStencilMaskReg(&stencilMask, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + + GX2InitSampler(&samplerPoint2DWrap, GX2_TEX_CLAMP_MODE_WRAP, GX2_TEX_XY_FILTER_MODE_POINT); + GX2InitSampler(&samplerPoint2DClamp, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_POINT); + GX2InitSampler(&samplerLinear2DWrap, GX2_TEX_CLAMP_MODE_WRAP, GX2_TEX_XY_FILTER_MODE_LINEAR); + GX2InitSampler(&samplerLinear2DClamp, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_POINT); + + GX2InitSamplerBorderType(&samplerPoint2DWrap, GX2_TEX_BORDER_TYPE_WHITE); + GX2InitSamplerBorderType(&samplerPoint2DClamp, GX2_TEX_BORDER_TYPE_WHITE); + GX2InitSamplerBorderType(&samplerLinear2DWrap, GX2_TEX_BORDER_TYPE_WHITE); + GX2InitSamplerBorderType(&samplerLinear2DClamp, GX2_TEX_BORDER_TYPE_WHITE); +} diff --git a/GPU/GX2/GX2Util.h b/GPU/GX2/GX2Util.h new file mode 100644 index 000000000000..0f6de60906e6 --- /dev/null +++ b/GPU/GX2/GX2Util.h @@ -0,0 +1,86 @@ +// Copyright (c) 2017- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include +#include +#include +#include + +class PushBufferGX2 { +public: + PushBufferGX2(u32 size, u32 align) : align_(align) { + size = (size + align_ - 1) & ~(align_ - 1); + buffer_ = (u8 *)MEM1_alloc(size, align_); + } + PushBufferGX2(PushBufferGX2 &) = delete; + ~PushBufferGX2() { + MEM1_free(buffer_); + } + void *Buf() const { + return buffer_; + } + + // Should be done each frame + void Reset() { + pos_ = 0; + push_size_ = 0; + } + + u8 *BeginPush(u32 *offset, u32 size) { + size = (size + align_ - 1) & ~(align_ - 1); + if (pos_ + size > size_) { + // Wrap! Note that with this method, since we return the same buffer as before, you have to do the draw immediately after. + EndPush(); + pos_ = 0; + } + *offset = pos_; + u8 *retval = (u8 *)buffer_ + pos_; + push_size_ += size; + return retval; + } + void EndPush() { + if(push_size_) { + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, buffer_ + pos_, push_size_); + pos_ += push_size_; + push_size_ = 0; + } + } + +private: + u32 size_; + u32 align_; + u8 *buffer_; + u32 pos_; + u32 push_size_ ; +}; + +class StockGX2 { +public: + static void Init(); + static GX2DepthStencilControlReg depthStencilDisabled; + static GX2DepthStencilControlReg depthDisabledStencilWrite; + static GX2TargetChannelMaskReg TargetChannelMasks[16]; + static GX2StencilMaskReg stencilMask; + static GX2ColorControlReg blendDisabledColorWrite; + static GX2ColorControlReg blendColorDisabled; + static GX2Sampler samplerPoint2DWrap; + static GX2Sampler samplerLinear2DWrap; + static GX2Sampler samplerPoint2DClamp; + static GX2Sampler samplerLinear2DClamp; +}; diff --git a/GPU/GX2/ShaderManagerGX2.cpp b/GPU/GX2/ShaderManagerGX2.cpp new file mode 100644 index 000000000000..bc61c294a89d --- /dev/null +++ b/GPU/GX2/ShaderManagerGX2.cpp @@ -0,0 +1,248 @@ +// Copyright (c) 2015- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "ppsspp_config.h" + +#include +#include + +#include "base/logging.h" +#include "math/lin/matrix4x4.h" +#include "math/math_util.h" +#include "math/dataconv.h" +#include "util/text/utf8.h" +#include "Common/Common.h" +#include "Core/Config.h" +#include "Core/Reporting.h" +#include "GPU/Math3D.h" +#include "GPU/GPUState.h" +#include "GPU/ge_constants.h" +#include "GPU/GX2/ShaderManagerGX2.h" +#include "GPU/GX2/FragmentShaderGeneratorGX2.h" +#include "GPU/GX2/VertexShaderGeneratorGX2.h" +#include "GPU/GX2/GX2Util.h" + + +std::string GX2PShader::GetShaderString(DebugShaderStringType type) const { + switch (type) { + case SHADER_STRING_SHORT_DESC: + return FragmentShaderDesc(id_); + case SHADER_STRING_SOURCE_CODE: + default: + return "N/A"; + } +} + +std::string GX2VShader::GetShaderString(DebugShaderStringType type) const { + switch (type) { + case SHADER_STRING_SHORT_DESC: + return VertexShaderDesc(id_); + case SHADER_STRING_SOURCE_CODE: + default: + return "N/A"; + } +} + +ShaderManagerGX2::ShaderManagerGX2(GX2ContextState *context) + : lastVShader_(nullptr), lastFShader_(nullptr) { + memset(&ub_base, 0, sizeof(ub_base)); + memset(&ub_lights, 0, sizeof(ub_lights)); + memset(&ub_bones, 0, sizeof(ub_bones)); + + INFO_LOG(G3D, "sizeof(ub_base): %d", (int)sizeof(ub_base)); + INFO_LOG(G3D, "sizeof(ub_lights): %d", (int)sizeof(ub_lights)); + INFO_LOG(G3D, "sizeof(ub_bones): %d", (int)sizeof(ub_bones)); + + push_base = MEM2_alloc(sizeof(ub_base), GX2_UNIFORM_BLOCK_ALIGNMENT); + push_lights = MEM2_alloc(sizeof(ub_lights), GX2_UNIFORM_BLOCK_ALIGNMENT); + push_bones = MEM2_alloc(sizeof(ub_bones), GX2_UNIFORM_BLOCK_ALIGNMENT); +} + +ShaderManagerGX2::~ShaderManagerGX2() { + MEM2_free(push_base); + MEM2_free(push_lights); + MEM2_free(push_bones); + ClearShaders(); +} + +void ShaderManagerGX2::Clear() { + for (auto iter = fsCache_.begin(); iter != fsCache_.end(); ++iter) { + delete iter->second; + } + for (auto iter = vsCache_.begin(); iter != vsCache_.end(); ++iter) { + delete iter->second; + } + fsCache_.clear(); + vsCache_.clear(); + lastFSID_.set_invalid(); + lastVSID_.set_invalid(); + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); +} + +void ShaderManagerGX2::ClearShaders() { + Clear(); + DirtyLastShader(); + gstate_c.Dirty(DIRTY_ALL_UNIFORMS); +} + +void ShaderManagerGX2::DirtyLastShader() { + lastFSID_.set_invalid(); + lastVSID_.set_invalid(); + lastVShader_ = nullptr; + lastFShader_ = nullptr; + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); +} + +uint64_t ShaderManagerGX2::UpdateUniforms() { + uint64_t dirty = gstate_c.GetDirtyUniforms(); + if (dirty != 0) { + if (dirty & DIRTY_BASE_UNIFORMS) { + BaseUpdateUniforms(&ub_base, dirty, true); + memcpy(push_base, &ub_base, sizeof(ub_base)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, push_base, sizeof(ub_base)); + } + if (dirty & DIRTY_LIGHT_UNIFORMS) { + LightUpdateUniforms(&ub_lights, dirty); + memcpy(push_lights, &ub_lights, sizeof(ub_lights)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, push_lights, sizeof(ub_lights)); + } + if (dirty & DIRTY_BONE_UNIFORMS) { + BoneUpdateUniforms(&ub_bones, dirty); + memcpy(push_bones, &ub_bones, sizeof(ub_bones)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, push_bones, sizeof(ub_bones)); + } + } + gstate_c.CleanUniforms(); + return dirty; +} + +void ShaderManagerGX2::BindUniforms() { + GX2SetVertexUniformBlock(1, sizeof(ub_base), push_base); + GX2SetVertexUniformBlock(2, sizeof(ub_lights), push_lights); + GX2SetVertexUniformBlock(3, sizeof(ub_bones), push_bones); + GX2SetPixelUniformBlock(1, sizeof(ub_base), push_base); +} + +void ShaderManagerGX2::GetShaders(int prim, u32 vertType, GX2VShader **vshader, GX2PShader **fshader, bool useHWTransform) { + VShaderID VSID; + FShaderID FSID; + + if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) { + gstate_c.Clean(DIRTY_VERTEXSHADER_STATE); + ComputeVertexShaderID(&VSID, vertType, useHWTransform); + } else { + VSID = lastVSID_; + } + + if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) { + gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE); + ComputeFragmentShaderID(&FSID); + } else { + FSID = lastFSID_; + } + + // Just update uniforms if this is the same shader as last time. + if (lastVShader_ != nullptr && lastFShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_) { + *vshader = lastVShader_; + *fshader = lastFShader_; + // Already all set, no need to look up in shader maps. + return; + } + + VSCache::iterator vsIter = vsCache_.find(VSID); + GX2VShader *vs; + if (vsIter == vsCache_.end()) { + // Vertex shader not in cache. Let's compile it. + // TODO: + vs = new GX2VShader(VSID, useHWTransform); + vsCache_[VSID] = vs; + } else { + vs = vsIter->second; + } + lastVSID_ = VSID; + + FSCache::iterator fsIter = fsCache_.find(FSID); + GX2PShader *fs; + if (fsIter == fsCache_.end()) { + // Fragment shader not in cache. Let's compile it. + // TODO: + fs = new GX2PShader(FSID, useHWTransform); + fsCache_[FSID] = fs; + } else { + fs = fsIter->second; + } + + lastFSID_ = FSID; + + lastVShader_ = vs; + lastFShader_ = fs; + + *vshader = vs; + *fshader = fs; +} + +std::vector ShaderManagerGX2::DebugGetShaderIDs(DebugShaderType type) { + std::string id; + std::vector ids; + switch (type) { + case SHADER_TYPE_VERTEX: + { + for (auto iter : vsCache_) { + iter.first.ToString(&id); + ids.push_back(id); + } + break; + } + case SHADER_TYPE_FRAGMENT: + { + for (auto iter : fsCache_) { + iter.first.ToString(&id); + ids.push_back(id); + } + break; + } + default: + break; + } + return ids; +} + +std::string ShaderManagerGX2::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) { + ShaderID shaderId; + shaderId.FromString(id); + switch (type) { + case SHADER_TYPE_VERTEX: + { + auto iter = vsCache_.find(VShaderID(shaderId)); + if (iter == vsCache_.end()) { + return ""; + } + return iter->second->GetShaderString(stringType); + } + + case SHADER_TYPE_FRAGMENT: + { + auto iter = fsCache_.find(FShaderID(shaderId)); + if (iter == fsCache_.end()) { + return ""; + } + return iter->second->GetShaderString(stringType); + } + default: + return "N/A"; + } +} diff --git a/GPU/GX2/ShaderManagerGX2.h b/GPU/GX2/ShaderManagerGX2.h new file mode 100644 index 000000000000..c2c224bae653 --- /dev/null +++ b/GPU/GX2/ShaderManagerGX2.h @@ -0,0 +1,119 @@ +// Copyright (c) 2017- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include +#include +#include + +#include "base/basictypes.h" +#include "GPU/Common/ShaderCommon.h" +#include "GPU/Common/ShaderId.h" +#include "GPU/Common/ShaderUniforms.h" + +#include "GPU/GX2/GX2StaticShaders.h" + +class GX2PShader : public GX2PixelShader { +public: + // TODO: fixme + GX2PShader(FShaderID id, bool useHWTransform) : GX2PixelShader(defPShaderGX2), id_(id), useHWTransform_(useHWTransform) {} + ~GX2PShader() { + if (program && program != defPShaderGX2.program) { + MEM2_free(program); + } + } + + const std::string source() const { return "N/A"; } + bool UseHWTransform() const { return useHWTransform_; } + std::string GetShaderString(DebugShaderStringType type) const; + +protected: + bool useHWTransform_; + FShaderID id_; +}; + +class GX2VShader : public GX2VertexShader { +public: + // TODO: fixme + GX2VShader(VShaderID id, bool useHWTransform) : GX2VertexShader(defVShaderGX2), id_(id), failed_(false), useHWTransform_(useHWTransform) {} + ~GX2VShader() { + if (program && program != defVShaderGX2.program) { + MEM2_free(program); + } + } + + const std::string source() const { return "N/A"; } + const u8 *bytecode() const { return program; } + bool UseHWTransform() const { return useHWTransform_; } + std::string GetShaderString(DebugShaderStringType type) const; + +protected: + bool failed_; + bool useHWTransform_; + VShaderID id_; +}; + +class ShaderManagerGX2 : public ShaderManagerCommon { +public: + ShaderManagerGX2(GX2ContextState *context); + ~ShaderManagerGX2(); + + void GetShaders(int prim, u32 vertType, GX2VShader **vshader, GX2PShader **fshader, bool useHWTransform); + void ClearShaders(); + void DirtyLastShader() override; + + int GetNumVertexShaders() const { return (int)vsCache_.size(); } + int GetNumFragmentShaders() const { return (int)fsCache_.size(); } + + std::vector DebugGetShaderIDs(DebugShaderType type); + std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType); + + uint64_t UpdateUniforms(); + void BindUniforms(); + + // TODO: Avoid copying these buffers if same as last draw, can still point to it assuming we're still in the same pushbuffer. + // Applies dirty changes and copies the buffer. + bool IsBaseDirty() { return true; } + bool IsLightDirty() { return true; } + bool IsBoneDirty() { return true; } + +private: + void Clear(); + + typedef std::map FSCache; + FSCache fsCache_; + + typedef std::map VSCache; + VSCache vsCache_; + + // Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time. + UB_VS_FS_Base ub_base; + UB_VS_Lights ub_lights; + UB_VS_Bones ub_bones; + + // Not actual pushbuffers + void *push_base; + void *push_lights; + void *push_bones; + + GX2PShader *lastFShader_; + GX2VShader *lastVShader_; + + FShaderID lastFSID_; + VShaderID lastVSID_; +}; diff --git a/GPU/GX2/StateMappingGX2.cpp b/GPU/GX2/StateMappingGX2.cpp new file mode 100644 index 000000000000..080d2c86713e --- /dev/null +++ b/GPU/GX2/StateMappingGX2.cpp @@ -0,0 +1,422 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "math/dataconv.h" + +#include "GPU/Math3D.h" +#include "GPU/GPUState.h" +#include "GPU/ge_constants.h" +#include "GPU/Common/GPUStateUtils.h" +#include "Core/System.h" +#include "Core/Config.h" +#include "Core/Reporting.h" + +#include "GPU/Common/FramebufferCommon.h" +#include "GPU/GX2/DrawEngineGX2.h" +#include "GPU/GX2/StateMappingGX2.h" +#include "GPU/GX2/FramebufferManagerGX2.h" +#include "GPU/GX2/TextureCacheGX2.h" + +#include + +// clang-format off +// These tables all fit into u8s. +static const GX2BlendMode GX2BlendFactorLookup[(size_t)BlendFactor::COUNT] = { + GX2_BLEND_MODE_ZERO, + GX2_BLEND_MODE_ONE, + GX2_BLEND_MODE_SRC_COLOR, + GX2_BLEND_MODE_INV_SRC_COLOR, + GX2_BLEND_MODE_DST_COLOR, + GX2_BLEND_MODE_INV_DST_COLOR, + GX2_BLEND_MODE_SRC_ALPHA, + GX2_BLEND_MODE_INV_SRC_ALPHA, + GX2_BLEND_MODE_DST_ALPHA, + GX2_BLEND_MODE_INV_DST_ALPHA, + GX2_BLEND_MODE_BLEND_FACTOR, + GX2_BLEND_MODE_INV_BLEND_FACTOR, + GX2_BLEND_MODE_BLEND_FACTOR, + GX2_BLEND_MODE_INV_BLEND_FACTOR, + GX2_BLEND_MODE_SRC1_COLOR, + GX2_BLEND_MODE_INV_SRC1_COLOR, + GX2_BLEND_MODE_SRC1_ALPHA, + GX2_BLEND_MODE_INV_SRC1_ALPHA, +}; + +static const GX2BlendCombineMode GX2BlendEqLookup[(size_t)BlendEq::COUNT] = { + GX2_BLEND_COMBINE_MODE_ADD, + GX2_BLEND_COMBINE_MODE_SUB, + GX2_BLEND_COMBINE_MODE_REV_SUB, + GX2_BLEND_COMBINE_MODE_MIN, + GX2_BLEND_COMBINE_MODE_MAX, +}; + +static const GX2CompareFunction compareOps[] = { + GX2_COMPARE_FUNC_NEVER, + GX2_COMPARE_FUNC_ALWAYS, + GX2_COMPARE_FUNC_EQUAL, + GX2_COMPARE_FUNC_NOT_EQUAL, + GX2_COMPARE_FUNC_LESS, + GX2_COMPARE_FUNC_LEQUAL, + GX2_COMPARE_FUNC_GREATER, + GX2_COMPARE_FUNC_GEQUAL, +}; + +static const GX2StencilFunction stencilOps[] = { + GX2_STENCIL_FUNCTION_KEEP, + GX2_STENCIL_FUNCTION_ZERO, + GX2_STENCIL_FUNCTION_REPLACE, + GX2_STENCIL_FUNCTION_INV, + GX2_STENCIL_FUNCTION_INCR_CLAMP, + GX2_STENCIL_FUNCTION_DECR_CLAMP, + GX2_STENCIL_FUNCTION_KEEP, // reserved + GX2_STENCIL_FUNCTION_KEEP, // reserved +}; + +static const GX2PrimitiveMode primToGX2[8] = { + GX2_PRIMITIVE_MODE_POINTS, + GX2_PRIMITIVE_MODE_LINES, + GX2_PRIMITIVE_MODE_LINE_STRIP, + GX2_PRIMITIVE_MODE_TRIANGLES, + GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, + GX2_PRIMITIVE_MODE_TRIANGLE_FAN, + GX2_PRIMITIVE_MODE_TRIANGLES, +}; + +static const GX2LogicOp logicOps[] = { + GX2_LOGIC_OP_CLEAR, + GX2_LOGIC_OP_AND, + GX2_LOGIC_OP_REV_AND, + GX2_LOGIC_OP_COPY, + GX2_LOGIC_OP_INV_AND, + GX2_LOGIC_OP_NOP, + GX2_LOGIC_OP_XOR, + GX2_LOGIC_OP_OR, + GX2_LOGIC_OP_NOR, + GX2_LOGIC_OP_EQUIV, + GX2_LOGIC_OP_INV, + GX2_LOGIC_OP_REV_OR, + GX2_LOGIC_OP_INV_COPY, + GX2_LOGIC_OP_INV_OR, + GX2_LOGIC_OP_NOT_AND, + GX2_LOGIC_OP_SET, +}; +// clang-format on + +void DrawEngineGX2::ResetShaderBlending() { + if (fboTexBound_) { + // GX2SetPixelTexture(nullptr, 0); + fboTexBound_ = false; + } +} + +class FramebufferManagerGX2; +class ShaderManagerGX2; + +void DrawEngineGX2::ApplyDrawState(int prim) { + dynState_.topology = primToGX2[prim]; + + if (!gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) { + // nothing to do + return; + } + + bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; + // Blend + if (gstate_c.IsDirty(DIRTY_BLEND_STATE)) { + gstate_c.SetAllowShaderBlend(!g_Config.bDisableSlowFramebufEffects); + if (gstate.isModeClear()) { + keys_.blend.value = 0; // full wipe + keys_.blend.blendEnable = false; + dynState_.useBlendColor = false; + // Color Test + bool alphaMask = gstate.isClearModeAlphaMask(); + bool colorMask = gstate.isClearModeColorMask(); + keys_.blend.colorWriteMask = (GX2ChannelMask)((colorMask ? (1 | 2 | 4) : 0) | (alphaMask ? 8 : 0)); + } else { + keys_.blend.value = 0; + // Set blend - unless we need to do it in the shader. + GenericBlendState blendState; + ConvertBlendState(blendState, gstate_c.allowShaderBlend); + if (blendState.applyShaderBlending) { + if (ApplyShaderBlending()) { + // We may still want to do something about stencil -> alpha. + ApplyStencilReplaceAndLogicOp(blendState.replaceAlphaWithStencil, blendState); + } else { + // Until next time, force it off. + ResetShaderBlending(); + gstate_c.SetAllowShaderBlend(false); + } + } else if (blendState.resetShaderBlending) { + ResetShaderBlending(); + } + + if (blendState.enabled) { + keys_.blend.blendEnable = true; + keys_.blend.logicOpEnable = false; + keys_.blend.blendOpColor = GX2BlendEqLookup[(size_t)blendState.eqColor]; + keys_.blend.blendOpAlpha = GX2BlendEqLookup[(size_t)blendState.eqAlpha]; + keys_.blend.srcColor = GX2BlendFactorLookup[(size_t)blendState.srcColor]; + keys_.blend.srcAlpha = GX2BlendFactorLookup[(size_t)blendState.srcAlpha]; + keys_.blend.destColor = GX2BlendFactorLookup[(size_t)blendState.dstColor]; + keys_.blend.destAlpha = GX2BlendFactorLookup[(size_t)blendState.dstAlpha]; + if (blendState.dirtyShaderBlend) { + gstate_c.Dirty(DIRTY_SHADERBLEND); + } + dynState_.useBlendColor = blendState.useBlendColor; + if (blendState.useBlendColor) { + dynState_.blendColor = blendState.blendColor; + } + } else { + keys_.blend.blendEnable = false; + dynState_.useBlendColor = false; + } + + if (gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) { + // Logic Ops + if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY) { + keys_.blend.blendEnable = false; // Can't have both blend & logic op - although I think the PSP can! + keys_.blend.logicOpEnable = true; + keys_.blend.logicOp = logicOps[gstate.getLogicOp()]; + } else { + keys_.blend.logicOpEnable = false; + } + } + + // PSP color/alpha mask is per bit but we can only support per byte. + // But let's do that, at least. And let's try a threshold. + bool rmask = (gstate.pmskc & 0xFF) < 128; + bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128; + bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128; + bool amask = (gstate.pmska & 0xFF) < 128; + +#ifndef MOBILE_DEVICE + u8 abits = (gstate.pmska >> 0) & 0xFF; + u8 rbits = (gstate.pmskc >> 0) & 0xFF; + u8 gbits = (gstate.pmskc >> 8) & 0xFF; + u8 bbits = (gstate.pmskc >> 16) & 0xFF; + if ((rbits != 0 && rbits != 0xFF) || (gbits != 0 && gbits != 0xFF) || (bbits != 0 && bbits != 0xFF)) { + WARN_LOG_REPORT_ONCE(rgbmask, G3D, "Unsupported RGB mask: r=%02x g=%02x b=%02x", rbits, gbits, bbits); + } + if (abits != 0 && abits != 0xFF) { + // The stencil part of the mask is supported. + WARN_LOG_REPORT_ONCE(amask, G3D, "Unsupported alpha/stencil mask: %02x", abits); + } +#endif + + // Let's not write to alpha if stencil isn't enabled. + if (!gstate.isStencilTestEnabled()) { + amask = false; + } else { + // If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel. + if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) { + amask = false; + } + } + + keys_.blend.colorWriteMask = (GX2ChannelMask)((rmask ? 1 : 0) | (gmask ? 2 : 0) | (bmask ? 4 : 0) | (amask ? 8 : 0)); + } + + GX2BlendState *bs1 = blendCache_.Get(keys_.blend.value); + if (bs1 == nullptr) { + bs1 = new GX2BlendState; + GX2InitColorControlReg(&bs1->color, keys_.blend.logicOpEnable ? keys_.blend.logicOp : GX2_LOGIC_OP_COPY, keys_.blend.blendEnable, false, keys_.blend.colorWriteMask != 0); + GX2InitTargetChannelMasksReg(&bs1->mask, keys_.blend.colorWriteMask, GX2_CHANNEL_MASK_RGBA, GX2_CHANNEL_MASK_RGBA, GX2_CHANNEL_MASK_RGBA, GX2_CHANNEL_MASK_RGBA, GX2_CHANNEL_MASK_RGBA, GX2_CHANNEL_MASK_RGBA, GX2_CHANNEL_MASK_RGBA); + GX2InitBlendControlReg(&bs1->blend, GX2_RENDER_TARGET_0, keys_.blend.srcColor, keys_.blend.destColor, keys_.blend.blendOpColor, keys_.blend.srcAlpha && keys_.blend.destAlpha, keys_.blend.srcAlpha, keys_.blend.destAlpha, keys_.blend.blendOpAlpha); + blendCache_.Insert(keys_.blend.value, bs1); + } + blendState_ = bs1; + } + + if (gstate_c.IsDirty(DIRTY_RASTER_STATE)) { + keys_.raster.value = 0; + keys_.raster.frontFace = GX2_FRONT_FACE_CCW; + // Set cull + if (!gstate.isModeClear() && !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled()) { + keys_.raster.cullFront = !!gstate.getCullMode(); + keys_.raster.cullBack = !gstate.getCullMode(); + } else { + keys_.raster.cullFront = GX2_DISABLE; + keys_.raster.cullBack = GX2_DISABLE; + } + GX2RasterizerState *rs = rasterCache_.Get(keys_.raster.value); + if (rs == nullptr) { + rs = new GX2RasterizerState({ keys_.raster.frontFace, keys_.raster.cullFront, keys_.raster.cullBack }); + rasterCache_.Insert(keys_.raster.value, rs); + } + rasterState_ = rs; + } + + if (gstate_c.IsDirty(DIRTY_DEPTHSTENCIL_STATE)) { + if (gstate.isModeClear()) { + keys_.depthStencil.value = 0; + keys_.depthStencil.depthTestEnable = true; + keys_.depthStencil.depthCompareOp = GX2_COMPARE_FUNC_ALWAYS; + keys_.depthStencil.depthWriteEnable = gstate.isClearModeDepthMask(); + if (gstate.isClearModeDepthMask()) { + framebufferManager_->SetDepthUpdated(); + } + + // Stencil Test + bool alphaMask = gstate.isClearModeAlphaMask(); + if (alphaMask) { + keys_.depthStencil.stencilTestEnable = true; + keys_.depthStencil.stencilCompareFunc = GX2_COMPARE_FUNC_ALWAYS; + keys_.depthStencil.stencilPassOp = GX2_STENCIL_FUNCTION_REPLACE; + keys_.depthStencil.stencilFailOp = GX2_STENCIL_FUNCTION_REPLACE; + keys_.depthStencil.stencilDepthFailOp = GX2_STENCIL_FUNCTION_REPLACE; + dynState_.useStencil = true; + // In clear mode, the stencil value is set to the alpha value of the vertex. + // A normal clear will be 2 points, the second point has the color. + // We override this value in the pipeline from software transform for clear rectangles. + dynState_.stencilRef = 0xFF; + keys_.depthStencil.stencilWriteMask = 0xFF; + } else { + keys_.depthStencil.stencilTestEnable = false; + dynState_.useStencil = false; + } + + } else { + keys_.depthStencil.value = 0; + // Depth Test + if (gstate.isDepthTestEnabled()) { + keys_.depthStencil.depthTestEnable = true; + keys_.depthStencil.depthCompareOp = compareOps[gstate.getDepthTestFunction()]; + keys_.depthStencil.depthWriteEnable = gstate.isDepthWriteEnabled(); + if (gstate.isDepthWriteEnabled()) { + framebufferManager_->SetDepthUpdated(); + } + } else { + keys_.depthStencil.depthTestEnable = false; + keys_.depthStencil.depthWriteEnable = false; + keys_.depthStencil.depthCompareOp = GX2_COMPARE_FUNC_ALWAYS; + } + + GenericStencilFuncState stencilState; + ConvertStencilFuncState(stencilState); + + // Stencil Test + if (stencilState.enabled) { + keys_.depthStencil.stencilTestEnable = true; + keys_.depthStencil.stencilCompareFunc = compareOps[stencilState.testFunc]; + keys_.depthStencil.stencilPassOp = stencilOps[stencilState.zPass]; + keys_.depthStencil.stencilFailOp = stencilOps[stencilState.sFail]; + keys_.depthStencil.stencilDepthFailOp = stencilOps[stencilState.zFail]; + keys_.depthStencil.stencilCompareMask = stencilState.testMask; + keys_.depthStencil.stencilWriteMask = stencilState.writeMask; + dynState_.useStencil = true; + dynState_.stencilRef = stencilState.testRef; + } else { + keys_.depthStencil.stencilTestEnable = false; + dynState_.useStencil = false; + } + } + GX2DepthStencilControlReg *ds = depthStencilCache_.Get(keys_.depthStencil.value); + if (ds == nullptr) { + ds = new GX2DepthStencilControlReg; + GX2InitDepthStencilControlReg(ds, keys_.depthStencil.depthTestEnable, keys_.depthStencil.depthWriteEnable, keys_.depthStencil.depthCompareOp, keys_.depthStencil.stencilTestEnable, keys_.depthStencil.stencilTestEnable, keys_.depthStencil.stencilCompareFunc, keys_.depthStencil.stencilPassOp, keys_.depthStencil.stencilDepthFailOp, keys_.depthStencil.stencilFailOp, keys_.depthStencil.stencilCompareFunc, keys_.depthStencil.stencilPassOp, keys_.depthStencil.stencilDepthFailOp, keys_.depthStencil.stencilFailOp); + depthStencilCache_.Insert(keys_.depthStencil.value, ds); + } + depthStencilState_ = ds; + } + + if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) { + ViewportAndScissor vpAndScissor; + ConvertViewportAndScissor(useBufferedRendering, framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(), framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(), vpAndScissor); + + float depthMin = vpAndScissor.depthRangeMin; + float depthMax = vpAndScissor.depthRangeMax; + + if (depthMin < 0.0f) + depthMin = 0.0f; + if (depthMax > 1.0f) + depthMax = 1.0f; + if (vpAndScissor.dirtyDepth) { + gstate_c.Dirty(DIRTY_DEPTHRANGE); + } + + Draw::Viewport &vp = dynState_.viewport; + vp.TopLeftX = vpAndScissor.viewportX; + vp.TopLeftY = vpAndScissor.viewportY; + vp.Width = vpAndScissor.viewportW; + vp.Height = vpAndScissor.viewportH; + vp.MinDepth = depthMin; + vp.MaxDepth = depthMax; + + if (vpAndScissor.dirtyProj) { + gstate_c.Dirty(DIRTY_PROJMATRIX); + } + + GX2_RECT &scissor = dynState_.scissor; + if (vpAndScissor.scissorEnable) { + scissor.left = vpAndScissor.scissorX; + scissor.top = vpAndScissor.scissorY; + scissor.right = vpAndScissor.scissorX + std::max(0, vpAndScissor.scissorW); + scissor.bottom = vpAndScissor.scissorY + std::max(0, vpAndScissor.scissorH); + } else { + scissor.left = 0; + scissor.top = 0; + scissor.right = framebufferManager_->GetRenderWidth(); + scissor.bottom = framebufferManager_->GetRenderHeight(); + } + } + + if (gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS) && !gstate.isModeClear() && gstate.isTextureMapEnabled()) { + textureCache_->SetTexture(); + gstate_c.Clean(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS); + } +} + +void DrawEngineGX2::ApplyDrawStateLate(bool applyStencilRef, uint8_t stencilRef) { + if (!gstate.isModeClear()) { + if (fboTexNeedBind_) { + framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY); + // No sampler required, we do a plain Load in the pixel shader. + fboTexBound_ = true; + fboTexNeedBind_ = false; + } + textureCache_->ApplyTexture(); + } + + // we go through Draw here because it automatically handles screen rotation, as needed in UWP on mobiles. + if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) { + draw_->SetViewports(1, &dynState_.viewport); + draw_->SetScissorRect(dynState_.scissor.left, dynState_.scissor.top, dynState_.scissor.right - dynState_.scissor.left, dynState_.scissor.bottom - dynState_.scissor.top); + } + if (gstate_c.IsDirty(DIRTY_RASTER_STATE)) { + GX2SetCullOnlyControl(rasterState_->frontFace_, rasterState_->cullFront_, rasterState_->cullBack_); + } + if (gstate_c.IsDirty(DIRTY_BLEND_STATE)) { + // Need to do this AFTER ApplyTexture because the process of depallettization can ruin the blend state. + float blendColor[4]; + Uint8x4ToFloat4(blendColor, dynState_.blendColor); + GX2SetBlendControlReg(&blendState_->blend); + GX2SetColorControlReg(&blendState_->color); + GX2SetTargetChannelMasksReg(&blendState_->mask); + GX2SetBlendConstantColorReg((GX2BlendConstantColorReg *)blendColor); + } + if (gstate_c.IsDirty(DIRTY_DEPTHSTENCIL_STATE) || applyStencilRef) { + GX2SetDepthStencilControlReg(depthStencilState_); + if (!applyStencilRef) + stencilRef = dynState_.stencilRef; + GX2SetStencilMask(0xFF, 0xFF, stencilRef, 0xFF, 0xFF, stencilRef); + } + gstate_c.Clean(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_BLEND_STATE); + + // Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects. + if (fboTexBound_) + gstate_c.Dirty(DIRTY_BLEND_STATE); +} diff --git a/GPU/GX2/StateMappingGX2.h b/GPU/GX2/StateMappingGX2.h new file mode 100644 index 000000000000..0af6b4febc2a --- /dev/null +++ b/GPU/GX2/StateMappingGX2.h @@ -0,0 +1,81 @@ +#pragma once + +#include +#include + +#include "thin3d/thin3d.h" + +// TODO: Do this more progressively. No need to compute the entire state if the entire state hasn't changed. + +struct GX2BlendKey { + union { + uint64_t value; + struct { + // Blend + bool blendEnable : 1; + GX2BlendMode srcColor : 5; + GX2BlendMode destColor : 5; + GX2BlendMode srcAlpha : 5; + GX2BlendMode destAlpha : 5; + GX2BlendCombineMode blendOpColor : 3; + GX2BlendCombineMode blendOpAlpha : 3; + bool logicOpEnable : 1; + GX2LogicOp logicOp : 8; + GX2ChannelMask colorWriteMask : 4; + }; + }; +}; + +struct GX2DepthStencilKey { + union { + uint64_t value; + struct { + // Depth/Stencil + bool depthTestEnable : 1; + bool depthWriteEnable : 1; + GX2CompareFunction depthCompareOp : 4; // GX2_COMPARISON (-1 and we could fit it in 3 bits) + bool stencilTestEnable : 1; + GX2CompareFunction stencilCompareFunc : 4; // GX2_COMPARISON + GX2StencilFunction stencilPassOp : 4; // GX2_STENCIL_OP + GX2StencilFunction stencilFailOp : 4; // GX2_STENCIL_OP + GX2StencilFunction stencilDepthFailOp : 4; // GX2_STENCIL_OP + unsigned int stencilWriteMask : 8; // Unfortunately these are baked into the state on GX2 + unsigned int stencilCompareMask : 8; + }; + }; +}; + +struct GX2RasterKey { + union { + uint32_t value; + struct { + GX2FrontFace frontFace : 1; + bool cullFront : 1; + bool cullBack : 1; + }; + }; +}; + +// In GX2 we cache blend state objects etc, and we simply emit keys, which are then also used to create these objects. +struct GX2StateKeys { + GX2BlendKey blend; + GX2DepthStencilKey depthStencil; + GX2RasterKey raster; +}; + +struct GX2_RECT { + int left; + int top; + int right; + int bottom; +}; + +struct GX2DynamicState { + int topology; + bool useBlendColor; + uint32_t blendColor; + bool useStencil; + uint8_t stencilRef; + Draw::Viewport viewport; + GX2_RECT scissor; +}; diff --git a/GPU/GX2/StencilBufferGX2.cpp b/GPU/GX2/StencilBufferGX2.cpp new file mode 100644 index 000000000000..432014e97d4f --- /dev/null +++ b/GPU/GX2/StencilBufferGX2.cpp @@ -0,0 +1,205 @@ +// Copyright (c) 2014- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include + +#include "base/logging.h" + +#include "ext/native/thin3d/thin3d.h" +#include "Core/Reporting.h" +#include "GPU/Common/StencilCommon.h" +#include "GPU/GX2/FramebufferManagerGX2.h" +#include "GPU/GX2/FragmentShaderGeneratorGX2.h" +#include "GPU/GX2/ShaderManagerGX2.h" +#include "GPU/GX2/TextureCacheGX2.h" +#include "GPU/GX2/GX2Util.h" +#include "GPU/GX2/GX2StaticShaders.h" + +static const char *stencil_ps = R"( +SamplerState samp : register(s0); +Texture2D tex : register(t0); +cbuffer base : register(b0) { + int4 u_stencilValue; +}; +struct PS_IN { + float2 v_texcoord0 : TEXCOORD0; +}; +float4 main(PS_IN In) : SV_Target { + float4 index = tex.Sample(samp, In.v_texcoord0); + int indexBits = int(index.a * 255.99); + if ((indexBits & u_stencilValue.x) == 0) + discard; + return index.aaaa; +} +)"; + +// static const char *stencil_ps_fast; + +static const char *stencil_vs = R"( +struct VS_IN { + float4 a_position : POSITION; + float2 a_texcoord0 : TEXCOORD0; +}; +struct VS_OUT { + float2 v_texcoord0 : TEXCOORD0; + float4 position : SV_Position; +}; +VS_OUT main(VS_IN In) { + VS_OUT Out; + Out.position = In.a_position; + Out.v_texcoord0 = In.a_texcoord0; + return Out; +} +)"; + +// TODO : If SV_StencilRef is available (?) then this can be done in a single pass. +bool FramebufferManagerGX2::NotifyStencilUpload(u32 addr, int size, bool skipZero) { + if (!MayIntersectFramebuffer(addr)) { + return false; + } + + VirtualFramebuffer *dstBuffer = 0; + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebuffer *vfb = vfbs_[i]; + if (MaskedEqual(vfb->fb_address, addr)) { + dstBuffer = vfb; + } + } + if (!dstBuffer) { + return false; + } + + int values = 0; + u8 usedBits = 0; + + const u8 *src = Memory::GetPointer(addr); + if (!src) { + return false; + } + + switch (dstBuffer->format) { + case GE_FORMAT_565: + // Well, this doesn't make much sense. + return false; + case GE_FORMAT_5551: + usedBits = StencilBits5551(src, dstBuffer->fb_stride * dstBuffer->bufferHeight); + values = 2; + break; + case GE_FORMAT_4444: + usedBits = StencilBits4444(src, dstBuffer->fb_stride * dstBuffer->bufferHeight); + values = 16; + break; + case GE_FORMAT_8888: + usedBits = StencilBits8888(src, dstBuffer->fb_stride * dstBuffer->bufferHeight); + values = 256; + break; + case GE_FORMAT_INVALID: + // Impossible. + break; + } + + if (usedBits == 0) { + if (skipZero) { + // Common when creating buffers, it's already 0. We're done. + return false; + } + + // Clear stencil+alpha but not color. Only way is to draw a quad. + GX2SetColorControlReg(&StockGX2::blendDisabledColorWrite); + GX2SetTargetChannelMasksReg(&StockGX2::TargetChannelMasks[0x8]); + GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE); + GX2SetDepthStencilControlReg(&StockGX2::depthDisabledStencilWrite); + GX2SetAttribBuffer(0, 4 * quadStride_, quadStride_, fsQuadBuffer_); + GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, 4, 0, 1); + + gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE); + return true; + } + + if (!stencilValueBuffer_) { + static_assert(!(sizeof(StencilValueUB) & 0x3F), "sizeof(StencilValueUB) must to be aligned to 64bytes!"); + stencilValueBuffer_ = (StencilValueUB *)MEM2_alloc(sizeof(StencilValueUB), GX2_UNIFORM_BLOCK_ALIGNMENT); + memset(stencilValueBuffer_, 0, sizeof(StencilValueUB)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, stencilValueBuffer_, sizeof(StencilValueUB)); + } + + shaderManagerGX2_->DirtyLastShader(); + + u16 w = dstBuffer->renderWidth; + u16 h = dstBuffer->renderHeight; + float u1 = 1.0f; + float v1 = 1.0f; + MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight, u1, v1); + if (dstBuffer->fbo) { + draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }); + } else { + // something is wrong... + } + GX2SetViewport(0.0f, 0.0f, (float)w, (float)h, 0.0f, 1.0f); + gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE); + + float coord[20] = { + -1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, u1, 0.0f, -1.0f, -1.0f, 0.0f, 0.0f, v1, 1.0f, -1.0f, 0.0f, u1, v1, + }; + + memcpy(quadBuffer_, coord, sizeof(float) * 4 * 5); + GX2Invalidate(GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER, quadBuffer_, sizeof(float) * 4 * 5); + + shaderManagerGX2_->DirtyLastShader(); + textureCacheGX2_->ForgetLastTexture(); + + GX2SetColorControlReg(&StockGX2::blendColorDisabled); + GX2SetTargetChannelMasksReg(&StockGX2::TargetChannelMasks[0x0]); + GX2SetFetchShader(&quadFetchShader_); + GX2SetPixelShader(&stencilUploadPSshaderGX2); // TODO + GX2SetVertexShader(&defVShaderGX2); + GX2SetPixelTexture(&drawPixelsTex_, 0); + GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE); + GX2SetAttribBuffer(0, 4 * quadStride_, quadStride_, fsQuadBuffer_); + GX2SetPixelSampler(&StockGX2::samplerPoint2DClamp, 0); + GX2SetDepthStencilControlReg(&StockGX2::depthDisabledStencilWrite); + gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE); + + for (int i = 1; i < values; i += i) { + if (!(usedBits & i)) { + // It's already zero, let's skip it. + continue; + } + uint8_t mask = 0; + uint8_t value = 0; + if (dstBuffer->format == GE_FORMAT_4444) { + mask = i | (i << 4); + value = i * 16; + } else if (dstBuffer->format == GE_FORMAT_5551) { + mask = 0xFF; + value = i * 128; + } else { + mask = i; + value = i; + } + + GX2SetDepthStencilControlReg(&StockGX2::depthDisabledStencilWrite); + GX2SetStencilMaskReg(&stencilMaskStates_[mask]); + + stencilValueBuffer_->u_stencilValue[0] = value; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, stencilValueBuffer_, sizeof(StencilValueUB)); + GX2SetPixelUniformBlock(1, sizeof(StencilValueUB), stencilValueBuffer_); + GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, 4, 0, 1); + } + RebindFramebuffer(); + return true; +} diff --git a/GPU/GX2/TextureCacheGX2.cpp b/GPU/GX2/TextureCacheGX2.cpp new file mode 100644 index 000000000000..36600a48554a --- /dev/null +++ b/GPU/GX2/TextureCacheGX2.cpp @@ -0,0 +1,743 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include +#include +#include +#include + +#include + +#include "Core/MemMap.h" +#include "Core/Reporting.h" +#include "GPU/ge_constants.h" +#include "GPU/GPUState.h" +#include "GPU/GX2/FragmentShaderGeneratorGX2.h" +#include "GPU/GX2/TextureCacheGX2.h" +#include "GPU/GX2/FramebufferManagerGX2.h" +#include "GPU/GX2/ShaderManagerGX2.h" +#include "GPU/GX2/DepalettizeShaderGX2.h" +#include "GPU/GX2/GX2Util.h" +#include "GPU/Common/FramebufferCommon.h" +#include "GPU/Common/TextureDecoder.h" +#include "Core/Config.h" +#include "Core/Host.h" + +#include "ext/xxhash.h" +#include "math/math_util.h" + +#define INVALID_TEX (GX2Texture *)(-1LL) + +SamplerCacheGX2::~SamplerCacheGX2() { + for (auto &iter : cache_) { + delete iter.second; + } +} + +GX2Sampler *SamplerCacheGX2::GetOrCreateSampler(const SamplerCacheKey &key) { + auto iter = cache_.find(key); + if (iter != cache_.end()) { + return iter->second; + } + GX2Sampler *sampler = new GX2Sampler; + + GX2TexClampMode sClamp = key.sClamp ? GX2_TEX_CLAMP_MODE_CLAMP : GX2_TEX_CLAMP_MODE_WRAP; + GX2TexClampMode tClamp = key.tClamp ? GX2_TEX_CLAMP_MODE_CLAMP : GX2_TEX_CLAMP_MODE_WRAP; + GX2InitSampler(sampler, sClamp, key.magFilt ? GX2_TEX_XY_FILTER_MODE_LINEAR : GX2_TEX_XY_FILTER_MODE_POINT); + GX2InitSamplerClamping(sampler, sClamp, tClamp, sClamp); + // TODO: GX2TexAnisoRatio ? + GX2InitSamplerXYFilter(sampler, key.minFilt ? GX2_TEX_XY_FILTER_MODE_LINEAR : GX2_TEX_XY_FILTER_MODE_POINT, key.magFilt ? GX2_TEX_XY_FILTER_MODE_LINEAR : GX2_TEX_XY_FILTER_MODE_POINT, GX2_TEX_ANISO_RATIO_NONE); + GX2InitSamplerZMFilter(sampler, GX2_TEX_Z_FILTER_MODE_POINT, key.mipFilt ? GX2_TEX_MIP_FILTER_MODE_LINEAR : GX2_TEX_MIP_FILTER_MODE_POINT); + GX2InitSamplerBorderType(sampler, GX2_TEX_BORDER_TYPE_WHITE); + + cache_[key] = sampler; + return sampler; +} + +TextureCacheGX2::TextureCacheGX2(Draw::DrawContext *draw) : TextureCacheCommon(draw) { + context_ = (GX2ContextState *)draw->GetNativeObject(Draw::NativeObject::CONTEXT); + + isBgraBackend_ = true; + lastBoundTexture = INVALID_TEX; + + SetupTextureDecoder(); + + nextTexture_ = nullptr; +} + +TextureCacheGX2::~TextureCacheGX2() { + // pFramebufferVertexDecl->Release(); + Clear(true); +} + +void TextureCacheGX2::SetFramebufferManager(FramebufferManagerGX2 *fbManager) { + framebufferManagerGX2_ = fbManager; + framebufferManager_ = fbManager; +} + +void TextureCacheGX2::ReleaseTexture(TexCacheEntry *entry, bool delete_them) { + GX2Texture *texture = (GX2Texture *)entry->texturePtr; + if (texture) { + if (delete_them) { + MEM2_free(texture->surface.image); + delete texture; + } + entry->texturePtr = nullptr; + } +} + +void TextureCacheGX2::ForgetLastTexture() { + InvalidateLastTexture(); + gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); + // GX2SetPixelTexture(nullptr, 0); +} + +void TextureCacheGX2::InvalidateLastTexture(TexCacheEntry *entry) { + if (!entry || entry->texturePtr == lastBoundTexture) { + lastBoundTexture = INVALID_TEX; + } +} + +void TextureCacheGX2::SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight, SamplerCacheKey &key) { + int minFilt; + int magFilt; + bool sClamp; + bool tClamp; + float lodBias; + GETexLevelMode mode; + GetSamplingParams(minFilt, magFilt, sClamp, tClamp, lodBias, 0, 0, mode); + + key.minFilt = minFilt & 1; + key.mipFilt = 0; + key.magFilt = magFilt & 1; + key.sClamp = sClamp; + key.tClamp = tClamp; + + // Often the framebuffer will not match the texture size. We'll wrap/clamp in the shader in that case. + // This happens whether we have OES_texture_npot or not. + int w = gstate.getTextureWidth(0); + int h = gstate.getTextureHeight(0); + if (w != bufferWidth || h != bufferHeight) { + key.sClamp = true; + key.tClamp = true; + } +} + +void TextureCacheGX2::StartFrame() { + InvalidateLastTexture(); + timesInvalidatedAllThisFrame_ = 0; + + if (texelsScaledThisFrame_) { + // INFO_LOG(G3D, "Scaled %i texels", texelsScaledThisFrame_); + } + texelsScaledThisFrame_ = 0; + if (clearCacheNextFrame_) { + Clear(true); + clearCacheNextFrame_ = false; + } else { + Decimate(); + } +} + +void TextureCacheGX2::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) { + const u32 clutBaseBytes = clutBase * (clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16)); + // Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier. + // If not, we're going to hash random data, which hopefully doesn't cause a performance issue. + // + // TODO: Actually, this seems like a hack. The game can upload part of a CLUT and reference other data. + // clutTotalBytes_ is the last amount uploaded. We should hash clutMaxBytes_, but this will often hash + // unrelated old entries for small palettes. + // Adding clutBaseBytes may just be mitigating this for some usage patterns. + const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_); + + clutHash_ = DoReliableHash32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888); + clutBuf_ = clutBufRaw_; + + // Special optimization: fonts typically draw clut4 with just alpha values in a single color. + clutAlphaLinear_ = false; + clutAlphaLinearColor_ = 0; + if (clutFormat == GE_CMODE_16BIT_ABGR4444 && clutIndexIsSimple) { + const u16_le *clut = GetCurrentClut(); + clutAlphaLinear_ = true; + clutAlphaLinearColor_ = clut[15] & 0x0FFF; + for (int i = 0; i < 16; ++i) { + u16 step = clutAlphaLinearColor_ | (i << 12); + if (clut[i] != step) { + clutAlphaLinear_ = false; + break; + } + } + } + + clutLastFormat_ = gstate.clutformat; +} + +void TextureCacheGX2::BindTexture(TexCacheEntry *entry) { + GX2Texture *texture = (GX2Texture *)entry->texturePtr; + if (texture != lastBoundTexture) { + GX2SetPixelTexture(texture, 0); + lastBoundTexture = texture; + } + SamplerCacheKey key{}; + UpdateSamplingParams(*entry, key); + GX2Sampler *sampler = samplerCache_.GetOrCreateSampler(key); + GX2SetPixelSampler(sampler, 0); +} + +void TextureCacheGX2::Unbind() { + // GX2SetPixelTexture(nullptr, 0); + InvalidateLastTexture(); +} + +class TextureShaderApplierGX2 { +public: + struct Pos { + Pos(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {} + Pos() {} + + float x; + float y; + float z; + }; + struct UV { + UV(float u_, float v_) : u(u_), v(v_) {} + UV() {} + + float u; + float v; + }; + + struct PosUV { + Pos pos; + UV uv; + }; + + TextureShaderApplierGX2(GX2ContextState *context, GX2PixelShader *pshader, void *dynamicBuffer, float bufferW, float bufferH, int renderW, int renderH, float xoff, float yoff) : context_(context), pshader_(pshader), bufferW_(bufferW), bufferH_(bufferH), renderW_(renderW), renderH_(renderH) { + static const Pos pos[4] = { + { -1, 1, 0 }, + { 1, 1, 0 }, + { -1, -1, 0 }, + { 1, -1, 0 }, + }; + static const UV uv[4] = { + { 0, 0 }, + { 1, 0 }, + { 0, 1 }, + { 1, 1 }, + }; + + for (int i = 0; i < 4; ++i) { + verts_[i].pos = pos[i]; + verts_[i].pos.x += xoff; + verts_[i].pos.y += yoff; + verts_[i].uv = uv[i]; + } + memcpy(dynamicBuffer, &verts_[0], 4 * 5 * sizeof(float)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, dynamicBuffer, 4 * 5 * sizeof(float)); + vbuffer_ = dynamicBuffer; + } + + void ApplyBounds(const KnownVertexBounds &bounds, u32 uoff, u32 voff, float xoff, float yoff) { + // If min is not < max, then we don't have values (wasn't set during decode.) + if (bounds.minV < bounds.maxV) { + const float invWidth = 1.0f / bufferW_; + const float invHeight = 1.0f / bufferH_; + // Inverse of half = double. + const float invHalfWidth = invWidth * 2.0f; + const float invHalfHeight = invHeight * 2.0f; + + const int u1 = bounds.minU + uoff; + const int v1 = bounds.minV + voff; + const int u2 = bounds.maxU + uoff; + const int v2 = bounds.maxV + voff; + + const float left = u1 * invHalfWidth - 1.0f + xoff; + const float right = u2 * invHalfWidth - 1.0f + xoff; + const float top = v1 * invHalfHeight - 1.0f + yoff; + const float bottom = v2 * invHalfHeight - 1.0f + yoff; + float z = 0.0f; + // Points are: BL, BR, TL, TR. + verts_[0].pos = Pos(left, bottom, z); + verts_[1].pos = Pos(right, bottom, z); + verts_[2].pos = Pos(left, top, z); + verts_[3].pos = Pos(right, top, z); + + // And also the UVs, same order. + const float uvleft = u1 * invWidth; + const float uvright = u2 * invWidth; + const float uvtop = v1 * invHeight; + const float uvbottom = v2 * invHeight; + verts_[0].uv = UV(uvleft, uvbottom); + verts_[1].uv = UV(uvright, uvbottom); + verts_[2].uv = UV(uvleft, uvtop); + verts_[3].uv = UV(uvright, uvtop); + + // We need to reapply the texture next time since we cropped UV. + gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); + } + } + + void Use(GX2VertexShader *vshader, GX2FetchShader *fshader) { + GX2SetPixelShader(pshader_); + GX2SetVertexShader(vshader); + GX2SetFetchShader(fshader); + } + + void Shade() { + GX2SetViewport(0.0f, 0.0f, (float)renderW_, (float)renderH_, 0.0f, 1.0f); + GX2SetScissor(0, 0, renderW_, renderH_); + GX2SetColorControlReg(&StockGX2::blendDisabledColorWrite); + GX2SetTargetChannelMasksReg(&StockGX2::TargetChannelMasks[0xF]); + GX2SetDepthStencilControlReg(&StockGX2::depthStencilDisabled); + GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE); + GX2SetAttribBuffer(0, 4 * stride_, stride_, (u8*)vbuffer_ + offset_); + GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, 4, 0, 1); + gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE); + } + +protected: + GX2ContextState *context_; + GX2PixelShader *pshader_; + void *vbuffer_; + PosUV verts_[4]; + u32 stride_ = sizeof(PosUV); + u32 offset_ = 0; + float bufferW_; + float bufferH_; + int renderW_; + int renderH_; +}; + +void TextureCacheGX2::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) { + GX2PixelShader *pshader = nullptr; + u32 clutMode = gstate.clutformat & 0xFFFFFF; + if ((entry->status & TexCacheEntry::STATUS_DEPALETTIZE) && !g_Config.bDisableSlowFramebufEffects) { + pshader = depalShaderCache_->GetDepalettizePixelShader(clutMode, framebuffer->drawnFormat); + } + + if (pshader) { + bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS); + const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); + GX2Texture *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_, expand32); + + Draw::Framebuffer *depalFBO = framebufferManagerGX2_->GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, Draw::FBO_8888); + shaderManager_->DirtyLastShader(); + draw_->BindPipeline(nullptr); + + float xoff = -0.5f / framebuffer->renderWidth; + float yoff = 0.5f / framebuffer->renderHeight; + + TextureShaderApplierGX2 shaderApply(context_, pshader, framebufferManagerGX2_->GetDynamicQuadBuffer(), framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight, xoff, yoff); + shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset, xoff, yoff); + shaderApply.Use(depalShaderCache_->GetDepalettizeVertexShader(), depalShaderCache_->GetFetchShader()); + + GX2SetPixelTexture(clutTexture, 1); + framebufferManagerGX2_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_SKIP_COPY); + GX2SetPixelSampler(&StockGX2::samplerPoint2DWrap, 0); + draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); + shaderApply.Shade(); + + framebufferManagerGX2_->RebindFramebuffer(); + draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, 0); + + const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16); + const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor; + + TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, GetClutDestFormatGX2(clutFormat), clutTotalColors, clutTotalColors, 1); + gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL); + } else { + entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; + + framebufferManagerGX2_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); + + gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650); + framebufferManagerGX2_->RebindFramebuffer(); // Probably not necessary. + } + SamplerCacheKey samplerKey{}; + SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, samplerKey); + GX2Sampler *sampler = samplerCache_.GetOrCreateSampler(samplerKey); + GX2SetPixelSampler(sampler, 0); + InvalidateLastTexture(); + + gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE); +} + +void TextureCacheGX2::BuildTexture(TexCacheEntry *const entry) { + entry->status &= ~TexCacheEntry::STATUS_ALPHA_MASK; + + // For the estimate, we assume cluts always point to 8888 for simplicity. + cacheSizeEstimate_ += EstimateTexMemoryUsage(entry); + + // TODO: If a framebuffer is attached here, might end up with a bad entry.texture. + // Should just always create one here or something (like GLES.) + + if (entry->framebuffer) { + // Nothing else to do here. + return; + } + + if ((entry->bufw == 0 || (gstate.texbufwidth[0] & 0xf800) != 0) && entry->addr >= PSP_GetKernelMemoryEnd()) { + ERROR_LOG_REPORT(G3D, "Texture with unexpected bufw (full=%d)", gstate.texbufwidth[0] & 0xffff); + // Proceeding here can cause a crash. + return; + } + + // Adjust maxLevel to actually present levels.. + bool badMipSizes = false; + int maxLevel = entry->maxLevel; + for (int i = 0; i <= maxLevel; i++) { + // If encountering levels pointing to nothing, adjust max level. + u32 levelTexaddr = gstate.getTextureAddress(i); + if (!Memory::IsValidAddress(levelTexaddr)) { + maxLevel = i - 1; + break; + } + + // If size reaches 1, stop, and override maxlevel. + int tw = gstate.getTextureWidth(i); + int th = gstate.getTextureHeight(i); + if (tw == 1 || th == 1) { + maxLevel = i; + break; + } + + if (i > 0 && gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) { + if (tw != 1 && tw != (gstate.getTextureWidth(i - 1) >> 1)) + badMipSizes = true; + else if (th != 1 && th != (gstate.getTextureHeight(i - 1) >> 1)) + badMipSizes = true; + } + } + + int scaleFactor = standardScaleFactor_; + + // Rachet down scale factor in low-memory mode. + if (lowMemoryMode_) { + // Keep it even, though, just in case of npot troubles. + scaleFactor = scaleFactor > 4 ? 4 : (scaleFactor > 2 ? 2 : 1); + } + + u64 cachekey = replacer_.Enabled() ? entry->CacheKey() : 0; + int w = gstate.getTextureWidth(0); + int h = gstate.getTextureHeight(0); + ReplacedTexture &replaced = replacer_.FindReplacement(cachekey, entry->fullhash, w, h); + if (replaced.GetSize(0, w, h)) { + // We're replacing, so we won't scale. + scaleFactor = 1; + entry->status |= TexCacheEntry::STATUS_IS_SCALED; + maxLevel = replaced.MaxLevel(); + badMipSizes = false; + } + + // Don't scale the PPGe texture. + if (entry->addr > 0x05000000 && entry->addr < PSP_GetKernelMemoryEnd()) + scaleFactor = 1; + if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && scaleFactor != 1) { + // Remember for later that we /wanted/ to scale this texture. + entry->status |= TexCacheEntry::STATUS_TO_SCALE; + scaleFactor = 1; + } + + if (scaleFactor != 1) { + if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED) { + entry->status |= TexCacheEntry::STATUS_TO_SCALE; + scaleFactor = 1; + } else { + entry->status &= ~TexCacheEntry::STATUS_TO_SCALE; + entry->status |= TexCacheEntry::STATUS_IS_SCALED; + texelsScaledThisFrame_ += w * h; + } + } + + // Seems to cause problems in Tactics Ogre. + if (badMipSizes) { + maxLevel = 0; + } + + GX2SurfaceFormat dstFmt = GetDestFormat(GETextureFormat(entry->format), gstate.getClutPaletteFormat()); + + if (IsFakeMipmapChange()) { + // NOTE: Since the level is not part of the cache key, we assume it never changes. + u8 level = std::max(0, gstate.getTexLevelOffset16() / 16); + LoadTextureLevel(*entry, replaced, level, maxLevel, scaleFactor, dstFmt); + } else { + LoadTextureLevel(*entry, replaced, 0, maxLevel, scaleFactor, dstFmt); + } + + if (!entry->texturePtr) { + return; + } + + // Mipmapping is only enabled when texture scaling is disabled. + if (maxLevel > 0 && scaleFactor == 1) { + for (int i = 1; i <= maxLevel; i++) { + LoadTextureLevel(*entry, replaced, i, maxLevel, scaleFactor, dstFmt); + } + } + + if (maxLevel == 0) { + entry->status |= TexCacheEntry::STATUS_BAD_MIPS; + } else { + entry->status &= ~TexCacheEntry::STATUS_BAD_MIPS; + } + if (replaced.Valid()) { + entry->SetAlphaStatus(TexCacheEntry::TexStatus(replaced.AlphaStatus())); + } +} + +GX2SurfaceFormat GetClutDestFormatGX2(GEPaletteFormat format) { + switch (format) { + case GE_CMODE_16BIT_ABGR4444: return GX2_SURFACE_FORMAT_UNORM_R4_G4_B4_A4; + case GE_CMODE_16BIT_ABGR5551: return GX2_SURFACE_FORMAT_UNORM_R5_G5_B5_A1; + case GE_CMODE_16BIT_BGR5650: return GX2_SURFACE_FORMAT_UNORM_R5_G6_B5; + case GE_CMODE_32BIT_ABGR8888: return GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8; + } + // Should never be here ! + return GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8; +} + +GX2SurfaceFormat TextureCacheGX2::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const { + if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS)) { + return GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8; + } + + switch (format) { + case GE_TFMT_CLUT4: + case GE_TFMT_CLUT8: + case GE_TFMT_CLUT16: + case GE_TFMT_CLUT32: return GetClutDestFormatGX2(clutFormat); + case GE_TFMT_4444: return GX2_SURFACE_FORMAT_UNORM_R4_G4_B4_A4; + case GE_TFMT_5551: return GX2_SURFACE_FORMAT_UNORM_R5_G5_B5_A1; + case GE_TFMT_5650: return GX2_SURFACE_FORMAT_UNORM_R5_G6_B5; + case GE_TFMT_8888: + case GE_TFMT_DXT1: + case GE_TFMT_DXT3: + case GE_TFMT_DXT5: + default: return GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8; + } +} + +TexCacheEntry::TexStatus TextureCacheGX2::CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h) { + CheckAlphaResult res; + switch (dstFmt) { + case GX2_SURFACE_FORMAT_UNORM_R4_G4_B4_A4: res = CheckAlphaRGBA4444Basic(pixelData, stride, w, h); break; + case GX2_SURFACE_FORMAT_UNORM_R5_G5_B5_A1: res = CheckAlphaRGBA5551Basic(pixelData, stride, w, h); break; + case GX2_SURFACE_FORMAT_UNORM_R5_G6_B5: + // Never has any alpha. + res = CHECKALPHA_FULL; + break; + default: res = CheckAlphaRGBA8888Basic(pixelData, stride, w, h); break; + } + + return (TexCacheEntry::TexStatus)res; +} + +ReplacedTextureFormat FromGX2Format(u32 fmt) { + switch (fmt) { + case GX2_SURFACE_FORMAT_UNORM_R5_G6_B5: return ReplacedTextureFormat::F_5650; + case GX2_SURFACE_FORMAT_UNORM_R5_G5_B5_A1: return ReplacedTextureFormat::F_5551; + case GX2_SURFACE_FORMAT_UNORM_R4_G4_B4_A4: return ReplacedTextureFormat::F_4444; + case GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8: + default: return ReplacedTextureFormat::F_8888; + } +} + +GX2SurfaceFormat ToDXGIFormat(ReplacedTextureFormat fmt) { + switch (fmt) { + case ReplacedTextureFormat::F_5650: return GX2_SURFACE_FORMAT_UNORM_R5_G6_B5; + case ReplacedTextureFormat::F_5551: return GX2_SURFACE_FORMAT_UNORM_R5_G5_B5_A1; + case ReplacedTextureFormat::F_4444: return GX2_SURFACE_FORMAT_UNORM_R4_G4_B4_A4; + case ReplacedTextureFormat::F_8888: + default: return GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8; + } +} + +void TextureCacheGX2::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, int scaleFactor, GX2SurfaceFormat dstFmt) { + int w = gstate.getTextureWidth(level); + int h = gstate.getTextureHeight(level); + + GX2Texture *texture = DxTex(&entry); + if ((level == 0 || IsFakeMipmapChange()) && texture == nullptr) { + // Create texture + int levels = scaleFactor == 1 ? maxLevel + 1 : 1; + int tw = w, th = h; + GX2SurfaceFormat tfmt = dstFmt; + if (replaced.GetSize(level, tw, th)) { + tfmt = ToDXGIFormat(replaced.Format(level)); + } else { + tw *= scaleFactor; + th *= scaleFactor; + if (scaleFactor > 1) { + tfmt = GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8; + } + } + + texture = new GX2Texture; + texture->surface.width = tw; + texture->surface.height = th; + texture->surface.depth = 1; + texture->surface.dim = GX2_SURFACE_DIM_TEXTURE_2D; + texture->surface.tileMode = GX2_TILE_MODE_LINEAR_ALIGNED; + texture->surface.use = GX2_SURFACE_USE_TEXTURE; + texture->viewNumSlices = 1; + texture->surface.format = tfmt; + texture->compMap = GX2_COMP_SEL(_a, _r, _g, _b); +#if 0 // TODO: mipmapping + texture->surface.mipLevels = IsFakeMipmapChange() ? 1 : levels; +#endif + + GX2CalcSurfaceSizeAndAlignment(&texture->surface); + GX2InitTextureRegs(texture); + texture->surface.image = MEM2_alloc(texture->surface.imageSize, texture->surface.alignment); + _assert_(texture->surface.image); + + entry.texturePtr = texture; + } + + gpuStats.numTexturesDecoded++; + // For UpdateSubresource, we can't decode directly into the texture so we allocate a buffer :( + u32 *mapData = nullptr; + int mapRowPitch = 0; + if (replaced.GetSize(level, w, h)) { + mapData = (u32 *)AllocateAlignedMemory(w * h * sizeof(u32), 16); + mapRowPitch = w * 4; + replaced.Load(level, mapData, mapRowPitch); + dstFmt = ToDXGIFormat(replaced.Format(level)); + } else { + GETextureFormat tfmt = (GETextureFormat)entry.format; + GEPaletteFormat clutformat = gstate.getClutPaletteFormat(); + u32 texaddr = gstate.getTextureAddress(level); + int bufw = GetTextureBufw(level, texaddr, tfmt); + int bpp = dstFmt == GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8 ? 4 : 2; + u32 *pixelData; + int decPitch; + if (scaleFactor > 1) { + tmpTexBufRearrange_.resize(std::max(bufw, w) * h); + pixelData = tmpTexBufRearrange_.data(); + // We want to end up with a neatly packed texture for scaling. + decPitch = w * bpp; + mapData = (u32 *)AllocateAlignedMemory(sizeof(u32) * (w * scaleFactor) * (h * scaleFactor), 16); + mapRowPitch = w * scaleFactor * 4; + } else { + mapRowPitch = std::max(bufw, w) * bpp; + size_t bufSize = sizeof(u32) * (mapRowPitch / bpp) * h; + mapData = (u32 *)AllocateAlignedMemory(bufSize, 16); + if (!mapData) { + ERROR_LOG(G3D, "Ran out of RAM trying to allocate a temporary texture upload buffer (alloc size: %d, %dx%d)", bufSize, mapRowPitch / sizeof(u32), h); + return; + } + pixelData = (u32 *)mapData; + decPitch = mapRowPitch; + } + + bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS); + DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, false, false, expand32); + + // We check before scaling since scaling shouldn't invent alpha from a full alpha texture. + if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) { + TexCacheEntry::TexStatus alphaStatus = CheckAlpha(pixelData, dstFmt, decPitch / bpp, w, h); + entry.SetAlphaStatus(alphaStatus, level); + } else { + entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN); + } + + if (scaleFactor > 1) { + u32 scaleFmt = (u32)dstFmt; + scaler.ScaleAlways((u32 *)mapData, pixelData, scaleFmt, w, h, scaleFactor); + pixelData = (u32 *)mapData; + + // We always end up at 8888. Other parts assume this. + assert(scaleFmt == GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8); + bpp = sizeof(u32); + decPitch = w * bpp; + + if (decPitch != mapRowPitch) { + // Rearrange in place to match the requested pitch. + // (it can only be larger than w * bpp, and a match is likely.) + // Note! This is bad because it reads the mapped memory! TODO: Look into if DX9 does this right. + for (int y = h - 1; y >= 0; --y) { + memcpy((u8 *)mapData + mapRowPitch * y, (u8 *)mapData + decPitch * y, w * bpp); + } + decPitch = mapRowPitch; + } + } + + if (replacer_.Enabled()) { + ReplacedTextureDecodeInfo replacedInfo; + replacedInfo.cachekey = entry.CacheKey(); + replacedInfo.hash = entry.fullhash; + replacedInfo.addr = entry.addr; + replacedInfo.isVideo = videos_.find(entry.addr & 0x3FFFFFFF) != videos_.end(); + replacedInfo.isFinal = (entry.status & TexCacheEntry::STATUS_TO_SCALE) == 0; + replacedInfo.scaleFactor = scaleFactor; + replacedInfo.fmt = FromGX2Format(dstFmt); + + replacer_.NotifyTextureDecoded(replacedInfo, pixelData, decPitch, level, w, h); + } + } +#if 0 // TODO: mipmapping + if (IsFakeMipmapChange()) + context_->UpdateSubresource(texture, 0, nullptr, mapData, mapRowPitch, 0); + else + context_->UpdateSubresource(texture, level, nullptr, mapData, mapRowPitch, 0); +#endif + FreeAlignedMemory(mapData); +} + +bool TextureCacheGX2::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) { + SetTexture(false); + if (!nextTexture_) + return false; + + // Apply texture may need to rebuild the texture if we're about to render, or bind a framebuffer. + TexCacheEntry *entry = nextTexture_; + ApplyTexture(); + + // TODO: Centralize. + if (entry->framebuffer) { + VirtualFramebuffer *vfb = entry->framebuffer; + buffer.Allocate(vfb->bufferWidth, vfb->bufferHeight, GPU_DBG_FORMAT_8888, false); + bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, 0, vfb->bufferWidth, vfb->bufferHeight, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), vfb->bufferWidth); + gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE); + // We may have blitted to a temp FBO. + framebufferManager_->RebindFramebuffer(); + return retval; + } + + GX2Texture *texture = (GX2Texture *)entry->texturePtr; + if (!texture) + return false; + + if (texture->surface.format != GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8) { + // TODO: Support the other formats + return false; + } +#if 0 // TODO: mipmapping + int width = texture->surface.width >> level; + int height = texture->surface.height >> level; +#else + int width = texture->surface.width; + int height = texture->surface.height; +#endif + buffer.Allocate(width, height, GPU_DBG_FORMAT_8888); + + for (int y = 0; y < height; y++) { + memcpy(buffer.GetData() + 4 * width * y, (const uint8_t *)texture->surface.image + texture->surface.pitch * y, 4 * width); + } + + return true; +} diff --git a/GPU/GX2/TextureCacheGX2.h b/GPU/GX2/TextureCacheGX2.h new file mode 100644 index 000000000000..37f45440b375 --- /dev/null +++ b/GPU/GX2/TextureCacheGX2.h @@ -0,0 +1,103 @@ +// Copyright (c) 2017- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include +#include +#include + +#include "Common/CommonWindows.h" + +#include "GPU/GPU.h" +#include "GPU/GPUInterface.h" +#include "GPU/GX2/TextureScalerGX2.h" +#include "GPU/Common/TextureCacheCommon.h" + +struct VirtualFramebuffer; + +class FramebufferManagerGX2; +class DepalShaderCacheGX2; +class ShaderManagerGX2; + +class SamplerCacheGX2 { +public: + SamplerCacheGX2() {} + ~SamplerCacheGX2(); + GX2Sampler* GetOrCreateSampler(const SamplerCacheKey &key); + +private: + std::map cache_; +}; + +class TextureCacheGX2 : public TextureCacheCommon { +public: + TextureCacheGX2(Draw::DrawContext *draw); + ~TextureCacheGX2(); + + void StartFrame(); + + void SetFramebufferManager(FramebufferManagerGX2 *fbManager); + void SetDepalShaderCache(DepalShaderCacheGX2 *dpCache) { + depalShaderCache_ = dpCache; + } + void SetShaderManager(ShaderManagerGX2 *sm) { + shaderManager_ = sm; + } + + void ForgetLastTexture() override; + void InvalidateLastTexture(TexCacheEntry *entry = nullptr) override; + + void SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight, SamplerCacheKey &key); + bool GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) override; + +protected: + void BindTexture(TexCacheEntry *entry) override; + void Unbind() override; + void ReleaseTexture(TexCacheEntry *entry, bool delete_them) override; + +private: + void LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, int scaleFactor, GX2SurfaceFormat dstFmt); + GX2SurfaceFormat GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const; + TexCacheEntry::TexStatus CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h); + void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override; + + void ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) override; + void BuildTexture(TexCacheEntry *const entry) override; + + GX2ContextState *context_; + + GX2Texture *&DxTex(TexCacheEntry *entry) { + return (GX2Texture *&)entry->texturePtr; + } + + TextureScalerGX2 scaler; + + SamplerCacheGX2 samplerCache_; + + GX2Texture *lastBoundTexture; + + int decimationCounter_; + int texelsScaledThisFrame_; + int timesInvalidatedAllThisFrame_; + + FramebufferManagerGX2 *framebufferManagerGX2_; + DepalShaderCacheGX2 *depalShaderCache_; + ShaderManagerGX2 *shaderManager_; +}; + +GX2SurfaceFormat GetClutDestFormatGX2(GEPaletteFormat format); diff --git a/GPU/GX2/TextureScalerGX2.cpp b/GPU/GX2/TextureScalerGX2.cpp new file mode 100644 index 000000000000..697a5f65a9cc --- /dev/null +++ b/GPU/GX2/TextureScalerGX2.cpp @@ -0,0 +1,59 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include + +#include +#include "Common/ColorConv.h" +#include "Common/ThreadPools.h" +#include "GPU/Common/TextureScalerCommon.h" +#include "GPU/GX2/TextureScalerGX2.h" +#include "GPU/GX2/GPU_GX2.h" + +#undef _1 + +int TextureScalerGX2::BytesPerPixel(u32 format) { + return format == GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8 ? 4 : 2; +} + +u32 TextureScalerGX2::Get8888Format() { + return GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8; +} + +void TextureScalerGX2::ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height) { + switch (format) { + case GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8: + dest = source; // already fine + break; + + case GX2_SURFACE_FORMAT_UNORM_R4_G4_B4_A4: + GlobalThreadPool::Loop(std::bind(&convert4444_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height); + break; + + case GX2_SURFACE_FORMAT_UNORM_R5_G6_B5: + GlobalThreadPool::Loop(std::bind(&convert565_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height); + break; + + case GX2_SURFACE_FORMAT_UNORM_R5_G5_B5_A1: + GlobalThreadPool::Loop(std::bind(&convert5551_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height); + break; + + default: + dest = source; + ERROR_LOG(G3D, "iXBRZTexScaling: unsupported texture format"); + } +} diff --git a/GPU/GX2/TextureScalerGX2.h b/GPU/GX2/TextureScalerGX2.h new file mode 100644 index 000000000000..753b39827376 --- /dev/null +++ b/GPU/GX2/TextureScalerGX2.h @@ -0,0 +1,29 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "Common/CommonTypes.h" +#include "GPU/Common/TextureScalerCommon.h" + +class TextureScalerGX2 : public TextureScalerCommon { +private: + // NOTE: We use GE formats, GX2 doesn't support 4444 + void ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height) override; + int BytesPerPixel(u32 format) override; + u32 Get8888Format() override; +}; diff --git a/GPU/GX2/VertexShaderGeneratorGX2.cpp b/GPU/GX2/VertexShaderGeneratorGX2.cpp new file mode 100644 index 000000000000..2d00bd1fd244 --- /dev/null +++ b/GPU/GX2/VertexShaderGeneratorGX2.cpp @@ -0,0 +1,26 @@ +// Copyright (c) 2017- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include +#undef ARRAY_SIZE + +#include "GPU/Common/ShaderCommon.h" +#include "GPU/GX2/VertexShaderGeneratorGX2.h" + +void GenerateVertexShaderGX2(const VShaderID &id, GX2VertexShader *vs) { + // TODO; +} diff --git a/GPU/GX2/VertexShaderGeneratorGX2.h b/GPU/GX2/VertexShaderGeneratorGX2.h new file mode 100644 index 000000000000..05acfff97539 --- /dev/null +++ b/GPU/GX2/VertexShaderGeneratorGX2.h @@ -0,0 +1,23 @@ +// Copyright (c) 2017- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include +#include "GPU/Common/ShaderId.h" + +void GenerateVertexShaderGX2(const VShaderID &id, GX2VertexShader *vs); diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 776c03487b9a..6e57067f619f 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -197,12 +197,16 @@ void EmuScreen::bootGame(const std::string &filename) { CoreParameter coreParam{}; coreParam.cpuCore = (CPUCore)g_Config.iCpuCore; +#if PPSSPP_PLATFORM(UWP) + coreParam.gpuCore = GPUCORE_DIRECTX11; +#elif PPSSPP_PLATFORM(WIIU) + coreParam.gpuCore = GPUCORE_GX2; +#else coreParam.gpuCore = GPUCORE_GLES; switch (GetGPUBackend()) { case GPUBackend::DIRECT3D11: coreParam.gpuCore = GPUCORE_DIRECTX11; break; -#if !PPSSPP_PLATFORM(UWP) case GPUBackend::OPENGL: coreParam.gpuCore = GPUCORE_GLES; break; @@ -212,13 +216,8 @@ void EmuScreen::bootGame(const std::string &filename) { case GPUBackend::VULKAN: coreParam.gpuCore = GPUCORE_VULKAN; break; -#endif -#ifdef __wiiu__ - case GPUBackend::GX2: - coreParam.gpuCore = GPUCORE_NULL; - break; -#endif } +#endif if (g_Config.bSoftwareRendering) { coreParam.gpuCore = GPUCORE_SOFTWARE; } diff --git a/ext/native/thin3d/GX2Shaders.c b/ext/native/thin3d/GX2Shaders.c index 0d14ba8bf601..14cb0c2617de 100644 --- a/ext/native/thin3d/GX2Shaders.c +++ b/ext/native/thin3d/GX2Shaders.c @@ -126,7 +126,6 @@ GX2PixelShader GX2_fsCol = { .size = sizeof(fsColCode), .program = (uint8_t *)&fsColCode, .mode = GX2_SHADER_MODE_UNIFORM_BLOCK, -// .samplerVarCount = countof(samplers), samplers, }; // clang-format off __attribute__((aligned(GX2_SHADER_ALIGNMENT))) diff --git a/ext/native/thin3d/thin3d_gx2.cpp b/ext/native/thin3d/thin3d_gx2.cpp index ee46a6434e69..0f5b6d68a4b3 100644 --- a/ext/native/thin3d/thin3d_gx2.cpp +++ b/ext/native/thin3d/thin3d_gx2.cpp @@ -53,61 +53,41 @@ static const GX2TexClampMode taddrToGX2[] = { }; static GX2SurfaceFormat dataFormatToGX2SurfaceFormat(DataFormat format) { switch (format) { - case DataFormat::R32_FLOAT: - return GX2_SURFACE_FORMAT_FLOAT_R32; + case DataFormat::R32_FLOAT: return GX2_SURFACE_FORMAT_FLOAT_R32; case DataFormat::R32G32_FLOAT: return GX2_SURFACE_FORMAT_FLOAT_R32_G32; // case DataFormat::R32G32B32_FLOAT: // return GX2_SURFACE_FORMAT_FLOAT_R32_G32_B32; - case DataFormat::R32G32B32A32_FLOAT: - return GX2_SURFACE_FORMAT_FLOAT_R32_G32_B32_A32; - case DataFormat::A4R4G4B4_UNORM_PACK16: - return GX2_SURFACE_FORMAT_UNORM_R4_G4_B4_A4; - case DataFormat::A1R5G5B5_UNORM_PACK16: - return GX2_SURFACE_FORMAT_UNORM_A1_B5_G5_R5; - case DataFormat::R5G5B5A1_UNORM_PACK16: - return GX2_SURFACE_FORMAT_UNORM_R5_G5_B5_A1; - case DataFormat::R5G6B5_UNORM_PACK16: - return GX2_SURFACE_FORMAT_UNORM_R5_G6_B5; + case DataFormat::R32G32B32A32_FLOAT: return GX2_SURFACE_FORMAT_FLOAT_R32_G32_B32_A32; + case DataFormat::A4R4G4B4_UNORM_PACK16: return GX2_SURFACE_FORMAT_UNORM_R4_G4_B4_A4; + case DataFormat::A1R5G5B5_UNORM_PACK16: return GX2_SURFACE_FORMAT_UNORM_A1_B5_G5_R5; + case DataFormat::R5G5B5A1_UNORM_PACK16: return GX2_SURFACE_FORMAT_UNORM_R5_G5_B5_A1; + case DataFormat::R5G6B5_UNORM_PACK16: return GX2_SURFACE_FORMAT_UNORM_R5_G6_B5; case DataFormat::R8G8B8_UNORM: - case DataFormat::R8G8B8A8_UNORM: - return GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8; - case DataFormat::R8G8B8A8_UNORM_SRGB: - return GX2_SURFACE_FORMAT_SRGB_R8_G8_B8_A8; - case DataFormat::R16_FLOAT: - return GX2_SURFACE_FORMAT_FLOAT_R16; - case DataFormat::R16G16_FLOAT: - return GX2_SURFACE_FORMAT_FLOAT_R16_G16; - case DataFormat::R16G16B16A16_FLOAT: - return GX2_SURFACE_FORMAT_FLOAT_R16_G16_B16_A16; - case DataFormat::D24_S8: - return GX2_SURFACE_FORMAT_FLOAT_D24_S8; - case DataFormat::D16: - return GX2_SURFACE_FORMAT_UNORM_R16; - case DataFormat::D32F: - return GX2_SURFACE_FORMAT_FLOAT_R32; + case DataFormat::R8G8B8A8_UNORM: return GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8; + case DataFormat::R8G8B8A8_UNORM_SRGB: return GX2_SURFACE_FORMAT_SRGB_R8_G8_B8_A8; + case DataFormat::R16_FLOAT: return GX2_SURFACE_FORMAT_FLOAT_R16; + case DataFormat::R16G16_FLOAT: return GX2_SURFACE_FORMAT_FLOAT_R16_G16; + case DataFormat::R16G16B16A16_FLOAT: return GX2_SURFACE_FORMAT_FLOAT_R16_G16_B16_A16; + case DataFormat::D24_S8: return GX2_SURFACE_FORMAT_FLOAT_D24_S8; + case DataFormat::D16: return GX2_SURFACE_FORMAT_UNORM_R16; + case DataFormat::D32F: return GX2_SURFACE_FORMAT_FLOAT_R32; case DataFormat::ETC1: - default: - return GX2_SURFACE_FORMAT_INVALID; + default: return GX2_SURFACE_FORMAT_INVALID; } } static u32 dataFormatToGX2SurfaceCompSelect(DataFormat format) { switch (format) { case DataFormat::R16_FLOAT: - case DataFormat::R32_FLOAT: - return GX2_COMP_SEL(_r, _0, _0, _1); + case DataFormat::R32_FLOAT: return GX2_COMP_SEL(_r, _0, _0, _1); case DataFormat::R16G16_FLOAT: - case DataFormat::R32G32_FLOAT: - return GX2_COMP_SEL(_r, _g, _0, _1); + case DataFormat::R32G32_FLOAT: return GX2_COMP_SEL(_r, _g, _0, _1); case DataFormat::R8G8B8_UNORM: - case DataFormat::R5G6B5_UNORM_PACK16: - return GX2_COMP_SEL(_r, _g, _b, _1); + case DataFormat::R5G6B5_UNORM_PACK16: return GX2_COMP_SEL(_r, _g, _b, _1); case DataFormat::B8G8R8A8_UNORM: - case DataFormat::B8G8R8A8_UNORM_SRGB: - return GX2_COMP_SEL(_a, _r, _g, _b); - default: - return GX2_COMP_SEL(_a, _b, _g, _r); + case DataFormat::B8G8R8A8_UNORM_SRGB: return GX2_COMP_SEL(_a, _r, _g, _b); + default: return GX2_COMP_SEL(_a, _b, _g, _r); } } @@ -119,62 +99,43 @@ static int dataFormatToSwapSize(DataFormat format) { case DataFormat::R16_FLOAT: case DataFormat::D16: case DataFormat::R16G16_FLOAT: - case DataFormat::R16G16B16A16_FLOAT: - return 2; - default: - return 4; + case DataFormat::R16G16B16A16_FLOAT: return 2; + default: return 4; } } static GX2AttribFormat dataFormatToGX2AttribFormat(DataFormat format) { switch (format) { - case DataFormat::R8_UNORM: - return GX2_ATTRIB_FORMAT_UNORM_8; - case DataFormat::R8G8_UNORM: - return GX2_ATTRIB_FORMAT_UNORM_8_8; + case DataFormat::R8_UNORM: return GX2_ATTRIB_FORMAT_UNORM_8; + case DataFormat::R8G8_UNORM: return GX2_ATTRIB_FORMAT_UNORM_8_8; case DataFormat::B8G8R8A8_UNORM: - case DataFormat::R8G8B8A8_UNORM: - return GX2_ATTRIB_FORMAT_UNORM_8_8_8_8; - case DataFormat::R8G8B8A8_UINT: - return GX2_ATTRIB_FORMAT_UINT_8_8_8_8; - case DataFormat::R8G8B8A8_SNORM: - return GX2_ATTRIB_FORMAT_SNORM_8_8_8_8; - case DataFormat::R8G8B8A8_SINT: - return GX2_ATTRIB_FORMAT_SINT_8_8_8_8; - case DataFormat::R32_FLOAT: - return GX2_ATTRIB_FORMAT_FLOAT_32; - case DataFormat::R32G32_FLOAT: - return GX2_ATTRIB_FORMAT_FLOAT_32_32; - case DataFormat::R32G32B32_FLOAT: - return GX2_ATTRIB_FORMAT_FLOAT_32_32_32; - case DataFormat::R32G32B32A32_FLOAT: - return GX2_ATTRIB_FORMAT_FLOAT_32_32_32_32; + case DataFormat::R8G8B8A8_UNORM: return GX2_ATTRIB_FORMAT_UNORM_8_8_8_8; + case DataFormat::R8G8B8A8_UINT: return GX2_ATTRIB_FORMAT_UINT_8_8_8_8; + case DataFormat::R8G8B8A8_SNORM: return GX2_ATTRIB_FORMAT_SNORM_8_8_8_8; + case DataFormat::R8G8B8A8_SINT: return GX2_ATTRIB_FORMAT_SINT_8_8_8_8; + case DataFormat::R32_FLOAT: return GX2_ATTRIB_FORMAT_FLOAT_32; + case DataFormat::R32G32_FLOAT: return GX2_ATTRIB_FORMAT_FLOAT_32_32; + case DataFormat::R32G32B32_FLOAT: return GX2_ATTRIB_FORMAT_FLOAT_32_32_32; + case DataFormat::R32G32B32A32_FLOAT: return GX2_ATTRIB_FORMAT_FLOAT_32_32_32_32; - default: - return (GX2AttribFormat)-1; + default: return (GX2AttribFormat)-1; } } static u32 dataFormatToGX2AttribCompSelect(DataFormat format) { switch (format) { case DataFormat::R8_UNORM: - case DataFormat::R32_FLOAT: - return GX2_COMP_SEL(_x, _0, _0, _1); + case DataFormat::R32_FLOAT: return GX2_COMP_SEL(_x, _0, _0, _1); case DataFormat::R8G8_UNORM: - case DataFormat::R32G32_FLOAT: - return GX2_COMP_SEL(_x, _y, _0, _1); - case DataFormat::R32G32B32_FLOAT: - return GX2_COMP_SEL(_x, _y, _z, _1); + case DataFormat::R32G32_FLOAT: return GX2_COMP_SEL(_x, _y, _0, _1); + case DataFormat::R32G32B32_FLOAT: return GX2_COMP_SEL(_x, _y, _z, _1); case DataFormat::R8G8B8A8_UNORM_SRGB: case DataFormat::B8G8R8A8_UNORM: - case DataFormat::B8G8R8A8_UNORM_SRGB: - return GX2_COMP_SEL(_b, _g, _r, _a); + case DataFormat::B8G8R8A8_UNORM_SRGB: return GX2_COMP_SEL(_b, _g, _r, _a); case DataFormat::R8G8B8A8_UNORM: case DataFormat::R8G8B8A8_SNORM: case DataFormat::R8G8B8A8_UINT: - case DataFormat::R8G8B8A8_SINT: - return GX2_COMP_SEL(_a, _b, _g, _r); - default: - return GX2_COMP_SEL(_x, _y, _z, _w); + case DataFormat::R8G8B8A8_SINT: return GX2_COMP_SEL(_a, _b, _g, _r); + default: return GX2_COMP_SEL(_x, _y, _z, _w); } } @@ -223,9 +184,7 @@ class GX2Buffer : public Buffer { size_ = (size_ + 0x3F) & ~0x3F; /* fallthrough */ default: - case GENERIC: - align = GX2_UNIFORM_BLOCK_ALIGNMENT; - invMode_ = GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK; + case GENERIC: align = GX2_UNIFORM_BLOCK_ALIGNMENT; invMode_ = GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK; } data_ = (u8 *)MEM1_alloc(size_, align); } @@ -249,14 +208,14 @@ class GX2BlendState : public BlendState { GX2BlendState(const BlendStateDesc &desc) { GX2InitBlendControlReg(®, GX2_RENDER_TARGET_0, blendToGX2[(int)desc.srcCol], blendToGX2[(int)desc.dstCol], blendOpToGX2[(int)desc.eqCol], (int)desc.srcAlpha && (int)desc.dstAlpha, blendToGX2[(int)desc.srcAlpha], blendToGX2[(int)desc.dstAlpha], blendOpToGX2[(int)desc.eqAlpha]); GX2InitColorControlReg(&color_reg, desc.logicEnabled ? logicOpToGX2[(int)desc.logicOp] : GX2_LOGIC_OP_COPY, desc.enabled, false, desc.colorMask != 0); + GX2InitTargetChannelMasksReg(&mask_reg, (GX2ChannelMask)desc.colorMask, GX2_CHANNEL_MASK_RGBA, GX2_CHANNEL_MASK_RGBA, GX2_CHANNEL_MASK_RGBA, GX2_CHANNEL_MASK_RGBA, GX2_CHANNEL_MASK_RGBA, GX2_CHANNEL_MASK_RGBA, GX2_CHANNEL_MASK_RGBA); logicEnabled = desc.logicEnabled; - colorMask = desc.colorMask; } ~GX2BlendState() {} GX2BlendControlReg reg; GX2ColorControlReg color_reg; + GX2TargetChannelMaskReg mask_reg; bool logicEnabled; - int colorMask; }; class GX2RasterState : public RasterState { @@ -456,7 +415,56 @@ class GX2TextureObject : public Texture { class GX2Framebuffer : public Framebuffer { public: - GX2Framebuffer(const FramebufferDesc &desc) { DEBUG_LINE(); } + GX2Framebuffer(const FramebufferDesc &desc) { + _assert_(desc.numColorAttachments == 1); + _assert_(desc.depth == 1); + colorBuffer.surface.width = desc.width; + colorBuffer.surface.height = desc.height; + colorBuffer.surface.depth = 1; + colorBuffer.surface.dim = GX2_SURFACE_DIM_TEXTURE_2D; + colorBuffer.surface.tileMode = GX2_TILE_MODE_DEFAULT; + colorBuffer.surface.use = (GX2SurfaceUse)(GX2_SURFACE_USE_COLOR_BUFFER | GX2_SURFACE_USE_TEXTURE); + colorBuffer.viewNumSlices = 1; + switch (desc.colorDepth) { + case FBO_565: colorBuffer.surface.format = GX2_SURFACE_FORMAT_UNORM_R5_G6_B5; break; + case FBO_4444: colorBuffer.surface.format = GX2_SURFACE_FORMAT_UNORM_R4_G4_B4_A4; break; + case FBO_5551: colorBuffer.surface.format = GX2_SURFACE_FORMAT_UNORM_R5_G5_B5_A1; break; + default: + case FBO_8888: colorBuffer.surface.format = GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8; break; + } + GX2CalcSurfaceSizeAndAlignment(&colorBuffer.surface); + GX2InitColorBufferRegs(&colorBuffer); + colorBuffer.surface.image = MEM2_alloc(colorBuffer.surface.imageSize, colorBuffer.surface.alignment); + _assert_(colorBuffer.surface.image); + GX2Invalidate(GX2_INVALIDATE_MODE_COLOR_BUFFER, colorBuffer.surface.image, colorBuffer.surface.imageSize); + colorTexture.surface = colorBuffer.surface; + GX2InitTextureRegs(&colorTexture); + if (desc.depth) { + depthBuffer.surface.width = desc.width; + depthBuffer.surface.height = desc.height; + depthBuffer.surface.depth = 1; + depthBuffer.surface.dim = GX2_SURFACE_DIM_TEXTURE_2D; + depthBuffer.surface.tileMode = GX2_TILE_MODE_DEFAULT; + depthBuffer.surface.use = (GX2SurfaceUse)(GX2_SURFACE_USE_DEPTH_BUFFER | GX2_SURFACE_USE_TEXTURE); + depthBuffer.viewNumSlices = 1; + depthBuffer.surface.format = GX2_SURFACE_FORMAT_FLOAT_D24_S8; + GX2CalcSurfaceSizeAndAlignment(&depthBuffer.surface); + GX2InitDepthBufferRegs(&depthBuffer); + depthBuffer.surface.image = MEM2_alloc(depthBuffer.surface.imageSize, depthBuffer.surface.alignment); + _assert_(depthBuffer.surface.image); + GX2Invalidate(GX2_INVALIDATE_MODE_DEPTH_BUFFER, depthBuffer.surface.image, depthBuffer.surface.imageSize); + depthTexture.surface = depthBuffer.surface; + GX2InitTextureRegs(&depthTexture); + } + } + ~GX2Framebuffer() { + MEM2_free(colorBuffer.surface.image); + MEM2_free(depthBuffer.surface.image); + } + GX2ColorBuffer colorBuffer = {}; + GX2DepthBuffer depthBuffer = {}; + GX2Texture colorTexture = {}; + GX2Texture depthTexture = {}; }; static GX2VertexShaderModule vsCol(&GX2_vsCol); @@ -539,28 +547,19 @@ class GX2DrawContext : public DrawContext { std::string GetInfoString(InfoField info) const override { switch (info) { - case APIVERSION: - return "1"; - case VENDORSTRING: - return "AMD"; - case VENDOR: - return ""; - case DRIVER: - return "-"; - case SHADELANGVERSION: - return "AMD R700 microcode"; - case APINAME: - return "GX2"; - default: - return "?"; + case APIVERSION: return "1"; + case VENDORSTRING: return "AMD"; + case VENDOR: return ""; + case DRIVER: return "-"; + case SHADELANGVERSION: return "AMD R700 microcode"; + case APINAME: return "GX2"; + default: return "?"; } } uintptr_t GetNativeObject(NativeObject obj) override { - DEBUG_LINE(); - DEBUG_VAR(obj); switch (obj) { - case NativeObject::CONTEXT: + case NativeObject::CONTEXT: return (uintptr_t)context_state_; case NativeObject::CONTEXT_EX: case NativeObject::DEVICE: case NativeObject::DEVICE_EX: @@ -577,6 +576,8 @@ class GX2DrawContext : public DrawContext { case NativeObject::BOUND_TEXTURE1_IMAGEVIEW: case NativeObject::RENDER_MANAGER: default: + DEBUG_LINE(); + DEBUG_VAR(obj); return 0; } } @@ -620,8 +621,7 @@ void GX2DrawContext::HandleEvent(Event ev, int width, int height, void *param1, case Event::GOT_BACKBUFFER: { break; } - case Event::PRESENTED: - break; + case Event::PRESENTED: break; } } @@ -656,6 +656,7 @@ void GX2DrawContext::BindPipeline(Pipeline *pipeline) { } GX2SetBlendControlReg(&pipeline_->blend_->reg); GX2SetColorControlReg(&pipeline_->blend_->color_reg); + GX2SetTargetChannelMasksReg(&pipeline_->blend_->mask_reg); GX2SetDepthStencilControlReg(&pipeline_->depthStencil_->reg_); GX2SetCullOnlyControl(pipeline_->raster_->frontFace_, pipeline_->raster_->cullFront_, pipeline_->raster_->cullBack_); if (pipeline_->ubo) { @@ -796,7 +797,6 @@ void GX2DrawContext::BindSamplerStates(int start, int count, SamplerState **stat } void GX2DrawContext::Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) { - DEBUG_LINE(); float f[4]; Uint8x4ToFloat4(f, colorval); @@ -827,10 +827,18 @@ bool GX2DrawContext::CopyFramebufferToMemorySync(Framebuffer *src, int channelBi return false; } -void GX2DrawContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) { +void GX2DrawContext::BindFramebufferAsRenderTarget(Framebuffer *fbo_, const RenderPassInfo &rp) { + GX2Framebuffer *fbo = (GX2Framebuffer *)fbo_; if (fbo) { - DEBUG_LINE(); + GX2SetColorBuffer(&fbo->colorBuffer, GX2_RENDER_TARGET_0); + if (fbo->depthBuffer.surface.image) + GX2SetDepthBuffer(&fbo->depthBuffer); + } else { + GX2SetColorBuffer(color_buffer_, GX2_RENDER_TARGET_0); + if (depth_buffer_->surface.image) + GX2SetDepthBuffer(depth_buffer_); } + float f[4]; Uint8x4ToFloat4(f, rp.clearColor); int flags = 0; @@ -849,14 +857,28 @@ void GX2DrawContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const Rende GX2SetContextState(context_state_); } -void GX2DrawContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) { DEBUG_LINE(); } +void GX2DrawContext::BindFramebufferAsTexture(Framebuffer *fbo_, int binding, FBChannel channelBit, int attachment) { + GX2Framebuffer *fbo = (GX2Framebuffer *)fbo_; + _assert_(channelBit == FB_COLOR_BIT); -uintptr_t GX2DrawContext::GetFramebufferAPITexture(Framebuffer *fbo, int channelBit, int attachment) { - DEBUG_LINE(); + GX2SetPixelTexture(&fbo->colorTexture, binding); +} + +uintptr_t GX2DrawContext::GetFramebufferAPITexture(Framebuffer *fbo_, int channelBit, int attachment) { + GX2Framebuffer *fbo = (GX2Framebuffer *)fbo_; + if (channelBit == FB_COLOR_BIT) { + return (uintptr_t)&fbo->colorTexture; + } else { + return (uintptr_t)&fbo->depthTexture; + } return 0; } -void GX2DrawContext::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) { DEBUG_LINE(); } +void GX2DrawContext::GetFramebufferDimensions(Framebuffer *fbo_, int *w, int *h) { + GX2Framebuffer *fbo = (GX2Framebuffer *)fbo_; + *w = fbo->colorBuffer.surface.width; + *h = fbo->colorBuffer.surface.height; +} DrawContext *T3DCreateGX2Context(GX2ContextState *context_state, GX2ColorBuffer *color_buffer, GX2DepthBuffer *depth_buffer) { return new GX2DrawContext(context_state, color_buffer, depth_buffer); } diff --git a/ext/wiiu/imports.h b/ext/wiiu/imports.h index 27dc41b03399..aa7987b98cd4 100644 --- a/ext/wiiu/imports.h +++ b/ext/wiiu/imports.h @@ -92,6 +92,7 @@ IMPORT(MEMCreateExpHeapEx); IMPORT(MEMDestroyExpHeap); IMPORT(MEMAllocFromExpHeapEx); IMPORT(MEMFreeToExpHeap); +IMPORT(MEMGetTotalFreeSizeForExpHeap); IMPORT(MEMGetSizeForMBlockExpHeap); IMPORT(MEMAllocFromFrmHeapEx); IMPORT(MEMFreeToFrmHeap); @@ -197,12 +198,15 @@ IMPORT(GX2SetViewport); IMPORT(GX2SetScissor); IMPORT(GX2SetDepthOnlyControl); IMPORT(GX2InitDepthStencilControlReg); +IMPORT(GX2InitStencilMaskReg); IMPORT(GX2SetColorControl); IMPORT(GX2InitColorControlReg); +IMPORT(GX2InitTargetChannelMasksReg); IMPORT(GX2SetBlendControl); IMPORT(GX2InitBlendControlReg); IMPORT(GX2SetBlendControlReg); IMPORT(GX2SetColorControlReg); +IMPORT(GX2SetTargetChannelMasksReg); IMPORT(GX2SetDepthStencilControlReg); IMPORT(GX2SetBlendConstantColor); IMPORT(GX2SetBlendConstantColorReg); @@ -232,6 +236,9 @@ IMPORT(GX2InitSamplerXYFilter); IMPORT(GX2InitSamplerZMFilter); IMPORT(GX2SetPixelTexture); IMPORT(GX2SetPixelSampler); +IMPORT(GX2SetVertexTexture); +IMPORT(GX2SetStencilMask); +IMPORT(GX2SetStencilMaskReg); IMPORT(GX2ClearColor); IMPORT(GX2ClearBuffersEx); IMPORT(GX2ClearDepthStencilEx); @@ -248,6 +255,7 @@ IMPORT(GX2DrawEx); IMPORT(GX2DrawIndexedEx); IMPORT(GX2WaitForFlip); IMPORT(GX2GetSwapStatus); +IMPORT(GX2ResetGPU); IMPORT_END(); diff --git a/ext/wiiu/include/wiiu/gx2/common.h b/ext/wiiu/include/wiiu/gx2/common.h index f11909bff268..bacd555e5ae3 100644 --- a/ext/wiiu/include/wiiu/gx2/common.h +++ b/ext/wiiu/include/wiiu/gx2/common.h @@ -16,7 +16,9 @@ #ifndef GX2_COMP_SEL #define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3)) - +#ifdef __cplusplus +#include +#endif #define _x 0 #define _y 1 #define _z 2 diff --git a/ext/wiiu/include/wiiu/gx2/context.h b/ext/wiiu/include/wiiu/gx2/context.h index f06c80bbd091..b4caf8ec8454 100644 --- a/ext/wiiu/include/wiiu/gx2/context.h +++ b/ext/wiiu/include/wiiu/gx2/context.h @@ -31,6 +31,7 @@ void GX2SetupContextStateEx(GX2ContextState *state, BOOL unk1); void GX2GetContextStateDisplayList(GX2ContextState *state, void *outDisplayList, uint32_t *outSize); void GX2SetContextState(GX2ContextState *state); void GX2SetDefaultState(); +void GX2ResetGPU(); #ifdef __cplusplus } diff --git a/ext/wiiu/include/wiiu/gx2/draw.h b/ext/wiiu/include/wiiu/gx2/draw.h index 85f2ff4c59b1..cc9b91a07cd3 100644 --- a/ext/wiiu/include/wiiu/gx2/draw.h +++ b/ext/wiiu/include/wiiu/gx2/draw.h @@ -1,13 +1,14 @@ #pragma once #include #include "enum.h" - +#include +#include #ifdef __cplusplus extern "C" { #endif -void GX2SetAttribBuffer(uint32_t index, uint32_t size, uint32_t stride, void *buffer); +void GX2SetAttribBuffer(uint32_t index, uint32_t size, uint32_t stride, const void *buffer); void GX2DrawEx(GX2PrimitiveMode mode, uint32_t count, @@ -48,4 +49,8 @@ void GX2SetPrimitiveRestartIndex(uint32_t index); } #endif -/** @} */ +#if 0 +#include "event.h" +#define GX2DrawEx(mode,count,offset, numInstances) do{GX2DrawEx(mode,count,offset, numInstances); GX2DrawDone(); DEBUG_BREAK_ONCE();}while(0) +#define GX2DrawIndexedEx(mode,count,indexType,indices,offset,numInstances) do{GX2DrawIndexedEx(mode,count,indexType,indices,offset,numInstances); GX2DrawDone(); DEBUG_BREAK_ONCE();}while(0) +#endif diff --git a/ext/wiiu/include/wiiu/gx2/enum.h b/ext/wiiu/include/wiiu/gx2/enum.h index 600316b671c6..f999a8520320 100644 --- a/ext/wiiu/include/wiiu/gx2/enum.h +++ b/ext/wiiu/include/wiiu/gx2/enum.h @@ -23,9 +23,9 @@ typedef enum GX2AlphaToMaskMode typedef enum GX2AttribFormat { - GX2_ATTRIB_FORMAT_UNORM_8 = 0x0, - GX2_ATTRIB_FORMAT_UNORM_8_8 = 0x04, - GX2_ATTRIB_FORMAT_UNORM_8_8_8_8 = 0x0A, + GX2_ATTRIB_FORMAT_UNORM_8 = 0x000, + GX2_ATTRIB_FORMAT_UNORM_8_8 = 0x004, + GX2_ATTRIB_FORMAT_UNORM_8_8_8_8 = 0x00A, GX2_ATTRIB_FORMAT_UINT_8 = 0x100, GX2_ATTRIB_FORMAT_UINT_8_8 = 0x104, @@ -39,6 +39,22 @@ typedef enum GX2AttribFormat GX2_ATTRIB_FORMAT_SINT_8_8 = 0x304, GX2_ATTRIB_FORMAT_SINT_8_8_8_8 = 0x30A, + GX2_ATTRIB_FORMAT_UNORM_16 = 0x002, + GX2_ATTRIB_FORMAT_UNORM_16_16 = 0x007, + GX2_ATTRIB_FORMAT_UNORM_16_16_16_16 = 0x00E, + + GX2_ATTRIB_FORMAT_UINT_16 = 0x102, + GX2_ATTRIB_FORMAT_UINT_16_16 = 0x107, + GX2_ATTRIB_FORMAT_UINT_16_16_16_16 = 0x10E, + + GX2_ATTRIB_FORMAT_SNORM_16 = 0x202, + GX2_ATTRIB_FORMAT_SNORM_16_16 = 0x207, + GX2_ATTRIB_FORMAT_SNORM_16_16_16_16 = 0x20E, + + GX2_ATTRIB_FORMAT_SINT_16 = 0x302, + GX2_ATTRIB_FORMAT_SINT_16_16 = 0x307, + GX2_ATTRIB_FORMAT_SINT_16_16_16_16 = 0x30E, + GX2_ATTRIB_FORMAT_FLOAT_32 = 0x806, GX2_ATTRIB_FORMAT_FLOAT_32_32 = 0x80d, GX2_ATTRIB_FORMAT_FLOAT_32_32_32 = 0x811, diff --git a/ext/wiiu/include/wiiu/gx2/registers.h b/ext/wiiu/include/wiiu/gx2/registers.h index 2e712943a72d..4e7688870b6e 100644 --- a/ext/wiiu/include/wiiu/gx2/registers.h +++ b/ext/wiiu/include/wiiu/gx2/registers.h @@ -228,8 +228,13 @@ void GX2InitViewportReg(GX2ViewportReg *reg, float x, float y, float width, floa void GX2GetViewportReg(GX2ViewportReg *reg, float *x, float *y, float *width, float *height, float *nearZ, float *farZ); void GX2SetViewportReg(GX2ViewportReg *reg); + #ifdef __cplusplus } #endif -/** @} */ +#if 0 +#include +#define GX2SetViewport(x, y, width, height, nearZ, farZ) do{GX2SetViewport(x, y, width, height, nearZ, farZ); printf("(%f,%f,%f,%f,%f,%f)\n", x, y, width, height, nearZ, farZ); DEBUG_LINE();}while(0) +#define GX2SetViewportReg(reg) do{GX2SetViewportReg(reg); DEBUG_LINE();}while(0) +#endif diff --git a/ext/wiiu/include/wiiu/gx2/shaders.h b/ext/wiiu/include/wiiu/gx2/shaders.h index ceb883e7af31..4d5caf52993b 100644 --- a/ext/wiiu/include/wiiu/gx2/shaders.h +++ b/ext/wiiu/include/wiiu/gx2/shaders.h @@ -536,16 +536,16 @@ uint32_t GX2CalcFetchShaderSizeEx(uint32_t attribs, GX2FetchShaderType fetchShad GX2TessellationMode tesellationMode); void GX2InitFetchShaderEx(GX2FetchShader *fetchShader, uint8_t *buffer, uint32_t attribCount, - GX2AttribStream *attribs, GX2FetchShaderType type, GX2TessellationMode tessMode); + const GX2AttribStream *attribs, GX2FetchShaderType type, GX2TessellationMode tessMode); -void GX2SetFetchShader(GX2FetchShader *shader); -void GX2SetVertexShader(GX2VertexShader *shader); -void GX2SetPixelShader(GX2PixelShader *shader); -void GX2SetGeometryShader(GX2GeometryShader *shader); +void GX2SetFetchShader(const GX2FetchShader *shader); +void GX2SetVertexShader(const GX2VertexShader *shader); +void GX2SetPixelShader(const GX2PixelShader *shader); +void GX2SetGeometryShader(const GX2GeometryShader *shader); -void GX2SetVertexSampler(GX2Sampler *sampler, uint32_t id); -void GX2SetPixelSampler(GX2Sampler *sampler, uint32_t id); -void GX2SetGeometrySampler(GX2Sampler *sampler, uint32_t id); +void GX2SetVertexSampler(const GX2Sampler *sampler, uint32_t id); +void GX2SetPixelSampler(const GX2Sampler *sampler, uint32_t id); +void GX2SetGeometrySampler(const GX2Sampler *sampler, uint32_t id); void GX2SetVertexUniformReg(uint32_t offset, uint32_t count, uint32_t *data); void GX2SetPixelUniformReg(uint32_t offset, uint32_t count, uint32_t *data); void GX2SetVertexUniformBlock(uint32_t location, uint32_t size, const void *data); @@ -576,6 +576,21 @@ uint32_t GX2GetVertexShaderStackEntries(GX2VertexShader *shader); uint32_t GX2GetGeometryShaderGPRs(GX2GeometryShader *shader); uint32_t GX2GetGeometryShaderStackEntries(GX2GeometryShader *shader); +static inline +void GX2InitFetchShader(GX2FetchShader *fetchShader, uint8_t *buffer, uint32_t attribCount, + const GX2AttribStream *attribs) +{ + GX2InitFetchShaderEx(fetchShader, buffer, attribCount, attribs, + GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE); +} + +static inline uint32_t GX2CalcFetchShaderSize(uint32_t attribs) +{ + return GX2CalcFetchShaderSizeEx(attribs, + GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE); +} + + #ifdef __cplusplus } #endif diff --git a/ext/wiiu/include/wiiu/gx2/shaders_asm.h b/ext/wiiu/include/wiiu/gx2/shaders_asm.h index 43c84391edf9..b08ed53a6b3d 100644 --- a/ext/wiiu/include/wiiu/gx2/shaders_asm.h +++ b/ext/wiiu/include/wiiu/gx2/shaders_asm.h @@ -96,6 +96,9 @@ #ifndef GX2_COMP_SEL #define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3)) +#ifdef __cplusplus +#include +#endif #define _x 0 #define _y 1 #define _z 2 diff --git a/ext/wiiu/include/wiiu/gx2/surface.h b/ext/wiiu/include/wiiu/gx2/surface.h index 9b1ffa37c4e3..9e41ffac1621 100644 --- a/ext/wiiu/include/wiiu/gx2/surface.h +++ b/ext/wiiu/include/wiiu/gx2/surface.h @@ -76,3 +76,8 @@ void GX2SetClearDepthStencil(GX2DepthBuffer *depthBuffer, float depth, uint8_t s #ifdef __cplusplus } #endif +#if 0 +#include +#define GX2SetColorBuffer(colorBuffer, target) do{GX2SetColorBuffer(colorBuffer, target); DEBUG_PTR(colorBuffer); DEBUG_LINE();}while(0) +#define GX2SetDepthBuffer(depthBuffer) do{GX2SetDepthBuffer(depthBuffer); DEBUG_PTR(depthBuffer); DEBUG_LINE();}while(0) +#endif diff --git a/ext/wiiu/include/wiiu/gx2/texture.h b/ext/wiiu/include/wiiu/gx2/texture.h index 1fd42830da90..03e30e3387f0 100644 --- a/ext/wiiu/include/wiiu/gx2/texture.h +++ b/ext/wiiu/include/wiiu/gx2/texture.h @@ -25,5 +25,3 @@ void GX2SetGeometryTexture(GX2Texture *texture, uint32_t unit); #ifdef __cplusplus } #endif - -/** @} */ diff --git a/ext/wiiu/include/wiiu/os/debug.h b/ext/wiiu/include/wiiu/os/debug.h index 4047f6e85e9d..14b10b11d374 100644 --- a/ext/wiiu/include/wiiu/os/debug.h +++ b/ext/wiiu/include/wiiu/os/debug.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include @@ -35,6 +36,7 @@ void OSSetDABR(BOOL allCores, void* addr, BOOL reads, BOOL writes); /* #define DEBUG_HOLD() do{printf("%s@%s:%d.\n",__FUNCTION__, __FILE__, __LINE__);fflush(stdout);wait_for_input();}while(0) */ #define DEBUG_LINE() do{printf("%s:%4d %s().\n", __FILE__, __LINE__, __FUNCTION__);fflush(stdout);}while(0) #define DEBUG_BREAK() do{DEBUG_LINE();__asm__ volatile (".int 0x0FE00016");}while(0) +#define DEBUG_BREAK_ONCE() do{static bool debug_break_done; if(!debug_break_done){debug_break_done=true; DEBUG_LINE(); __asm__ volatile (".int 0x0FE00016");}}while(0) #define DEBUG_CRASH() do{DEBUG_LINE(); *(u32*)0 = 0;}while(0) #define DEBUG_STR(X) do{printf( "%s: %s\n", #X, (char*)(X));fflush(stdout);}while(0) #define DEBUG_VAR(X) do{printf( "%-20s: 0x%08" PRIX32 "\n", #X, (uint32_t)(X));fflush(stdout);}while(0)