Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Render a software depth buffer in parallel with HW rendering #19748

Merged
merged 28 commits into from
Dec 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
58adb37
GE debugger improvements
hrydgard Dec 18, 2024
b442183
Add "Realtime" checkbox to pixel viewer
hrydgard Dec 19, 2024
c5ad81e
Add DepthRaster.cpp/h. Rasterize depth rectangles, some triangles
hrydgard Dec 17, 2024
d27d8c9
Remove subpixel precision. Some sketching.
hrydgard Dec 19, 2024
09afe36
One less operation in the inner loop
hrydgard Dec 19, 2024
72c954d
Add convenient wrappers
hrydgard Dec 19, 2024
c92b3b6
Move prototype cross simd wrapper structs to CrossSIMD.h
hrydgard Dec 20, 2024
c7f0eab
DepthRaster: Premultiply world-view-proj matrices
hrydgard Dec 20, 2024
dd31518
DepthRaster: Merge the decode and transform steps
hrydgard Dec 20, 2024
bdb5f3a
Reorganize the depth vertex pipeline for future optimizations
hrydgard Dec 20, 2024
bdf4b69
Warning fixes, minor cleanup
hrydgard Dec 20, 2024
de45960
Reformat CrossSIMD.h for easier editing. Add some new methods.
hrydgard Dec 20, 2024
03b9f98
Add more funcionality to CrossSIMD.h, like fast matrix mul and some c…
hrydgard Dec 20, 2024
6a1010a
Use CrossSIMD to optimize DecodeAndTransformForDepthRaster
hrydgard Dec 20, 2024
0b009c1
CrossSIMD: Add reciprocal, clamp, swaplowerelements, etc
hrydgard Dec 20, 2024
67078d4
Depth raster: Switch to a SoA data layout for the screen space verts
hrydgard Dec 20, 2024
820e736
Speed up DepthRasterClipIndexedTriangles with CrossSIMD
hrydgard Dec 20, 2024
65692d0
CrossSIMD: possible buildfix?
hrydgard Dec 21, 2024
a344d02
DepthRaster: Fix bug where we used the wrong vertex count.
hrydgard Dec 20, 2024
f886578
DepthRaster: Fix backface culling
hrydgard Dec 20, 2024
ad18098
Minor sign check optimization
hrydgard Dec 20, 2024
d1b50ea
Comment
hrydgard Dec 21, 2024
2051d55
CrossSIMD: Add a bunch more functonality for use by the rasterizer
hrydgard Dec 21, 2024
399570e
CrossSIMD: make the transpose function compatible with ARM32
hrydgard Dec 21, 2024
73ae6da
Reimplement the depth rasterizer with SIMD.
hrydgard Dec 21, 2024
5df88fc
Convert the rect implementation to CrossSIMD
hrydgard Dec 21, 2024
8cd86b4
AnyZeroSignBit arm fix, more crosssimd fixes. Now works on ARM.
hrydgard Dec 21, 2024
80cb57f
Cleanup
hrydgard Dec 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1906,6 +1906,8 @@ set(GPU_SOURCES
GPU/Common/Draw2D.cpp
GPU/Common/Draw2D.h
GPU/Common/DepthBufferCommon.cpp
GPU/Common/DepthRaster.cpp
GPU/Common/DepthRaster.h
GPU/Common/TextureShaderCommon.cpp
GPU/Common/TextureShaderCommon.h
GPU/Common/DepalettizeShaderCommon.cpp
Expand Down
4 changes: 2 additions & 2 deletions Common/Data/Convert/ColorConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ void ConvertBGRA8888ToRGB888(u8 *dst, const u32 *src, u32 numPixels) {
}

#if PPSSPP_ARCH(SSE2)
// fp64's improved version, see #19751
// fp64's improved SSE2 version, see #19751. SSE4 no longer required here.
static inline void ConvertRGBA8888ToRGBA5551(__m128i *dstp, const __m128i *srcp, u32 sseChunks) {
const __m128i maskRB = _mm_set1_epi32(0x00F800F8);
const __m128i maskGA = _mm_set1_epi32(0x8000F800);
Expand All @@ -76,7 +76,7 @@ static inline void ConvertRGBA8888ToRGBA5551(__m128i *dstp, const __m128i *srcp,
__m128i c0 = _mm_load_si128(&srcp[i + 0]);
__m128i c1 = _mm_load_si128(&srcp[i + 1]);

__m128i rb0 = _mm_and_si128(c0, maskRB); // 00000000bbbbb00000000000rrrrr000
__m128i rb0 = _mm_and_si128(c0, maskRB); // 00000000bbbbb00000000000rrrrr000 (each 32-bit lane)
__m128i rb1 = _mm_and_si128(c1, maskRB); // 00000000bbbbb00000000000rrrrr000
__m128i ga0 = _mm_and_si128(c0, maskGA); // a000000000000000ggggg00000000000
__m128i ga1 = _mm_and_si128(c1, maskGA); // a000000000000000ggggg00000000000
Expand Down
1 change: 1 addition & 0 deletions Common/GPU/DataFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ bool DataFormatIsDepthStencil(DataFormat fmt);
inline bool DataFormatIsColor(DataFormat fmt) {
return !DataFormatIsDepthStencil(fmt);
}
int DataFormatNumChannels(DataFormat fmt);
bool DataFormatIsBlockCompressed(DataFormat fmt, int *blockSize);

// Limited format support for now.
Expand Down
8 changes: 7 additions & 1 deletion Common/GPU/Vulkan/thin3d_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -803,9 +803,15 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanBarrierBatch *postBarriers, Vu
}

VkComponentMapping r8AsAlpha[4] = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R };
VkComponentMapping r8AsColor[4] = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE };

VkComponentMapping *swizzle = nullptr;
switch (desc.swizzle) {
case TextureSwizzle::R8_AS_ALPHA: swizzle = r8AsAlpha; break;
case TextureSwizzle::R8_AS_GRAYSCALE: swizzle = r8AsColor; break;
}
VulkanBarrierBatch barrier;
if (!vkTex_->CreateDirect(width_, height_, 1, mipLevels_, vulkanFormat, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, usageBits, &barrier, desc.swizzle == TextureSwizzle::R8_AS_ALPHA ? r8AsAlpha : nullptr)) {
if (!vkTex_->CreateDirect(width_, height_, 1, mipLevels_, vulkanFormat, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, usageBits, &barrier, swizzle)) {
ERROR_LOG(Log::G3D, "Failed to create VulkanTexture: %dx%dx%d fmt %d, %d levels", width_, height_, depth_, (int)vulkanFormat, mipLevels_);
return false;
}
Expand Down
19 changes: 19 additions & 0 deletions Common/GPU/thin3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,25 @@ bool DataFormatIsBlockCompressed(DataFormat fmt, int *blockSize) {
}
}

int DataFormatNumChannels(DataFormat fmt) {
switch (fmt) {
case DataFormat::D16:
case DataFormat::D32F:
case DataFormat::R8_UNORM:
case DataFormat::R16_UNORM:
case DataFormat::R16_FLOAT:
case DataFormat::R32_FLOAT:
return 1;
case DataFormat::R8G8B8A8_UNORM:
case DataFormat::R8G8B8A8_UNORM_SRGB:
case DataFormat::B8G8R8A8_UNORM:
case DataFormat::B8G8R8A8_UNORM_SRGB:
return 4;
default:
return 0;
}
}

RefCountedObject::~RefCountedObject() {
const int rc = refcount_.load();
_dbg_assert_msg_(rc == 0xDEDEDE, "Unexpected refcount %d in object of type '%s'", rc, name_);
Expand Down
1 change: 1 addition & 0 deletions Common/GPU/thin3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,7 @@ typedef std::function<bool(uint8_t *data, const uint8_t *initData, uint32_t w, u
enum class TextureSwizzle {
DEFAULT,
R8_AS_ALPHA,
R8_AS_GRAYSCALE,
};

struct TextureDesc {
Expand Down
Loading
Loading