From f7783b50a83883b4c62eea5853d8636f5cb1fa72 Mon Sep 17 00:00:00 2001 From: Try Date: Mon, 18 Nov 2024 22:54:09 +0100 Subject: [PATCH] fixes for mesh-shader * fix writing of standalone variables to output * fix for unused variable warning for `spvThreadCount` --- ...er-basic-lines.msl3.spv14.vk.nocompat.mesh | 3 +- ...basic-triangle.msl3.spv14.vk.nocompat.mesh | 3 +- ...r-flat-varying.msl3.spv14.vk.nocompat.mesh | 121 ++++++++++++++++++ ...er-basic-lines.msl3.spv14.vk.nocompat.mesh | 3 +- ...basic-triangle.msl3.spv14.vk.nocompat.mesh | 3 +- ...r-flat-varying.msl3.spv14.vk.nocompat.mesh | 121 ++++++++++++++++++ ...r-flat-varying.msl3.spv14.vk.nocompat.mesh | 23 ++++ spirv_msl.cpp | 4 +- 8 files changed, 275 insertions(+), 6 deletions(-) create mode 100644 reference/opt/shaders-msl/mesh/mesh-shader-flat-varying.msl3.spv14.vk.nocompat.mesh create mode 100644 reference/shaders-msl/mesh/mesh-shader-flat-varying.msl3.spv14.vk.nocompat.mesh create mode 100644 shaders-msl/mesh/mesh-shader-flat-varying.msl3.spv14.vk.nocompat.mesh diff --git a/reference/opt/shaders-msl/mesh/mesh-shader-basic-lines.msl3.spv14.vk.nocompat.mesh b/reference/opt/shaders-msl/mesh/mesh-shader-basic-lines.msl3.spv14.vk.nocompat.mesh index 5d0d9287e..d7c4871b9 100644 --- a/reference/opt/shaders-msl/mesh/mesh-shader-basic-lines.msl3.spv14.vk.nocompat.mesh +++ b/reference/opt/shaders-msl/mesh/mesh-shader-basic-lines.msl3.spv14.vk.nocompat.mesh @@ -168,7 +168,7 @@ void _4(threadgroup spvUnsafeArray& gl_PrimitiveLineIndicesEXT, threa return; } spvMesh.set_primitive_count(spvMeshSizes.y); - const uint spvThreadCount = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z); + const uint spvThreadCount [[maybe_unused]] = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z); const uint spvVI = gl_LocalInvocationIndex; if (gl_LocalInvocationIndex < spvMeshSizes.x) { @@ -177,6 +177,7 @@ void _4(threadgroup spvUnsafeArray& gl_PrimitiveLineIndicesEXT, threa spvV.gl_PointSize = gl_MeshVerticesEXT[spvVI].gl_PointSize; spvV.gl_ClipDistance[0] = gl_MeshVerticesEXT[spvVI].gl_ClipDistance[0]; spvV.gl_ClipDistance_0 = gl_MeshVerticesEXT[spvVI].gl_ClipDistance[0]; + spvV.vOut = vOut[spvVI]; spvV.outputs_a = outputs[spvVI].a; spvV.outputs_b = outputs[spvVI].b; spvMesh.set_vertex(spvVI, spvV); diff --git a/reference/opt/shaders-msl/mesh/mesh-shader-basic-triangle.msl3.spv14.vk.nocompat.mesh b/reference/opt/shaders-msl/mesh/mesh-shader-basic-triangle.msl3.spv14.vk.nocompat.mesh index 6e7889e6b..f94a68b16 100644 --- a/reference/opt/shaders-msl/mesh/mesh-shader-basic-triangle.msl3.spv14.vk.nocompat.mesh +++ b/reference/opt/shaders-msl/mesh/mesh-shader-basic-triangle.msl3.spv14.vk.nocompat.mesh @@ -168,7 +168,7 @@ void _4(threadgroup spvUnsafeArray& gl_MeshVerticesEXT, return; } spvMesh.set_primitive_count(spvMeshSizes.y); - const uint spvThreadCount = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z); + const uint spvThreadCount [[maybe_unused]] = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z); const uint spvVI = gl_LocalInvocationIndex; if (gl_LocalInvocationIndex < spvMeshSizes.x) { @@ -177,6 +177,7 @@ void _4(threadgroup spvUnsafeArray& gl_MeshVerticesEXT, spvV.gl_PointSize = gl_MeshVerticesEXT[spvVI].gl_PointSize; spvV.gl_ClipDistance[0] = gl_MeshVerticesEXT[spvVI].gl_ClipDistance[0]; spvV.gl_ClipDistance_0 = gl_MeshVerticesEXT[spvVI].gl_ClipDistance[0]; + spvV.vOut = vOut[spvVI]; spvV.outputs_a = outputs[spvVI].a; spvV.outputs_b = outputs[spvVI].b; spvMesh.set_vertex(spvVI, spvV); diff --git a/reference/opt/shaders-msl/mesh/mesh-shader-flat-varying.msl3.spv14.vk.nocompat.mesh b/reference/opt/shaders-msl/mesh/mesh-shader-flat-varying.msl3.spv14.vk.nocompat.mesh new file mode 100644 index 000000000..b1e06402a --- /dev/null +++ b/reference/opt/shaders-msl/mesh/mesh-shader-flat-varying.msl3.spv14.vk.nocompat.mesh @@ -0,0 +1,121 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } + + object_data T& operator [] (size_t pos) object_data + { + return elements[pos]; + } + constexpr const object_data T& operator [] (size_t pos) const object_data + { + return elements[pos]; + } +}; + +void spvSetMeshOutputsEXT(uint gl_LocalInvocationIndex, threadgroup uint2& spvMeshSizes, uint vertexCount, uint primitiveCount) +{ + if (gl_LocalInvocationIndex == 0) + { + spvMeshSizes.x = vertexCount; + spvMeshSizes.y = primitiveCount; + } +} + +struct gl_MeshPerVertexEXT +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 3u, 4u); + +struct spvPerVertex +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [1]; + float gl_ClipDistance_0 [[user(clip0)]]; + float fOut [[user(locn0)]]; + uint uiOut [[user(locn1)]]; +}; + +using spvMesh_t = mesh; + +static inline __attribute__((always_inline)) +void _4(threadgroup spvUnsafeArray& gl_MeshVerticesEXT, thread uint& gl_LocalInvocationIndex, thread uint3& gl_GlobalInvocationID, threadgroup spvUnsafeArray& fOut, threadgroup spvUnsafeArray& uiOut, threadgroup uint2& spvMeshSizes) +{ + spvSetMeshOutputsEXT(gl_LocalInvocationIndex, spvMeshSizes, 24u, 22u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + fOut[gl_LocalInvocationIndex] = float(gl_GlobalInvocationID.x); + uiOut[gl_LocalInvocationIndex] = gl_GlobalInvocationID.y; +} + +[[mesh]] void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], spvMesh_t spvMesh) +{ + threadgroup uint2 spvMeshSizes; + threadgroup spvUnsafeArray gl_MeshVerticesEXT; + threadgroup spvUnsafeArray fOut; + threadgroup spvUnsafeArray uiOut; + if (gl_LocalInvocationIndex == 0) spvMeshSizes.y = 0u; + _4(gl_MeshVerticesEXT, gl_LocalInvocationIndex, gl_GlobalInvocationID, fOut, uiOut, spvMeshSizes); + threadgroup_barrier(mem_flags::mem_threadgroup); + if (spvMeshSizes.y == 0) + { + return; + } + spvMesh.set_primitive_count(spvMeshSizes.y); + const uint spvThreadCount [[maybe_unused]] = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z); + const uint spvVI = gl_LocalInvocationIndex; + if (gl_LocalInvocationIndex < spvMeshSizes.x) + { + spvPerVertex spvV = {}; + spvV.gl_Position = gl_MeshVerticesEXT[spvVI].gl_Position; + spvV.gl_ClipDistance[0] = gl_MeshVerticesEXT[spvVI].gl_ClipDistance[0]; + spvV.gl_ClipDistance_0 = gl_MeshVerticesEXT[spvVI].gl_ClipDistance[0]; + spvV.fOut = fOut[spvVI]; + spvV.uiOut = uiOut[spvVI]; + spvMesh.set_vertex(spvVI, spvV); + } +} + diff --git a/reference/shaders-msl/mesh/mesh-shader-basic-lines.msl3.spv14.vk.nocompat.mesh b/reference/shaders-msl/mesh/mesh-shader-basic-lines.msl3.spv14.vk.nocompat.mesh index 2811b4587..62b96ec39 100644 --- a/reference/shaders-msl/mesh/mesh-shader-basic-lines.msl3.spv14.vk.nocompat.mesh +++ b/reference/shaders-msl/mesh/mesh-shader-basic-lines.msl3.spv14.vk.nocompat.mesh @@ -177,7 +177,7 @@ void _4(threadgroup spvUnsafeArray& gl_PrimitiveLineIndicesEXT, threa return; } spvMesh.set_primitive_count(spvMeshSizes.y); - const uint spvThreadCount = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z); + const uint spvThreadCount [[maybe_unused]] = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z); const uint spvVI = gl_LocalInvocationIndex; if (gl_LocalInvocationIndex < spvMeshSizes.x) { @@ -186,6 +186,7 @@ void _4(threadgroup spvUnsafeArray& gl_PrimitiveLineIndicesEXT, threa spvV.gl_PointSize = gl_MeshVerticesEXT[spvVI].gl_PointSize; spvV.gl_ClipDistance[0] = gl_MeshVerticesEXT[spvVI].gl_ClipDistance[0]; spvV.gl_ClipDistance_0 = gl_MeshVerticesEXT[spvVI].gl_ClipDistance[0]; + spvV.vOut = vOut[spvVI]; spvV.outputs_a = outputs[spvVI].a; spvV.outputs_b = outputs[spvVI].b; spvMesh.set_vertex(spvVI, spvV); diff --git a/reference/shaders-msl/mesh/mesh-shader-basic-triangle.msl3.spv14.vk.nocompat.mesh b/reference/shaders-msl/mesh/mesh-shader-basic-triangle.msl3.spv14.vk.nocompat.mesh index b40dd48ec..8c02206a8 100644 --- a/reference/shaders-msl/mesh/mesh-shader-basic-triangle.msl3.spv14.vk.nocompat.mesh +++ b/reference/shaders-msl/mesh/mesh-shader-basic-triangle.msl3.spv14.vk.nocompat.mesh @@ -165,7 +165,7 @@ void _4(threadgroup spvUnsafeArray& gl_MeshVerticesEXT, return; } spvMesh.set_primitive_count(spvMeshSizes.y); - const uint spvThreadCount = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z); + const uint spvThreadCount [[maybe_unused]] = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z); const uint spvVI = gl_LocalInvocationIndex; if (gl_LocalInvocationIndex < spvMeshSizes.x) { @@ -174,6 +174,7 @@ void _4(threadgroup spvUnsafeArray& gl_MeshVerticesEXT, spvV.gl_PointSize = gl_MeshVerticesEXT[spvVI].gl_PointSize; spvV.gl_ClipDistance[0] = gl_MeshVerticesEXT[spvVI].gl_ClipDistance[0]; spvV.gl_ClipDistance_0 = gl_MeshVerticesEXT[spvVI].gl_ClipDistance[0]; + spvV.vOut = vOut[spvVI]; spvV.outputs_a = outputs[spvVI].a; spvV.outputs_b = outputs[spvVI].b; spvMesh.set_vertex(spvVI, spvV); diff --git a/reference/shaders-msl/mesh/mesh-shader-flat-varying.msl3.spv14.vk.nocompat.mesh b/reference/shaders-msl/mesh/mesh-shader-flat-varying.msl3.spv14.vk.nocompat.mesh new file mode 100644 index 000000000..b1e06402a --- /dev/null +++ b/reference/shaders-msl/mesh/mesh-shader-flat-varying.msl3.spv14.vk.nocompat.mesh @@ -0,0 +1,121 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } + + object_data T& operator [] (size_t pos) object_data + { + return elements[pos]; + } + constexpr const object_data T& operator [] (size_t pos) const object_data + { + return elements[pos]; + } +}; + +void spvSetMeshOutputsEXT(uint gl_LocalInvocationIndex, threadgroup uint2& spvMeshSizes, uint vertexCount, uint primitiveCount) +{ + if (gl_LocalInvocationIndex == 0) + { + spvMeshSizes.x = vertexCount; + spvMeshSizes.y = primitiveCount; + } +} + +struct gl_MeshPerVertexEXT +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 3u, 4u); + +struct spvPerVertex +{ + float4 gl_Position [[position]]; + float gl_ClipDistance [[clip_distance]] [1]; + float gl_ClipDistance_0 [[user(clip0)]]; + float fOut [[user(locn0)]]; + uint uiOut [[user(locn1)]]; +}; + +using spvMesh_t = mesh; + +static inline __attribute__((always_inline)) +void _4(threadgroup spvUnsafeArray& gl_MeshVerticesEXT, thread uint& gl_LocalInvocationIndex, thread uint3& gl_GlobalInvocationID, threadgroup spvUnsafeArray& fOut, threadgroup spvUnsafeArray& uiOut, threadgroup uint2& spvMeshSizes) +{ + spvSetMeshOutputsEXT(gl_LocalInvocationIndex, spvMeshSizes, 24u, 22u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + fOut[gl_LocalInvocationIndex] = float(gl_GlobalInvocationID.x); + uiOut[gl_LocalInvocationIndex] = gl_GlobalInvocationID.y; +} + +[[mesh]] void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], spvMesh_t spvMesh) +{ + threadgroup uint2 spvMeshSizes; + threadgroup spvUnsafeArray gl_MeshVerticesEXT; + threadgroup spvUnsafeArray fOut; + threadgroup spvUnsafeArray uiOut; + if (gl_LocalInvocationIndex == 0) spvMeshSizes.y = 0u; + _4(gl_MeshVerticesEXT, gl_LocalInvocationIndex, gl_GlobalInvocationID, fOut, uiOut, spvMeshSizes); + threadgroup_barrier(mem_flags::mem_threadgroup); + if (spvMeshSizes.y == 0) + { + return; + } + spvMesh.set_primitive_count(spvMeshSizes.y); + const uint spvThreadCount [[maybe_unused]] = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z); + const uint spvVI = gl_LocalInvocationIndex; + if (gl_LocalInvocationIndex < spvMeshSizes.x) + { + spvPerVertex spvV = {}; + spvV.gl_Position = gl_MeshVerticesEXT[spvVI].gl_Position; + spvV.gl_ClipDistance[0] = gl_MeshVerticesEXT[spvVI].gl_ClipDistance[0]; + spvV.gl_ClipDistance_0 = gl_MeshVerticesEXT[spvVI].gl_ClipDistance[0]; + spvV.fOut = fOut[spvVI]; + spvV.uiOut = uiOut[spvVI]; + spvMesh.set_vertex(spvVI, spvV); + } +} + diff --git a/shaders-msl/mesh/mesh-shader-flat-varying.msl3.spv14.vk.nocompat.mesh b/shaders-msl/mesh/mesh-shader-flat-varying.msl3.spv14.vk.nocompat.mesh new file mode 100644 index 000000000..60d47aca8 --- /dev/null +++ b/shaders-msl/mesh/mesh-shader-flat-varying.msl3.spv14.vk.nocompat.mesh @@ -0,0 +1,23 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(triangles, max_vertices = 24, max_primitives = 22) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_ClipDistance[1]; +} gl_MeshVerticesEXT[]; + +layout(location = 0) out float fOut[]; +layout(location = 1) out flat uint uiOut[]; + +void main() +{ + SetMeshOutputsEXT(24, 22); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + fOut[gl_LocalInvocationIndex] = float(gl_GlobalInvocationID.x); + uiOut[gl_LocalInvocationIndex] = gl_GlobalInvocationID.y; +} diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 94a1765f8..79efc1eda 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -19215,7 +19215,7 @@ void CompilerMSL::emit_mesh_outputs() end_scope(); statement("spvMesh.set_primitive_count(spvMeshSizes.y);"); - statement("const uint spvThreadCount = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z);"); + statement("const uint spvThreadCount [[maybe_unused]] = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z);"); if (mesh_out_per_vertex != 0) { @@ -19240,7 +19240,7 @@ void CompilerMSL::emit_mesh_outputs() uint32_t orig_id = get_extended_member_decoration(type_vert.self, index, SPIRVCrossDecorationInterfaceMemberIndex); // Clip/cull distances are special-case - if (orig_id == (~0u)) + if (orig_var == 0 && orig_id == (~0u)) continue; auto &orig = get(orig_var);