From aa2562ef0afd9246a9720f546b9fe9b9f1b851fe Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Fri, 29 Nov 2024 15:44:26 +1100 Subject: [PATCH] Metal: Support Apple4 GPUs (2017 era iOS devices) Closes #99682 --- drivers/metal/metal_device_properties.h | 25 +- drivers/metal/metal_device_properties.mm | 1 + drivers/metal/metal_objects.h | 21 +- drivers/metal/metal_objects.mm | 449 +++++++++-- drivers/metal/rendering_device_driver_metal.h | 6 +- .../metal/rendering_device_driver_metal.mm | 155 ++-- thirdparty/README.md | 2 +- thirdparty/spirv-cross/spirv.hpp | 12 +- thirdparty/spirv-cross/spirv_common.hpp | 10 +- thirdparty/spirv-cross/spirv_cross.cpp | 16 +- .../spirv-cross/spirv_cross_parsed_ir.cpp | 7 +- thirdparty/spirv-cross/spirv_glsl.cpp | 116 ++- thirdparty/spirv-cross/spirv_msl.cpp | 747 ++++++++++++++++-- thirdparty/spirv-cross/spirv_msl.hpp | 16 +- thirdparty/spirv-cross/spirv_reflect.cpp | 2 + 15 files changed, 1353 insertions(+), 232 deletions(-) diff --git a/drivers/metal/metal_device_properties.h b/drivers/metal/metal_device_properties.h index 7467e8ceb49a..af7746b9ac54 100644 --- a/drivers/metal/metal_device_properties.h +++ b/drivers/metal/metal_device_properties.h @@ -71,18 +71,19 @@ typedef NS_OPTIONS(NSUInteger, SampleCount) { }; struct API_AVAILABLE(macos(11.0), ios(14.0)) MetalFeatures { - uint32_t mslVersion; - MTLGPUFamily highestFamily; - MTLLanguageVersion mslVersionEnum; - SampleCount supportedSampleCounts; - long hostMemoryPageSize; - bool layeredRendering; - bool multisampleLayeredRendering; - bool quadPermute; /**< If true, quadgroup permutation functions (vote, ballot, shuffle) are supported in shaders. */ - bool simdPermute; /**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */ - bool simdReduction; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */ - bool tessellationShader; /**< If true, tessellation shaders are supported. */ - bool imageCubeArray; /**< If true, image cube arrays are supported. */ + uint32_t mslVersion = 0; + MTLGPUFamily highestFamily = MTLGPUFamilyApple4; + MTLLanguageVersion mslVersionEnum = MTLLanguageVersion1_2; + SampleCount supportedSampleCounts = SampleCount1; + long hostMemoryPageSize = 0; + bool layeredRendering = false; + bool multisampleLayeredRendering = false; + bool quadPermute = false; /**< If true, quadgroup permutation functions (vote, ballot, shuffle) are supported in shaders. */ + bool simdPermute = false; /**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */ + bool simdReduction = false; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */ + bool tessellationShader = false; /**< If true, tessellation shaders are supported. */ + bool imageCubeArray = false; /**< If true, image cube arrays are supported. */ + MTLArgumentBuffersTier argument_buffers_tier = MTLArgumentBuffersTier1; }; struct MetalLimits { diff --git a/drivers/metal/metal_device_properties.mm b/drivers/metal/metal_device_properties.mm index 857fa8c66ee1..2c3f99dd50c2 100644 --- a/drivers/metal/metal_device_properties.mm +++ b/drivers/metal/metal_device_properties.mm @@ -98,6 +98,7 @@ features.quadPermute = [p_device supportsFamily:MTLGPUFamilyApple4]; features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6]; features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7]; + features.argument_buffers_tier = p_device.argumentBuffersSupport; MTLCompileOptions *opts = [MTLCompileOptions new]; features.mslVersionEnum = opts.languageVersion; // By default, Metal uses the most recent language version. diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h index 1870221b22c0..4cb929a9081f 100644 --- a/drivers/metal/metal_objects.h +++ b/drivers/metal/metal_objects.h @@ -696,11 +696,12 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) MDShader { public: CharString name; Vector sets; + bool uses_argument_buffers = true; virtual void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) = 0; - MDShader(CharString p_name, Vector p_sets) : - name(p_name), sets(p_sets) {} + MDShader(CharString p_name, Vector p_sets, bool p_uses_argument_buffers) : + name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {} virtual ~MDShader() = default; }; @@ -719,7 +720,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) MDComputeShader final : public MDSha void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) final; - MDComputeShader(CharString p_name, Vector p_sets, MDLibrary *p_kernel); + MDComputeShader(CharString p_name, Vector p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel); }; class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderShader final : public MDShader { @@ -746,8 +747,9 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderShader final : public MDShad void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) final; MDRenderShader(CharString p_name, - bool p_needs_view_mask_buffer, Vector p_sets, + bool p_needs_view_mask_buffer, + bool p_uses_argument_buffers, MDLibrary *p_vert, MDLibrary *p_frag); }; @@ -783,12 +785,21 @@ struct BoundUniformSet { }; class API_AVAILABLE(macos(11.0), ios(14.0)) MDUniformSet { +private: + void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state); + void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state); + void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state); + void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state); + public: uint32_t index; LocalVector uniforms; HashMap bound_uniforms; - BoundUniformSet &boundUniformSetForShader(MDShader *p_shader, id p_device); + void bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state); + void bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state); + + BoundUniformSet &bound_uniform_set(MDShader *p_shader, id p_device, ResourceUsageMap &p_resource_usage); }; class API_AVAILABLE(macos(11.0), ios(14.0)) MDPipeline { diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm index a3a2f75fef60..b44c496e4f6f 100644 --- a/drivers/metal/metal_objects.mm +++ b/drivers/metal/metal_objects.mm @@ -249,7 +249,7 @@ const MDSubpass &subpass = render.get_subpass(); uint32_t vertex_count = p_rects.size() * 6 * subpass.view_count; - simd::float4 vertices[vertex_count]; + simd::float4 *vertices = ALLOCA_ARRAY(simd::float4, vertex_count); simd::float4 clear_colors[ClearAttKey::ATTACHMENT_COUNT]; Size2i size = render.frameBuffer->size; @@ -362,7 +362,7 @@ if (render.dirty.has_flag(RenderState::DIRTY_SCISSOR) && !render.scissors.is_empty()) { size_t len = render.scissors.size(); - MTLScissorRect rects[len]; + MTLScissorRect *rects = ALLOCA_ARRAY(MTLScissorRect, len); for (size_t i = 0; i < len; i++) { rects[i] = render.clip_to_render_area(render.scissors[i]); } @@ -466,9 +466,7 @@ uint64_t set_uniforms = render.uniform_set_mask; render.uniform_set_mask = 0; - id enc = render.encoder; MDRenderShader *shader = render.pipeline->shader; - id device = enc.device; while (set_uniforms != 0) { // Find the index of the next set bit. @@ -479,25 +477,7 @@ if (set == nullptr || set->index >= (uint32_t)shader->sets.size()) { continue; } - UniformSet const &set_info = shader->sets[set->index]; - - BoundUniformSet &bus = set->boundUniformSetForShader(shader, device); - bus.merge_into(render.resource_usage); - - // Set the buffer for the vertex stage. - { - uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_VERTEX); - if (offset) { - [enc setVertexBuffer:bus.buffer offset:*offset atIndex:set->index]; - } - } - // Set the buffer for the fragment stage. - { - uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_FRAGMENT); - if (offset) { - [enc setFragmentBuffer:bus.buffer offset:*offset atIndex:set->index]; - } - } + set->bind_uniforms(shader, render); } } @@ -968,54 +948,21 @@ void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) { DEV_ASSERT(type == MDCommandBufferStateType::Compute); - id enc = compute.encoder; - id device = enc.device; - MDShader *shader = (MDShader *)(p_shader.id); - UniformSet const &set_info = shader->sets[p_set_index]; - MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id); - BoundUniformSet &bus = set->boundUniformSetForShader(shader, device); - bus.merge_into(compute.resource_usage); - - uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE); - if (offset) { - [enc setBuffer:bus.buffer offset:*offset atIndex:p_set_index]; - } + set->bind_uniforms(shader, compute); } void MDCommandBuffer::compute_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) { DEV_ASSERT(type == MDCommandBufferStateType::Compute); - id enc = compute.encoder; - id device = enc.device; - MDShader *shader = (MDShader *)(p_shader.id); - thread_local LocalVector<__unsafe_unretained id> buffers; - thread_local LocalVector offsets; - - buffers.resize(p_set_count); - offsets.resize(p_set_count); - + // TODO(sgc): Bind multiple buffers using [encoder setBuffers:offsets:withRange:] for (size_t i = 0u; i < p_set_count; ++i) { - UniformSet const &set_info = shader->sets[p_first_set_index + i]; - MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id); - BoundUniformSet &bus = set->boundUniformSetForShader(shader, device); - bus.merge_into(compute.resource_usage); - - uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE); - if (offset) { - buffers[i] = bus.buffer; - offsets[i] = *offset; - } else { - buffers[i] = nullptr; - offsets[i] = 0u; - } + set->bind_uniforms(shader, compute); } - - [enc setBuffers:buffers.ptr() offsets:offsets.ptr() withRange:NSMakeRange(p_first_set_index, p_set_count)]; } void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { @@ -1052,8 +999,11 @@ type = MDCommandBufferStateType::None; } -MDComputeShader::MDComputeShader(CharString p_name, Vector p_sets, MDLibrary *p_kernel) : - MDShader(p_name, p_sets), kernel(p_kernel) { +MDComputeShader::MDComputeShader(CharString p_name, + Vector p_sets, + bool p_uses_argument_buffers, + MDLibrary *p_kernel) : + MDShader(p_name, p_sets, p_uses_argument_buffers), kernel(p_kernel) { } void MDComputeShader::encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) { @@ -1071,15 +1021,19 @@ } MDRenderShader::MDRenderShader(CharString p_name, - bool p_needs_view_mask_buffer, Vector p_sets, + bool p_needs_view_mask_buffer, + bool p_uses_argument_buffers, MDLibrary *_Nonnull p_vert, MDLibrary *_Nonnull p_frag) : - MDShader(p_name, p_sets), needs_view_mask_buffer(p_needs_view_mask_buffer), vert(p_vert), frag(p_frag) { + MDShader(p_name, p_sets, p_uses_argument_buffers), + needs_view_mask_buffer(p_needs_view_mask_buffer), + vert(p_vert), + frag(p_frag) { } void MDRenderShader::encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) { DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Render); - id enc = p_cb->render.encoder; + id __unsafe_unretained enc = p_cb->render.encoder; void const *ptr = p_data.ptr(); size_t length = p_data.size() * sizeof(uint32_t); @@ -1093,9 +1047,373 @@ } } -BoundUniformSet &MDUniformSet::boundUniformSetForShader(MDShader *p_shader, id p_device) { +void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state) { + DEV_ASSERT(p_shader->uses_argument_buffers); + DEV_ASSERT(p_state.encoder != nil); + + UniformSet const &set_info = p_shader->sets[index]; + + id __unsafe_unretained enc = p_state.encoder; + id __unsafe_unretained device = enc.device; + + BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage); + + // Set the buffer for the vertex stage. + { + uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_VERTEX); + if (offset) { + [enc setVertexBuffer:bus.buffer offset:*offset atIndex:index]; + } + } + // Set the buffer for the fragment stage. + { + uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_FRAGMENT); + if (offset) { + [enc setFragmentBuffer:bus.buffer offset:*offset atIndex:index]; + } + } +} + +void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state) { + DEV_ASSERT(!p_shader->uses_argument_buffers); + DEV_ASSERT(p_state.encoder != nil); + + id __unsafe_unretained enc = p_state.encoder; + + UniformSet const &set = p_shader->sets[index]; + + for (uint32_t i = 0; i < uniforms.size(); i++) { + RDD::BoundUniform const &uniform = uniforms[i]; + UniformInfo ui = set.uniforms[i]; + + static const RDC::ShaderStage stage_usages[2] = { RDC::ShaderStage::SHADER_STAGE_VERTEX, RDC::ShaderStage::SHADER_STAGE_FRAGMENT }; + for (const RDC::ShaderStage stage : stage_usages) { + ShaderStageUsage const stage_usage = ShaderStageUsage(1 << stage); + + BindingInfo *bi = ui.bindings.getptr(stage); + if (bi == nullptr) { + // No binding for this stage. + continue; + } + + if ((ui.active_stages & stage_usage) == 0) { + // Not active for this state, so don't bind anything. + continue; + } + + switch (uniform.type) { + case RDD::UNIFORM_TYPE_SAMPLER: { + size_t count = uniform.ids.size(); + id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + objects[j] = rid::get(uniform.ids[j].id); + } + if (stage == RDD::SHADER_STAGE_VERTEX) { + [enc setVertexSamplerStates:objects withRange:NSMakeRange(bi->index, count)]; + } else { + [enc setFragmentSamplerStates:objects withRange:NSMakeRange(bi->index, count)]; + } + } break; + case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { + size_t count = uniform.ids.size() / 2; + id __unsafe_unretained *textures = ALLOCA_ARRAY(id __unsafe_unretained, count); + id __unsafe_unretained *samplers = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (uint32_t j = 0; j < count; j += 1) { + id sampler = rid::get(uniform.ids[j * 2 + 0]); + id texture = rid::get(uniform.ids[j * 2 + 1]); + samplers[j] = sampler; + textures[j] = texture; + } + BindingInfo *sbi = ui.bindings_secondary.getptr(stage); + if (sbi) { + if (stage == RDD::SHADER_STAGE_VERTEX) { + [enc setVertexSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)]; + } else { + [enc setFragmentSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)]; + } + } + if (stage == RDD::SHADER_STAGE_VERTEX) { + [enc setVertexTextures:textures withRange:NSMakeRange(bi->index, count)]; + } else { + [enc setFragmentTextures:textures withRange:NSMakeRange(bi->index, count)]; + } + } break; + case RDD::UNIFORM_TYPE_TEXTURE: { + size_t count = uniform.ids.size(); + if (count == 1) { + id obj = rid::get(uniform.ids[0]); + if (stage == RDD::SHADER_STAGE_VERTEX) { + [enc setVertexTexture:obj atIndex:bi->index]; + } else { + [enc setFragmentTexture:obj atIndex:bi->index]; + } + } else { + id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + id obj = rid::get(uniform.ids[j]); + objects[j] = obj; + } + if (stage == RDD::SHADER_STAGE_VERTEX) { + [enc setVertexTextures:objects withRange:NSMakeRange(bi->index, count)]; + } else { + [enc setFragmentTextures:objects withRange:NSMakeRange(bi->index, count)]; + } + } + } break; + case RDD::UNIFORM_TYPE_IMAGE: { + size_t count = uniform.ids.size(); + if (count == 1) { + id obj = rid::get(uniform.ids[0]); + if (stage == RDD::SHADER_STAGE_VERTEX) { + [enc setVertexTexture:obj atIndex:bi->index]; + } else { + [enc setFragmentTexture:obj atIndex:bi->index]; + } + + BindingInfo *sbi = ui.bindings_secondary.getptr(stage); + if (sbi) { + id tex = obj.parentTexture ? obj.parentTexture : obj; + id buf = tex.buffer; + if (buf) { + if (stage == RDD::SHADER_STAGE_VERTEX) { + [enc setVertexBuffer:buf offset:tex.bufferOffset atIndex:sbi->index]; + } else { + [enc setFragmentBuffer:buf offset:tex.bufferOffset atIndex:sbi->index]; + } + } + } + } else { + id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + id obj = rid::get(uniform.ids[j]); + objects[j] = obj; + } + if (stage == RDD::SHADER_STAGE_VERTEX) { + [enc setVertexTextures:objects withRange:NSMakeRange(bi->index, count)]; + } else { + [enc setFragmentTextures:objects withRange:NSMakeRange(bi->index, count)]; + } + } + } break; + case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: { + ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER"); + } break; + case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: { + ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER"); + } break; + case RDD::UNIFORM_TYPE_IMAGE_BUFFER: { + CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER"); + } break; + case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: { + id buffer = rid::get(uniform.ids[0]); + if (stage == RDD::SHADER_STAGE_VERTEX) { + [enc setVertexBuffer:buffer offset:0 atIndex:bi->index]; + } else { + [enc setFragmentBuffer:buffer offset:0 atIndex:bi->index]; + } + } break; + case RDD::UNIFORM_TYPE_STORAGE_BUFFER: { + id buffer = rid::get(uniform.ids[0]); + if (stage == RDD::SHADER_STAGE_VERTEX) { + [enc setVertexBuffer:buffer offset:0 atIndex:bi->index]; + } else { + [enc setFragmentBuffer:buffer offset:0 atIndex:bi->index]; + } + } break; + case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: { + size_t count = uniform.ids.size(); + if (count == 1) { + id obj = rid::get(uniform.ids[0]); + if (stage == RDD::SHADER_STAGE_VERTEX) { + [enc setVertexTexture:obj atIndex:bi->index]; + } else { + [enc setFragmentTexture:obj atIndex:bi->index]; + } + } else { + id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + id obj = rid::get(uniform.ids[j]); + objects[j] = obj; + } + + if (stage == RDD::SHADER_STAGE_VERTEX) { + [enc setVertexTextures:objects withRange:NSMakeRange(bi->index, count)]; + } else { + [enc setFragmentTextures:objects withRange:NSMakeRange(bi->index, count)]; + } + } + } break; + default: { + DEV_ASSERT(false); + } + } + } + } +} + +void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state) { + if (p_shader->uses_argument_buffers) { + bind_uniforms_argument_buffers(p_shader, p_state); + } else { + bind_uniforms_direct(p_shader, p_state); + } +} + +void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state) { + DEV_ASSERT(p_shader->uses_argument_buffers); + DEV_ASSERT(p_state.encoder != nil); + + UniformSet const &set_info = p_shader->sets[index]; + + id enc = p_state.encoder; + id device = enc.device; + + BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage); + + uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE); + if (offset) { + [enc setBuffer:bus.buffer offset:*offset atIndex:index]; + } +} + +void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state) { + DEV_ASSERT(!p_shader->uses_argument_buffers); + DEV_ASSERT(p_state.encoder != nil); + + id __unsafe_unretained enc = p_state.encoder; + + UniformSet const &set = p_shader->sets[index]; + + for (uint32_t i = 0; i < uniforms.size(); i++) { + RDD::BoundUniform const &uniform = uniforms[i]; + UniformInfo ui = set.uniforms[i]; + + const RDC::ShaderStage stage = RDC::ShaderStage::SHADER_STAGE_COMPUTE; + const ShaderStageUsage stage_usage = ShaderStageUsage(1 << stage); + + BindingInfo *bi = ui.bindings.getptr(stage); + if (bi == nullptr) { + // No binding for this stage. + continue; + } + + if ((ui.active_stages & stage_usage) == 0) { + // Not active for this state, so don't bind anything. + continue; + } + + switch (uniform.type) { + case RDD::UNIFORM_TYPE_SAMPLER: { + size_t count = uniform.ids.size(); + id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + objects[j] = rid::get(uniform.ids[j].id); + } + [enc setSamplerStates:objects withRange:NSMakeRange(bi->index, count)]; + } break; + case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { + size_t count = uniform.ids.size() / 2; + id __unsafe_unretained *textures = ALLOCA_ARRAY(id __unsafe_unretained, count); + id __unsafe_unretained *samplers = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (uint32_t j = 0; j < count; j += 1) { + id sampler = rid::get(uniform.ids[j * 2 + 0]); + id texture = rid::get(uniform.ids[j * 2 + 1]); + samplers[j] = sampler; + textures[j] = texture; + } + BindingInfo *sbi = ui.bindings_secondary.getptr(stage); + if (sbi) { + [enc setSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)]; + } + [enc setTextures:textures withRange:NSMakeRange(bi->index, count)]; + } break; + case RDD::UNIFORM_TYPE_TEXTURE: { + size_t count = uniform.ids.size(); + if (count == 1) { + id obj = rid::get(uniform.ids[0]); + [enc setTexture:obj atIndex:bi->index]; + } else { + id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + id obj = rid::get(uniform.ids[j]); + objects[j] = obj; + } + [enc setTextures:objects withRange:NSMakeRange(bi->index, count)]; + } + } break; + case RDD::UNIFORM_TYPE_IMAGE: { + size_t count = uniform.ids.size(); + if (count == 1) { + id obj = rid::get(uniform.ids[0]); + [enc setTexture:obj atIndex:bi->index]; + + BindingInfo *sbi = ui.bindings_secondary.getptr(stage); + if (sbi) { + id tex = obj.parentTexture ? obj.parentTexture : obj; + id buf = tex.buffer; + if (buf) { + [enc setBuffer:buf offset:tex.bufferOffset atIndex:sbi->index]; + } + } + } else { + id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + id obj = rid::get(uniform.ids[j]); + objects[j] = obj; + } + [enc setTextures:objects withRange:NSMakeRange(bi->index, count)]; + } + } break; + case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: { + ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER"); + } break; + case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: { + ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER"); + } break; + case RDD::UNIFORM_TYPE_IMAGE_BUFFER: { + CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER"); + } break; + case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: { + id buffer = rid::get(uniform.ids[0]); + [enc setBuffer:buffer offset:0 atIndex:bi->index]; + } break; + case RDD::UNIFORM_TYPE_STORAGE_BUFFER: { + id buffer = rid::get(uniform.ids[0]); + [enc setBuffer:buffer offset:0 atIndex:bi->index]; + } break; + case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: { + size_t count = uniform.ids.size(); + if (count == 1) { + id obj = rid::get(uniform.ids[0]); + [enc setTexture:obj atIndex:bi->index]; + } else { + id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + id obj = rid::get(uniform.ids[j]); + objects[j] = obj; + } + [enc setTextures:objects withRange:NSMakeRange(bi->index, count)]; + } + } break; + default: { + DEV_ASSERT(false); + } + } + } +} + +void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state) { + if (p_shader->uses_argument_buffers) { + bind_uniforms_argument_buffers(p_shader, p_state); + } else { + bind_uniforms_direct(p_shader, p_state); + } +} + +BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id p_device, ResourceUsageMap &p_resource_usage) { BoundUniformSet *sus = bound_uniforms.getptr(p_shader); if (sus != nullptr) { + sus->merge_into(p_resource_usage); return *sus; } @@ -1261,6 +1579,7 @@ BoundUniformSet bs = { .buffer = enc_buffer, .usage_to_resources = usage_to_resources }; bound_uniforms.insert(p_shader, bs); + bs.merge_into(p_resource_usage); return bound_uniforms.get(p_shader); } diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index 52eb0f79300d..0fff49da4106 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -61,7 +61,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public uint32_t version_major = 2; uint32_t version_minor = 0; - MetalDeviceProperties *metal_device_properties = nullptr; + MetalDeviceProperties *device_properties = nullptr; PixelFormats *pixel_formats = nullptr; std::unique_ptr resource_cache; @@ -431,10 +431,10 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public id get_device() const { return device; } PixelFormats &get_pixel_formats() const { return *pixel_formats; } MDResourceCache &get_resource_cache() const { return *resource_cache; } - MetalDeviceProperties const &get_device_properties() const { return *metal_device_properties; } + MetalDeviceProperties const &get_device_properties() const { return *device_properties; } _FORCE_INLINE_ uint32_t get_metal_buffer_index_for_vertex_attribute_binding(uint32_t p_binding) { - return (metal_device_properties->limits.maxPerStageBufferCount - 1) - p_binding; + return (device_properties->limits.maxPerStageBufferCount - 1) - p_binding; } size_t get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const; diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index dea02f699775..dbb982efe823 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -218,7 +218,7 @@ _FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_leve // desc.compressionType = MTLTextureCompressionTypeLossy; if (p_format.samples > TEXTURE_SAMPLES_1) { - SampleCount supported = (*metal_device_properties).find_nearest_supported_sample_count(p_format.samples); + SampleCount supported = (*device_properties).find_nearest_supported_sample_count(p_format.samples); if (supported > SampleCount1) { bool ok = p_format.texture_type == TEXTURE_TYPE_2D || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY; @@ -277,7 +277,7 @@ _FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_leve // Usage. MTLResourceOptions options = 0; - const bool supports_memoryless = (*metal_device_properties).features.highestFamily >= MTLGPUFamilyApple2 && (*metal_device_properties).features.highestFamily < MTLGPUFamilyMac1; + const bool supports_memoryless = (*device_properties).features.highestFamily >= MTLGPUFamilyApple2 && (*device_properties).features.highestFamily < MTLGPUFamilyMac1; if (supports_memoryless && p_format.usage_bits & TEXTURE_USAGE_TRANSIENT_BIT) { options = MTLResourceStorageModeMemoryless | MTLResourceHazardTrackingModeTracked; desc.storageMode = MTLStorageModeMemoryless; @@ -1058,7 +1058,7 @@ static const API_AVAILABLE(macos(11.0), ios(14.0)) MTLSamplerBorderColor SAMPLER #pragma mark - Shader -const uint32_t SHADER_BINARY_VERSION = 3; +const uint32_t SHADER_BINARY_VERSION = 4; // region Serialization @@ -1503,6 +1503,9 @@ void deserialize(BufReader &p_reader) { p_reader.read(index); p_reader.read(uniforms); } + UniformSetData() = default; + UniformSetData(uint32_t p_index) : + index(p_index) {} }; struct PushConstantData { @@ -1536,6 +1539,11 @@ void deserialize(BufReader &p_reader) { }; struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData { + enum Flags : uint32_t { + NONE = 0, + NEEDS_VIEW_MASK_BUFFER = 1 << 0, + USES_ARGUMENT_BUFFERS = 1 << 1, + }; CharString shader_name; // The Metal language version specified when compiling SPIR-V to MSL. // Format is major * 10000 + minor * 100 + patch. @@ -1543,8 +1551,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData { uint32_t vertex_input_mask = UINT32_MAX; uint32_t fragment_output_mask = UINT32_MAX; uint32_t spirv_specialization_constants_ids_mask = UINT32_MAX; - uint32_t is_compute = UINT32_MAX; - uint32_t needs_view_mask_buffer = UINT32_MAX; + uint32_t flags = NONE; ComputeSize compute_local_size; PushConstantData push_constant; LocalVector stages; @@ -1557,17 +1564,46 @@ MTLLanguageVersion get_msl_version() const { return MTLLanguageVersion((major << 0x10) + minor); } + bool is_compute() const { + return std::any_of(stages.begin(), stages.end(), [](ShaderStageData const &e) { + return e.stage == RD::ShaderStage::SHADER_STAGE_COMPUTE; + }); + } + + bool needs_view_mask_buffer() const { + return flags & NEEDS_VIEW_MASK_BUFFER; + } + + void set_needs_view_mask_buffer(bool p_value) { + if (p_value) { + flags |= NEEDS_VIEW_MASK_BUFFER; + } else { + flags &= ~NEEDS_VIEW_MASK_BUFFER; + } + } + + bool uses_argument_buffers() const { + return flags & USES_ARGUMENT_BUFFERS; + } + + void set_uses_argument_buffers(bool p_value) { + if (p_value) { + flags |= USES_ARGUMENT_BUFFERS; + } else { + flags &= ~USES_ARGUMENT_BUFFERS; + } + } + size_t serialize_size() const { size_t size = 0; size += sizeof(uint32_t) + shader_name.length(); // shader_name - size += sizeof(uint32_t); // msl_version - size += sizeof(uint32_t); // vertex_input_mask - size += sizeof(uint32_t); // fragment_output_mask - size += sizeof(uint32_t); // spirv_specialization_constants_ids_mask - size += sizeof(uint32_t); // is_compute - size += sizeof(uint32_t); // needs_view_mask_buffer - size += compute_local_size.serialize_size(); // compute_local_size - size += push_constant.serialize_size(); // push_constant + size += sizeof(msl_version); + size += sizeof(vertex_input_mask); + size += sizeof(fragment_output_mask); + size += sizeof(spirv_specialization_constants_ids_mask); + size += sizeof(flags); + size += compute_local_size.serialize_size(); + size += push_constant.serialize_size(); size += sizeof(uint32_t); // stages.size() for (ShaderStageData const &e : stages) { size += e.serialize_size(); @@ -1589,8 +1625,7 @@ void serialize(BufWriter &p_writer) const { p_writer.write(vertex_input_mask); p_writer.write(fragment_output_mask); p_writer.write(spirv_specialization_constants_ids_mask); - p_writer.write(is_compute); - p_writer.write(needs_view_mask_buffer); + p_writer.write(flags); p_writer.write(compute_local_size); p_writer.write(push_constant); p_writer.write(VectorView(stages)); @@ -1604,8 +1639,7 @@ void deserialize(BufReader &p_reader) { p_reader.read(vertex_input_mask); p_reader.read(fragment_output_mask); p_reader.read(spirv_specialization_constants_ids_mask); - p_reader.read(is_compute); - p_reader.read(needs_view_mask_buffer); + p_reader.read(flags); p_reader.read(compute_local_size); p_reader.read(push_constant); p_reader.read(stages); @@ -1952,14 +1986,13 @@ void deserialize(BufReader &p_reader) { .y = spirv_data.compute_local_size[1], .z = spirv_data.compute_local_size[2], }; - bin_data.is_compute = spirv_data.is_compute; bin_data.push_constant.size = spirv_data.push_constant_size; bin_data.push_constant.stages = (ShaderStageUsage)(uint8_t)spirv_data.push_constant_stages; - bin_data.needs_view_mask_buffer = shader_meta.has_multiview ? 1 : 0; + bin_data.set_needs_view_mask_buffer(shader_meta.has_multiview); for (uint32_t i = 0; i < spirv_data.uniform_sets.size(); i++) { const ::Vector &spirv_set = spirv_data.uniform_sets[i]; - UniformSetData set{ .index = i }; + UniformSetData set(i); for (const ShaderUniform &spirv_uniform : spirv_set) { UniformData binding{}; binding.type = spirv_uniform.type; @@ -1999,10 +2032,25 @@ void deserialize(BufReader &p_reader) { #endif #if TARGET_OS_IOS - msl_options.ios_use_simdgroup_functions = (*metal_device_properties).features.simdPermute; + msl_options.ios_use_simdgroup_functions = (*device_properties).features.simdPermute; #endif - msl_options.argument_buffers = true; + bool disable_argument_buffers = false; + if (String v = OS::get_singleton()->get_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") { + disable_argument_buffers = true; + } + + if (device_properties->features.argument_buffers_tier >= MTLArgumentBuffersTier2 && !disable_argument_buffers) { + msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2; + msl_options.argument_buffers = true; + bin_data.set_uses_argument_buffers(true); + } else { + msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier1; + // Tier 1 argument buffers don't support writable textures, so we disable them completely. + msl_options.argument_buffers = false; + bin_data.set_uses_argument_buffers(false); + } + msl_options.force_active_argument_buffer_resources = true; // Same as MoltenVK when using argument buffers. // msl_options.pad_argument_buffer_resources = true; // Same as MoltenVK when using argument buffers. msl_options.texture_buffer_native = true; // Enable texture buffer support. @@ -2088,8 +2136,9 @@ void deserialize(BufReader &p_reader) { return res; }; - auto descriptor_bindings = [&compiler, &active, &uniform_sets, stage, &get_decoration](SmallVector &resources, Writable writable) { - for (Resource const &res : resources) { + auto descriptor_bindings = [&compiler, &active, &uniform_sets, stage, &get_decoration](SmallVector &p_resources, Writable p_writable) { + for (Resource const &res : p_resources) { + auto name = compiler.get_name(res.id); uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet); uint32_t dbin = get_decoration(res.id, spv::DecorationBinding); UniformData *found = nullptr; @@ -2195,7 +2244,7 @@ void deserialize(BufReader &p_reader) { } // Update writable. - if (writable == Writable::Maybe) { + if (p_writable == Writable::Maybe) { if (basetype == BT::Struct) { Bitset flags = compiler.get_buffer_block_flags(res.id); if (!flags.get(spv::DecorationNonWritable)) { @@ -2384,6 +2433,11 @@ void deserialize(BufReader &p_reader) { ERR_FAIL_V_MSG(ShaderID(), "Unexpected end of buffer"); } + // We need to regenerate the shader if the cache is moved to an incompatible device. + ERR_FAIL_COND_V_MSG(device_properties->features.argument_buffers_tier < MTLArgumentBuffersTier2 && binary_data.uses_argument_buffers(), + ShaderID(), + "Shader was generated with argument buffers, but device has limited support"); + MTLCompileOptions *options = [MTLCompileOptions new]; options.languageVersion = binary_data.get_msl_version(); HashMap libraries; @@ -2505,8 +2559,12 @@ void deserialize(BufReader &p_reader) { } MDShader *shader = nullptr; - if (binary_data.is_compute) { - MDComputeShader *cs = new MDComputeShader(binary_data.shader_name, uniform_sets, libraries[ShaderStage::SHADER_STAGE_COMPUTE]); + if (binary_data.is_compute()) { + MDComputeShader *cs = new MDComputeShader( + binary_data.shader_name, + uniform_sets, + binary_data.uses_argument_buffers(), + libraries[ShaderStage::SHADER_STAGE_COMPUTE]); uint32_t *binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_COMPUTE); if (binding) { @@ -2520,7 +2578,13 @@ void deserialize(BufReader &p_reader) { #endif shader = cs; } else { - MDRenderShader *rs = new MDRenderShader(binary_data.shader_name, (bool)binary_data.needs_view_mask_buffer, uniform_sets, libraries[ShaderStage::SHADER_STAGE_VERTEX], libraries[ShaderStage::SHADER_STAGE_FRAGMENT]); + MDRenderShader *rs = new MDRenderShader( + binary_data.shader_name, + uniform_sets, + binary_data.needs_view_mask_buffer(), + binary_data.uses_argument_buffers(), + libraries[ShaderStage::SHADER_STAGE_VERTEX], + libraries[ShaderStage::SHADER_STAGE_FRAGMENT]); uint32_t *vert_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_VERTEX); if (vert_binding) { @@ -2547,7 +2611,7 @@ void deserialize(BufReader &p_reader) { r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask; r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask; - r_shader_desc.is_compute = binary_data.is_compute; + r_shader_desc.is_compute = binary_data.is_compute(); r_shader_desc.compute_local_size[0] = binary_data.compute_local_size.x; r_shader_desc.compute_local_size[1] = binary_data.compute_local_size.y; r_shader_desc.compute_local_size[2] = binary_data.compute_local_size.z; @@ -2572,7 +2636,7 @@ void deserialize(BufReader &p_reader) { RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) { //p_linear_pool_index = -1; // TODO:? Linear pools not implemented or not supported by API backend. - MDUniformSet *set = new MDUniformSet(); + MDUniformSet *set = memnew(MDUniformSet); Vector bound_uniforms; bound_uniforms.resize(p_uniforms.size()); for (uint32_t i = 0; i < p_uniforms.size(); i += 1) { @@ -2586,7 +2650,7 @@ void deserialize(BufReader &p_reader) { void RenderingDeviceDriverMetal::uniform_set_free(UniformSetID p_uniform_set) { MDUniformSet *obj = (MDUniformSet *)p_uniform_set.id; - delete obj; + memdelete(obj); } void RenderingDeviceDriverMetal::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { @@ -2800,7 +2864,7 @@ static inline MTLSize clampMTLSize(MTLSize p_size, MTLOrigin p_origin, MTLSize p uint32_t layerCnt = p_subresources.layer_count; uint32_t layerEnd = layerStart + layerCnt; - MetalFeatures const &features = (*metal_device_properties).features; + MetalFeatures const &features = (*device_properties).features; // Iterate across mipmap levels and layers, and perform and empty render to clear each. for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) { @@ -3057,7 +3121,7 @@ bool isArrayTexture(MTLTextureType p_type) { MTLPixelFormat format = pf.getMTLPixelFormat(a.format); mda.format = format; if (a.samples > TEXTURE_SAMPLES_1) { - mda.samples = (*metal_device_properties).find_nearest_supported_sample_count(a.samples); + mda.samples = (*device_properties).find_nearest_supported_sample_count(a.samples); } mda.loadAction = LOAD_ACTIONS[a.load_op]; mda.storeAction = STORE_ACTIONS[a.store_op]; @@ -3436,7 +3500,7 @@ bool isArrayTexture(MTLTextureType p_type) { } if (p_multisample_state.sample_count > TEXTURE_SAMPLES_1) { - pipeline->sample_count = (*metal_device_properties).find_nearest_supported_sample_count(p_multisample_state.sample_count); + pipeline->sample_count = (*device_properties).find_nearest_supported_sample_count(p_multisample_state.sample_count); } desc.rasterSampleCount = static_cast(pipeline->sample_count); desc.alphaToCoverageEnabled = p_multisample_state.enable_alpha_to_coverage; @@ -3815,7 +3879,7 @@ bool isArrayTexture(MTLTextureType p_type) { } uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) { - MetalDeviceProperties const &props = (*metal_device_properties); + MetalDeviceProperties const &props = (*device_properties); MetalLimits const &limits = props.limits; #if defined(DEV_ENABLED) @@ -3911,11 +3975,13 @@ bool isArrayTexture(MTLTextureType p_type) { case LIMIT_SUBGROUP_MAX_SIZE: return limits.maxSubgroupSize; case LIMIT_SUBGROUP_IN_SHADERS: - return (int64_t)limits.subgroupSupportedShaderStages; + return (uint64_t)limits.subgroupSupportedShaderStages; case LIMIT_SUBGROUP_OPERATIONS: - return (int64_t)limits.subgroupSupportedOperations; + return (uint64_t)limits.subgroupSupportedOperations; UNKNOWN(LIMIT_VRS_TEXEL_WIDTH); UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT); + UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_WIDTH); + UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_HEIGHT); default: ERR_FAIL_V(0); } @@ -4042,11 +4108,11 @@ bool isArrayTexture(MTLTextureType p_type) { // Set the pipeline cache ID based on the Metal version. pipeline_cache_id = "metal-driver-" + get_api_version(); - metal_device_properties = memnew(MetalDeviceProperties(device)); + device_properties = memnew(MetalDeviceProperties(device)); pixel_formats = memnew(PixelFormats(device)); - if (metal_device_properties->features.layeredRendering) { + if (device_properties->features.layeredRendering) { multiview_capabilities.is_supported = true; - multiview_capabilities.max_view_count = metal_device_properties->limits.maxViewports; + multiview_capabilities.max_view_count = device_properties->limits.maxViewports; // NOTE: I'm not sure what the limit is as I don't see it referenced anywhere multiview_capabilities.max_instance_count = UINT32_MAX; @@ -4057,11 +4123,10 @@ bool isArrayTexture(MTLTextureType p_type) { print_verbose("- Metal multiview not supported"); } - // Check required features and abort if any of them is missing. - if (!metal_device_properties->features.imageCubeArray) { - // NOTE: Apple A11 (Apple4) GPUs support image cube arrays, which are devices from 2017 and newer. - String error_string = vformat("Your Apple GPU does not support the following features which are required to use Metal-based renderers in Godot:\n\n"); - if (!metal_device_properties->features.imageCubeArray) { + // The Metal renderer requires Apple4 family. This is 2017 era A11 chips and newer. + if (device_properties->features.highestFamily < MTLGPUFamilyApple4) { + String error_string = vformat("Your Apple GPU does not support the following features, which are required to use Metal-based renderers in Godot:\n\n"); + if (!device_properties->features.imageCubeArray) { error_string += "- No support for image cube arrays.\n"; } diff --git a/thirdparty/README.md b/thirdparty/README.md index 57701e94ee38..df0743afa682 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -881,7 +881,7 @@ proposed by these libraries and better integrate them with Godot. ## spirv-cross - Upstream: https://github.com/KhronosGroup/SPIRV-Cross -- Version: vulkan-sdk-1.3.290.0 (5d127b917f080c6f052553c47170ec0ba702e54f, 2024) +- Version: git (6173e24b31f09a0c3217103a130e74c4ddec14a6, 2024) - License: Apache 2.0 Files extracted from upstream source: diff --git a/thirdparty/spirv-cross/spirv.hpp b/thirdparty/spirv-cross/spirv.hpp index f2ee9096bdd5..5047b9b30230 100644 --- a/thirdparty/spirv-cross/spirv.hpp +++ b/thirdparty/spirv-cross/spirv.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2014-2020 The Khronos Group Inc. +// Copyright (c) 2014-2024 The Khronos Group Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and/or associated documentation files (the "Materials"), @@ -507,6 +507,7 @@ enum Decoration { DecorationNoUnsignedWrap = 4470, DecorationWeightTextureQCOM = 4487, DecorationBlockMatchTextureQCOM = 4488, + DecorationBlockMatchSamplerQCOM = 4499, DecorationExplicitInterpAMD = 4999, DecorationOverrideCoverageNV = 5248, DecorationPassthroughNV = 5250, @@ -992,6 +993,7 @@ enum Capability { CapabilityTextureSampleWeightedQCOM = 4484, CapabilityTextureBoxFilterQCOM = 4485, CapabilityTextureBlockMatchQCOM = 4486, + CapabilityTextureBlockMatch2QCOM = 4498, CapabilityFloat16ImageAMD = 5008, CapabilityImageGatherBiasLodAMD = 5009, CapabilityFragmentMaskAMD = 5010, @@ -1601,6 +1603,10 @@ enum Op { OpImageBoxFilterQCOM = 4481, OpImageBlockMatchSSDQCOM = 4482, OpImageBlockMatchSADQCOM = 4483, + OpImageBlockMatchWindowSSDQCOM = 4500, + OpImageBlockMatchWindowSADQCOM = 4501, + OpImageBlockMatchGatherSSDQCOM = 4502, + OpImageBlockMatchGatherSADQCOM = 4503, OpGroupIAddNonUniformAMD = 5000, OpGroupFAddNonUniformAMD = 5001, OpGroupFMinNonUniformAMD = 5002, @@ -2280,6 +2286,10 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpImageBoxFilterQCOM: *hasResult = true; *hasResultType = true; break; case OpImageBlockMatchSSDQCOM: *hasResult = true; *hasResultType = true; break; case OpImageBlockMatchSADQCOM: *hasResult = true; *hasResultType = true; break; + case OpImageBlockMatchWindowSSDQCOM: *hasResult = true; *hasResultType = true; break; + case OpImageBlockMatchWindowSADQCOM: *hasResult = true; *hasResultType = true; break; + case OpImageBlockMatchGatherSSDQCOM: *hasResult = true; *hasResultType = true; break; + case OpImageBlockMatchGatherSADQCOM: *hasResult = true; *hasResultType = true; break; case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; diff --git a/thirdparty/spirv-cross/spirv_common.hpp b/thirdparty/spirv-cross/spirv_common.hpp index 93b266977097..b70536d9ecc0 100644 --- a/thirdparty/spirv-cross/spirv_common.hpp +++ b/thirdparty/spirv-cross/spirv_common.hpp @@ -578,7 +578,9 @@ struct SPIRType : IVariant // Keep internal types at the end. ControlPointArray, Interpolant, - Char + Char, + // MSL specific type, that is used by 'object'(analog of 'task' from glsl) shader. + MeshGridProperties }; // Scalar/vector/matrix support. @@ -746,6 +748,10 @@ struct SPIRExpression : IVariant // A list of expressions which this expression depends on. SmallVector expression_dependencies; + // Similar as expression dependencies, but does not stop the tracking for force-temporary variables. + // We need to know the full chain from store back to any SSA variable. + SmallVector invariance_dependencies; + // By reading this expression, we implicitly read these expressions as well. // Used by access chain Store and Load since we read multiple expressions in this case. SmallVector implied_read_expressions; @@ -1598,6 +1604,8 @@ struct AccessChainMeta bool flattened_struct = false; bool relaxed_precision = false; bool access_meshlet_position_y = false; + bool chain_is_builtin = false; + spv::BuiltIn builtin = {}; }; enum ExtendedDecorations diff --git a/thirdparty/spirv-cross/spirv_cross.cpp b/thirdparty/spirv-cross/spirv_cross.cpp index 8c3e7d381202..3492f0b3ed9b 100644 --- a/thirdparty/spirv-cross/spirv_cross.cpp +++ b/thirdparty/spirv-cross/spirv_cross.cpp @@ -1850,6 +1850,11 @@ const SmallVector &Compiler::get_case_list(const SPIRBlock &blo const auto &type = get(constant->constant_type); width = type.width; } + else if (const auto *op = maybe_get(block.condition)) + { + const auto &type = get(op->basetype); + width = type.width; + } else if (const auto *var = maybe_get(block.condition)) { const auto &type = get(var->basetype); @@ -2564,6 +2569,15 @@ void Compiler::add_active_interface_variable(uint32_t var_id) void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression) { + auto *ptr_e = maybe_get(dst); + + if (is_position_invariant() && ptr_e && maybe_get(source_expression)) + { + auto &deps = ptr_e->invariance_dependencies; + if (std::find(deps.begin(), deps.end(), source_expression) == deps.end()) + deps.push_back(source_expression); + } + // Don't inherit any expression dependencies if the expression in dst // is not a forwarded temporary. if (forwarded_temporaries.find(dst) == end(forwarded_temporaries) || @@ -2572,7 +2586,7 @@ void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_exp return; } - auto &e = get(dst); + auto &e = *ptr_e; auto *phi = maybe_get(source_expression); if (phi && phi->phi_variable) { diff --git a/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp b/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp index 3072cd8abb05..b05afeb3f57e 100644 --- a/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp +++ b/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp @@ -564,7 +564,8 @@ Bitset ParsedIR::get_buffer_block_type_flags(const SPIRType &type) const Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const { auto &type = get(var.basetype); - assert(type.basetype == SPIRType::Struct); + if (type.basetype != SPIRType::Struct) + SPIRV_CROSS_THROW("Cannot get buffer block flags for non-buffer variable."); // Some flags like non-writable, non-readable are actually found // as member decorations. If all members have a decoration set, propagate @@ -927,6 +928,8 @@ void ParsedIR::reset_all_of_type(Types type) void ParsedIR::add_typed_id(Types type, ID id) { + assert(id < ids.size()); + if (loop_iteration_depth_hard != 0) SPIRV_CROSS_THROW("Cannot add typed ID while looping over it."); @@ -1029,6 +1032,8 @@ ParsedIR::LoopLock &ParsedIR::LoopLock::operator=(LoopLock &&other) SPIRV_CROSS_ void ParsedIR::make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_id_set) { + assert(id < ids.size()); + auto &constant_type = get(type); if (constant_type.pointer) diff --git a/thirdparty/spirv-cross/spirv_glsl.cpp b/thirdparty/spirv-cross/spirv_glsl.cpp index fad1132e82a7..6c1d5208b980 100644 --- a/thirdparty/spirv-cross/spirv_glsl.cpp +++ b/thirdparty/spirv-cross/spirv_glsl.cpp @@ -2764,6 +2764,8 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) block_qualifier = "patch "; else if (has_decoration(var.self, DecorationPerPrimitiveEXT)) block_qualifier = "perprimitiveEXT "; + else if (has_decoration(var.self, DecorationPerVertexKHR)) + block_qualifier = "pervertexEXT "; else block_qualifier = ""; @@ -3691,11 +3693,11 @@ void CompilerGLSL::emit_resources() auto &type = this->get(undef.basetype); // OpUndef can be void for some reason ... if (type.basetype == SPIRType::Void) - return; + continue; // This will break. It is bogus and should not be legal. if (type_is_top_level_block(type)) - return; + continue; string initializer; if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) @@ -6436,7 +6438,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t if (splat) { res += convert_to_string(c.scalar(vector, 0)); - if (is_legacy()) + if (is_legacy() && !has_extension("GL_EXT_gpu_shader4")) { // Fake unsigned constant literals with signed ones if possible. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. @@ -6455,7 +6457,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t else { res += convert_to_string(c.scalar(vector, i)); - if (is_legacy()) + if (is_legacy() && !has_extension("GL_EXT_gpu_shader4")) { // Fake unsigned constant literals with signed ones if possible. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. @@ -10208,6 +10210,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice bool pending_array_enclose = false; bool dimension_flatten = false; bool access_meshlet_position_y = false; + bool chain_is_builtin = false; + spv::BuiltIn chained_builtin = {}; if (auto *base_expr = maybe_get(base)) { @@ -10365,6 +10369,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice auto builtin = ir.meta[base].decoration.builtin_type; bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT; + chain_is_builtin = true; + chained_builtin = builtin; + switch (builtin) { case BuiltInCullDistance: @@ -10500,6 +10507,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice { access_meshlet_position_y = true; } + + chain_is_builtin = true; + chained_builtin = builtin; } else { @@ -10719,6 +10729,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice meta->storage_physical_type = physical_type; meta->relaxed_precision = relaxed_precision; meta->access_meshlet_position_y = access_meshlet_position_y; + meta->chain_is_builtin = chain_is_builtin; + meta->builtin = chained_builtin; } return expr; @@ -11764,13 +11776,13 @@ void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression // Allow trivially forwarded expressions like OpLoad or trivial shuffles, // these will be marked as having suppressed usage tracking. // Our only concern is to make sure arithmetic operations are done in similar ways. - if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) && - forced_invariant_temporaries.count(expr.self) == 0) + if (forced_invariant_temporaries.count(expr.self) == 0) { - force_temporary_and_recompile(expr.self); + if (!expression_suppresses_usage_tracking(expr.self)) + force_temporary_and_recompile(expr.self); forced_invariant_temporaries.insert(expr.self); - for (auto &dependent : expr.expression_dependencies) + for (auto &dependent : expr.invariance_dependencies) disallow_forwarding_in_expression_chain(get(dependent)); } } @@ -12334,6 +12346,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) flattened_structs[ops[1]] = true; if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) set_decoration(ops[1], DecorationRelaxedPrecision); + if (meta.chain_is_builtin) + set_decoration(ops[1], DecorationBuiltIn, meta.builtin); // If we have some expression dependencies in our access chain, this access chain is technically a forwarded // temporary which could be subject to invalidation. @@ -13227,13 +13241,24 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; - // Needs special handling. + auto &out_type = get(result_type); + bool forward = should_forward(op0) && should_forward(op1); - auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(", - to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); + string cast_op0, cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, int_type, op0, op1, false); + + // Needs special handling. + auto expr = join(cast_op0, " - ", cast_op1, " * ", "(", cast_op0, " / ", cast_op1, ")"); if (implicit_integer_promotion) + { expr = join(type_to_glsl(get(result_type)), '(', expr, ')'); + } + else if (out_type.basetype != int_type) + { + expected_type.basetype = int_type; + expr = join(bitcast_glsl_op(out_type, expected_type), '(', expr, ')'); + } emit_op(result_type, result_id, expr, forward); inherit_expression_dependencies(result_id, op0); @@ -14481,6 +14506,50 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) break; } + case OpImageBlockMatchWindowSSDQCOM: + case OpImageBlockMatchWindowSADQCOM: + case OpImageBlockMatchGatherSSDQCOM: + case OpImageBlockMatchGatherSADQCOM: + { + require_extension_internal("GL_QCOM_image_processing2"); + uint32_t result_type_id = ops[0]; + uint32_t id = ops[1]; + string expr; + switch (opcode) + { + case OpImageBlockMatchWindowSSDQCOM: + expr = "textureBlockMatchWindowSSDQCOM"; + break; + case OpImageBlockMatchWindowSADQCOM: + expr = "textureBlockMatchWindowSADQCOM"; + break; + case OpImageBlockMatchGatherSSDQCOM: + expr = "textureBlockMatchGatherSSDQCOM"; + break; + case OpImageBlockMatchGatherSADQCOM: + expr = "textureBlockMatchGatherSADQCOM"; + break; + default: + SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing2."); + } + expr += "("; + + bool forward = false; + expr += to_expression(ops[2]); + expr += ", " + to_expression(ops[3]); + + expr += ", " + to_non_uniform_aware_expression(ops[4]); + expr += ", " + to_expression(ops[5]); + expr += ", " + to_expression(ops[6]); + + expr += ")"; + emit_op(result_type_id, id, expr, forward); + + inherit_expression_dependencies(id, ops[3]); + inherit_expression_dependencies(id, ops[5]); + break; + } + // Compute case OpControlBarrier: case OpMemoryBarrier: @@ -15622,7 +15691,16 @@ string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) if (type.pointer) { - if (arg.write_count && arg.read_count) + // If we're passing around block types to function, we really mean reference in a pointer sense, + // but DXC does not like inout for mesh blocks, so workaround that. out is technically not correct, + // but it works in practice due to legalization. It's ... not great, but you gotta do what you gotta do. + // GLSL will never hit this case since it's not valid. + if (type.storage == StorageClassOutput && get_execution_model() == ExecutionModelMeshEXT && + has_decoration(type.self, DecorationBlock) && is_builtin_type(type) && arg.write_count) + { + direction = "out "; + } + else if (arg.write_count && arg.read_count) direction = "inout "; else if (arg.write_count) direction = "out "; @@ -15899,7 +15977,7 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id, bool /*m case DimBuffer: if (options.es && options.version < 320) require_extension_internal("GL_EXT_texture_buffer"); - else if (!options.es && options.version < 300) + else if (!options.es && options.version < 140) require_extension_internal("GL_EXT_texture_buffer_object"); res += "Buffer"; break; @@ -16442,6 +16520,8 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) { auto &var = get(v); var.deferred_declaration = false; + if (var.storage == StorageClassTaskPayloadWorkgroupEXT) + continue; if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup)) { @@ -17608,7 +17688,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) if (!collapsed_switch) { - if (block_like_switch || is_legacy_es()) + if (block_like_switch || is_legacy()) { // ESSL 1.0 is not guaranteed to support do/while. if (is_legacy_es()) @@ -17638,7 +17718,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) // Default case. if (!block_like_switch) { - if (is_legacy_es()) + if (is_legacy()) statement("else"); else statement("default:"); @@ -17646,7 +17726,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) } else { - if (is_legacy_es()) + if (is_legacy()) { statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix), ")"); @@ -17698,7 +17778,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) if (block.default_block == block.next_block) { - if (is_legacy_es()) + if (is_legacy()) statement("else"); else statement("default:"); @@ -17712,7 +17792,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) if (!collapsed_switch) { - if (block_like_switch && !is_legacy_es()) + if ((block_like_switch || is_legacy()) && !is_legacy_es()) end_scope_decl("while(false)"); else end_scope(); diff --git a/thirdparty/spirv-cross/spirv_msl.cpp b/thirdparty/spirv-cross/spirv_msl.cpp index 383ce688e981..642fcfa59a39 100644 --- a/thirdparty/spirv-cross/spirv_msl.cpp +++ b/thirdparty/spirv-cross/spirv_msl.cpp @@ -202,6 +202,9 @@ uint32_t CompilerMSL::get_resource_array_size(const SPIRType &type, uint32_t id) { uint32_t array_size = to_array_size_literal(type); + if (id == 0) + return array_size; + // If we have argument buffers, we need to honor the ABI by using the correct array size // from the layout. Only use shader declared size if we're not using argument buffers. uint32_t desc_set = get_decoration(id, DecorationDescriptorSet); @@ -269,7 +272,7 @@ void CompilerMSL::build_implicit_builtins() (active_input_builtins.get(BuiltInVertexId) || active_input_builtins.get(BuiltInVertexIndex) || active_input_builtins.get(BuiltInBaseVertex) || active_input_builtins.get(BuiltInInstanceId) || active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance)); - bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId); + bool need_local_invocation_index = (msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId)) || is_mesh_shader(); bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups); bool force_frag_depth_passthrough = get_execution_model() == ExecutionModelFragment && !uses_explicit_early_fragment_test() && need_subpass_input && @@ -278,7 +281,7 @@ void CompilerMSL::build_implicit_builtins() if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params || need_tese_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params || needs_sample_id || needs_subgroup_invocation_id || needs_subgroup_size || needs_helper_invocation || - has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size || force_frag_depth_passthrough) + has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size || force_frag_depth_passthrough || is_mesh_shader()) { bool has_frag_coord = false; bool has_sample_id = false; @@ -325,6 +328,13 @@ void CompilerMSL::build_implicit_builtins() } } + if (builtin == BuiltInPrimitivePointIndicesEXT || + builtin == BuiltInPrimitiveLineIndicesEXT || + builtin == BuiltInPrimitiveTriangleIndicesEXT) + { + builtin_mesh_primitive_indices_id = var.self; + } + if (var.storage != StorageClassInput) return; @@ -1057,6 +1067,53 @@ void CompilerMSL::build_implicit_builtins() set_decoration(var_id, DecorationBuiltIn, BuiltInPosition); mark_implicit_builtin(StorageClassOutput, BuiltInPosition, var_id); } + + if (is_mesh_shader()) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create variable to store meshlet size. + uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 2); + SPIRType uint_type_ptr = get(type_id); + uint_type_ptr.op = OpTypePointer; + uint_type_ptr.pointer = true; + uint_type_ptr.pointer_depth++; + uint_type_ptr.parent_type = type_id; + uint_type_ptr.storage = StorageClassWorkgroup; + + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = type_id; + set(var_id, type_ptr_id, StorageClassWorkgroup); + set_name(var_id, "spvMeshSizes"); + builtin_mesh_sizes_id = var_id; + } + + if (get_execution_model() == spv::ExecutionModelTaskEXT) + { + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + uint32_t var_id = offset + 2; + + SPIRType mesh_grid_type { OpTypeStruct }; + mesh_grid_type.basetype = SPIRType::MeshGridProperties; + set(type_id, mesh_grid_type); + + SPIRType mesh_grid_type_ptr = mesh_grid_type; + mesh_grid_type_ptr.op = spv::OpTypePointer; + mesh_grid_type_ptr.pointer = true; + mesh_grid_type_ptr.pointer_depth++; + mesh_grid_type_ptr.parent_type = type_id; + mesh_grid_type_ptr.storage = StorageClassOutput; + + auto &ptr_in_type = set(type_ptr_id, mesh_grid_type_ptr); + ptr_in_type.self = type_id; + set(var_id, type_ptr_id, StorageClassOutput); + set_name(var_id, "spvMgp"); + builtin_task_grid_id = var_id; + } } // Checks if the specified builtin variable (e.g. gl_InstanceIndex) is marked as active. @@ -1509,6 +1566,10 @@ void CompilerMSL::emit_entry_point_declarations() statement(CompilerGLSL::variable_decl(var), ";"); var.deferred_declaration = false; } + + // Holds SetMeshOutputsEXT information. Threadgroup since first thread wins. + if (processing_entry_point && is_mesh_shader()) + statement("threadgroup uint2 spvMeshSizes;"); } string CompilerMSL::compile() @@ -1544,6 +1605,8 @@ string CompilerMSL::compile() backend.native_pointers = true; backend.nonuniform_qualifier = ""; backend.support_small_type_sampling_result = true; + backend.force_merged_mesh_block = false; + backend.force_gl_in_out_block = get_execution_model() == ExecutionModelMeshEXT; backend.supports_empty_struct = true; backend.support_64bit_switch = true; backend.boolean_in_struct_remapped_type = SPIRType::Short; @@ -1559,6 +1622,9 @@ string CompilerMSL::compile() capture_output_to_buffer = msl_options.capture_output_to_buffer; is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer; + if (is_mesh_shader() && !get_entry_point().flags.get(ExecutionModeOutputPoints)) + msl_options.enable_point_size_builtin = false; + // Initialize array here rather than constructor, MSVC 2013 workaround. for (auto &id : next_metal_resource_ids) id = 0; @@ -1566,6 +1632,11 @@ string CompilerMSL::compile() fixup_anonymous_struct_names(); fixup_type_alias(); replace_illegal_names(); + if (get_execution_model() == ExecutionModelMeshEXT) + { + // Emit proxy entry-point for the sake of copy-pass + emit_mesh_entry_point(); + } sync_entry_point_aliases_and_names(); build_function_control_flow_graphs_and_analyze(); @@ -1576,8 +1647,7 @@ string CompilerMSL::compile() preprocess_op_codes(); build_implicit_builtins(); - if (needs_manual_helper_invocation_updates() && - (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation)) + if (needs_manual_helper_invocation_updates() && needs_helper_invocation) { string builtin_helper_invocation = builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput); string discard_expr = join(builtin_helper_invocation, " = true, discard_fragment()"); @@ -1618,9 +1688,17 @@ string CompilerMSL::compile() // Create structs to hold input, output and uniform variables. // Do output first to ensure out. is declared at top of entry function. qual_pos_var_name = ""; - stage_out_var_id = add_interface_block(StorageClassOutput); - patch_stage_out_var_id = add_interface_block(StorageClassOutput, true); - stage_in_var_id = add_interface_block(StorageClassInput); + if (is_mesh_shader()) + { + fixup_implicit_builtin_block_names(get_execution_model()); + } + else + { + stage_out_var_id = add_interface_block(StorageClassOutput); + patch_stage_out_var_id = add_interface_block(StorageClassOutput, true); + stage_in_var_id = add_interface_block(StorageClassInput); + } + if (is_tese_shader()) patch_stage_in_var_id = add_interface_block(StorageClassInput, true); @@ -1629,6 +1707,12 @@ string CompilerMSL::compile() if (is_tessellation_shader()) stage_in_ptr_var_id = add_interface_block_pointer(stage_in_var_id, StorageClassInput); + if (is_mesh_shader()) + { + mesh_out_per_vertex = add_meshlet_block(false); + mesh_out_per_primitive = add_meshlet_block(true); + } + // Metal vertex functions that define no output must disable rasterization and return void. if (!stage_out_var_id) is_rasterization_disabled = true; @@ -1721,7 +1805,7 @@ void CompilerMSL::preprocess_op_codes() (is_sample_rate() && (active_input_builtins.get(BuiltInFragCoord) || (need_subpass_input_ms && !msl_options.use_framebuffer_fetch_subpasses)))) needs_sample_id = true; - if (preproc.needs_helper_invocation) + if (preproc.needs_helper_invocation || active_input_builtins.get(BuiltInHelperInvocation)) needs_helper_invocation = true; // OpKill is removed by the parser, so we need to identify those by inspecting @@ -1763,12 +1847,18 @@ void CompilerMSL::localize_global_variables() { uint32_t v_id = *iter; auto &var = get(v_id); - if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup) + if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup || + var.storage == StorageClassTaskPayloadWorkgroupEXT) { if (!variable_is_lut(var)) entry_func.add_local_variable(v_id); iter = global_variables.erase(iter); } + else if (var.storage == StorageClassOutput && is_mesh_shader()) + { + entry_func.add_local_variable(v_id); + iter = global_variables.erase(iter); + } else iter++; } @@ -2058,8 +2148,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: } case OpDemoteToHelperInvocation: - if (needs_manual_helper_invocation_updates() && - (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation)) + if (needs_manual_helper_invocation_updates() && needs_helper_invocation) added_arg_ids.insert(builtin_helper_invocation_id); break; @@ -2107,18 +2196,30 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: break; } + case OpSetMeshOutputsEXT: + { + if (builtin_local_invocation_index_id != 0) + added_arg_ids.insert(builtin_local_invocation_index_id); + if (builtin_mesh_sizes_id != 0) + added_arg_ids.insert(builtin_mesh_sizes_id); + break; + } + default: break; } if (needs_manual_helper_invocation_updates() && b.terminator == SPIRBlock::Kill && - (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation)) + needs_helper_invocation) added_arg_ids.insert(builtin_helper_invocation_id); // TODO: Add all other operations which can affect memory. // We should consider a more unified system here to reduce boiler-plate. // This kind of analysis is done in several places ... } + + if (b.terminator == SPIRBlock::EmitMeshTasks && builtin_task_grid_id != 0) + added_arg_ids.insert(builtin_task_grid_id); } function_global_vars[func_id] = added_arg_ids; @@ -2208,6 +2309,17 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: if (is_tese_shader() && msl_options.raw_buffer_tese_input && var.storage == StorageClassInput) set_decoration(next_id, DecorationNonWritable); } + else if (is_builtin && is_mesh_shader()) + { + uint32_t next_id = ir.increase_bound_by(1); + func.add_parameter(type_id, next_id, true); + auto &v = set(next_id, type_id, StorageClassFunction, 0, arg_id); + v.storage = StorageClassWorkgroup; + + // Ensure the existing variable has a valid name and the new variable has all the same meta info + set_name(arg_id, ensure_valid_name(to_name(arg_id), "v")); + ir.meta[next_id] = ir.meta[arg_id]; + } else if (is_builtin && has_decoration(p_type->self, DecorationBlock)) { // Get the pointee type @@ -4492,6 +4604,42 @@ uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageCla return ib_ptr_var_id; } +uint32_t CompilerMSL::add_meshlet_block(bool per_primitive) +{ + // Accumulate the variables that should appear in the interface struct. + SmallVector vars; + + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + if (var.storage != StorageClassOutput || var.self == builtin_mesh_primitive_indices_id) + return; + if (is_per_primitive_variable(var) != per_primitive) + return; + vars.push_back(&var); + }); + + if (vars.empty()) + return 0; + + uint32_t next_id = ir.increase_bound_by(1); + auto &type = set(next_id, SPIRType(OpTypeStruct)); + type.basetype = SPIRType::Struct; + + InterfaceBlockMeta meta; + for (auto *p_var : vars) + { + meta.strip_array = true; + meta.allow_local_declaration = false; + add_variable_to_interface_block(StorageClassOutput, "", type, *p_var, meta); + } + + if (per_primitive) + set_name(type.self, "spvPerPrimitive"); + else + set_name(type.self, "spvPerVertex"); + + return next_id; +} + // Ensure that the type is compatible with the builtin. // If it is, simply return the given type ID. // Otherwise, create a new type, and return it's ID. @@ -5484,6 +5632,19 @@ void CompilerMSL::emit_custom_templates() begin_scope(); statement("return elements[pos];"); end_scope(); + if (get_execution_model() == spv::ExecutionModelMeshEXT || + get_execution_model() == spv::ExecutionModelTaskEXT) + { + statement(""); + statement("object_data T& operator [] (size_t pos) object_data"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement("constexpr const object_data T& operator [] (size_t pos) const object_data"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + } end_scope_decl(); statement(""); break; @@ -7601,6 +7762,28 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplMulExtended: + // Compiler may hit an internal error with mulhi, but doesn't when encapsulated for some reason. + statement("template"); + statement("[[clang::optnone]] T spvMulExtended(V l, V r)"); + begin_scope(); + statement("return T{U(l * r), U(mulhi(l, r))};"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSetMeshOutputsEXT: + statement("void spvSetMeshOutputsEXT(uint gl_LocalInvocationIndex, threadgroup uint2& spvMeshSizes, uint vertexCount, uint primitiveCount)"); + begin_scope(); + statement("if (gl_LocalInvocationIndex == 0)"); + begin_scope(); + statement("spvMeshSizes.x = vertexCount;"); + statement("spvMeshSizes.y = primitiveCount;"); + end_scope(); + end_scope(); + statement(""); + break; + default: break; } @@ -7702,6 +7885,23 @@ void CompilerMSL::emit_resources() emit_interface_block(patch_stage_out_var_id); emit_interface_block(stage_in_var_id); emit_interface_block(patch_stage_in_var_id); + + if (get_execution_model() == ExecutionModelMeshEXT) + { + auto &execution = get_entry_point(); + const char *topology = ""; + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + topology = "topology::triangle"; + else if (execution.flags.get(ExecutionModeOutputLinesEXT)) + topology = "topology::line"; + else if (execution.flags.get(ExecutionModeOutputPoints)) + topology = "topology::point"; + + const char *per_primitive = mesh_out_per_primitive ? "spvPerPrimitive" : "void"; + statement("using spvMesh_t = mesh<", "spvPerVertex, ", per_primitive, ", ", execution.output_vertices, ", ", + execution.output_primitives, ", ", topology, ">;"); + statement(""); + } } // Emit declarations for the specialization Metal function constants @@ -7725,7 +7925,7 @@ void CompilerMSL::emit_specialization_constants_and_structs() mark_scalar_layout_structs(type); }); - bool builtin_block_type_is_required = false; + bool builtin_block_type_is_required = is_mesh_shader(); // Very special case. If gl_PerVertex is initialized as an array (tessellation) // we have to potentially emit the gl_PerVertex struct type so that we can emit a constant LUT. ir.for_each_typed_id([&](uint32_t, SPIRConstant &c) { @@ -9552,13 +9752,13 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; auto &type = get(result_type); + auto &op_type = get(type.member_types[0]); auto input_type = opcode == OpSMulExtended ? int_type : uint_type; string cast_op0, cast_op1; binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, false); - emit_uninitialized_temporary_expression(result_type, result_id); - statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", cast_op0, " * ", cast_op1, ";"); - statement(to_expression(result_id), ".", to_member_name(type, 1), " = mulhi(", cast_op0, ", ", cast_op1, ");"); + auto expr = join("spvMulExtended<", type_to_glsl(type), ", ", type_to_glsl(op_type), ">(", cast_op0, ", ", cast_op1, ")"); + emit_op(result_type, result_id, expr, true); break; } @@ -9917,6 +10117,14 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } + case OpSetMeshOutputsEXT: + { + flush_variable_declaration(builtin_mesh_primitive_indices_id); + add_spv_func_and_recompile(SPVFuncImplSetMeshOutputsEXT); + statement("spvSetMeshOutputsEXT(gl_LocalInvocationIndex, spvMeshSizes, ", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");"); + break; + } + default: CompilerGLSL::emit_instruction(instruction); break; @@ -9958,8 +10166,13 @@ void CompilerMSL::emit_texture_op(const Instruction &i, bool sparse) void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem) { - if (get_execution_model() != ExecutionModelGLCompute && !is_tesc_shader()) + auto model = get_execution_model(); + + if (model != ExecutionModelGLCompute && model != ExecutionModelTaskEXT && + model != ExecutionModelMeshEXT && !is_tesc_shader()) + { return; + } uint32_t exe_scope = id_exe_scope ? evaluate_constant_u32(id_exe_scope) : uint32_t(ScopeInvocation); uint32_t mem_scope = id_mem_scope ? evaluate_constant_u32(id_mem_scope) : uint32_t(ScopeInvocation); @@ -10305,6 +10518,13 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, { auto obj_expression = to_expression(obj); auto split_index = obj_expression.find_first_of('@'); + bool needs_reinterpret = opcode == OpAtomicUMax || opcode == OpAtomicUMin || opcode == OpAtomicSMax || opcode == OpAtomicSMin; + needs_reinterpret &= type.basetype != expected_type; + SPIRVariable *backing_var = nullptr; + + // Try to avoid waiting until not force recompile later mode to enable force recompile later + if (needs_reinterpret && (backing_var = maybe_get_backing_variable(obj))) + add_spv_func_and_recompile(SPVFuncImplTextureCast); // Will only be false if we're in "force recompile later" mode. if (split_index != string::npos) @@ -10315,27 +10535,21 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, // Handle problem cases with sign where we need signed min/max on a uint image for example. // It seems to work to cast the texture type itself, even if it is probably wildly outside of spec, // but SPIR-V requires this to work. - if ((opcode == OpAtomicUMax || opcode == OpAtomicUMin || - opcode == OpAtomicSMax || opcode == OpAtomicSMin) && - type.basetype != expected_type) + if (needs_reinterpret && backing_var) { - auto *backing_var = maybe_get_backing_variable(obj); - if (backing_var) - { - add_spv_func_and_recompile(SPVFuncImplTextureCast); + assert(spv_function_implementations.count(SPVFuncImplTextureCast) && "Should have been added above"); - const auto *backing_type = &get(backing_var->basetype); - while (backing_type->op != OpTypeImage) - backing_type = &get(backing_type->parent_type); + const auto *backing_type = &get(backing_var->basetype); + while (backing_type->op != OpTypeImage) + backing_type = &get(backing_type->parent_type); - auto img_type = *backing_type; - auto tmp_type = type; - tmp_type.basetype = expected_type; - img_type.image.type = ir.increase_bound_by(1); - set(img_type.image.type, tmp_type); + auto img_type = *backing_type; + auto tmp_type = type; + tmp_type.basetype = expected_type; + img_type.image.type = ir.increase_bound_by(1); + set(img_type.image.type, tmp_type); - image_expr = join("spvTextureCast<", type_to_glsl(img_type, obj), ">(", image_expr, ")"); - } + image_expr = join("spvTextureCast<", type_to_glsl(img_type, obj), ">(", image_expr, ")"); } exp += join(image_expr, ".", op, "("); @@ -10999,6 +11213,21 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) if (ir.ids[initializer].get_type() == TypeNone || ir.ids[initializer].get_type() == TypeExpression) set(ed_var.initializer, "{}", ed_var.basetype, true); } + + // add `taskPayloadSharedEXT` variable to entry-point arguments + for (auto &v : func.local_variables) + { + auto &var = get(v); + if (var.storage != StorageClassTaskPayloadWorkgroupEXT) + continue; + + add_local_variable_name(v); + SPIRFunction::Parameter arg = {}; + arg.id = v; + arg.type = var.basetype; + arg.alias_global_variable = true; + decl += join(", ", argument_decl(arg), " [[payload]]"); + } } for (auto &arg : func.arguments) @@ -11316,7 +11545,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool if (args.has_array_offsets) { forward = forward && should_forward(args.offset); - farg_str += ", " + to_expression(args.offset); + farg_str += ", " + to_unpacked_expression(args.offset); } // Const offsets gather or swizzled gather puts the component before the other args. @@ -11329,7 +11558,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool // Texture coordinates forward = forward && should_forward(args.coord); - auto coord_expr = to_enclosed_expression(args.coord); + auto coord_expr = to_enclosed_unpacked_expression(args.coord); auto &coord_type = expression_type(args.coord); bool coord_is_fp = type_is_floating_point(coord_type); bool is_cube_fetch = false; @@ -11453,14 +11682,14 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool if (type.basetype != SPIRType::UInt) tex_coords += join(" + uint2(", bitcast_expression(SPIRType::UInt, args.offset), ", 0)"); else - tex_coords += join(" + uint2(", to_enclosed_expression(args.offset), ", 0)"); + tex_coords += join(" + uint2(", to_enclosed_unpacked_expression(args.offset), ", 0)"); } else { if (type.basetype != SPIRType::UInt) tex_coords += " + " + bitcast_expression(SPIRType::UInt, args.offset); else - tex_coords += " + " + to_enclosed_expression(args.offset); + tex_coords += " + " + to_enclosed_unpacked_expression(args.offset); } } @@ -11547,10 +11776,10 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool string dref_expr; if (args.base.is_proj) - dref_expr = join(to_enclosed_expression(args.dref), " / ", + dref_expr = join(to_enclosed_unpacked_expression(args.dref), " / ", to_extract_component_expression(args.coord, alt_coord_component)); else - dref_expr = to_expression(args.dref); + dref_expr = to_unpacked_expression(args.dref); if (sampling_type_needs_f32_conversion(dref_type)) dref_expr = convert_to_f32(dref_expr, 1); @@ -11601,7 +11830,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool if (bias && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D)) { forward = forward && should_forward(bias); - farg_str += ", bias(" + to_expression(bias) + ")"; + farg_str += ", bias(" + to_unpacked_expression(bias) + ")"; } // Metal does not support LOD for 1D textures. @@ -11610,7 +11839,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool forward = forward && should_forward(lod); if (args.base.is_fetch) { - farg_str += ", " + to_expression(lod); + farg_str += ", " + to_unpacked_expression(lod); } else if (msl_options.sample_dref_lod_array_as_grad && args.dref && imgtype.image.arrayed) { @@ -11667,12 +11896,12 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool extent = "float3(1.0)"; break; } - farg_str += join(", ", grad_opt, "(", grad_coord, "exp2(", to_expression(lod), " - 0.5) / ", extent, - ", exp2(", to_expression(lod), " - 0.5) / ", extent, ")"); + farg_str += join(", ", grad_opt, "(", grad_coord, "exp2(", to_unpacked_expression(lod), " - 0.5) / ", extent, + ", exp2(", to_unpacked_expression(lod), " - 0.5) / ", extent, ")"); } else { - farg_str += ", level(" + to_expression(lod) + ")"; + farg_str += ", level(" + to_unpacked_expression(lod) + ")"; } } else if (args.base.is_fetch && !lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D) && @@ -11718,7 +11947,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool grad_opt = "unsupported_gradient_dimension"; break; } - farg_str += join(", ", grad_opt, "(", grad_coord, to_expression(grad_x), ", ", to_expression(grad_y), ")"); + farg_str += join(", ", grad_opt, "(", grad_coord, to_unpacked_expression(grad_x), ", ", to_unpacked_expression(grad_y), ")"); } if (args.min_lod) @@ -11727,7 +11956,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up."); forward = forward && should_forward(args.min_lod); - farg_str += ", min_lod_clamp(" + to_expression(args.min_lod) + ")"; + farg_str += ", min_lod_clamp(" + to_unpacked_expression(args.min_lod) + ")"; } // Add offsets @@ -11736,7 +11965,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool if (args.offset && !args.base.is_fetch && !args.has_array_offsets) { forward = forward && should_forward(args.offset); - offset_expr = to_expression(args.offset); + offset_expr = to_unpacked_expression(args.offset); offset_type = &expression_type(args.offset); } @@ -11802,7 +12031,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool { forward = forward && should_forward(args.sample); farg_str += ", "; - farg_str += to_expression(args.sample); + farg_str += to_unpacked_expression(args.sample); } *p_forward = forward; @@ -12454,12 +12683,50 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_ ((stage_out_var_id && get_stage_out_struct_type().self == type.self && variable_storage_requires_stage_io(StorageClassOutput)) || (stage_in_var_id && get_stage_in_struct_type().self == type.self && - variable_storage_requires_stage_io(StorageClassInput))); + variable_storage_requires_stage_io(StorageClassInput))) || + is_mesh_shader(); if (is_ib_in_out && is_member_builtin(type, index, &builtin)) is_using_builtin_array = true; array_type = type_to_array_glsl(physical_type, orig_id); } + if (is_mesh_shader()) + { + BuiltIn builtin = BuiltInMax; + if (is_member_builtin(type, index, &builtin)) + { + if (builtin == BuiltInPrimitiveShadingRateKHR) + { + // not supported in metal 3.0 + is_using_builtin_array = false; + return ""; + } + + SPIRType metallic_type = *declared_type; + if (builtin == BuiltInCullPrimitiveEXT) + metallic_type.basetype = SPIRType::Boolean; + else if (builtin == BuiltInPrimitiveId || builtin == BuiltInLayer || builtin == BuiltInViewportIndex) + metallic_type.basetype = SPIRType::UInt; + + is_using_builtin_array = true; + std::string result; + if (has_member_decoration(type.self, orig_id, DecorationBuiltIn)) + { + // avoid '_RESERVED_IDENTIFIER_FIXUP_' in variable name + result = join(type_to_glsl(metallic_type, orig_id, false), " ", qualifier, + builtin_to_glsl(builtin, StorageClassOutput), member_attribute_qualifier(type, index), + array_type, ";"); + } + else + { + result = join(type_to_glsl(metallic_type, orig_id, false), " ", qualifier, + to_member_name(type, index), member_attribute_qualifier(type, index), array_type, ";"); + } + is_using_builtin_array = false; + return result; + } + } + if (orig_id) { auto *data_type = declared_type; @@ -12513,6 +12780,16 @@ void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_ statement("char _m", index, "_pad", "[", pad_len, "];"); } + BuiltIn builtin = BuiltInMax; + if (is_mesh_shader() && is_member_builtin(type, index, &builtin)) + { + if (!has_active_builtin(builtin, StorageClassOutput) && !has_active_builtin(builtin, StorageClassInput)) + { + // Do not emit unused builtins in mesh-output blocks + return; + } + } + // Handle HLSL-style 0-based vertex/instance index. builtin_declaration = true; statement(to_struct_member(type, member_type_id, index, qualifier)); @@ -12586,9 +12863,11 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in return string(" [[attribute(") + convert_to_string(locn) + ")]]"; } - // Vertex and tessellation evaluation function outputs - if (((execution.model == ExecutionModelVertex && !msl_options.vertex_for_tessellation) || is_tese_shader()) && - type.storage == StorageClassOutput) + bool use_semantic_stage_output = is_mesh_shader() || is_tese_shader() || + (execution.model == ExecutionModelVertex && !msl_options.vertex_for_tessellation); + + // Vertex, mesh and tessellation evaluation function outputs + if ((type.storage == StorageClassOutput || is_mesh_shader()) && use_semantic_stage_output) { if (is_builtin) { @@ -12607,6 +12886,9 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in /* fallthrough */ case BuiltInPosition: case BuiltInLayer: + case BuiltInCullPrimitiveEXT: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInPrimitiveId: return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); case BuiltInClipDistance: @@ -12760,17 +13042,10 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in else quals = member_location_attribute_qualifier(type, index); - if (builtin == BuiltInBaryCoordKHR || builtin == BuiltInBaryCoordNoPerspKHR) + if (builtin == BuiltInBaryCoordKHR && has_member_decoration(type.self, index, DecorationNoPerspective)) { - if (has_member_decoration(type.self, index, DecorationFlat) || - has_member_decoration(type.self, index, DecorationCentroid) || - has_member_decoration(type.self, index, DecorationSample) || - has_member_decoration(type.self, index, DecorationNoPerspective)) - { - // NoPerspective is baked into the builtin type. - SPIRV_CROSS_THROW( - "Flat, Centroid, Sample, NoPerspective decorations are not supported for BaryCoord inputs."); - } + // NoPerspective is baked into the builtin type. + SPIRV_CROSS_THROW("NoPerspective decorations are not supported for BaryCoord inputs."); } // Don't bother decorating integers with the 'flat' attribute; it's @@ -12788,6 +13063,10 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in { if (!quals.empty()) quals += ", "; + + if (builtin == BuiltInBaryCoordNoPerspKHR || builtin == BuiltInBaryCoordKHR) + SPIRV_CROSS_THROW("Centroid interpolation not supported for barycentrics in MSL."); + if (has_member_decoration(type.self, index, DecorationNoPerspective)) quals += "centroid_no_perspective"; else @@ -12797,17 +13076,27 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in { if (!quals.empty()) quals += ", "; + + if (builtin == BuiltInBaryCoordNoPerspKHR || builtin == BuiltInBaryCoordKHR) + SPIRV_CROSS_THROW("Sample interpolation not supported for barycentrics in MSL."); + if (has_member_decoration(type.self, index, DecorationNoPerspective)) quals += "sample_no_perspective"; else quals += "sample_perspective"; } - else if (has_member_decoration(type.self, index, DecorationNoPerspective)) + else if (has_member_decoration(type.self, index, DecorationNoPerspective) || builtin == BuiltInBaryCoordNoPerspKHR) { if (!quals.empty()) quals += ", "; quals += "center_no_perspective"; } + else if (builtin == BuiltInBaryCoordKHR) + { + if (!quals.empty()) + quals += ", "; + quals += "center_perspective"; + } } if (!quals.empty()) @@ -13070,6 +13359,12 @@ string CompilerMSL::func_type_decl(SPIRType &type) case ExecutionModelKernel: entry_type = "kernel"; break; + case ExecutionModelMeshEXT: + entry_type = "[[mesh]]"; + break; + case ExecutionModelTaskEXT: + entry_type = "[[object]]"; + break; default: entry_type = "unknown"; break; @@ -13088,6 +13383,11 @@ bool CompilerMSL::is_tese_shader() const return get_execution_model() == ExecutionModelTessellationEvaluation; } +bool CompilerMSL::is_mesh_shader() const +{ + return get_execution_model() == spv::ExecutionModelMeshEXT; +} + bool CompilerMSL::uses_explicit_early_fragment_test() { auto &ep_flags = get_entry_point().flags; @@ -13203,6 +13503,16 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo if (!addr_space) addr_space = "device"; } + + if (is_mesh_shader()) + addr_space = "threadgroup"; + break; + + case StorageClassTaskPayloadWorkgroupEXT: + if (is_mesh_shader()) + addr_space = "const object_data"; + else + addr_space = "object_data"; break; default: @@ -13215,7 +13525,10 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo addr_space = type.pointer || (argument && type.basetype == SPIRType::ControlPointArray) ? "thread" : ""; } - return join(decoration_flags_signal_volatile(flags) ? "volatile " : "", addr_space); + if (decoration_flags_signal_volatile(flags) && 0 != strcmp(addr_space, "thread")) + return join("volatile ", addr_space); + else + return addr_space; } const char *CompilerMSL::to_restrict(uint32_t id, bool space) @@ -13601,6 +13914,20 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) " [[buffer(", convert_to_string(msl_options.shader_input_buffer_index), ")]]"); } } + + if (is_mesh_shader()) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("spvMesh_t spvMesh"); + } + + if (get_execution_model() == ExecutionModelTaskEXT) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("mesh_grid_properties spvMgp"); + } } string CompilerMSL::entry_point_args_argument_buffer(bool append_comma) @@ -13872,6 +14199,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) } else { + add_spv_func_and_recompile(SPVFuncImplVariableDescriptor); ep_args += "const device spvDescriptor<" + get_argument_address_space(var) + " " + type_to_glsl(type) + "*>* "; } @@ -14038,6 +14366,14 @@ void CompilerMSL::fix_up_shader_inputs_outputs() }); } + if (is_mesh_shader()) + { + // If shader doesn't call SetMeshOutputsEXT, nothing should be rendered. + // No need to barrier after this, because only thread 0 writes to this later. + entry_func.fixup_hooks_in.push_back([this]() { statement("if (gl_LocalInvocationIndex == 0) spvMeshSizes.y = 0u;"); }); + entry_func.fixup_hooks_out.push_back([this]() { emit_mesh_outputs(); }); + } + // Look for sampled images and buffer. Add hooks to set up the swizzle constants or array lengths. ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = get_variable_data_type(var); @@ -14838,7 +15174,8 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id)) decl = join(cv_qualifier, type_to_glsl(type, arg.id)); - else if (builtin) + else if (builtin && builtin_type != spv::BuiltInPrimitiveTriangleIndicesEXT && + builtin_type != spv::BuiltInPrimitiveLineIndicesEXT && builtin_type != spv::BuiltInPrimitivePointIndicesEXT) { // Only use templated array for Clip/Cull distance when feasible. // In other scenarios, we need need to override array length for tess levels (if used as outputs), @@ -15252,6 +15589,7 @@ const std::unordered_set &CompilerMSL::get_illegal_func_names() "fmin3", "fmax3", "divide", + "fmod", "median3", "VARIABLE_TRACEPOINT", "STATIC_DATA_TRACEPOINT", @@ -15474,6 +15812,9 @@ string CompilerMSL::to_qualifiers_glsl(uint32_t id) auto *var = maybe_get(id); auto &type = expression_type(id); + if (type.storage == StorageClassTaskPayloadWorkgroupEXT) + quals += "object_data "; + if (type.storage == StorageClassWorkgroup || (var && variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))) quals += "threadgroup "; @@ -15658,6 +15999,8 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id, bool member) break; case SPIRType::RayQuery: return "raytracing::intersection_query"; + case SPIRType::MeshGridProperties: + return "mesh_grid_properties"; default: return "unknown_type"; @@ -15772,6 +16115,9 @@ bool CompilerMSL::variable_decl_is_remapped_storage(const SPIRVariable &variable return true; } + if (is_mesh_shader()) + return variable.storage == StorageClassOutput; + return variable.storage == StorageClassOutput && is_tesc_shader() && is_stage_output_variable_masked(variable); } else if (storage == StorageClassStorageBuffer) @@ -16541,6 +16887,8 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInLayer: if (is_tesc_shader()) break; + if (is_mesh_shader()) + break; if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) && !is_stage_output_builtin_masked(builtin)) return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); @@ -16598,6 +16946,9 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) // In SPIR-V 1.6 with Volatile HelperInvocation, we cannot emit a fixup early. return "simd_is_helper_thread()"; + case BuiltInPrimitiveId: + return "gl_PrimitiveID"; + default: break; } @@ -16631,6 +16982,8 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) // Vertex function out case BuiltInClipDistance: return "clip_distance"; + case BuiltInCullDistance: + return "cull_distance"; case BuiltInPointSize: return "point_size"; case BuiltInPosition: @@ -16678,6 +17031,8 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 2)) SPIRV_CROSS_THROW("PrimitiveId on macOS requires MSL 2.2."); return "primitive_id"; + case ExecutionModelMeshEXT: + return "primitive_id"; default: SPIRV_CROSS_THROW("PrimitiveId is not supported in this execution model."); } @@ -16707,7 +17062,7 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) // Shouldn't be reached. SPIRV_CROSS_THROW("Sample position is retrieved by a function in MSL."); case BuiltInViewIndex: - if (execution.model != ExecutionModelFragment) + if (execution.model != ExecutionModelFragment && execution.model != ExecutionModelMeshEXT) SPIRV_CROSS_THROW("ViewIndex is handled specially outside fragment shaders."); // The ViewIndex was implicitly used in the prior stages to set the render_target_array_index, // so we can get it from there. @@ -16805,18 +17160,15 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) SPIRV_CROSS_THROW("Subgroup ballot masks are handled specially in MSL."); case BuiltInBaryCoordKHR: - if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) - SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS."); - else if (!msl_options.supports_msl_version(2, 2)) - SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS."); - return "barycentric_coord, center_perspective"; - case BuiltInBaryCoordNoPerspKHR: if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS."); else if (!msl_options.supports_msl_version(2, 2)) SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS."); - return "barycentric_coord, center_no_perspective"; + return "barycentric_coord"; + + case BuiltInCullPrimitiveEXT: + return "primitive_culled"; default: return "unsupported-built-in"; @@ -16934,6 +17286,13 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) case BuiltInDeviceIndex: return "int"; + case BuiltInPrimitivePointIndicesEXT: + return "uint"; + case BuiltInPrimitiveLineIndicesEXT: + return "uint2"; + case BuiltInPrimitiveTriangleIndicesEXT: + return "uint3"; + default: return "unsupported-built-in-type"; } @@ -17704,6 +18063,10 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o case OpSUDotAccSat: return SPVFuncImplReduceAdd; + case OpSMulExtended: + case OpUMulExtended: + return SPVFuncImplMulExtended; + default: break; } @@ -18441,6 +18804,7 @@ void CompilerMSL::analyze_argument_buffers() uint32_t member_index = 0; uint32_t next_arg_buff_index = 0; + uint32_t prev_was_scalar_on_array_offset = 0; for (auto &resource : resources) { auto &var = *resource.var; @@ -18453,7 +18817,9 @@ void CompilerMSL::analyze_argument_buffers() // member_index and next_arg_buff_index are incremented when padding members are added. if (msl_options.pad_argument_buffer_resources && resource.plane == 0 && resource.overlapping_var_id == 0) { - auto rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index); + auto rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index - prev_was_scalar_on_array_offset); + rez_bind.count -= prev_was_scalar_on_array_offset; + while (resource.index > next_arg_buff_index) { switch (rez_bind.basetype) @@ -18492,12 +18858,19 @@ void CompilerMSL::analyze_argument_buffers() // After padding, retrieve the resource again. It will either be more padding, or the actual resource. rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index); + prev_was_scalar_on_array_offset = 0; } + uint32_t count = rez_bind.count; + + // If the current resource is an array in the descriptor, but is a scalar + // in the shader, only the first element will be consumed. The next pass + // will add a padding member to consume the remaining array elements. + if (count > 1 && type.array.empty()) + count = prev_was_scalar_on_array_offset = 1; + // Adjust the number of slots consumed by current member itself. - // Use the count value from the app, instead of the shader, in case the - // shader is only accessing part, or even one element, of the array. - next_arg_buff_index += resource.plane_count * rez_bind.count; + next_arg_buff_index += resource.plane_count * count; } string mbr_name = ensure_valid_name(resource.name, "m"); @@ -18788,6 +19161,224 @@ void CompilerMSL::emit_block_hints(const SPIRBlock &) { } +void CompilerMSL::emit_mesh_entry_point() +{ + auto &ep = get_entry_point(); + auto &f = get(ir.default_entry_point); + + const uint32_t func_id = ir.increase_bound_by(3); + const uint32_t block_id = func_id + 1; + const uint32_t ret_id = func_id + 2; + auto &wrapped_main = set(func_id, f.return_type, f.function_type); + + wrapped_main.blocks.push_back(block_id); + wrapped_main.entry_block = block_id; + + auto &wrapped_entry = set(block_id); + wrapped_entry.terminator = SPIRBlock::Return; + + // Push call to original 'main' + Instruction ix = {}; + ix.op = OpFunctionCall; + ix.offset = uint32_t(ir.spirv.size()); + ix.length = 3; + + ir.spirv.push_back(f.return_type); + ir.spirv.push_back(ret_id); + ir.spirv.push_back(ep.self); + + wrapped_entry.ops.push_back(ix); + + // relace entry-point for new one + SPIREntryPoint proxy_ep = ep; + proxy_ep.self = func_id; + ir.entry_points.insert(std::make_pair(func_id, proxy_ep)); + ir.meta[func_id] = ir.meta[ir.default_entry_point]; + ir.meta[ir.default_entry_point].decoration.alias.clear(); + + ir.default_entry_point = func_id; +} + +void CompilerMSL::emit_mesh_outputs() +{ + auto &mode = get_entry_point(); + + // predefined thread count or zero, if specialization constant is in use + uint32_t num_invocations = 0; + if (mode.workgroup_size.id_x == 0 && mode.workgroup_size.id_y == 0 && mode.workgroup_size.id_z == 0) + num_invocations = mode.workgroup_size.x * mode.workgroup_size.y * mode.workgroup_size.z; + + statement("threadgroup_barrier(mem_flags::mem_threadgroup);"); + statement("if (spvMeshSizes.y == 0)"); + begin_scope(); + statement("return;"); + end_scope(); + statement("spvMesh.set_primitive_count(spvMeshSizes.y);"); + + statement("const uint spvThreadCount [[maybe_unused]] = (gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z);"); + + if (mesh_out_per_vertex != 0) + { + auto &type_vert = get(mesh_out_per_vertex); + + if (num_invocations < mode.output_vertices) + { + statement("for (uint spvVI = gl_LocalInvocationIndex; spvVI < spvMeshSizes.x; spvVI += spvThreadCount)"); + } + else + { + statement("const uint spvVI = gl_LocalInvocationIndex;"); + statement("if (gl_LocalInvocationIndex < spvMeshSizes.x)"); + } + + begin_scope(); + + statement("spvPerVertex spvV = {};"); + for (uint32_t index = 0; index < uint32_t(type_vert.member_types.size()); ++index) + { + uint32_t orig_var = get_extended_member_decoration(type_vert.self, index, SPIRVCrossDecorationInterfaceOrigID); + uint32_t orig_id = get_extended_member_decoration(type_vert.self, index, SPIRVCrossDecorationInterfaceMemberIndex); + + // Clip/cull distances are special-case + if (orig_var == 0 && orig_id == (~0u)) + continue; + + auto &orig = get(orig_var); + auto &orig_type = get(orig.basetype); + + // FIXME: Need to deal with complex composite IO types. These may need extra unroll, etc. + + BuiltIn builtin = BuiltInMax; + std::string access; + if (orig_type.basetype == SPIRType::Struct) + { + if (has_member_decoration(orig_type.self, orig_id, DecorationBuiltIn)) + builtin = BuiltIn(get_member_decoration(orig_type.self, orig_id, DecorationBuiltIn)); + + switch (builtin) + { + case BuiltInPosition: + case BuiltInPointSize: + case BuiltInClipDistance: + case BuiltInCullDistance: + access = "." + builtin_to_glsl(builtin, StorageClassOutput); + break; + default: + access = "." + to_member_name(orig_type, orig_id); + break; + } + + if (has_member_decoration(type_vert.self, index, DecorationIndex)) + { + // Declare the Clip/CullDistance as [[user(clip/cullN)]]. + const uint32_t orig_index = get_member_decoration(type_vert.self, index, DecorationIndex); + access += "[" + to_string(orig_index) + "]"; + statement("spvV.", builtin_to_glsl(builtin, StorageClassOutput), "[", orig_index, "] = ", to_name(orig_var), "[spvVI]", access, ";"); + } + } + + statement("spvV.", to_member_name(type_vert, index), " = ", to_name(orig_var), "[spvVI]", access, ";"); + if (options.vertex.flip_vert_y && builtin == BuiltInPosition) + { + statement("spvV.", to_member_name(type_vert, index), ".y = -(", "spvV.", + to_member_name(type_vert, index), ".y);", " // Invert Y-axis for Metal"); + } + } + statement("spvMesh.set_vertex(spvVI, spvV);"); + end_scope(); + } + + if (mesh_out_per_primitive != 0 || builtin_mesh_primitive_indices_id != 0) + { + if (num_invocations < mode.output_primitives) + { + statement("for (uint spvPI = gl_LocalInvocationIndex; spvPI < spvMeshSizes.y; spvPI += spvThreadCount)"); + } + else + { + statement("const uint spvPI = gl_LocalInvocationIndex;"); + statement("if (gl_LocalInvocationIndex < spvMeshSizes.y)"); + } + + // FIXME: Need to deal with complex composite IO types. These may need extra unroll, etc. + + begin_scope(); + + if (builtin_mesh_primitive_indices_id != 0) + { + if (mode.flags.get(ExecutionModeOutputTrianglesEXT)) + { + statement("spvMesh.set_index(spvPI * 3u + 0u, gl_PrimitiveTriangleIndicesEXT[spvPI].x);"); + statement("spvMesh.set_index(spvPI * 3u + 1u, gl_PrimitiveTriangleIndicesEXT[spvPI].y);"); + statement("spvMesh.set_index(spvPI * 3u + 2u, gl_PrimitiveTriangleIndicesEXT[spvPI].z);"); + } + else if (mode.flags.get(ExecutionModeOutputLinesEXT)) + { + statement("spvMesh.set_index(spvPI * 2u + 0u, gl_PrimitiveLineIndicesEXT[spvPI].x);"); + statement("spvMesh.set_index(spvPI * 2u + 1u, gl_PrimitiveLineIndicesEXT[spvPI].y);"); + } + else + { + statement("spvMesh.set_index(spvPI, gl_PrimitivePointIndicesEXT[spvPI]);"); + } + } + + if (mesh_out_per_primitive != 0) + { + auto &type_prim = get(mesh_out_per_primitive); + statement("spvPerPrimitive spvP = {};"); + for (uint32_t index = 0; index < uint32_t(type_prim.member_types.size()); ++index) + { + uint32_t orig_var = + get_extended_member_decoration(type_prim.self, index, SPIRVCrossDecorationInterfaceOrigID); + uint32_t orig_id = + get_extended_member_decoration(type_prim.self, index, SPIRVCrossDecorationInterfaceMemberIndex); + auto &orig = get(orig_var); + auto &orig_type = get(orig.basetype); + + BuiltIn builtin = BuiltInMax; + std::string access; + if (orig_type.basetype == SPIRType::Struct) + { + if (has_member_decoration(orig_type.self, orig_id, DecorationBuiltIn)) + builtin = BuiltIn(get_member_decoration(orig_type.self, orig_id, DecorationBuiltIn)); + + switch (builtin) + { + case BuiltInPrimitiveId: + case BuiltInLayer: + case BuiltInViewportIndex: + case BuiltInCullPrimitiveEXT: + case BuiltInPrimitiveShadingRateKHR: + access = "." + builtin_to_glsl(builtin, StorageClassOutput); + break; + default: + access = "." + to_member_name(orig_type, orig_id); + } + } + statement("spvP.", to_member_name(type_prim, index), " = ", to_name(orig_var), "[spvPI]", access, ";"); + } + statement("spvMesh.set_primitive(spvPI, spvP);"); + } + + end_scope(); + } +} + +void CompilerMSL::emit_mesh_tasks(SPIRBlock &block) +{ + // GLSL: Once this instruction is called, the workgroup must be terminated immediately, and the mesh shaders are launched. + // TODO: find relieble and clean of terminating shader. + flush_variable_declaration(builtin_task_grid_id); + statement("spvMgp.set_threadgroups_per_grid(uint3(", to_unpacked_expression(block.mesh.groups[0]), ", ", + to_unpacked_expression(block.mesh.groups[1]), ", ", to_unpacked_expression(block.mesh.groups[2]), "));"); + // This is correct if EmitMeshTasks is called in the entry function for shader. + // Only viable solutions would be: + // - Caller ensures the SPIR-V is inlined, then this always holds true. + // - Pass down a "should terminate" bool to leaf functions and chain return (horrible and disgusting, let's not). + statement("return;"); +} + string CompilerMSL::additional_fixed_sample_mask_str() const { char print_buffer[32]; diff --git a/thirdparty/spirv-cross/spirv_msl.hpp b/thirdparty/spirv-cross/spirv_msl.hpp index 2d970c0da5b8..4aaad01a8921 100644 --- a/thirdparty/spirv-cross/spirv_msl.hpp +++ b/thirdparty/spirv-cross/spirv_msl.hpp @@ -838,7 +838,9 @@ class CompilerMSL : public CompilerGLSL SPVFuncImplPaddedStd140, SPVFuncImplReduceAdd, SPVFuncImplImageFence, - SPVFuncImplTextureCast + SPVFuncImplTextureCast, + SPVFuncImplMulExtended, + SPVFuncImplSetMeshOutputsEXT, }; // If the underlying resource has been used for comparison then duplicate loads of that resource must be too @@ -867,6 +869,9 @@ class CompilerMSL : public CompilerGLSL std::string type_to_glsl(const SPIRType &type, uint32_t id, bool member); std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; void emit_block_hints(const SPIRBlock &block) override; + void emit_mesh_entry_point(); + void emit_mesh_outputs(); + void emit_mesh_tasks(SPIRBlock &block) override; // Allow Metal to use the array template to make arrays a value type std::string type_to_array_glsl(const SPIRType &type, uint32_t variable_id) override; @@ -918,6 +923,7 @@ class CompilerMSL : public CompilerGLSL bool is_tesc_shader() const; bool is_tese_shader() const; + bool is_mesh_shader() const; void preprocess_op_codes(); void localize_global_variables(); @@ -932,6 +938,7 @@ class CompilerMSL : public CompilerGLSL std::unordered_set &processed_func_ids); uint32_t add_interface_block(spv::StorageClass storage, bool patch = false); uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage); + uint32_t add_meshlet_block(bool per_primitive); struct InterfaceBlockMeta { @@ -1103,12 +1110,17 @@ class CompilerMSL : public CompilerGLSL uint32_t builtin_stage_input_size_id = 0; uint32_t builtin_local_invocation_index_id = 0; uint32_t builtin_workgroup_size_id = 0; + uint32_t builtin_mesh_primitive_indices_id = 0; + uint32_t builtin_mesh_sizes_id = 0; + uint32_t builtin_task_grid_id = 0; uint32_t builtin_frag_depth_id = 0; uint32_t swizzle_buffer_id = 0; uint32_t buffer_size_buffer_id = 0; uint32_t view_mask_buffer_id = 0; uint32_t dynamic_offsets_buffer_id = 0; uint32_t uint_type_id = 0; + uint32_t shared_uint_type_id = 0; + uint32_t meshlet_type_id = 0; uint32_t argument_buffer_padding_buffer_type_id = 0; uint32_t argument_buffer_padding_image_type_id = 0; uint32_t argument_buffer_padding_sampler_type_id = 0; @@ -1173,6 +1185,8 @@ class CompilerMSL : public CompilerGLSL VariableID stage_out_ptr_var_id = 0; VariableID tess_level_inner_var_id = 0; VariableID tess_level_outer_var_id = 0; + VariableID mesh_out_per_vertex = 0; + VariableID mesh_out_per_primitive = 0; VariableID stage_out_masked_builtin_type_id = 0; // Handle HLSL-style 0-based vertex/instance index. diff --git a/thirdparty/spirv-cross/spirv_reflect.cpp b/thirdparty/spirv-cross/spirv_reflect.cpp index 633983bd30de..552d671a649b 100644 --- a/thirdparty/spirv-cross/spirv_reflect.cpp +++ b/thirdparty/spirv-cross/spirv_reflect.cpp @@ -637,6 +637,8 @@ void CompilerReflection::emit_resources(const char *tag, const SmallVectoremit_json_key_value("WeightTextureQCOM", get_decoration(res.id, DecorationWeightTextureQCOM)); if (mask.get(DecorationBlockMatchTextureQCOM)) json_stream->emit_json_key_value("BlockMatchTextureQCOM", get_decoration(res.id, DecorationBlockMatchTextureQCOM)); + if (mask.get(DecorationBlockMatchSamplerQCOM)) + json_stream->emit_json_key_value("BlockMatchSamplerQCOM", get_decoration(res.id, DecorationBlockMatchSamplerQCOM)); // For images, the type itself adds a layout qualifer. // Only emit the format for storage images.