diff --git a/taichi/rhi/device.h b/taichi/rhi/device.h index 2ac8d80a8cb2e..2bbb75ff9a8db 100644 --- a/taichi/rhi/device.h +++ b/taichi/rhi/device.h @@ -17,6 +17,7 @@ enum class RhiResult { error = -1, invalid_usage = -2, not_supported = -3, + out_of_memory = -4, }; constexpr size_t kBufferSizeEntireSize = size_t(-1); @@ -54,7 +55,7 @@ struct LLVMRuntime; // TODO: Figure out how to support images. Temporary solutions is to have all // opque types such as images work as an allocation -using DeviceAllocationId = uint32_t; +using DeviceAllocationId = uint64_t; struct TI_DLL_EXPORT DeviceAllocation { Device *device{nullptr}; diff --git a/taichi/rhi/impl_support.h b/taichi/rhi/impl_support.h index 0f559884d90bb..38d66ef824c07 100644 --- a/taichi/rhi/impl_support.h +++ b/taichi/rhi/impl_support.h @@ -2,6 +2,9 @@ #include "taichi/rhi/device.h" #include +#include +#include +#include namespace taichi::lang { @@ -89,5 +92,61 @@ struct BidirMap { } }; +// A synchronized list of objects that is pointer stable & reuse objects +template +class SyncedPtrStableObjectList { + using storage_block = std::array; + + public: + template + T &acquire(Params &&...args) { + std::lock_guard _(lock_); + + void *storage = nullptr; + if (free_nodes_.empty()) { + storage = objects_.emplace_front().data(); + } else { + storage = free_nodes_.back(); + free_nodes_.pop_back(); + } + return *new (storage) T(std::forward(args)...); + } + + void release(T *ptr) { + std::lock_guard _(lock_); + + ptr->~T(); + free_nodes_.push_back(ptr); + } + + void clear() { + std::lock_guard _(lock_); + + // Transfer to quick look-up + std::unordered_set free_nodes_set(free_nodes_.begin(), + free_nodes_.end()); + free_nodes_.clear(); + // Destroy live objects + for (auto &storage : objects_) { + T *obj = reinterpret_cast(storage.data()); + // Call destructor if object is not in the free list (thus live) + if (free_nodes_set.find(obj) == free_nodes_set.end()) { + obj->~T(); + } + } + // Clear the storage + objects_.clear(); + } + + ~SyncedPtrStableObjectList() { + clear(); + } + + private: + std::mutex lock_; + std::forward_list objects_; + std::vector free_nodes_; +}; + } // namespace rhi_impl } // namespace taichi::lang diff --git a/taichi/rhi/opengl/opengl_device.cpp b/taichi/rhi/opengl/opengl_device.cpp index 683184858f0c2..87db5b8af42c9 100644 --- a/taichi/rhi/opengl/opengl_device.cpp +++ b/taichi/rhi/opengl/opengl_device.cpp @@ -562,7 +562,8 @@ DeviceAllocation GLDevice::allocate_memory(const AllocParams ¶ms) { } void GLDevice::dealloc_memory(DeviceAllocation handle) { - glDeleteBuffers(1, &handle.alloc_id); + GLuint buffer = GLuint(handle.alloc_id); + glDeleteBuffers(1, &buffer); check_opengl_error("glDeleteBuffers"); } @@ -714,7 +715,8 @@ DeviceAllocation GLDevice::create_image(const ImageParams ¶ms) { } void GLDevice::destroy_image(DeviceAllocation handle) { - glDeleteTextures(1, &handle.alloc_id); + GLuint texture = GLuint(handle.alloc_id); + glDeleteTextures(1, &texture); check_opengl_error("glDeleteTextures"); image_to_dims_.erase(handle.alloc_id); image_to_int_format_.erase(handle.alloc_id); diff --git a/taichi/rhi/vulkan/vulkan_api.cpp b/taichi/rhi/vulkan/vulkan_api.cpp index 95652ea24c953..4615590159362 100644 --- a/taichi/rhi/vulkan/vulkan_api.cpp +++ b/taichi/rhi/vulkan/vulkan_api.cpp @@ -369,6 +369,38 @@ IVkPipeline create_graphics_pipeline(VkDevice device, return obj; } +IVkPipeline create_graphics_pipeline_dynamic( + VkDevice device, + VkGraphicsPipelineCreateInfo *create_info, + VkPipelineRenderingCreateInfoKHR *rendering_info, + IVkPipelineLayout layout, + IVkPipelineCache cache, + IVkPipeline base_pipeline) { + IVkPipeline obj = std::make_shared(); + obj->device = device; + obj->ref_layout = layout; + obj->ref_cache = cache; + obj->ref_renderpass = nullptr; + + create_info->pNext = rendering_info; + create_info->layout = layout->layout; + + if (base_pipeline) { + create_info->basePipelineHandle = base_pipeline->pipeline; + create_info->basePipelineIndex = -1; + } else { + create_info->basePipelineHandle = VK_NULL_HANDLE; + create_info->basePipelineIndex = 0; + } + + VkResult res = + vkCreateGraphicsPipelines(device, cache ? cache->cache : VK_NULL_HANDLE, + 1, create_info, nullptr, &obj->pipeline); + BAIL_ON_VK_BAD_RESULT_NO_RETURN(res, "failed to create graphics pipeline"); + + return obj; +} + IVkPipeline create_raytracing_pipeline( VkDevice device, VkRayTracingPipelineCreateInfoKHR *create_info, diff --git a/taichi/rhi/vulkan/vulkan_api.h b/taichi/rhi/vulkan/vulkan_api.h index c092528bd1474..840a5b114ba14 100644 --- a/taichi/rhi/vulkan/vulkan_api.h +++ b/taichi/rhi/vulkan/vulkan_api.h @@ -167,6 +167,13 @@ IVkPipeline create_graphics_pipeline(VkDevice device, IVkPipelineLayout layout, IVkPipelineCache cache = nullptr, IVkPipeline base_pipeline = nullptr); +IVkPipeline create_graphics_pipeline_dynamic( + VkDevice device, + VkGraphicsPipelineCreateInfo *create_info, + VkPipelineRenderingCreateInfoKHR *rendering_info, + IVkPipelineLayout layout, + IVkPipelineCache cache = nullptr, + IVkPipeline base_pipeline = nullptr); IVkPipeline create_raytracing_pipeline( VkDevice device, VkRayTracingPipelineCreateInfoKHR *create_info, diff --git a/taichi/rhi/vulkan/vulkan_device.cpp b/taichi/rhi/vulkan/vulkan_device.cpp index 764a86d28f01d..df9bdb64a4ce3 100644 --- a/taichi/rhi/vulkan/vulkan_device.cpp +++ b/taichi/rhi/vulkan/vulkan_device.cpp @@ -206,6 +206,37 @@ vkapi::IVkPipeline VulkanPipeline::graphics_pipeline( return pipeline; } +vkapi::IVkPipeline VulkanPipeline::graphics_pipeline_dynamic( + const VulkanRenderPassDesc &renderpass_desc) { + if (graphics_pipeline_dynamic_.find(renderpass_desc) != + graphics_pipeline_dynamic_.end()) { + return graphics_pipeline_dynamic_.at(renderpass_desc); + } + + std::vector color_attachment_formats; + for (const auto &color_attachment : renderpass_desc.color_attachments) { + color_attachment_formats.push_back(color_attachment.first); + } + + VkPipelineRenderingCreateInfoKHR rendering_info; + rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR; + rendering_info.pNext = nullptr; + rendering_info.viewMask = 0; + rendering_info.colorAttachmentCount = + renderpass_desc.color_attachments.size(); + rendering_info.pColorAttachmentFormats = color_attachment_formats.data(); + rendering_info.depthAttachmentFormat = renderpass_desc.depth_attachment; + rendering_info.stencilAttachmentFormat = VK_FORMAT_UNDEFINED; + + vkapi::IVkPipeline pipeline = vkapi::create_graphics_pipeline_dynamic( + device_, &graphics_pipeline_template_->pipeline_info, &rendering_info, + pipeline_layout_); + + graphics_pipeline_dynamic_[renderpass_desc] = pipeline; + + return pipeline; +} + void VulkanPipeline::create_descriptor_set_layout(const Params ¶ms) { std::unordered_set sets_used; @@ -827,8 +858,11 @@ void VulkanCommandList::bind_pipeline(Pipeline *p) { return; if (pipeline->is_graphics()) { - vkapi::IVkPipeline vk_pipeline = pipeline->graphics_pipeline( - current_renderpass_desc_, current_renderpass_); + vkapi::IVkPipeline vk_pipeline = + ti_device_->vk_caps().dynamic_rendering + ? pipeline->graphics_pipeline_dynamic(current_renderpass_desc_) + : pipeline->graphics_pipeline(current_renderpass_desc_, + current_renderpass_); vkCmdBindPipeline(buffer_->buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline->pipeline); @@ -1021,6 +1055,89 @@ void VulkanCommandList::begin_renderpass(int x0, current_renderpass_desc_.color_attachments.clear(); rp_desc.clear_depth = depth_clear; + VkRect2D render_area; + render_area.offset = {x0, y0}; + render_area.extent = {uint32_t(x1 - x0), uint32_t(y1 - y0)}; + + viewport_width_ = render_area.extent.width; + viewport_height_ = render_area.extent.height; + + // Dynamic rendering codepath + if (ti_device_->vk_caps().dynamic_rendering) { + current_dynamic_targets_.clear(); + + std::vector color_attachment_infos( + num_color_attachments); + for (uint32_t i = 0; i < num_color_attachments; i++) { + auto [image, view, format] = + ti_device_->get_vk_image(color_attachments[i]); + bool clear = color_clear[i]; + rp_desc.color_attachments.emplace_back(format, clear); + + VkRenderingAttachmentInfoKHR &attachment_info = color_attachment_infos[i]; + attachment_info.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR; + attachment_info.pNext = nullptr; + attachment_info.imageView = view->view; + attachment_info.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachment_info.resolveMode = VK_RESOLVE_MODE_NONE; + attachment_info.resolveImageView = VK_NULL_HANDLE; + attachment_info.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + attachment_info.loadOp = + clear ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD; + attachment_info.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + if (clear) { + attachment_info.clearValue.color = { + {clear_colors[i][0], clear_colors[i][1], clear_colors[i][2], + clear_colors[i][3]}}; + } + + current_dynamic_targets_.push_back(image); + } + + VkRenderingInfoKHR render_info{}; + render_info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO_KHR; + render_info.pNext = nullptr; + render_info.flags = 0; + render_info.renderArea = render_area; + render_info.layerCount = 1; + render_info.viewMask = 0; + render_info.colorAttachmentCount = num_color_attachments; + render_info.pColorAttachments = color_attachment_infos.data(); + render_info.pDepthAttachment = nullptr; + render_info.pStencilAttachment = nullptr; + + VkRenderingAttachmentInfo depth_attachment_info; + if (depth_attachment) { + auto [image, view, format] = ti_device_->get_vk_image(*depth_attachment); + rp_desc.depth_attachment = format; + + depth_attachment_info.sType = + VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR; + depth_attachment_info.pNext = nullptr; + depth_attachment_info.imageView = view->view; + depth_attachment_info.imageLayout = + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL; + depth_attachment_info.resolveMode = VK_RESOLVE_MODE_NONE; + depth_attachment_info.resolveImageView = VK_NULL_HANDLE; + depth_attachment_info.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + depth_attachment_info.loadOp = depth_clear ? VK_ATTACHMENT_LOAD_OP_CLEAR + : VK_ATTACHMENT_LOAD_OP_LOAD; + depth_attachment_info.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + depth_attachment_info.clearValue.depthStencil = {0.0, 0}; + + render_info.pDepthAttachment = &depth_attachment_info; + + current_dynamic_targets_.push_back(image); + } else { + rp_desc.depth_attachment = VK_FORMAT_UNDEFINED; + } + + vkCmdBeginRenderingKHR(buffer_->buffer, &render_info); + + return; + } + + // VkRenderpass & VkFramebuffer codepath bool has_depth = false; if (depth_attachment) { @@ -1059,15 +1176,8 @@ void VulkanCommandList::begin_renderpass(int x0, fb_desc.height = y1 - y0; fb_desc.renderpass = current_renderpass_; - viewport_width_ = fb_desc.width; - viewport_height_ = fb_desc.height; - current_framebuffer_ = ti_device_->get_framebuffer(fb_desc); - VkRect2D render_area; - render_area.offset = {x0, y0}; - render_area.extent = {uint32_t(x1 - x0), uint32_t(y1 - y0)}; - VkRenderPassBeginInfo begin_info{}; begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; begin_info.pNext = nullptr; @@ -1084,6 +1194,47 @@ void VulkanCommandList::begin_renderpass(int x0, } void VulkanCommandList::end_renderpass() { + if (ti_device_->vk_caps().dynamic_rendering) { + vkCmdEndRenderingKHR(buffer_->buffer); + + if (0) { + std::vector memory_barriers( + current_dynamic_targets_.size()); + for (int i = 0; i < current_dynamic_targets_.size(); i++) { + VkImageMemoryBarrier &barrier = memory_barriers[i]; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + // FIXME: Change this spec to stay in color attachment + barrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = current_dynamic_targets_[i]->image; + barrier.subresourceRange.aspectMask = + VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + } + + vkCmdPipelineBarrier(buffer_->buffer, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + /*dependencyFlags=*/0, /*memoryBarrierCount=*/0, + /*pMemoryBarriers=*/nullptr, + /*bufferMemoryBarrierCount=*/0, + /*pBufferMemoryBarriers=*/nullptr, + /*imageMemoryBarrierCount=*/memory_barriers.size(), + /*pImageMemoryBarriers=*/memory_barriers.data()); + } + current_dynamic_targets_.clear(); + + return; + } + vkCmdEndRenderPass(buffer_->buffer); current_renderpass_ = VK_NULL_HANDLE; @@ -1325,6 +1476,12 @@ void VulkanCommandList::set_line_width(float width) { } vkapi::IVkRenderPass VulkanCommandList::current_renderpass() { + if (ti_device_->vk_caps().dynamic_rendering) { + vkapi::IVkRenderPass rp = + ti_device_->get_renderpass(current_renderpass_desc_); + buffer_->refs.push_back(rp); + return rp; + } return current_renderpass_; } @@ -1377,12 +1534,6 @@ VulkanDevice::~VulkanDevice() { // be properly deallocated before VulkanDevice destruction. This isn't // the most proper fix but is less intrusive compared to other // approaches. - for (auto &alloc : allocations_) { - alloc.second.buffer.reset(); - } - for (auto &alloc : image_allocations_) { - alloc.second.image.reset(); - } allocations_.clear(); image_allocations_.clear(); @@ -1424,13 +1575,7 @@ std::unique_ptr VulkanDevice::create_event() { } DeviceAllocation VulkanDevice::allocate_memory(const AllocParams ¶ms) { - DeviceAllocation handle; - - handle.device = this; - handle.alloc_id = alloc_cnt_++; - - allocations_[handle.alloc_id] = {}; - AllocationInternal &alloc = allocations_[handle.alloc_id]; + AllocationInternal &alloc = allocations_.acquire(); VkBufferCreateInfo buffer_info{}; buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; @@ -1526,21 +1671,7 @@ DeviceAllocation VulkanDevice::allocate_memory(const AllocParams ¶ms) { alloc.addr = vkGetBufferDeviceAddressKHR(device_, &info); } - return handle; -} - -VulkanDevice::AllocationInternal &VulkanDevice::get_alloc_internal( - const DeviceAllocation &alloc) { - auto map_pair = allocations_.find(alloc.alloc_id); - assert(map_pair != allocations_.end() && "Invalid memory handle"); - return map_pair->second; -} - -const VulkanDevice::AllocationInternal &VulkanDevice::get_alloc_internal( - const DeviceAllocation &alloc) const { - const auto &map_pair = allocations_.find(alloc.alloc_id); - assert(map_pair != allocations_.cend() && "Invalid memory handle"); - return map_pair->second; + return DeviceAllocation{this, (uint64_t)&alloc}; } RhiResult VulkanDevice::map_internal(AllocationInternal &alloc_int, @@ -1552,7 +1683,7 @@ RhiResult VulkanDevice::map_internal(AllocationInternal &alloc_int, return RhiResult::invalid_usage; } - if (alloc_int.alloc_info.size < offset + size) { + if (size != VK_WHOLE_SIZE && alloc_int.alloc_info.size < offset + size) { RHI_LOG_ERROR("Mapping out of range"); return RhiResult::invalid_usage; } @@ -1589,28 +1720,21 @@ RhiResult VulkanDevice::map_internal(AllocationInternal &alloc_int, } void VulkanDevice::dealloc_memory(DeviceAllocation handle) { - get_alloc_internal(handle); - - allocations_.erase(handle.alloc_id); + allocations_.release(&get_alloc_internal(handle)); } uint64_t VulkanDevice::get_memory_physical_pointer(DeviceAllocation handle) { - const AllocationInternal &alloc_int = get_alloc_internal(handle); - return uint64_t(alloc_int.addr); + return uint64_t(get_alloc_internal(handle).addr); } RhiResult VulkanDevice::map_range(DevicePtr ptr, uint64_t size, void **mapped_ptr) { - AllocationInternal &alloc_int = get_alloc_internal(ptr); - - return map_internal(alloc_int, ptr.offset, size, mapped_ptr); + return map_internal(get_alloc_internal(ptr), ptr.offset, size, mapped_ptr); } RhiResult VulkanDevice::map(DeviceAllocation alloc, void **mapped_ptr) { - AllocationInternal &alloc_int = get_alloc_internal(alloc); - - return map_internal(alloc_int, 0, alloc_int.alloc_info.size, mapped_ptr); + return map_internal(get_alloc_internal(alloc), 0, VK_WHOLE_SIZE, mapped_ptr); } void VulkanDevice::unmap(DevicePtr ptr) { @@ -1827,18 +1951,10 @@ std::unique_ptr VulkanDevice::create_surface( std::tuple VulkanDevice::get_vkmemory_offset_size(const DeviceAllocation &alloc) const { - auto buffer_alloc = allocations_.find(alloc.alloc_id); - if (buffer_alloc != allocations_.end()) { - return std::make_tuple(buffer_alloc->second.alloc_info.deviceMemory, - buffer_alloc->second.alloc_info.offset, - buffer_alloc->second.alloc_info.size); - } else { - const ImageAllocInternal &image_alloc = - image_allocations_.at(alloc.alloc_id); - return std::make_tuple(image_alloc.alloc_info.deviceMemory, - image_alloc.alloc_info.offset, - image_alloc.alloc_info.size); - } + auto &buffer_alloc = get_alloc_internal(alloc); + return std::make_tuple(buffer_alloc.alloc_info.deviceMemory, + buffer_alloc.alloc_info.offset, + buffer_alloc.alloc_info.size); } vkapi::IVkBuffer VulkanDevice::get_vkbuffer( @@ -1850,7 +1966,7 @@ vkapi::IVkBuffer VulkanDevice::get_vkbuffer( std::tuple VulkanDevice::get_vk_image(const DeviceAllocation &alloc) const { - const ImageAllocInternal &alloc_int = image_allocations_.at(alloc.alloc_id); + const ImageAllocInternal &alloc_int = get_image_alloc_internal(alloc); return std::make_tuple(alloc_int.image, alloc_int.view, alloc_int.image->format); @@ -1874,7 +1990,8 @@ DeviceAllocation VulkanDevice::import_vkbuffer(vkapi::IVkBuffer buffer, size_t size, VkDeviceMemory memory, VkDeviceSize offset) { - AllocationInternal alloc_int{}; + AllocationInternal &alloc_int = allocations_.acquire(); + alloc_int.external = true; alloc_int.buffer = buffer; alloc_int.mapped = nullptr; @@ -1890,31 +2007,20 @@ DeviceAllocation VulkanDevice::import_vkbuffer(vkapi::IVkBuffer buffer, alloc_int.alloc_info.deviceMemory = memory; alloc_int.alloc_info.offset = offset; - DeviceAllocation alloc; - alloc.device = this; - alloc.alloc_id = alloc_cnt_++; - - allocations_[alloc.alloc_id] = alloc_int; - - return alloc; + return DeviceAllocation{this, reinterpret_cast(&alloc_int)}; } DeviceAllocation VulkanDevice::import_vk_image(vkapi::IVkImage image, vkapi::IVkImageView view, VkImageLayout layout) { - ImageAllocInternal alloc_int; + ImageAllocInternal &alloc_int = image_allocations_.acquire(); + alloc_int.external = true; alloc_int.image = image; alloc_int.view = view; alloc_int.view_lods.emplace_back(view); - DeviceAllocation alloc; - alloc.device = this; - alloc.alloc_id = alloc_cnt_++; - - image_allocations_[alloc.alloc_id] = alloc_int; - - return alloc; + return DeviceAllocation{this, reinterpret_cast(&alloc_int)}; } vkapi::IVkImageView VulkanDevice::get_vk_imageview( @@ -1925,16 +2031,11 @@ vkapi::IVkImageView VulkanDevice::get_vk_imageview( vkapi::IVkImageView VulkanDevice::get_vk_lod_imageview( const DeviceAllocation &alloc, int lod) const { - return image_allocations_.at(alloc.alloc_id).view_lods[lod]; + return get_image_alloc_internal(alloc).view_lods[lod]; } DeviceAllocation VulkanDevice::create_image(const ImageParams ¶ms) { - DeviceAllocation handle; - handle.device = this; - handle.alloc_id = alloc_cnt_++; - - image_allocations_[handle.alloc_id] = {}; - ImageAllocInternal &alloc = image_allocations_[handle.alloc_id]; + ImageAllocInternal &alloc = image_allocations_.acquire(); int num_mip_levels = 1; @@ -2056,25 +2157,17 @@ DeviceAllocation VulkanDevice::create_image(const ImageParams ¶ms) { vkapi::create_image_view(device_, alloc.image, &view_info)); } + DeviceAllocation handle{this, reinterpret_cast(&alloc)}; + if (params.initial_layout != ImageLayout::undefined) { image_transition(handle, ImageLayout::undefined, params.initial_layout); } -#ifdef TI_VULKAN_DEBUG_ALLOCATIONS - TI_TRACE("Allocate VK image {}, alloc_id={}", (void *)alloc.image, - handle.alloc_id); -#endif - return handle; } void VulkanDevice::destroy_image(DeviceAllocation handle) { - auto map_pair = image_allocations_.find(handle.alloc_id); - - RHI_ASSERT(map_pair != image_allocations_.end() && - "Invalid handle (double free?) {}"); - - image_allocations_.erase(handle.alloc_id); + image_allocations_.release(&get_image_alloc_internal(handle)); } vkapi::IVkRenderPass VulkanDevice::get_renderpass( diff --git a/taichi/rhi/vulkan/vulkan_device.h b/taichi/rhi/vulkan/vulkan_device.h index 94dbbadcd41e9..6941eb4272c11 100644 --- a/taichi/rhi/vulkan/vulkan_device.h +++ b/taichi/rhi/vulkan/vulkan_device.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -284,6 +285,9 @@ class VulkanPipeline : public Pipeline { const VulkanRenderPassDesc &renderpass_desc, vkapi::IVkRenderPass renderpass); + vkapi::IVkPipeline graphics_pipeline_dynamic( + const VulkanRenderPassDesc &renderpass_desc); + const std::string &name() const { return name_; } @@ -334,6 +338,12 @@ class VulkanPipeline : public Pipeline { std::unordered_map graphics_pipeline_; + // For KHR_dynamic_rendering + std::unordered_map + graphics_pipeline_dynamic_; + VulkanResourceBinder resource_binder_; std::vector set_layouts_; std::vector shader_modules_; @@ -443,6 +453,7 @@ class VulkanCommandList : public CommandList { currently_used_sets_; // Renderpass & raster pipeline + std::vector current_dynamic_targets_; VulkanRenderPassDesc current_renderpass_desc_; vkapi::IVkRenderPass current_renderpass_{VK_NULL_HANDLE}; vkapi::IVkFramebuffer current_framebuffer_{VK_NULL_HANDLE}; @@ -559,6 +570,7 @@ struct VulkanCapabilities { bool wide_line{false}; bool surface{false}; bool present{false}; + bool dynamic_rendering{false}; }; class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { @@ -710,27 +722,31 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { // Memory allocation struct AllocationInternal { - bool external{false}; + // Allocation info from VMA or set by `import_vkbuffer` VmaAllocationInfo alloc_info; - vkapi::IVkBuffer buffer; - void *mapped{nullptr}; + // VkBuffer handle (reference counted) + vkapi::IVkBuffer buffer{nullptr}; + // Buffer Device Address VkDeviceAddress addr{0}; + // If mapped, the currently mapped address + void *mapped{nullptr}; + // Is the allocation external (imported) or not (VMA) + bool external{false}; }; - unordered_map allocations_; - - uint32_t alloc_cnt_ = 0; - // Images / Image views struct ImageAllocInternal { bool external{false}; VmaAllocationInfo alloc_info; - vkapi::IVkImage image; - vkapi::IVkImageView view; + vkapi::IVkImage image{nullptr}; + vkapi::IVkImageView view{nullptr}; std::vector view_lods; }; - unordered_map image_allocations_; + // Since we use the pointer to AllocationInternal as the `alloc_id`, + // **pointer stability** is important. + rhi_impl::SyncedPtrStableObjectList allocations_; + rhi_impl::SyncedPtrStableObjectList image_allocations_; // Renderpass unordered_map(alloc.alloc_id); + } + + inline static ImageAllocInternal &get_image_alloc_internal( + const DeviceAllocation &alloc) { + return *reinterpret_cast(alloc.alloc_id); + } RhiResult map_internal(AllocationInternal &alloc_int, size_t offset, diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 63b745bf6bbce..8f1c036570994 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -563,6 +563,8 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { enabled_extensions.push_back(ext.extensionName); } else if (name == VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME) { enabled_extensions.push_back(ext.extensionName); + } else if (name == VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME) { + enabled_extensions.push_back(ext.extensionName); } else if (name == VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME && params_.enable_validation_layer) { // VK_KHR_shader_non_semantic_info isn't supported on molten-vk. @@ -664,6 +666,9 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { buffer_device_address_feature{}; buffer_device_address_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR; + VkPhysicalDeviceDynamicRenderingFeaturesKHR dynamic_rendering_feature{}; + dynamic_rendering_feature.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR; if (ti_device_->vk_caps().physical_device_features2) { VkPhysicalDeviceFeatures2KHR features2{}; @@ -772,6 +777,23 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { pNextEnd = &buffer_device_address_feature.pNext; } + // Dynamic rendering + // TODO: Figure out how to integrate this correctly with ImGui, + // and then figure out the layout & barrier stuff + /* + if (CHECK_EXTENSION(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME)) { + features2.pNext = &dynamic_rendering_feature; + vkGetPhysicalDeviceFeatures2KHR(physical_device_, &features2); + + if (dynamic_rendering_feature.dynamicRendering) { + ti_device_->vk_caps().dynamic_rendering = true; + } + + *pNextEnd = &dynamic_rendering_feature; + pNextEnd = &dynamic_rendering_feature.pNext; + } + */ + // TODO: add atomic min/max feature } diff --git a/taichi/runtime/gfx/runtime.cpp b/taichi/runtime/gfx/runtime.cpp index f306af7a4d39c..7d629570cb307 100644 --- a/taichi/runtime/gfx/runtime.cpp +++ b/taichi/runtime/gfx/runtime.cpp @@ -434,26 +434,21 @@ void GfxRuntime::launch_kernel(KernelHandle handle, RuntimeContext *host_ctx) { ti_kernel->ti_kernel_attribs().ctx_attribs.arr_access.at(i)); // Alloc ext arr - if (ext_array_size[i]) { - bool host_write = - access & uint32_t(irpass::ExternalPtrAccess::READ); + size_t alloc_size = std::max(size_t(32), ext_array_size.at(i)); + bool host_write = access & uint32_t(irpass::ExternalPtrAccess::READ); + auto allocated = device_->allocate_memory_unique( + {alloc_size, host_write, false, /*export_sharing=*/false, + AllocUsage::Storage}); + any_arrays[i] = *allocated.get(); + allocated_buffers.push_back(std::move(allocated)); + + bool host_read = access & uint32_t(irpass::ExternalPtrAccess::WRITE); + if (host_read) { auto allocated = device_->allocate_memory_unique( - {ext_array_size[i], host_write, false, - /*export_sharing=*/false, AllocUsage::Storage}); - any_arrays[i] = *allocated.get(); + {alloc_size, false, true, /*export_sharing=*/false, + AllocUsage::None}); + any_array_shadows[i] = *allocated.get(); allocated_buffers.push_back(std::move(allocated)); - - bool host_read = - access & uint32_t(irpass::ExternalPtrAccess::WRITE); - if (host_read) { - auto allocated = device_->allocate_memory_unique( - {ext_array_size[i], false, true, - /*export_sharing=*/false, AllocUsage::None}); - any_array_shadows[i] = *allocated.get(); - allocated_buffers.push_back(std::move(allocated)); - } - } else { - any_arrays[i] = kDeviceNullAllocation; } } } diff --git a/taichi/ui/backends/vulkan/swap_chain.cpp b/taichi/ui/backends/vulkan/swap_chain.cpp index 0d93c5fde433c..579e2ce991592 100644 --- a/taichi/ui/backends/vulkan/swap_chain.cpp +++ b/taichi/ui/backends/vulkan/swap_chain.cpp @@ -32,6 +32,7 @@ void SwapChain::create_depth_resources() { params.x = curr_width_; params.y = curr_height_; params.export_sharing = false; + params.usage = ImageAllocUsage::Attachment | ImageAllocUsage::Sampled; depth_allocation_ = app_context_->device().create_image(params); }