diff --git a/taichi/backends/device.cpp b/taichi/backends/device.cpp index 0c36ed4cd06cb..76de52e8a1792 100644 --- a/taichi/backends/device.cpp +++ b/taichi/backends/device.cpp @@ -25,25 +25,28 @@ void Device::memcpy(DevicePtr dst, DevicePtr src, uint64_t size) { void GraphicsDevice::image_transition(DeviceAllocation img, ImageLayout old_layout, ImageLayout new_layout) { - auto cmd_list = new_command_list({CommandListType::Graphics}); + Stream *stream = get_graphics_stream(); + auto cmd_list = stream->new_command_list(); cmd_list->image_transition(img, old_layout, new_layout); - submit_synced(cmd_list.get()); + stream->submit_synced(cmd_list.get()); } void GraphicsDevice::buffer_to_image(DeviceAllocation dst_img, DevicePtr src_buf, ImageLayout img_layout, const BufferImageCopyParams ¶ms) { - auto cmd_list = new_command_list({CommandListType::Graphics}); + Stream *stream = get_graphics_stream(); + auto cmd_list = stream->new_command_list(); cmd_list->buffer_to_image(dst_img, src_buf, img_layout, params); - submit_synced(cmd_list.get()); + stream->submit_synced(cmd_list.get()); } void GraphicsDevice::image_to_buffer(DevicePtr dst_buf, DeviceAllocation src_img, ImageLayout img_layout, const BufferImageCopyParams ¶ms) { - auto cmd_list = new_command_list({CommandListType::Graphics}); + Stream *stream = get_graphics_stream(); + auto cmd_list = stream->new_command_list(); cmd_list->image_to_buffer(dst_buf, src_img, img_layout, params); - submit_synced(cmd_list.get()); + stream->submit_synced(cmd_list.get()); } } // namespace lang diff --git a/taichi/backends/device.h b/taichi/backends/device.h index 3aafdea6ce008..1000691fef04d 100644 --- a/taichi/backends/device.h +++ b/taichi/backends/device.h @@ -203,12 +203,6 @@ class Pipeline { virtual ResourceBinder *resource_binder() = 0; }; -enum class CommandListType { Graphics, Compute }; - -struct CommandListConfig { - CommandListType type; -}; - enum class ImageDimension { d1D, d2D, d3D }; enum class ImageLayout { @@ -327,6 +321,18 @@ inline bool operator&(AllocUsage a, AllocUsage b) { return static_cast(a) & static_cast(b); } +class Stream { + public: + virtual ~Stream(){}; + + virtual std::unique_ptr new_command_list() = 0; + virtual void dealloc_command_list(CommandList *cmdlist) = 0; + virtual void submit(CommandList *cmdlist) = 0; + virtual void submit_synced(CommandList *cmdlist) = 0; + + virtual void command_sync() = 0; +}; + class Device { public: virtual ~Device(){}; @@ -378,14 +384,8 @@ class Device { // Copy memory inter or intra devices (synced) static void memcpy(DevicePtr dst, DevicePtr src, uint64_t size); - // TODO: Add a flag to select graphics / compute pool - virtual std::unique_ptr new_command_list( - CommandListConfig config) = 0; - virtual void dealloc_command_list(CommandList *cmdlist) = 0; - virtual void submit(CommandList *cmdlist) = 0; - virtual void submit_synced(CommandList *cmdlist) = 0; - - virtual void command_sync() = 0; + // Each thraed will acquire its own stream + virtual Stream *get_compute_stream() = 0; private: std::unordered_map caps_; @@ -448,6 +448,8 @@ class GraphicsDevice : public Device { const std::vector &vertex_attrs, std::string name = "Pipeline") = 0; + virtual Stream *get_graphics_stream() = 0; + virtual std::unique_ptr create_surface( const SurfaceConfig &config) = 0; virtual DeviceAllocation create_image(const ImageParams ¶ms) = 0; diff --git a/taichi/backends/vulkan/runtime.cpp b/taichi/backends/vulkan/runtime.cpp index c4dda910a7fb4..dcc395d2e1b52 100644 --- a/taichi/backends/vulkan/runtime.cpp +++ b/taichi/backends/vulkan/runtime.cpp @@ -133,6 +133,22 @@ class HostDeviceContextBlitter { return; } + bool require_sync = ctx_attribs_->rets().size() > 0; + if (!require_sync) { + for (int i = 0; i < ctx_attribs_->args().size(); ++i) { + const auto &arg = ctx_attribs_->args()[i]; + if (arg.is_array) { + require_sync = true; + } + } + } + + if (require_sync) { + device_->get_compute_stream()->command_sync(); + } else { + return; + } + char *const device_base = reinterpret_cast(device_->map(*host_shadow_buffer_)); @@ -231,8 +247,9 @@ class CompiledTaichiKernel { }; CompiledTaichiKernel(const Params &ti_params) - : ti_kernel_attribs_(*ti_params.ti_kernel_attribs) { - InputBuffersMap input_buffers = { + : ti_kernel_attribs_(*ti_params.ti_kernel_attribs), + device_(ti_params.device) { + input_buffers_ = { {BufferEnum::Root, ti_params.root_buffer}, {BufferEnum::GlobalTmps, ti_params.global_tmps_buffer}, }; @@ -241,44 +258,27 @@ class CompiledTaichiKernel { Device::AllocParams params; ctx_buffer_ = ti_params.device->allocate_memory_unique( {size_t(ctx_sz), - /*host_write*/ true, /*host_read*/ false}); + /*host_write=*/true, /*host_read=*/false, + /*export_sharing=*/false, AllocUsage::Storage}); ctx_buffer_host_ = ti_params.device->allocate_memory_unique( {size_t(ctx_sz), - /*host_write*/ false, /*host_read*/ true}); - input_buffers[BufferEnum::Context] = ctx_buffer_.get(); + /*host_write=*/false, /*host_read=*/true, + /*export_sharing=*/false, AllocUsage::Storage}); + input_buffers_[BufferEnum::Context] = ctx_buffer_.get(); } const auto &task_attribs = ti_kernel_attribs_.tasks_attribs; const auto &spirv_bins = ti_params.spirv_bins; TI_ASSERT(task_attribs.size() == spirv_bins.size()); - cmdlist_ = ti_params.device->new_command_list({CommandListType::Compute}); for (int i = 0; i < task_attribs.size(); ++i) { - const auto &attribs = task_attribs[i]; PipelineSourceDesc source_desc{PipelineSourceType::spirv_binary, (void *)spirv_bins[i].data(), spirv_bins[i].size() * sizeof(uint32_t)}; auto vp = ti_params.device->create_pipeline(source_desc, ti_kernel_attribs_.name); - const int group_x = (attribs.advisory_total_num_threads + - attribs.advisory_num_threads_per_group - 1) / - attribs.advisory_num_threads_per_group; - ResourceBinder *binder = vp->resource_binder(); - for (auto &pair : input_buffers) { - binder->rw_buffer(0, uint32_t(pair.first), *pair.second); - } - cmdlist_->bind_pipeline(vp.get()); - cmdlist_->bind_resources(binder); - cmdlist_->dispatch(group_x); - cmdlist_->memory_barrier(); pipelines_.push_back(std::move(vp)); } - - if (!ti_kernel_attribs_.ctx_attribs.empty()) { - cmdlist_->buffer_copy(ctx_buffer_host_->get_ptr(0), - ctx_buffer_->get_ptr(0), ctx_sz); - cmdlist_->buffer_barrier(*ctx_buffer_host_); - } } const TaichiKernelAttributes &ti_kernel_attribs() const { @@ -297,12 +297,40 @@ class CompiledTaichiKernel { return ctx_buffer_host_.get(); } - CommandList *command_list() const { - return cmdlist_.get(); + void command_list(CommandList *cmdlist) const { + const auto &task_attribs = ti_kernel_attribs_.tasks_attribs; + + for (int i = 0; i < task_attribs.size(); ++i) { + const auto &attribs = task_attribs[i]; + auto vp = pipelines_[i].get(); + const int group_x = (attribs.advisory_total_num_threads + + attribs.advisory_num_threads_per_group - 1) / + attribs.advisory_num_threads_per_group; + ResourceBinder *binder = vp->resource_binder(); + for (auto &pair : input_buffers_) { + binder->rw_buffer(0, uint32_t(pair.first), *pair.second); + } + cmdlist->bind_pipeline(vp); + cmdlist->bind_resources(binder); + cmdlist->dispatch(group_x); + cmdlist->memory_barrier(); + } + + const auto ctx_sz = ti_kernel_attribs_.ctx_attribs.total_bytes(); + if (!ti_kernel_attribs_.ctx_attribs.empty()) { + cmdlist->buffer_copy(ctx_buffer_host_->get_ptr(0), + ctx_buffer_->get_ptr(0), ctx_sz); + cmdlist->buffer_barrier(*ctx_buffer_host_); + } } private: TaichiKernelAttributes ti_kernel_attribs_; + std::vector tasks_attribs_; + + Device *device_; + + InputBuffersMap input_buffers_; // Right now |ctx_buffer_| is allocated from a HOST_VISIBLE|COHERENT // memory, because we do not do computation on this buffer anyway, and it may @@ -312,8 +340,6 @@ class CompiledTaichiKernel { std::unique_ptr ctx_buffer_{nullptr}; std::unique_ptr ctx_buffer_host_{nullptr}; std::vector> pipelines_; - - std::unique_ptr cmdlist_; }; } // namespace @@ -376,15 +402,22 @@ class VkRuntime ::Impl { ctx_blitter->host_to_device(); } - device_->submit(ti_kernel->command_list()); + if (!current_cmdlist_) { + current_cmdlist_ = device_->get_compute_stream()->new_command_list(); + } + + ti_kernel->command_list(current_cmdlist_.get()); + if (ctx_blitter) { - synchronize(); + device_->get_compute_stream()->submit(current_cmdlist_.get()); ctx_blitter->device_to_host(); + + current_cmdlist_ = nullptr; } } void synchronize() { - device_->command_sync(); + device_->get_compute_stream()->command_sync(); } Device *get_ti_device() const { @@ -397,16 +430,23 @@ class VkRuntime ::Impl { size_t root_buffer_size = 64 * 1024 * 1024; size_t gtmp_buffer_size = 1024 * 1024; - root_buffer_ = device_->allocate_memory_unique({root_buffer_size}); - global_tmps_buffer_ = device_->allocate_memory_unique({gtmp_buffer_size}); + root_buffer_ = device_->allocate_memory_unique( + {root_buffer_size, + /*host_write=*/false, /*host_read=*/false, + /*export_sharing=*/false, AllocUsage::Storage}); + global_tmps_buffer_ = device_->allocate_memory_unique( + {gtmp_buffer_size, + /*host_write=*/false, /*host_read=*/false, + /*export_sharing=*/false, AllocUsage::Storage}); // Need to zero fill the buffers, otherwise there could be NaN. - auto cmdlist = device_->new_command_list({CommandListType::Compute}); + Stream *stream = device_->get_compute_stream(); + auto cmdlist = stream->new_command_list(); cmdlist->buffer_fill(root_buffer_->get_ptr(0), root_buffer_size, /*data=*/0); cmdlist->buffer_fill(global_tmps_buffer_->get_ptr(0), gtmp_buffer_size, /*data=*/0); - device_->submit_synced(cmdlist.get()); + stream->submit_synced(cmdlist.get()); } const SNodeDescriptorsMap *const snode_descriptors_; @@ -419,6 +459,8 @@ class VkRuntime ::Impl { Device *device_; + std::unique_ptr current_cmdlist_{nullptr}; + std::vector> ti_kernels_; }; diff --git a/taichi/backends/vulkan/spirv_ir_builder.cpp b/taichi/backends/vulkan/spirv_ir_builder.cpp index ce593f5b7704b..91d0c12e89c1e 100644 --- a/taichi/backends/vulkan/spirv_ir_builder.cpp +++ b/taichi/backends/vulkan/spirv_ir_builder.cpp @@ -314,6 +314,14 @@ SType IRBuilder::get_struct_array_type(const SType &value_type, TI_ERROR("buffer type must be primitive or snode struct"); } + if (nbytes == 0) { + if (value_type.flag == TypeKind::kPrimitive) { + TI_WARN("Invalid primitive bit size"); + } else { + TI_WARN("Invalid container stride"); + } + } + // decorate the array type this->decorate(spv::OpDecorate, arr_type, spv::DecorationArrayStride, nbytes); // declare struct of array diff --git a/taichi/backends/vulkan/vulkan_api.cpp b/taichi/backends/vulkan/vulkan_api.cpp index 70984a6830e27..c796b44edbbf0 100644 --- a/taichi/backends/vulkan/vulkan_api.cpp +++ b/taichi/backends/vulkan/vulkan_api.cpp @@ -184,8 +184,6 @@ EmbeddedVulkanDevice::EmbeddedVulkanDevice( } pick_physical_device(); create_logical_device(); - create_command_pool(); - create_debug_swapchain(); // TODO: Change the ownership hierarchy, the taichi Device class should be at // the top level @@ -195,11 +193,9 @@ EmbeddedVulkanDevice::EmbeddedVulkanDevice( params.physical_device = physical_device_; params.device = device_; params.compute_queue = compute_queue_; - params.compute_pool = command_pool_; params.compute_queue_family_index = queue_family_indices_.compute_family.value(); params.graphics_queue = graphics_queue_; - params.graphics_pool = command_pool_; // FIXME: This is potentially wrong params.graphics_queue_family_index = queue_family_indices_.graphics_family.value(); ti_device_->init_vulkan_structs(params); @@ -207,18 +203,6 @@ EmbeddedVulkanDevice::EmbeddedVulkanDevice( } EmbeddedVulkanDevice::~EmbeddedVulkanDevice() { -#ifdef TI_VULKAN_DEBUG - if (capability_.has_presentation) { - vkDestroySemaphore(device_, debug_struct_.image_available, - kNoVkAllocCallbacks); - vkDestroySwapchainKHR(device_, debug_struct_.swapchain, - kNoVkAllocCallbacks); - vkDestroySurfaceKHR(instance_, debug_struct_.surface, kNoVkAllocCallbacks); - glfwDestroyWindow(debug_struct_.window); - glfwTerminate(); - } -#endif - ti_device_.reset(); if (surface_ != VK_NULL_HANDLE) { vkDestroySurfaceKHR(instance_, surface_, kNoVkAllocCallbacks); @@ -227,7 +211,6 @@ EmbeddedVulkanDevice::~EmbeddedVulkanDevice() { destroy_debug_utils_messenger_ext(instance_, debug_messenger_, kNoVkAllocCallbacks); } - vkDestroyCommandPool(device_, command_pool_, kNoVkAllocCallbacks); vkDestroyDevice(device_, kNoVkAllocCallbacks); vkDestroyInstance(instance_, kNoVkAllocCallbacks); } @@ -282,15 +265,6 @@ void EmbeddedVulkanDevice::create_instance() { extensions.insert(std::string(ext)); } -#ifdef TI_VULKAN_DEBUG - glfwInit(); - uint32_t count; - const char **glfw_extensions = glfwGetRequiredInstanceExtensions(&count); - for (uint32_t i = 0; i < count; i++) { - extensions.insert(glfw_extensions[i]); - } -#endif - uint32_t num_instance_extensions; vkEnumerateInstanceExtensionProperties(nullptr, &num_instance_extensions, nullptr); @@ -580,118 +554,11 @@ void EmbeddedVulkanDevice::create_logical_device() { &graphics_queue_); vkGetDeviceQueue(device_, queue_family_indices_.graphics_family.value(), 0, &present_queue_); - } else { - vkGetDeviceQueue(device_, queue_family_indices_.compute_family.value(), 0, - &compute_queue_); - } -} // namespace vulkan - -void EmbeddedVulkanDevice::create_command_pool() { - VkCommandPoolCreateInfo pool_info{}; - pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - if (params_.is_for_ui) { - pool_info.queueFamilyIndex = queue_family_indices_.graphics_family.value(); - } else { - pool_info.queueFamilyIndex = queue_family_indices_.compute_family.value(); } - BAIL_ON_VK_BAD_RESULT( - vkCreateCommandPool(device_, &pool_info, kNoVkAllocCallbacks, - &command_pool_), - "failed to create command pool"); -} - -void EmbeddedVulkanDevice::create_debug_swapchain() { -#ifdef TI_VULKAN_DEBUG - TI_TRACE("Creating debug swapchian"); - if (capability_.has_presentation) { - glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); - debug_struct_.window = - glfwCreateWindow(640, 480, "Taichi Debug Swapchain", NULL, NULL); - VkResult err = glfwCreateWindowSurface(instance_, debug_struct_.window, - NULL, &debug_struct_.surface); - if (err) { - TI_ERROR("Failed to create debug window ({})", err); - return; - } - - auto choose_surface_format = - [](const std::vector &availableFormats) { - for (const auto &availableFormat : availableFormats) { - if (availableFormat.format == VK_FORMAT_B8G8R8A8_SRGB && - availableFormat.colorSpace == - VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) { - return availableFormat; - } - } - return availableFormats[0]; - }; - - VkSurfaceCapabilitiesKHR capabilities; - vkGetPhysicalDeviceSurfaceCapabilitiesKHR( - physical_device_, debug_struct_.surface, &capabilities); - - VkBool32 supported = false; - vkGetPhysicalDeviceSurfaceSupportKHR( - physical_device_, queue_family_indices_.compute_family.value(), - debug_struct_.surface, &supported); - - if (!supported) { - TI_ERROR("Selected queue does not support presenting", err); - return; - } - uint32_t formatCount; - vkGetPhysicalDeviceSurfaceFormatsKHR( - physical_device_, debug_struct_.surface, &formatCount, nullptr); - std::vector surface_formats(formatCount); - vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device_, - debug_struct_.surface, &formatCount, - surface_formats.data()); - - VkSurfaceFormatKHR surface_format = choose_surface_format(surface_formats); - - int width, height; - glfwGetFramebufferSize(debug_struct_.window, &width, &height); - - VkExtent2D extent = {uint32_t(width), uint32_t(height)}; - - VkSwapchainCreateInfoKHR createInfo; - createInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; - createInfo.pNext = nullptr; - createInfo.flags = 0; - createInfo.surface = debug_struct_.surface; - createInfo.minImageCount = capabilities.minImageCount; - createInfo.imageFormat = surface_format.format; - createInfo.imageColorSpace = surface_format.colorSpace; - createInfo.imageExtent = extent; - createInfo.imageArrayLayers = 1; - createInfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - createInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; - createInfo.queueFamilyIndexCount = 0; - createInfo.pQueueFamilyIndices = nullptr; - createInfo.preTransform = capabilities.currentTransform; - createInfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - createInfo.presentMode = VK_PRESENT_MODE_MAILBOX_KHR; - createInfo.clipped = VK_TRUE; - createInfo.oldSwapchain = nullptr; - - if (vkCreateSwapchainKHR(device_, &createInfo, kNoVkAllocCallbacks, - &debug_struct_.swapchain) != VK_SUCCESS) { - TI_ERROR("Failed to create debug swapchain"); - return; - } - - VkSemaphoreCreateInfo sema_create_info; - sema_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - sema_create_info.pNext = nullptr; - sema_create_info.flags = 0; - vkCreateSemaphore(device_, &sema_create_info, kNoVkAllocCallbacks, - &debug_struct_.image_available); - TI_TRACE("Creating debug swapchian3"); - } -#endif -} + vkGetDeviceQueue(device_, queue_family_indices_.compute_family.value(), 0, + &compute_queue_); +} // namespace vulkan } // namespace vulkan } // namespace lang diff --git a/taichi/backends/vulkan/vulkan_api.h b/taichi/backends/vulkan/vulkan_api.h index 56dd3ba6958bb..4b70bac35f2f9 100644 --- a/taichi/backends/vulkan/vulkan_api.h +++ b/taichi/backends/vulkan/vulkan_api.h @@ -17,12 +17,6 @@ #include #include -// #define TI_VULKAN_DEBUG - -#ifdef TI_VULKAN_DEBUG -#include -#endif - namespace taichi { namespace lang { namespace vulkan { @@ -47,15 +41,6 @@ struct VulkanQueueFamilyIndices { } }; -#ifdef TI_VULKAN_DEBUG -struct VulkanDeviceDebugStruct { - GLFWwindow *window{nullptr}; - VkSurfaceKHR surface; - VkSwapchainKHR swapchain; - VkSemaphore image_available; -}; -#endif - /** * This class creates a VulkanDevice instance. The underlying Vk* resources are * embedded directly inside the class. @@ -112,12 +97,6 @@ class EmbeddedVulkanDevice { void create_surface(); void pick_physical_device(); void create_logical_device(); - void create_command_pool(); - void create_debug_swapchain(); - -#ifdef TI_VULKAN_DEBUG - VulkanDeviceDebugStruct debug_struct_; -#endif VkInstance instance_{VK_NULL_HANDLE}; VkDebugUtilsMessengerEXT debug_messenger_{VK_NULL_HANDLE}; diff --git a/taichi/backends/vulkan/vulkan_device.cpp b/taichi/backends/vulkan/vulkan_device.cpp index 4e7b7ecfb6197..ee98cb120f695 100644 --- a/taichi/backends/vulkan/vulkan_device.cpp +++ b/taichi/backends/vulkan/vulkan_device.cpp @@ -664,12 +664,12 @@ void VulkanResourceBinder::lock_layout() { } VulkanCommandList::VulkanCommandList(VulkanDevice *ti_device, - VkCommandBuffer buffer, - CommandListConfig config) + VulkanStream *stream, + VkCommandBuffer buffer) : ti_device_(ti_device), + stream_(stream), device_(ti_device->vk_device()), - buffer_(buffer), - config_(config) { + buffer_(buffer) { VkCommandBufferBeginInfo info{}; info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; info.pNext = nullptr; @@ -679,15 +679,11 @@ VulkanCommandList::VulkanCommandList(VulkanDevice *ti_device, vkBeginCommandBuffer(buffer, &info); } -const CommandListConfig &VulkanCommandList::config() const { - return config_; -} - VulkanCommandList::~VulkanCommandList() { for (auto pair : desc_sets_) { ti_device_->dealloc_desc_set(pair.first, pair.second); } - ti_device_->dealloc_command_list(this); + stream_->dealloc_command_list(this); } void VulkanCommandList::bind_pipeline(Pipeline *p) { @@ -1057,35 +1053,25 @@ void VulkanDevice::init_vulkan_structs(Params ¶ms) { device_ = params.device; physical_device_ = params.physical_device; compute_queue_ = params.compute_queue; - compute_pool_ = params.compute_pool; compute_queue_family_index_ = params.compute_queue_family_index; graphics_queue_ = params.graphics_queue; - graphics_pool_ = params.graphics_pool; graphics_queue_family_index_ = params.graphics_queue_family_index; create_vma_allocator(); - - VkFenceCreateInfo fence_info{VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, 0}; - BAIL_ON_VK_BAD_RESULT(vkCreateFence(device_, &fence_info, kNoVkAllocCallbacks, - &cmd_sync_fence_), - "failed to create fence"); } VulkanDevice::~VulkanDevice() { - command_sync(); + vkDeviceWaitIdle(device_); - TI_TRACE("Total #{} descriptor pools created", desc_set_pools_.size()); + TI_TRACE("Total #{} descriptor pools created", layout_to_pools_.size()); size_t desc_count = 0; - for (auto &pair : desc_set_pools_) { - vkResetDescriptorPool(device_, pair.second.pool, 0); - vkDestroyDescriptorPool(device_, pair.second.pool, kNoVkAllocCallbacks); - desc_count += pair.second.free_sets.size(); + for (auto &pair : layout_to_pools_) { + vkResetDescriptorPool(device_, pair.second->pool, 0); + vkDestroyDescriptorPool(device_, pair.second->pool, kNoVkAllocCallbacks); } - TI_TRACE("Total #{} descriptors allocated", desc_count); - for (auto &pair : desc_set_layouts_) { vkDestroyDescriptorSetLayout(device_, pair.second, kNoVkAllocCallbacks); } @@ -1100,7 +1086,6 @@ VulkanDevice::~VulkanDevice() { vmaDestroyPool(allocator_, export_pool_.pool); vmaDestroyAllocator(allocator_); - vkDestroyFence(device_, cmd_sync_fence_, kNoVkAllocCallbacks); } std::unique_ptr VulkanDevice::create_pipeline(PipelineSourceDesc &src, @@ -1165,10 +1150,9 @@ DeviceAllocation VulkanDevice::allocate_memory(const AllocParams ¶ms) { VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; #endif - buffer_info.pNext = &external_mem_buffer_create_info; - VmaAllocationCreateInfo alloc_info{}; if (params.export_sharing) { + buffer_info.pNext = &external_mem_buffer_create_info; alloc_info.pool = export_pool_.pool; } @@ -1264,62 +1248,65 @@ void VulkanDevice::memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) { // TODO: always create a queue specifically for transfer - CommandListConfig config; - if (compute_queue() != VK_NULL_HANDLE) { - config.type = CommandListType::Compute; - } else if (graphics_queue() != VK_NULL_HANDLE) { - config.type = CommandListType::Graphics; - } else { - TI_ERROR("cannot find a queue"); - } - std::unique_ptr cmd = new_command_list(config); + Stream *stream = get_compute_stream(); + std::unique_ptr cmd = stream->new_command_list(); cmd->buffer_copy(dst, src, size); - submit_synced(cmd.get()); + stream->submit_synced(cmd.get()); } -std::unique_ptr VulkanDevice::new_command_list( - CommandListConfig config) { - VkCommandBuffer buffer = VK_NULL_HANDLE; +Stream *VulkanDevice::get_compute_stream() { + auto tid = std::this_thread::get_id(); + auto iter = compute_stream_.find(tid); + if (iter == compute_stream_.end()) { + compute_stream_[tid] = std::make_unique( + *this, compute_queue_, compute_queue_family_index_); + return compute_stream_.at(tid).get(); + } else { + return iter->second.get(); + } +} - if (free_cmdbuffers_.size()) { - buffer = free_cmdbuffers_.back(); - free_cmdbuffers_.pop_back(); +Stream *VulkanDevice::get_graphics_stream() { + auto tid = std::this_thread::get_id(); + auto iter = graphics_stream_.find(tid); + if (iter == graphics_stream_.end()) { + graphics_stream_[tid] = std::make_unique( + *this, graphics_queue_, graphics_queue_family_index_); + return graphics_stream_.at(tid).get(); } else { + return iter->second.get(); + } +} + +std::unique_ptr VulkanStream::new_command_list() { + VkCommandBuffer buffer = cmdbuffer_pool_.gc_pop_one(VK_NULL_HANDLE); + + if (buffer == VK_NULL_HANDLE) { VkCommandBufferAllocateInfo alloc_info{}; alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - if (config.type == CommandListType::Compute) { - alloc_info.commandPool = compute_cmd_pool(); - } else if (config.type == CommandListType::Graphics) { - alloc_info.commandPool = graphics_cmd_pool(); - } else { - TI_ERROR("unrecognized cmd list type"); - } - + alloc_info.commandPool = command_pool_; alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; alloc_info.commandBufferCount = 1; + BAIL_ON_VK_BAD_RESULT( - vkAllocateCommandBuffers(device_, &alloc_info, &buffer), + vkAllocateCommandBuffers(device_.vk_device(), &alloc_info, &buffer), "failed to allocate command buffer"); } - return std::make_unique(this, buffer, config); + cmdbuffer_pool_.inc(buffer); + + return std::make_unique(&device_, this, buffer); } -void VulkanDevice::dealloc_command_list(CommandList *cmdlist) { +void VulkanStream::dealloc_command_list(CommandList *cmdlist) { VkCommandBuffer buffer = static_cast(cmdlist)->finalize(); - if (in_flight_cmdlists_.find(buffer) == in_flight_cmdlists_.end()) { - // Not in flight - free_cmdbuffers_.push_back(buffer); - } else { - // In flight - dealloc_cmdlists_.push_back(buffer); - } + cmdbuffer_pool_.dec(buffer); } -void VulkanDevice::submit(CommandList *cmdlist) { - VkCommandBuffer buffer = - static_cast(cmdlist)->finalize(); +void VulkanStream::submit(CommandList *cmdlist_) { + VulkanCommandList *cmdlist = static_cast(cmdlist_); + VkCommandBuffer buffer = cmdlist->finalize(); /* if (in_flight_cmdlists_.find(buffer) != in_flight_cmdlists_.end()) { @@ -1333,32 +1320,21 @@ void VulkanDevice::submit(CommandList *cmdlist) { submit_info.commandBufferCount = 1; submit_info.pCommandBuffers = &buffer; - // FIXME: Reuse fences as well? - VkFence fence; - VkFenceCreateInfo fence_info{VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, 0}; - BAIL_ON_VK_BAD_RESULT( - vkCreateFence(device_, &fence_info, kNoVkAllocCallbacks, &fence), - "failed to create fence"); - - in_flight_cmdlists_.insert({buffer, fence}); - - VkQueue queue; - const CommandListConfig &config = - static_cast(cmdlist)->config(); - if (config.type == CommandListType::Compute) { - queue = compute_queue(); - } else if (config.type == CommandListType::Graphics) { - queue = graphics_queue(); - } else { - TI_ERROR("unrecognized cmd list type"); + cmdbuffer_pool_.inc(buffer); + submitted_cmdbuffers_.push_back(buffer); + + for (auto pair : cmdlist->desc_sets()) { + auto pool = device_.get_pool_from_layout(pair.first); + pool->sets.inc(pair.second); + submitted_desc_sets_.push_back(std::make_pair(pool, pair.second)); } - BAIL_ON_VK_BAD_RESULT(vkQueueSubmit(queue, /*submitCount=*/1, &submit_info, - /*fence=*/fence), + BAIL_ON_VK_BAD_RESULT(vkQueueSubmit(queue_, /*submitCount=*/1, &submit_info, + /*fence=*/VK_NULL_HANDLE), "failed to submit command buffer"); } -void VulkanDevice::submit_synced(CommandList *cmdlist) { +void VulkanStream::submit_synced(CommandList *cmdlist) { VkCommandBuffer buffer = static_cast(cmdlist)->finalize(); @@ -1367,55 +1343,28 @@ void VulkanDevice::submit_synced(CommandList *cmdlist) { submit_info.commandBufferCount = 1; submit_info.pCommandBuffers = &buffer; - VkQueue queue; - const CommandListConfig &config = - static_cast(cmdlist)->config(); - if (config.type == CommandListType::Compute) { - queue = compute_queue(); - } else if (config.type == CommandListType::Graphics) { - queue = graphics_queue(); - } else { - TI_ERROR("unrecognized cmd list type"); - } - - BAIL_ON_VK_BAD_RESULT(vkQueueSubmit(queue, /*submitCount=*/1, &submit_info, + BAIL_ON_VK_BAD_RESULT(vkQueueSubmit(queue_, /*submitCount=*/1, &submit_info, /*fence=*/cmd_sync_fence_), "failed to submit command buffer"); // Timeout is in nanoseconds, 60s = 60,000ms = 60,000,000ns - vkWaitForFences(device_, 1, &cmd_sync_fence_, true, (60 * 1000 * 1000)); - vkResetFences(device_, 1, &cmd_sync_fence_); -} - -void VulkanDevice::command_sync() { - if (!in_flight_cmdlists_.size()) { - return; - } - - std::vector fences; - fences.reserve(in_flight_cmdlists_.size()); - - for (auto &pair : in_flight_cmdlists_) { - fences.push_back(pair.second); - } - - vkWaitForFences(device_, fences.size(), fences.data(), true, + vkWaitForFences(device_.vk_device(), 1, &cmd_sync_fence_, true, (60 * 1000 * 1000)); + vkResetFences(device_.vk_device(), 1, &cmd_sync_fence_); +} - for (auto &pair : in_flight_cmdlists_) { - vkDestroyFence(device_, pair.second, kNoVkAllocCallbacks); - } - - in_flight_cmdlists_.clear(); - in_flight_desc_sets_.clear(); +void VulkanStream::command_sync() { + vkQueueWaitIdle(queue_); - for (auto buf : dealloc_cmdlists_) { - free_cmdbuffers_.push_back(buf); + for (VkCommandBuffer buf : submitted_cmdbuffers_) { + cmdbuffer_pool_.dec(buf); } + submitted_cmdbuffers_.clear(); - for (auto &pair : dealloc_desc_sets_) { - pair.first->free_sets.push_back(pair.second); + for (auto &pair : submitted_desc_sets_) { + pair.first->sets.dec(pair.second); } + submitted_desc_sets_.clear(); } std::unique_ptr VulkanDevice::create_raster_pipeline( @@ -1795,7 +1744,7 @@ VkDescriptorSetLayout VulkanDevice::get_desc_set_layout( } desc_set_layouts_[set] = layout; - desc_set_pools_[layout] = {pool, {}}; + layout_to_pools_[layout] = std::move(std::make_unique(pool)); TI_TRACE("New descriptor set layout {}", (void *)layout); @@ -1809,13 +1758,11 @@ VkDescriptorSet VulkanDevice::alloc_desc_set(VkDescriptorSetLayout layout) { // TODO: Currently we assume the calling code has called get_desc_set_layout // before allocating a desc set. Either we should guard against this or // maintain this assumption in other parts of the VulkanBackend - DescPool &desc_pool = desc_set_pools_.at(layout); + DescPool &desc_pool = *layout_to_pools_.at(layout); - if (desc_pool.free_sets.size()) { - VkDescriptorSet set = desc_pool.free_sets.back(); - desc_pool.free_sets.pop_back(); - return set; - } else { + VkDescriptorSet set = desc_pool.sets.gc_pop_one(VK_NULL_HANDLE); + + if (set == VK_NULL_HANDLE) { VkDescriptorSetAllocateInfo alloc_info{}; alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; alloc_info.pNext = nullptr; @@ -1823,24 +1770,23 @@ VkDescriptorSet VulkanDevice::alloc_desc_set(VkDescriptorSetLayout layout) { alloc_info.descriptorSetCount = 1; alloc_info.pSetLayouts = &layout; - VkDescriptorSet set; BAIL_ON_VK_BAD_RESULT(vkAllocateDescriptorSets(device_, &alloc_info, &set), "Alloc descriptor set from pool failed"); - - return set; } + + desc_pool.sets.inc(set); + + return set; } void VulkanDevice::dealloc_desc_set(VkDescriptorSetLayout layout, VkDescriptorSet set) { - DescPool *pool = &desc_set_pools_.at(layout); - if (in_flight_desc_sets_.find(set) == in_flight_desc_sets_.end()) { - // Not in-flight - pool->free_sets.push_back(set); - } else { - // Still in-flight - dealloc_desc_sets_.push_back(std::make_pair(pool, set)); - } + DescPool *pool = layout_to_pools_.at(layout).get(); + pool->sets.dec(set); +} + +DescPool *VulkanDevice::get_pool_from_layout(VkDescriptorSetLayout layout) { + return layout_to_pools_.at(layout).get(); } void VulkanDevice::create_vma_allocator() { @@ -2160,6 +2106,32 @@ void VulkanSurface::present_image() { vkQueuePresentKHR(device_->graphics_queue(), &presentInfo); } +VulkanStream::VulkanStream(VulkanDevice &device, + VkQueue queue, + uint32_t queue_family_index) + : device_(device), queue_(queue), queue_family_index_(queue_family_index) { + VkCommandPoolCreateInfo create_info{}; + create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + create_info.pNext = nullptr; + create_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + create_info.queueFamilyIndex = queue_family_index; + + BAIL_ON_VK_BAD_RESULT( + vkCreateCommandPool(device_.vk_device(), &create_info, + kNoVkAllocCallbacks, &command_pool_), + "Failed to create command pool"); + + VkFenceCreateInfo fence_info{VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, 0}; + BAIL_ON_VK_BAD_RESULT(vkCreateFence(device_.vk_device(), &fence_info, + kNoVkAllocCallbacks, &cmd_sync_fence_), + "failed to create fence"); +} + +VulkanStream::~VulkanStream() { + vkDestroyCommandPool(device_.vk_device(), command_pool_, kNoVkAllocCallbacks); + vkDestroyFence(device_.vk_device(), cmd_sync_fence_, kNoVkAllocCallbacks); +} + } // namespace vulkan } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/vulkan_device.h b/taichi/backends/vulkan/vulkan_device.h index 656406b3c0ec3..4cac92d6071c6 100644 --- a/taichi/backends/vulkan/vulkan_device.h +++ b/taichi/backends/vulkan/vulkan_device.h @@ -14,6 +14,7 @@ #include #include +#include namespace taichi { namespace lang { @@ -21,6 +22,7 @@ namespace vulkan { class VulkanDevice; class VulkanResourceBinder; +class VulkanStream; struct SpirvCodeView { const uint32_t *data = nullptr; @@ -258,8 +260,8 @@ class VulkanPipeline : public Pipeline { class VulkanCommandList : public CommandList { public: VulkanCommandList(VulkanDevice *ti_device, - VkCommandBuffer buffer, - CommandListConfig config); + VulkanStream *stream, + VkCommandBuffer buffer); ~VulkanCommandList(); void bind_pipeline(Pipeline *p) override; @@ -302,15 +304,17 @@ class VulkanCommandList : public CommandList { // Vulkan specific functions VkCommandBuffer finalize(); - const CommandListConfig &config() const; VkCommandBuffer vk_command_buffer(); - private: - CommandListConfig config_; + std::vector> &desc_sets() { + return desc_sets_; + } + private: bool finalized_{false}; VulkanDevice *ti_device_; + VulkanStream *stream_; VkDevice device_; VkCommandBuffer buffer_; VulkanPipeline *current_pipeline_{nullptr}; @@ -367,6 +371,42 @@ struct VulkanMemoryPool { #endif }; +struct DescPool { + VkDescriptorPool pool; + // Threads share descriptor sets + RefCountedPool sets; + + DescPool(VkDescriptorPool pool) : pool(pool) { + } +}; + +class VulkanStream : public Stream { + public: + VulkanStream(VulkanDevice &device, + VkQueue queue, + uint32_t queue_family_index); + ~VulkanStream(); + + std::unique_ptr new_command_list() override; + void dealloc_command_list(CommandList *cmdlist) override; + void submit(CommandList *cmdlist) override; + void submit_synced(CommandList *cmdlist) override; + + void command_sync() override; + + private: + VkFence cmd_sync_fence_; + VulkanDevice &device_; + VkQueue queue_; + VkCommandPool command_pool_; + uint32_t queue_family_index_; + + // Command pools are per-thread + RefCountedPool cmdbuffer_pool_; + std::vector submitted_cmdbuffers_; + std::vector> submitted_desc_sets_; +}; + class VulkanDevice : public GraphicsDevice { public: struct Params { @@ -374,10 +414,8 @@ class VulkanDevice : public GraphicsDevice { VkPhysicalDevice physical_device; VkDevice device; VkQueue compute_queue; - VkCommandPool compute_pool; uint32_t compute_queue_family_index; VkQueue graphics_queue; - VkCommandPool graphics_pool; uint32_t graphics_queue_family_index; }; @@ -401,13 +439,8 @@ class VulkanDevice : public GraphicsDevice { // Strictly intra device copy void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override; - std::unique_ptr new_command_list( - CommandListConfig config) override; - void dealloc_command_list(CommandList *cmdlist) override; - void submit(CommandList *cmdlist) override; - void submit_synced(CommandList *cmdlist) override; - - void command_sync() override; + Stream *get_compute_stream() override; + Stream *get_graphics_stream() override; std::unique_ptr create_raster_pipeline( const std::vector &src, @@ -450,14 +483,6 @@ class VulkanDevice : public GraphicsDevice { return compute_queue_; } - VkCommandPool graphics_cmd_pool() const { - return graphics_pool_; - } - - VkCommandPool compute_cmd_pool() const { - return compute_pool_; - } - std::tuple get_vkmemory_offset_size( const DeviceAllocation &alloc) const; @@ -478,6 +503,7 @@ class VulkanDevice : public GraphicsDevice { VkDescriptorSetLayout get_desc_set_layout(VulkanResourceBinder::Set &set); VkDescriptorSet alloc_desc_set(VkDescriptorSetLayout layout); void dealloc_desc_set(VkDescriptorSetLayout layout, VkDescriptorSet set); + DescPool *get_pool_from_layout(VkDescriptorSetLayout layout); static constexpr size_t kMemoryBlockSize = 128ull * 1024 * 1024; @@ -491,13 +517,16 @@ class VulkanDevice : public GraphicsDevice { VulkanMemoryPool export_pool_; VkQueue compute_queue_; - VkCommandPool compute_pool_; uint32_t compute_queue_family_index_; VkQueue graphics_queue_; - VkCommandPool graphics_pool_; uint32_t graphics_queue_family_index_; + std::unordered_map> + compute_stream_; + std::unordered_map> + graphics_stream_; + // Memory allocation struct AllocationInternal { VmaAllocation allocation; @@ -522,13 +551,6 @@ class VulkanDevice : public GraphicsDevice { std::unordered_map image_allocations_; - // Command buffer tracking & allocation - VkFence cmd_sync_fence_; - - std::unordered_multimap in_flight_cmdlists_; - std::vector dealloc_cmdlists_; - std::vector free_cmdbuffers_; - // Renderpass std::unordered_map renderpass_pools_; @@ -537,20 +559,12 @@ class VulkanDevice : public GraphicsDevice { framebuffer_pools_; // Descriptors / Layouts / Pools - struct DescPool { - VkDescriptorPool pool; - std::vector free_sets; - }; - std::unordered_map desc_set_layouts_; - - std::unordered_map desc_set_pools_; - - std::unordered_multimap in_flight_desc_sets_; - std::vector> dealloc_desc_sets_; + std::unordered_map> + layout_to_pools_; }; VkFormat buffer_format_ti_to_vk(BufferFormat f); diff --git a/taichi/common/ref_counted_pool.h b/taichi/common/ref_counted_pool.h new file mode 100644 index 0000000000000..d8c94ba61d722 --- /dev/null +++ b/taichi/common/ref_counted_pool.h @@ -0,0 +1,104 @@ +#pragma once + +#include +#include +#include + +#include "taichi/common/core.h" + +namespace taichi { + +class RefCount { + public: + void inc() { + ref_count++; + } + int dec() { + return --ref_count; + } + int count() { + return ref_count; + } + + private: + int ref_count{1}; +}; + +template +class RefCountedPool { + public: + void inc(T obj) { + if constexpr (sync) { + gc_pool_lock_.lock(); + } + + auto iter = counts_.find(obj); + + if (iter == counts_.end()) { + counts_[obj] = RefCount(); + } else { + iter->second.inc(); + } + + if constexpr (sync) { + gc_pool_lock_.unlock(); + } + } + + void dec(T obj) { + if constexpr (sync) { + gc_pool_lock_.lock(); + } + + auto iter = counts_.find(obj); + + if (iter == counts_.end()) { + TI_ERROR("Can not find counted reference"); + } else { + int c = iter->second.dec(); + if (c == 0) { + gc_pool_.push_back(iter->first); + counts_.erase(iter); + } + } + + if constexpr (sync) { + gc_pool_lock_.unlock(); + } + } + + T gc_pop_one(T null) { + if constexpr (sync) { + gc_pool_lock_.lock(); + } + + T obj = null; + + if (gc_pool_.size()) { + obj = gc_pool_.back(); + gc_pool_.pop_back(); + } + + if constexpr (sync) { + gc_pool_lock_.unlock(); + } + + return obj; + } + + void gc_remove_all(std::function deallocator) { + std::lock_guard lg(gc_pool_lock_); + + for (T obj : gc_pool_) { + deallocator(obj); + } + gc_pool_.clear(); + } + + private: + std::unordered_map counts_; + std::vector gc_pool_; + std::mutex gc_pool_lock_; +}; + +} // namespace taichi diff --git a/taichi/ui/backends/vulkan/gui.cpp b/taichi/ui/backends/vulkan/gui.cpp index b309dc2fbab8c..babad7b824e40 100644 --- a/taichi/ui/backends/vulkan/gui.cpp +++ b/taichi/ui/backends/vulkan/gui.cpp @@ -51,14 +51,14 @@ void Gui::init_render_resources(VkRenderPass render_pass) { // Upload Fonts { - std::unique_ptr cmd_list = - device.new_command_list({CommandListType::Graphics}); + auto stream = device.get_graphics_stream(); + std::unique_ptr cmd_list = stream->new_command_list(); VkCommandBuffer command_buffer = static_cast(cmd_list.get())->vk_command_buffer(); ImGui_ImplVulkan_CreateFontsTexture(command_buffer); - device.submit_synced(cmd_list.get()); + stream->submit_synced(cmd_list.get()); ImGui_ImplVulkan_DestroyFontUploadObjects(); } prepare_for_next_frame(); diff --git a/taichi/ui/backends/vulkan/renderable.cpp b/taichi/ui/backends/vulkan/renderable.cpp index 05e4fedf7eff9..1c1113169ff05 100644 --- a/taichi/ui/backends/vulkan/renderable.cpp +++ b/taichi/ui/backends/vulkan/renderable.cpp @@ -157,15 +157,15 @@ void Renderable::update_data(const RenderableInfo &info) { } app_context_->device().unmap(staging_index_buffer_); } - auto cmd_list = - app_context_->device().new_command_list({CommandListType::Graphics}); + auto stream = app_context_->device().get_graphics_stream(); + auto cmd_list = stream->new_command_list(); cmd_list->buffer_copy(vertex_buffer_.get_ptr(0), staging_vertex_buffer_.get_ptr(0), config_.vertices_count * sizeof(Vertex)); cmd_list->buffer_copy(index_buffer_.get_ptr(0), staging_index_buffer_.get_ptr(0), config_.indices_count * sizeof(int)); - app_context_->device().submit_synced(cmd_list.get()); + stream->submit_synced(cmd_list.get()); } else { throw std::runtime_error("unsupported field source"); } diff --git a/taichi/ui/backends/vulkan/renderables/set_image.cpp b/taichi/ui/backends/vulkan/renderables/set_image.cpp index 5c1d757b00be8..cdbc15aa70881 100644 --- a/taichi/ui/backends/vulkan/renderables/set_image.cpp +++ b/taichi/ui/backends/vulkan/renderables/set_image.cpp @@ -59,15 +59,14 @@ void SetImage::update_data(const SetImageInfo &info) { throw std::runtime_error("for set image, dtype must be u8 or f32"); } - auto cmd_list = - app_context_->device().new_command_list({CommandListType::Graphics}); + auto stream = app_context_->device().get_graphics_stream(); + auto cmd_list = stream->new_command_list(); cmd_list->buffer_to_image(texture_, gpu_staging_buffer_.get_ptr(0), ImageLayout::transfer_dst, copy_params); cmd_list->image_transition(texture_, ImageLayout::transfer_dst, ImageLayout::shader_read); - app_context_->device().submit_synced(cmd_list.get()); - + stream->submit_synced(cmd_list.get()); } else if (img.field_source == FieldSource::TaichiX64) { unsigned char *mapped = (unsigned char *)app_context_->device().map(cpu_staging_buffer_); @@ -85,15 +84,14 @@ void SetImage::update_data(const SetImageInfo &info) { app_context_->device().unmap(cpu_staging_buffer_); - auto cmd_list = - app_context_->device().new_command_list({CommandListType::Graphics}); + auto stream = app_context_->device().get_graphics_stream(); + auto cmd_list = stream->new_command_list(); cmd_list->buffer_to_image(texture_, cpu_staging_buffer_.get_ptr(0), ImageLayout::transfer_dst, copy_params); cmd_list->image_transition(texture_, ImageLayout::transfer_dst, ImageLayout::shader_read); - app_context_->device().submit_synced(cmd_list.get()); - + stream->submit_synced(cmd_list.get()); } else { throw std::runtime_error("unsupported field source"); }