diff --git a/taichi/rhi/cpu/cpu_device.cpp b/taichi/rhi/cpu/cpu_device.cpp index 4c6fb6778e762..7835723261335 100644 --- a/taichi/rhi/cpu/cpu_device.cpp +++ b/taichi/rhi/cpu/cpu_device.cpp @@ -17,16 +17,19 @@ CpuDevice::CpuDevice() { RhiResult CpuDevice::allocate_memory(const AllocParams ¶ms, DeviceAllocation *out_devalloc) { AllocInfo info; - - info.ptr = HostMemoryPool::get_instance().allocate( - params.size, HostMemoryPool::page_size, true /*exclusive*/); info.size = params.size; info.use_cached = false; - if (info.ptr == nullptr) { - return RhiResult::out_of_memory; - } + if (info.size == 0) { + info.ptr = nullptr; + } else { + info.ptr = HostMemoryPool::get_instance().allocate( + params.size, HostMemoryPool::page_size, true /*exclusive*/); + if (info.ptr == nullptr) { + return RhiResult::out_of_memory; + } + } *out_devalloc = DeviceAllocation{}; out_devalloc->alloc_id = allocations_.size(); out_devalloc->device = this; @@ -48,6 +51,9 @@ DeviceAllocation CpuDevice::allocate_memory_runtime( void CpuDevice::dealloc_memory(DeviceAllocation handle) { validate_device_alloc(handle); AllocInfo &info = allocations_[handle.alloc_id]; + if (info.size == 0) { + return; + } if (info.ptr == nullptr) { TI_ERROR("the DeviceAllocation is already deallocated"); } diff --git a/taichi/rhi/cuda/cuda_device.cpp b/taichi/rhi/cuda/cuda_device.cpp index 36068805ba67a..dc8716be77e80 100644 --- a/taichi/rhi/cuda/cuda_device.cpp +++ b/taichi/rhi/cuda/cuda_device.cpp @@ -47,7 +47,9 @@ DeviceAllocation CudaDevice::allocate_memory_runtime( const LlvmRuntimeAllocParams ¶ms) { AllocInfo info; info.size = taichi::iroundup(params.size, taichi_page_size); - if (params.use_cached) { + if (info.size == 0) { + info.ptr = nullptr; + } else if (params.use_cached) { info.ptr = DeviceMemoryPool::get_instance().allocate_with_cache(this, params); @@ -76,6 +78,9 @@ void CudaDevice::dealloc_memory(DeviceAllocation handle) { validate_device_alloc(handle); AllocInfo &info = allocations_[handle.alloc_id]; + if (info.size == 0) { + return; + } if (info.ptr == nullptr) { TI_ERROR("the DeviceAllocation is already deallocated"); } diff --git a/taichi/runtime/llvm/llvm_runtime_executor.cpp b/taichi/runtime/llvm/llvm_runtime_executor.cpp index 0de7742e06115..cfced95954c1c 100644 --- a/taichi/runtime/llvm/llvm_runtime_executor.cpp +++ b/taichi/runtime/llvm/llvm_runtime_executor.cpp @@ -397,9 +397,8 @@ void LlvmRuntimeExecutor::initialize_llvm_runtime_snodes( TI_TRACE("Allocating data structure of size {} bytes", root_size); std::size_t rounded_size = taichi::iroundup(root_size, taichi_page_size); - Ptr root_buffer = snode_tree_buffer_manager_->allocate( - runtime_jit, llvm_runtime_, rounded_size, taichi_page_size, tree_id, - result_buffer); + Ptr root_buffer = snode_tree_buffer_manager_->allocate(rounded_size, tree_id, + result_buffer); if (config_.arch == Arch::cuda) { #if defined(TI_WITH_CUDA) CUDADriver::get_instance().memset(root_buffer, 0, rounded_size); diff --git a/taichi/runtime/llvm/snode_tree_buffer_manager.cpp b/taichi/runtime/llvm/snode_tree_buffer_manager.cpp index 8498ddb736892..939fb906ce45f 100644 --- a/taichi/runtime/llvm/snode_tree_buffer_manager.cpp +++ b/taichi/runtime/llvm/snode_tree_buffer_manager.cpp @@ -9,74 +9,18 @@ SNodeTreeBufferManager::SNodeTreeBufferManager( TI_TRACE("SNode tree buffer manager created."); } -void SNodeTreeBufferManager::merge_and_insert(Ptr ptr, std::size_t size) { - // merge with right block - if (ptr_map_[ptr + size]) { - std::size_t tmp = ptr_map_[ptr + size]; - size_set_.erase(std::make_pair(tmp, ptr + size)); - ptr_map_.erase(ptr + size); - size += tmp; - } - // merge with left block - auto map_it = ptr_map_.lower_bound(ptr); - if (map_it != ptr_map_.begin()) { - auto x = *--map_it; - if (x.first + x.second == ptr) { - size_set_.erase(std::make_pair(x.second, x.first)); - ptr_map_.erase(x.first); - ptr = x.first; - size += x.second; - } - } - size_set_.insert(std::make_pair(size, ptr)); - ptr_map_[ptr] = size; -} - -Ptr SNodeTreeBufferManager::allocate(JITModule *runtime_jit, - void *runtime, - std::size_t size, - std::size_t alignment, +Ptr SNodeTreeBufferManager::allocate(std::size_t size, const int snode_tree_id, uint64 *result_buffer) { - TI_TRACE("allocating memory for SNode Tree {}", snode_tree_id); - TI_ASSERT_INFO(snode_tree_id < kMaxNumSnodeTreesLlvm, - "LLVM backend supports up to {} snode trees", - kMaxNumSnodeTreesLlvm); - auto set_it = size_set_.lower_bound(std::make_pair(size, nullptr)); - if (set_it == size_set_.end()) { - runtime_jit->call( - "runtime_memory_allocate_aligned", runtime, size, alignment, - result_buffer); - auto ptr = runtime_exec_->fetch_result(0, result_buffer); - roots_[snode_tree_id] = ptr; - sizes_[snode_tree_id] = size; - return ptr; - } else { - auto x = *set_it; - size_set_.erase(x); - ptr_map_.erase(x.second); - if (x.first - size > 0) { - size_set_.insert(std::make_pair(x.first - size, x.second + size)); - ptr_map_[x.second + size] = x.first - size; - } - TI_ASSERT(x.second); - roots_[snode_tree_id] = x.second; - sizes_[snode_tree_id] = size; - return x.second; - } + auto devalloc = runtime_exec_->allocate_memory_ndarray(size, result_buffer); + snode_tree_id_to_device_alloc_[snode_tree_id] = devalloc; + return (Ptr)runtime_exec_->get_ndarray_alloc_info_ptr(devalloc); } void SNodeTreeBufferManager::destroy(SNodeTree *snode_tree) { - int snode_tree_id = snode_tree->id(); - TI_TRACE("Destroying SNode tree {}.", snode_tree_id); - std::size_t size = sizes_[snode_tree_id]; - if (size == 0) { - TI_DEBUG("SNode tree {} destroy failed.", snode_tree_id); - return; - } - Ptr ptr = roots_[snode_tree_id]; - merge_and_insert(ptr, size); - TI_DEBUG("SNode tree {} destroyed.", snode_tree_id); + auto devalloc = snode_tree_id_to_device_alloc_[snode_tree->id()]; + runtime_exec_->deallocate_memory_ndarray(devalloc); + snode_tree_id_to_device_alloc_.erase(snode_tree->id()); } } // namespace taichi::lang diff --git a/taichi/runtime/llvm/snode_tree_buffer_manager.h b/taichi/runtime/llvm/snode_tree_buffer_manager.h index fd116ad84d022..1eb4096356899 100644 --- a/taichi/runtime/llvm/snode_tree_buffer_manager.h +++ b/taichi/runtime/llvm/snode_tree_buffer_manager.h @@ -1,6 +1,7 @@ #pragma once #include "taichi/inc/constants.h" #include "taichi/struct/snode_tree.h" +#include "taichi/rhi/public_device.h" #define TI_RUNTIME_HOST #include @@ -16,23 +17,15 @@ class SNodeTreeBufferManager { public: explicit SNodeTreeBufferManager(LlvmRuntimeExecutor *runtime_exec); - void merge_and_insert(Ptr ptr, std::size_t size); - - Ptr allocate(JITModule *runtime_jit, - void *runtime, - std::size_t size, - std::size_t alignment, + Ptr allocate(std::size_t size, const int snode_tree_id, uint64 *result_buffer); void destroy(SNodeTree *snode_tree); private: - std::set> size_set_; - std::map ptr_map_; LlvmRuntimeExecutor *runtime_exec_; - Ptr roots_[kMaxNumSnodeTreesLlvm]; - std::size_t sizes_[kMaxNumSnodeTreesLlvm]; + std::map snode_tree_id_to_device_alloc_; }; } // namespace taichi::lang