From 524b6fbd26fe2a0b2d3ce5ff375a326615775db7 Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Tue, 21 Mar 2023 18:03:40 +0800 Subject: [PATCH] [lang] Removed cpu_device(), cuda_device(), and amdgpu_device() from LlvmRuntimeExecutor (#7544) Issue: #7300 ### Brief Summary --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- taichi/rhi/amdgpu/amdgpu_device.h | 2 +- taichi/rhi/cpu/cpu_device.h | 2 +- taichi/rhi/cuda/cuda_device.h | 2 +- taichi/rhi/llvm/llvm_device.h | 11 +++ taichi/runtime/llvm/llvm_runtime_executor.cpp | 83 ++++++------------- taichi/runtime/llvm/llvm_runtime_executor.h | 4 - .../runtime/program_impls/llvm/llvm_program.h | 8 -- 7 files changed, 41 insertions(+), 71 deletions(-) diff --git a/taichi/rhi/amdgpu/amdgpu_device.h b/taichi/rhi/amdgpu/amdgpu_device.h index ef2f257bb1225..0c540d1d700d2 100644 --- a/taichi/rhi/amdgpu/amdgpu_device.h +++ b/taichi/rhi/amdgpu/amdgpu_device.h @@ -96,7 +96,7 @@ class AmdgpuDevice : public LlvmDevice { void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override; - DeviceAllocation import_memory(void *ptr, size_t size); + DeviceAllocation import_memory(void *ptr, size_t size) override; Stream *get_compute_stream() override{TI_NOT_IMPLEMENTED}; diff --git a/taichi/rhi/cpu/cpu_device.h b/taichi/rhi/cpu/cpu_device.h index c0aecbc401e83..c3a03a9a6017e 100644 --- a/taichi/rhi/cpu/cpu_device.h +++ b/taichi/rhi/cpu/cpu_device.h @@ -106,7 +106,7 @@ class CpuDevice : public LlvmDevice { void unmap(DevicePtr ptr) final{TI_NOT_IMPLEMENTED}; void unmap(DeviceAllocation alloc) final; - DeviceAllocation import_memory(void *ptr, size_t size); + DeviceAllocation import_memory(void *ptr, size_t size) override; void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override; diff --git a/taichi/rhi/cuda/cuda_device.h b/taichi/rhi/cuda/cuda_device.h index c1e0185af879b..b9a1213ead94c 100644 --- a/taichi/rhi/cuda/cuda_device.h +++ b/taichi/rhi/cuda/cuda_device.h @@ -123,7 +123,7 @@ class CudaDevice : public LlvmDevice { void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override; - DeviceAllocation import_memory(void *ptr, size_t size); + DeviceAllocation import_memory(void *ptr, size_t size) override; Stream *get_compute_stream() override{TI_NOT_IMPLEMENTED}; diff --git a/taichi/rhi/llvm/llvm_device.h b/taichi/rhi/llvm/llvm_device.h index 5995a2d36dbb0..5fa9535b64724 100644 --- a/taichi/rhi/llvm/llvm_device.h +++ b/taichi/rhi/llvm/llvm_device.h @@ -20,6 +20,17 @@ class LlvmDevice : public Device { TI_NOT_IMPLEMENTED } + template + DEVICE *as() { + auto *device = dynamic_cast(this); + TI_ASSERT(device != nullptr); + return device; + } + + virtual DeviceAllocation import_memory(void *ptr, size_t size) { + TI_NOT_IMPLEMENTED + } + virtual DeviceAllocation allocate_memory_runtime( const LlvmRuntimeAllocParams ¶ms) { TI_NOT_IMPLEMENTED; diff --git a/taichi/runtime/llvm/llvm_runtime_executor.cpp b/taichi/runtime/llvm/llvm_runtime_executor.cpp index 98502570b337e..0025070933660 100644 --- a/taichi/runtime/llvm/llvm_runtime_executor.cpp +++ b/taichi/runtime/llvm/llvm_runtime_executor.cpp @@ -415,23 +415,8 @@ void LlvmRuntimeExecutor::initialize_llvm_runtime_snodes( std::memset(root_buffer, 0, rounded_size); } - DeviceAllocation alloc{kDeviceNullAllocation}; - - if (config_.arch == Arch::cuda) { -#if defined(TI_WITH_CUDA) - alloc = cuda_device()->import_memory(root_buffer, rounded_size); -#else - TI_NOT_IMPLEMENTED -#endif - } else if (config_.arch == Arch::amdgpu) { -#if defined(TI_WITH_AMDGPU) - alloc = amdgpu_device()->import_memory(root_buffer, rounded_size); -#else - TI_NOT_IMPLEMENTED -#endif - } else { - alloc = cpu_device()->import_memory(root_buffer, rounded_size); - } + DeviceAllocation alloc = + llvm_device()->import_memory(root_buffer, rounded_size); snode_tree_allocs_[tree_id] = alloc; @@ -474,25 +459,6 @@ void LlvmRuntimeExecutor::initialize_llvm_runtime_snodes( } } -cuda::CudaDevice *LlvmRuntimeExecutor::cuda_device() { - if (config_.arch != Arch::cuda) { - TI_ERROR("arch is not cuda"); - } - return static_cast(device_.get()); -} - -amdgpu::AmdgpuDevice *LlvmRuntimeExecutor::amdgpu_device() { - if (config_.arch != Arch::amdgpu) { - TI_ERROR("arch is not amdgpu"); - } - return static_cast(device_.get()); -} - -cpu::CpuDevice *LlvmRuntimeExecutor::cpu_device() { - TI_ERROR_IF(!arch_is_cpu(config_.arch), "arch is not cpu"); - return static_cast(device_.get()); -} - LlvmDevice *LlvmRuntimeExecutor::llvm_device() { TI_ASSERT(dynamic_cast(device_.get())); return static_cast(device_.get()); @@ -511,7 +477,7 @@ DeviceAllocation LlvmRuntimeExecutor::allocate_memory_ndarray( } void LlvmRuntimeExecutor::deallocate_memory_ndarray(DeviceAllocation handle) { - cuda_device()->dealloc_memory(handle); + llvm_device()->dealloc_memory(handle); } void LlvmRuntimeExecutor::fill_ndarray(const DeviceAllocation &alloc, @@ -539,32 +505,35 @@ uint64_t *LlvmRuntimeExecutor::get_ndarray_alloc_info_ptr( const DeviceAllocation &alloc) { if (config_.arch == Arch::cuda) { #if defined(TI_WITH_CUDA) - return (uint64_t *)cuda_device()->get_alloc_info(alloc).ptr; + return (uint64_t *)llvm_device() + ->as() + ->get_alloc_info(alloc) + .ptr; #else TI_NOT_IMPLEMENTED #endif } else if (config_.arch == Arch::amdgpu) { #if defined(TI_WITH_AMDGPU) - return (uint64_t *)amdgpu_device()->get_alloc_info(alloc).ptr; + return (uint64_t *)llvm_device() + ->as() + ->get_alloc_info(alloc) + .ptr; #else - TI_NOT_IMPLEMENTED + TI_NOT_IMPLEMENTED; #endif - } else { - return (uint64_t *)cpu_device()->get_alloc_info(alloc).ptr; } + + return (uint64_t *)llvm_device() + ->as() + ->get_alloc_info(alloc) + .ptr; } void LlvmRuntimeExecutor::finalize() { profiler_ = nullptr; if (preallocated_device_buffer_ != nullptr) { - if (config_.arch == Arch::cuda) { -#if defined(TI_WITH_CUDA) - cuda_device()->dealloc_memory(preallocated_device_buffer_alloc_); -#endif - } else if (config_.arch == Arch::amdgpu) { -#if defined(TI_WITH_AMDGPU) - amdgpu_device()->dealloc_memory(preallocated_device_buffer_alloc_); -#endif + if (config_.arch == Arch::cuda || config_.arch == Arch::amdgpu) { + llvm_device()->dealloc_memory(preallocated_device_buffer_alloc_); } } finalized_ = true; @@ -603,11 +572,12 @@ void LlvmRuntimeExecutor::materialize_runtime(MemoryPool *memory_pool, Device::AllocParams preallocated_device_buffer_alloc_params; preallocated_device_buffer_alloc_params.size = prealloc_size; RhiResult res = - cuda_device()->allocate_memory(preallocated_device_buffer_alloc_params, + llvm_device()->allocate_memory(preallocated_device_buffer_alloc_params, &preallocated_device_buffer_alloc_); TI_ASSERT(res == RhiResult::success); cuda::CudaDevice::AllocInfo preallocated_device_buffer_alloc_info = - cuda_device()->get_alloc_info(preallocated_device_buffer_alloc_); + llvm_device()->as()->get_alloc_info( + preallocated_device_buffer_alloc_); preallocated_device_buffer_ = preallocated_device_buffer_alloc_info.ptr; CUDADriver::get_instance().memset(preallocated_device_buffer_, 0, @@ -636,12 +606,13 @@ void LlvmRuntimeExecutor::materialize_runtime(MemoryPool *memory_pool, Device::AllocParams preallocated_device_buffer_alloc_params; preallocated_device_buffer_alloc_params.size = prealloc_size; - RhiResult res = amdgpu_device()->allocate_memory( - preallocated_device_buffer_alloc_params, - &preallocated_device_buffer_alloc_); + RhiResult res = + llvm_device()->allocate_memory(preallocated_device_buffer_alloc_params, + &preallocated_device_buffer_alloc_); TI_ASSERT(res == RhiResult::success); amdgpu::AmdgpuDevice::AllocInfo preallocated_device_buffer_alloc_info = - amdgpu_device()->get_alloc_info(preallocated_device_buffer_alloc_); + llvm_device()->as()->get_alloc_info( + preallocated_device_buffer_alloc_); preallocated_device_buffer_ = preallocated_device_buffer_alloc_info.ptr; AMDGPUDriver::get_instance().memset(preallocated_device_buffer_, 0, diff --git a/taichi/runtime/llvm/llvm_runtime_executor.h b/taichi/runtime/llvm/llvm_runtime_executor.h index 501c8428cc98a..a25e06c3efcc2 100644 --- a/taichi/runtime/llvm/llvm_runtime_executor.h +++ b/taichi/runtime/llvm/llvm_runtime_executor.h @@ -122,10 +122,6 @@ class LlvmRuntimeExecutor { /* -------------------------- */ /* ------ Member Access ----- */ /* -------------------------- */ - cuda::CudaDevice *cuda_device(); - cpu::CpuDevice *cpu_device(); - amdgpu::AmdgpuDevice *amdgpu_device(); - void finalize(); uint64 fetch_result_uint64(int i, uint64 *result_buffer); diff --git a/taichi/runtime/program_impls/llvm/llvm_program.h b/taichi/runtime/program_impls/llvm/llvm_program.h index 05fe9d71cfb3d..678c64f01c484 100644 --- a/taichi/runtime/program_impls/llvm/llvm_program.h +++ b/taichi/runtime/program_impls/llvm/llvm_program.h @@ -254,14 +254,6 @@ class LlvmProgramImpl : public ProgramImpl { return runtime_exec_->get_snode_tree_device_ptr(tree_id); } - cuda::CudaDevice *cuda_device() { - return runtime_exec_->cuda_device(); - } - - cpu::CpuDevice *cpu_device() { - return runtime_exec_->cpu_device(); - } - LlvmDevice *llvm_device() { return runtime_exec_->llvm_device(); }