Skip to content

Commit

Permalink
[lang] Removed cpu_device(), cuda_device(), and amdgpu_device() from …
Browse files Browse the repository at this point in the history
…LlvmRuntimeExecutor (#7544)

Issue: #7300

### Brief Summary

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
jim19930609 and pre-commit-ci[bot] authored Mar 21, 2023
1 parent 8c78b36 commit 524b6fb
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 71 deletions.
2 changes: 1 addition & 1 deletion taichi/rhi/amdgpu/amdgpu_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class AmdgpuDevice : public LlvmDevice {

void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override;

DeviceAllocation import_memory(void *ptr, size_t size);
DeviceAllocation import_memory(void *ptr, size_t size) override;

Stream *get_compute_stream() override{TI_NOT_IMPLEMENTED};

Expand Down
2 changes: 1 addition & 1 deletion taichi/rhi/cpu/cpu_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class CpuDevice : public LlvmDevice {
void unmap(DevicePtr ptr) final{TI_NOT_IMPLEMENTED};
void unmap(DeviceAllocation alloc) final;

DeviceAllocation import_memory(void *ptr, size_t size);
DeviceAllocation import_memory(void *ptr, size_t size) override;

void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override;

Expand Down
2 changes: 1 addition & 1 deletion taichi/rhi/cuda/cuda_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class CudaDevice : public LlvmDevice {

void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override;

DeviceAllocation import_memory(void *ptr, size_t size);
DeviceAllocation import_memory(void *ptr, size_t size) override;

Stream *get_compute_stream() override{TI_NOT_IMPLEMENTED};

Expand Down
11 changes: 11 additions & 0 deletions taichi/rhi/llvm/llvm_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,17 @@ class LlvmDevice : public Device {
TI_NOT_IMPLEMENTED
}

template <typename DEVICE>
DEVICE *as() {
auto *device = dynamic_cast<DEVICE *>(this);
TI_ASSERT(device != nullptr);
return device;
}

virtual DeviceAllocation import_memory(void *ptr, size_t size) {
TI_NOT_IMPLEMENTED
}

virtual DeviceAllocation allocate_memory_runtime(
const LlvmRuntimeAllocParams &params) {
TI_NOT_IMPLEMENTED;
Expand Down
83 changes: 27 additions & 56 deletions taichi/runtime/llvm/llvm_runtime_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,23 +415,8 @@ void LlvmRuntimeExecutor::initialize_llvm_runtime_snodes(
std::memset(root_buffer, 0, rounded_size);
}

DeviceAllocation alloc{kDeviceNullAllocation};

if (config_.arch == Arch::cuda) {
#if defined(TI_WITH_CUDA)
alloc = cuda_device()->import_memory(root_buffer, rounded_size);
#else
TI_NOT_IMPLEMENTED
#endif
} else if (config_.arch == Arch::amdgpu) {
#if defined(TI_WITH_AMDGPU)
alloc = amdgpu_device()->import_memory(root_buffer, rounded_size);
#else
TI_NOT_IMPLEMENTED
#endif
} else {
alloc = cpu_device()->import_memory(root_buffer, rounded_size);
}
DeviceAllocation alloc =
llvm_device()->import_memory(root_buffer, rounded_size);

snode_tree_allocs_[tree_id] = alloc;

Expand Down Expand Up @@ -474,25 +459,6 @@ void LlvmRuntimeExecutor::initialize_llvm_runtime_snodes(
}
}

cuda::CudaDevice *LlvmRuntimeExecutor::cuda_device() {
if (config_.arch != Arch::cuda) {
TI_ERROR("arch is not cuda");
}
return static_cast<cuda::CudaDevice *>(device_.get());
}

amdgpu::AmdgpuDevice *LlvmRuntimeExecutor::amdgpu_device() {
if (config_.arch != Arch::amdgpu) {
TI_ERROR("arch is not amdgpu");
}
return static_cast<amdgpu::AmdgpuDevice *>(device_.get());
}

cpu::CpuDevice *LlvmRuntimeExecutor::cpu_device() {
TI_ERROR_IF(!arch_is_cpu(config_.arch), "arch is not cpu");
return static_cast<cpu::CpuDevice *>(device_.get());
}

LlvmDevice *LlvmRuntimeExecutor::llvm_device() {
TI_ASSERT(dynamic_cast<LlvmDevice *>(device_.get()));
return static_cast<LlvmDevice *>(device_.get());
Expand All @@ -511,7 +477,7 @@ DeviceAllocation LlvmRuntimeExecutor::allocate_memory_ndarray(
}

void LlvmRuntimeExecutor::deallocate_memory_ndarray(DeviceAllocation handle) {
cuda_device()->dealloc_memory(handle);
llvm_device()->dealloc_memory(handle);
}

void LlvmRuntimeExecutor::fill_ndarray(const DeviceAllocation &alloc,
Expand Down Expand Up @@ -539,32 +505,35 @@ uint64_t *LlvmRuntimeExecutor::get_ndarray_alloc_info_ptr(
const DeviceAllocation &alloc) {
if (config_.arch == Arch::cuda) {
#if defined(TI_WITH_CUDA)
return (uint64_t *)cuda_device()->get_alloc_info(alloc).ptr;
return (uint64_t *)llvm_device()
->as<cuda::CudaDevice>()
->get_alloc_info(alloc)
.ptr;
#else
TI_NOT_IMPLEMENTED
#endif
} else if (config_.arch == Arch::amdgpu) {
#if defined(TI_WITH_AMDGPU)
return (uint64_t *)amdgpu_device()->get_alloc_info(alloc).ptr;
return (uint64_t *)llvm_device()
->as<amdgpu::AmdgpuDevice>()
->get_alloc_info(alloc)
.ptr;
#else
TI_NOT_IMPLEMENTED
TI_NOT_IMPLEMENTED;
#endif
} else {
return (uint64_t *)cpu_device()->get_alloc_info(alloc).ptr;
}

return (uint64_t *)llvm_device()
->as<cpu::CpuDevice>()
->get_alloc_info(alloc)
.ptr;
}

void LlvmRuntimeExecutor::finalize() {
profiler_ = nullptr;
if (preallocated_device_buffer_ != nullptr) {
if (config_.arch == Arch::cuda) {
#if defined(TI_WITH_CUDA)
cuda_device()->dealloc_memory(preallocated_device_buffer_alloc_);
#endif
} else if (config_.arch == Arch::amdgpu) {
#if defined(TI_WITH_AMDGPU)
amdgpu_device()->dealloc_memory(preallocated_device_buffer_alloc_);
#endif
if (config_.arch == Arch::cuda || config_.arch == Arch::amdgpu) {
llvm_device()->dealloc_memory(preallocated_device_buffer_alloc_);
}
}
finalized_ = true;
Expand Down Expand Up @@ -603,11 +572,12 @@ void LlvmRuntimeExecutor::materialize_runtime(MemoryPool *memory_pool,
Device::AllocParams preallocated_device_buffer_alloc_params;
preallocated_device_buffer_alloc_params.size = prealloc_size;
RhiResult res =
cuda_device()->allocate_memory(preallocated_device_buffer_alloc_params,
llvm_device()->allocate_memory(preallocated_device_buffer_alloc_params,
&preallocated_device_buffer_alloc_);
TI_ASSERT(res == RhiResult::success);
cuda::CudaDevice::AllocInfo preallocated_device_buffer_alloc_info =
cuda_device()->get_alloc_info(preallocated_device_buffer_alloc_);
llvm_device()->as<cuda::CudaDevice>()->get_alloc_info(
preallocated_device_buffer_alloc_);
preallocated_device_buffer_ = preallocated_device_buffer_alloc_info.ptr;

CUDADriver::get_instance().memset(preallocated_device_buffer_, 0,
Expand Down Expand Up @@ -636,12 +606,13 @@ void LlvmRuntimeExecutor::materialize_runtime(MemoryPool *memory_pool,

Device::AllocParams preallocated_device_buffer_alloc_params;
preallocated_device_buffer_alloc_params.size = prealloc_size;
RhiResult res = amdgpu_device()->allocate_memory(
preallocated_device_buffer_alloc_params,
&preallocated_device_buffer_alloc_);
RhiResult res =
llvm_device()->allocate_memory(preallocated_device_buffer_alloc_params,
&preallocated_device_buffer_alloc_);
TI_ASSERT(res == RhiResult::success);
amdgpu::AmdgpuDevice::AllocInfo preallocated_device_buffer_alloc_info =
amdgpu_device()->get_alloc_info(preallocated_device_buffer_alloc_);
llvm_device()->as<amdgpu::AmdgpuDevice>()->get_alloc_info(
preallocated_device_buffer_alloc_);
preallocated_device_buffer_ = preallocated_device_buffer_alloc_info.ptr;

AMDGPUDriver::get_instance().memset(preallocated_device_buffer_, 0,
Expand Down
4 changes: 0 additions & 4 deletions taichi/runtime/llvm/llvm_runtime_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,6 @@ class LlvmRuntimeExecutor {
/* -------------------------- */
/* ------ Member Access ----- */
/* -------------------------- */
cuda::CudaDevice *cuda_device();
cpu::CpuDevice *cpu_device();
amdgpu::AmdgpuDevice *amdgpu_device();

void finalize();

uint64 fetch_result_uint64(int i, uint64 *result_buffer);
Expand Down
8 changes: 0 additions & 8 deletions taichi/runtime/program_impls/llvm/llvm_program.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,14 +254,6 @@ class LlvmProgramImpl : public ProgramImpl {
return runtime_exec_->get_snode_tree_device_ptr(tree_id);
}

cuda::CudaDevice *cuda_device() {
return runtime_exec_->cuda_device();
}

cpu::CpuDevice *cpu_device() {
return runtime_exec_->cpu_device();
}

LlvmDevice *llvm_device() {
return runtime_exec_->llvm_device();
}
Expand Down

0 comments on commit 524b6fb

Please sign in to comment.