diff --git a/c_api/src/taichi_llvm_impl.cpp b/c_api/src/taichi_llvm_impl.cpp index c39bf619a7ed6..a8b2fb8d8c6d2 100644 --- a/c_api/src/taichi_llvm_impl.cpp +++ b/c_api/src/taichi_llvm_impl.cpp @@ -52,14 +52,13 @@ taichi::lang::Device &LlvmRuntime::get() { TiMemory LlvmRuntime::allocate_memory( const taichi::lang::Device::AllocParams ¶ms) { - const taichi::lang::CompileConfig &config = executor_->get_config(); taichi::lang::LLVMRuntime *llvm_runtime = executor_->get_llvm_runtime(); taichi::lang::LlvmDevice *llvm_device = executor_->llvm_device(); taichi::lang::DeviceAllocation devalloc = - llvm_device->allocate_memory_runtime( - {params, config.ndarray_use_cached_allocator, - executor_->get_runtime_jit_module(), llvm_runtime, result_buffer}); + llvm_device->allocate_memory_runtime({params, + executor_->get_runtime_jit_module(), + llvm_runtime, result_buffer}); return devalloc2devmem(*this, devalloc); } diff --git a/taichi/program/compile_config.cpp b/taichi/program/compile_config.cpp index 64c04371a44a2..bb6c102041d7d 100644 --- a/taichi/program/compile_config.cpp +++ b/taichi/program/compile_config.cpp @@ -42,7 +42,6 @@ CompileConfig::CompileConfig() { make_thread_local = true; make_block_local = true; detect_read_only = true; - ndarray_use_cached_allocator = true; real_matrix_scalarize = true; half2_vectorization = false; make_cpu_multithreading_loop = true; diff --git a/taichi/program/compile_config.h b/taichi/program/compile_config.h index df1650ec163a3..619109f4e86d8 100644 --- a/taichi/program/compile_config.h +++ b/taichi/program/compile_config.h @@ -37,7 +37,6 @@ struct CompileConfig { bool make_thread_local; bool make_block_local; bool detect_read_only; - bool ndarray_use_cached_allocator; bool real_matrix_scalarize; bool half2_vectorization; bool make_cpu_multithreading_loop; diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 04301f154e5d6..fe3e26dd213b4 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -193,8 +193,6 @@ void export_lang(py::module &m) { .def_readwrite("make_thread_local", &CompileConfig::make_thread_local) .def_readwrite("make_block_local", &CompileConfig::make_block_local) .def_readwrite("detect_read_only", &CompileConfig::detect_read_only) - .def_readwrite("ndarray_use_cached_allocator", - &CompileConfig::ndarray_use_cached_allocator) .def_readwrite("real_matrix_scalarize", &CompileConfig::real_matrix_scalarize) .def_readwrite("half2_vectorization", &CompileConfig::half2_vectorization) diff --git a/taichi/rhi/amdgpu/amdgpu_device.cpp b/taichi/rhi/amdgpu/amdgpu_device.cpp index f2d94a9c06883..6107904c9f644 100644 --- a/taichi/rhi/amdgpu/amdgpu_device.cpp +++ b/taichi/rhi/amdgpu/amdgpu_device.cpp @@ -56,17 +56,15 @@ DeviceAllocation AmdgpuDevice::allocate_memory_runtime( info.size = taichi::iroundup(params.size, taichi_page_size); if (params.host_read || params.host_write) { TI_NOT_IMPLEMENTED - } else if (params.use_cached) { + } else { info.ptr = DeviceMemoryPool::get_instance().allocate_with_cache(this, params); TI_ASSERT(info.ptr != nullptr); AMDGPUDriver::get_instance().memset((void *)info.ptr, 0, info.size); - } else { - info.ptr = allocate_llvm_runtime_memory_jit(params); } info.is_imported = false; - info.use_cached = params.use_cached; + info.use_cached = true; info.use_preallocated = true; DeviceAllocation alloc; diff --git a/taichi/rhi/cuda/cuda_device.cpp b/taichi/rhi/cuda/cuda_device.cpp index bb4bded239eb2..f75f657972cbf 100644 --- a/taichi/rhi/cuda/cuda_device.cpp +++ b/taichi/rhi/cuda/cuda_device.cpp @@ -51,18 +51,16 @@ DeviceAllocation CudaDevice::allocate_memory_runtime( info.size = taichi::iroundup(params.size, taichi_page_size); if (info.size == 0) { info.ptr = nullptr; - } else if (params.use_cached) { + } else { info.ptr = DeviceMemoryPool::get_instance().allocate_with_cache(this, params); TI_ASSERT(info.ptr != nullptr); CUDADriver::get_instance().memset((void *)info.ptr, 0, info.size); - } else { - info.ptr = allocate_llvm_runtime_memory_jit(params); } info.is_imported = false; - info.use_cached = params.use_cached; + info.use_cached = true; info.use_preallocated = true; DeviceAllocation alloc; diff --git a/taichi/rhi/llvm/llvm_device.h b/taichi/rhi/llvm/llvm_device.h index e7104f94a20fb..7997a5c3fe964 100644 --- a/taichi/rhi/llvm/llvm_device.h +++ b/taichi/rhi/llvm/llvm_device.h @@ -10,7 +10,6 @@ struct LLVMRuntime; class LlvmDevice : public Device { public: struct LlvmRuntimeAllocParams : AllocParams { - bool use_cached{true}; JITModule *runtime_jit{nullptr}; LLVMRuntime *runtime{nullptr}; uint64 *result_buffer{nullptr}; diff --git a/taichi/runtime/llvm/llvm_runtime_executor.cpp b/taichi/runtime/llvm/llvm_runtime_executor.cpp index c7ad98b5201e6..ffb35055618a0 100644 --- a/taichi/runtime/llvm/llvm_runtime_executor.cpp +++ b/taichi/runtime/llvm/llvm_runtime_executor.cpp @@ -475,7 +475,6 @@ DeviceAllocation LlvmRuntimeExecutor::allocate_memory_ndarray( return llvm_device()->allocate_memory_runtime( {{alloc_size, /*host_write=*/false, /*host_read=*/false, /*export_sharing=*/false, AllocUsage::Storage}, - config_.ndarray_use_cached_allocator, get_runtime_jit_module(), get_llvm_runtime(), result_buffer}); diff --git a/tests/python/test_ndarray.py b/tests/python/test_ndarray.py index e3c89fa3e0f00..f139d44917c75 100644 --- a/tests/python/test_ndarray.py +++ b/tests/python/test_ndarray.py @@ -278,7 +278,7 @@ def test_ndarray_deepcopy(): assert y[4][1, 0] == 9 -@test_utils.test(arch=[ti.cuda], ndarray_use_cached_allocator=True) +@test_utils.test(arch=[ti.cuda]) def test_ndarray_caching_allocator(): n = 8 a = ti.ndarray(ti.i32, shape=(n))