Skip to content

Commit

Permalink
[aot] Import CPU and CUDA memory for Taichi AOT
Browse files Browse the repository at this point in the history
  • Loading branch information
Routhleck committed Oct 14, 2023
1 parent 6b563a2 commit 1b3f2f9
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 0 deletions.
4 changes: 4 additions & 0 deletions c_api/include/taichi/taichi_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ ti_export_cpu_memory(TiRuntime runtime,
TiMemory memory,
TiCpuMemoryInteropInfo *interop_info);

TI_DLL_EXPORT TiMemory TI_API_CALL ti_import_cpu_memory(TiRuntime runtime,
void *ptr,
size_t memory_size);

#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
4 changes: 4 additions & 0 deletions c_api/include/taichi/taichi_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ ti_export_cuda_memory(TiRuntime runtime,
TiMemory memory,
TiCudaMemoryInteropInfo *interop_info);

TI_DLL_EXPORT TiMemory TI_API_CALL ti_import_cuda_memory(TiRuntime runtime,
void * ptr,
size_t memory_size);

#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
12 changes: 12 additions & 0 deletions c_api/src/c_api_test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ bool check_cuda_value_impl(void *ptr, T value) {
return false;
}

void cudaMalloc(void **ptr, size_t size) {
#ifdef TI_WITH_CUDA
taichi::lang::CUDADriver::get_instance().malloc(ptr, size);
#endif
}

void cudaMemcpy(void *ptr, void *data, size_t size) {
#ifdef TI_WITH_CUDA
taichi::lang::CUDADriver::get_instance().memcpy_host_to_device(ptr, data, size);
#endif
}

bool check_cuda_value(void *ptr, float value) {
return check_cuda_value_impl(ptr, value);
}
Expand Down
3 changes: 3 additions & 0 deletions c_api/src/c_api_test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,8 @@ TI_DLL_EXPORT bool TI_API_CALL check_cuda_value(void *ptr, double value);
TI_DLL_EXPORT uint16_t to_float16(float in);
TI_DLL_EXPORT float to_float32(uint16_t in);

TI_DLL_EXPORT void TI_API_CALL cudaMalloc(void **ptr, size_t size);
TI_DLL_EXPORT void TI_API_CALL cudaMemcpy(void *ptr, void *data, size_t size);

} // namespace utils
} // namespace capi
36 changes: 36 additions & 0 deletions c_api/src/taichi_llvm_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,42 @@ void ti_export_cpu_memory(TiRuntime runtime,
#endif // TI_WITH_LLVM
}

TI_DLL_EXPORT TiMemory TI_API_CALL ti_import_cpu_memory(TiRuntime runtime,
void *ptr,
size_t memory_size) {
capi::LlvmRuntime *llvm_runtime =
static_cast<capi::LlvmRuntime *>((Runtime *)runtime);

auto &device = llvm_runtime->get();
auto &cpu_device = static_cast<taichi::lang::cpu::CpuDevice &>(device);

taichi::lang::DeviceAllocation device_alloc =
cpu_device.import_memory(ptr, memory_size);

// prepare memory object
TiMemory memory = devalloc2devmem(*llvm_runtime, device_alloc);

return memory;
}

TI_DLL_EXPORT TiMemory TI_API_CALL ti_import_cuda_memory(TiRuntime runtime,
void *ptr,
size_t memory_size) {
capi::LlvmRuntime *llvm_runtime =
static_cast<capi::LlvmRuntime *>((Runtime *)runtime);

auto &device = llvm_runtime->get();
auto &cuda_device = static_cast<taichi::lang::cuda::CudaDevice &>(device);

taichi::lang::DeviceAllocation device_alloc =
cuda_device.import_memory(ptr, memory_size);

// prepare memory object
TiMemory memory = devalloc2devmem(*llvm_runtime, device_alloc);

return memory;
}

// function.export_cuda_runtime
void ti_export_cuda_memory(TiRuntime runtime,
TiMemory memory,
Expand Down
2 changes: 2 additions & 0 deletions c_api/tests/c_api_behavior_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "gtest/gtest.h"
#include "c_api_test_utils.h"
#include "taichi/cpp/taichi.hpp"
#include "taichi/taichi_cpu.h"
#include "taichi/taichi_cuda.h"
#include "c_api/tests/gtest_fixture.h"

TEST_F(CapiTest, TestBehaviorCreateRuntime) {
Expand Down
73 changes: 73 additions & 0 deletions c_api/tests/c_api_interop_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,77 @@ TEST_F(CapiTest, AotTestVulkanTextureInterop) {
}
}

TEST_F(CapiTest, TestCPUImport) {
TiArch arch = TiArch::TI_ARCH_X64;
ti::Runtime runtime(arch);

float data_x[4] = {1.0, 2.0, 3.0, 4.0};

auto memory = ti_import_cpu_memory(runtime, &data_x[0], sizeof(float) * 4);

int dim_count = 1;
int element_count = 4;
auto elem_type = TI_DATA_TYPE_F32;

// prepare tiNdArray
TiNdArray tiNdArray;
tiNdArray.memory = memory;
tiNdArray.shape.dim_count = dim_count;
tiNdArray.shape.dims[0] = element_count;
tiNdArray.elem_shape.dim_count = 0;
tiNdArray.elem_type = elem_type;

auto ti_memory = ti::Memory(runtime, memory, sizeof(float) * 4, false);
// prepare ndarray
auto ndarray = ti::NdArray<float>(std::move(ti_memory), tiNdArray);

std::vector<float> data_out(4);
ndarray.read(data_out);

std::cout << data_out[0] << std::endl;
std::cout << data_out[1] << std::endl;
std::cout << data_out[2] << std::endl;
std::cout << data_out[3] << std::endl;
}

TEST_F(CapiTest, TestCUDAImport) {
TiArch arch = TiArch::TI_ARCH_CUDA;
ti::Runtime runtime(arch);

float data_x[4] = {1.0, 2.0, 3.0, 4.0};

void *device_array;
size_t device_array_size = sizeof(data_x);
capi::utils::cudaMalloc(&device_array, device_array_size);
capi::utils::cudaMemcpy(device_array, data_x, device_array_size);

auto memory = ti_import_cuda_memory(runtime, device_array, device_array_size);

int dim_count = 1;
int element_count = 4;
auto elem_type = TI_DATA_TYPE_F32;

// prepare tiNdArray
TiNdArray tiNdArray;
tiNdArray.memory = memory;
tiNdArray.shape.dim_count = dim_count;
tiNdArray.shape.dims[0] = element_count;
tiNdArray.elem_shape.dim_count = 0;
tiNdArray.elem_type = elem_type;

auto ti_memory = ti::Memory(runtime, memory, sizeof(float) * 4, false);
// prepare ndarray
auto ndarray = ti::NdArray<float>(std::move(ti_memory), tiNdArray);

std::vector<float> data_out(4);
ndarray.read(data_out);

std::cout << data_out[0] << std::endl;
std::cout << data_out[1] << std::endl;
std::cout << data_out[2] << std::endl;
std::cout << data_out[3] << std::endl;
}

#endif // TI_WITH_VULKAN


0 comments on commit 1b3f2f9

Please sign in to comment.