diff --git a/taichi/backends/codegen_opengl.cpp b/taichi/backends/codegen_opengl.cpp index a27984e865e3c7..414b61b03699cb 100644 --- a/taichi/backends/codegen_opengl.cpp +++ b/taichi/backends/codegen_opengl.cpp @@ -3,12 +3,59 @@ #include #include +#include #include TLANG_NAMESPACE_BEGIN namespace opengl { namespace { +struct SSBO +{ + void *data; + const size_t data_size; + + SSBO(size_t data_size) + : data_size(data_size) + { + TI_INFO("[glsl] Allocating {} B SSBO", data_size); + data = std::malloc(data_size); + } + + void load_arguments_from(Context &ctx) + { + uint64_t *data_i = (uint64_t *)data; + for (int i = 0; i < taichi_max_num_args; i++) { + uint64_t value = ctx.get_arg(i); + data_i[i] = value; + } + } + + void save_returns_to(Context &ctx) + { + uint64_t *data_i = (uint64_t *)data; + for (int i = 0; i < taichi_max_num_args; i++) { + uint64_t value = data_i[i]; + ctx.set_arg(i, value); + } + } + + void update(void *data_r) + { + std::memcpy(data, data_r, data_size); + } + + operator IOV() + { + return IOV{data, data_size}; + } + + ~SSBO() + { + std::free(data); + } +}; + class KernelGen : public IRVisitor { Kernel *kernel; @@ -285,7 +332,7 @@ class KernelGen : public IRVisitor void visit(OffloadedStmt *stmt) override { - TI_ASSERT(is_top_level_); + TI_ASSERT(is_top_level_); // TODO(archibate): remove for nested kernel (?) is_top_level_ = false; using Type = OffloadedStmt::TaskType; if (stmt->task_type == Type::serial) { @@ -307,6 +354,11 @@ class KernelGen : public IRVisitor return kernel_src_code_; } + SSBO *create_root_ssbo() + { + return new SSBO(struct_compiled_->root_size); + } + void run(const SNode &root_snode) { //TI_INFO("ntm:: {}", root_snode.node_type_name); @@ -437,39 +489,22 @@ void OpenglCodeGen::lower() } } -void load_data(Context &ctx, void *data) -{ - int *data_ = (int *)data; - for (int i = 0; i < taichi_max_num_args; i++) { - int value = ctx.get_arg(i); - data_[i] = value; - } -} - -void save_data(Context &ctx, void *data) -{ - int *data_ = (int *)data; - for (int i = 0; i < taichi_max_num_args; i++) { - int value = data_[i]; - ctx.set_arg(i, value); - } -} - FunctionType OpenglCodeGen::gen(void) { KernelGen codegen(kernel_, kernel_name_, struct_compiled_); codegen.run(*prog_->snode_root); + SSBO *root_sb = codegen.create_root_ssbo(); const std::string kernel_source_code = codegen.kernel_source_code(); - //TI_INFO("\n{}", kernel_source_code); - - return [kernel_source_code](Context &ctx) { - void *data, *data_r; - size_t data_size = 1024; // ... - data = malloc(data_size); - load_data(ctx, data); - data_r = launch_glsl_kernel(kernel_source_code, data, data_size); - free(data); - save_data(ctx, data_r); + + return [kernel_source_code, root_sb](Context &ctx) { + // TODO(archibate): find out where get_arg stored, and just new SSBO(ctx) + SSBO *arg_sb = new SSBO(taichi_max_num_args * sizeof(uint64_t)); + arg_sb->load_arguments_from(ctx); + std::vector iov = {*arg_sb, *root_sb}; + std::vector res = launch_glsl_kernel(kernel_source_code, iov); + arg_sb->update(res[0]); + arg_sb->save_returns_to(ctx); + unmap_all_ssbo(); }; } diff --git a/taichi/platform/opengl/opengl_api.cpp b/taichi/platform/opengl/opengl_api.cpp index 53628053f64c06..b5893a90b715d3 100644 --- a/taichi/platform/opengl/opengl_api.cpp +++ b/taichi/platform/opengl/opengl_api.cpp @@ -129,6 +129,7 @@ struct GLProgram } }; + // https://blog.csdn.net/ylbs110/article/details/52074826 // https://www.khronos.org/opengl/wiki/Shader_Storage_Buffer_Object // This is Shader Storage Buffer, we use it to share data between CPU & GPU @@ -191,8 +192,15 @@ struct GLSSBO void *map(size_t offset, size_t length, GLbitfield access = GL_MAP_READ_BIT) { // map GPU memory to CPU address space, offset within SSBO data + glBindBuffer(GL_SHADER_STORAGE_BUFFER, id_); return glMapBufferRange(GL_SHADER_STORAGE_BUFFER, offset, length, access); } + + void *map(GLbitfield access = GL_MAP_READ_BIT) + { + glBindBuffer(GL_SHADER_STORAGE_BUFFER, id_); + return glMapBuffer(GL_SHADER_STORAGE_BUFFER, access); + } }; void initialize_opengl() @@ -227,7 +235,7 @@ void initialize_opengl() } } -void *launch_glsl_kernel(std::string source, void *data, size_t data_size) +std::vector launch_glsl_kernel(std::string source, std::vector iov) { static bool gl_inited = false; if (!gl_inited) { @@ -241,9 +249,11 @@ void *launch_glsl_kernel(std::string source, void *data, size_t data_size) program.link(); program.use(); - GLSSBO ssbo; - ssbo.bind_index(0); - ssbo.bind_data(data, data_size, GL_DYNAMIC_READ); // input + std::vector ssbo(iov.size()); + for (int i = 0; i < ssbo.size(); i++) { + ssbo[i].bind_index(i); + ssbo[i].bind_data(iov[i].base, iov[i].size, GL_DYNAMIC_READ); // input + } // https://www.khronos.org/opengl/wiki/Compute_Shader // https://community.arm.com/developer/tools-software/graphics/b/blog/posts/get-started-with-compute-shaders @@ -253,10 +263,18 @@ void *launch_glsl_kernel(std::string source, void *data, size_t data_size) // `layout(local_size_x = X) in;` - the X == `Threads` in CUDA // glDispatchCompute(1, 1, 1); - glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); // TODO(archibate): move to Program::synchroize() - void *data_r = ssbo.map(0, data_size); // output - return data_r; + std::vector maps(ssbo.size()); + for (int i = 0; i < ssbo.size(); i++) { + maps[i] = ssbo[i].map(0, iov[i].size); // output + } + return maps; +} + +void unmap_all_ssbo() +{ + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); } bool is_opengl_api_available() diff --git a/taichi/platform/opengl/opengl_api.h b/taichi/platform/opengl/opengl_api.h index f8589d59689834..a896c0e9accf28 100644 --- a/taichi/platform/opengl/opengl_api.h +++ b/taichi/platform/opengl/opengl_api.h @@ -10,8 +10,15 @@ TLANG_NAMESPACE_BEGIN namespace opengl { +struct IOV +{ + void *base; + size_t size; +}; + bool is_opengl_api_available(); -void *launch_glsl_kernel(std::string source, void *data, size_t data_size); +std::vector launch_glsl_kernel(std::string source, std::vector iov); +void unmap_all_ssbo(); } // namespace opengl