diff --git a/cmake/TaichiCore.cmake b/cmake/TaichiCore.cmake index 803b7c79fc6016..69db6709ddea4b 100644 --- a/cmake/TaichiCore.cmake +++ b/cmake/TaichiCore.cmake @@ -88,6 +88,10 @@ if (TI_WITH_CUDA) target_link_libraries(${LIBRARY_NAME} ${llvm_ptx_libs}) endif() +#target_link_libraries(${LIBRARY_NAME} /usr/lib/libGL.so GL) +target_link_libraries(${LIBRARY_NAME} /usr/lib/libGLEW.so GLEW) +target_link_libraries(${LIBRARY_NAME} /usr/lib/libglfw.so glfw) + # add_executable(runtime runtime/runtime.cpp) # Optional dependencies diff --git a/examples/opengl_backend.py b/examples/opengl_backend.py new file mode 100644 index 00000000000000..77f5d75590a6f1 --- /dev/null +++ b/examples/opengl_backend.py @@ -0,0 +1,12 @@ +import taichi as ti + +ti.init(arch=ti.opengl) + +x = ti.var(ti.f32, shape=()) +x[None] = 1 + +@ti.kernel +def func(): + x[None] = x[None] * 2 + +func() diff --git a/python/taichi/lang/__init__.py b/python/taichi/lang/__init__.py index 62592a39f0cae8..bcd7ef4a6333d4 100644 --- a/python/taichi/lang/__init__.py +++ b/python/taichi/lang/__init__.py @@ -27,6 +27,7 @@ x86_64 = core.x86_64 cuda = core.cuda metal = core.metal +opengl = core.opengl profiler_print = lambda: core.get_current_program().profiler_print() profiler_clear = lambda: core.get_current_program().profiler_clear() @@ -178,6 +179,8 @@ def supported_archs(): archs.append(cuda) if ti.core.with_metal(): archs.append(metal) + if ti.core.with_opengl(): + archs.append(opengl) return archs class _ArchCheckers(object): diff --git a/taichi/backends/codegen_opengl.cpp b/taichi/backends/codegen_opengl.cpp new file mode 100644 index 00000000000000..731ca5c83eef07 --- /dev/null +++ b/taichi/backends/codegen_opengl.cpp @@ -0,0 +1,397 @@ +#include "codegen_opengl.h" +#include + +#include +#include + +TLANG_NAMESPACE_BEGIN +namespace opengl { +namespace { + +std::string opengl_data_type_name(DataType dt) { + switch (dt) { + case DataType::f32: + return "float"; + case DataType::i32: + return "int"; + case DataType::u32: + return "uint"; + default: + TI_NOT_IMPLEMENTED; + break; + } + return ""; +} + +bool is_opengl_binary_op_infix(BinaryOpType type) { + return !((type == BinaryOpType::min) || (type == BinaryOpType::max) || + (type == BinaryOpType::atan2) || (type == BinaryOpType::pow)); +} + +class KernelGen : public IRVisitor +{ + Kernel *kernel; + +public: + KernelGen(Kernel *kernel, std::string kernel_name) + : kernel(kernel), kernel_name_(kernel_name), + glsl_kernel_prefix_(kernel_name) + { + allow_undefined_visitor = true; + invoke_default_visitor = true; + } + +private: // {{{ + std::string kernel_src_code_; + std::string indent_; + bool is_top_level_{true}; + + std::string kernel_name_; + std::string glsl_kernel_name_; + std::string glsl_kernel_prefix_; + int glsl_kernel_count_{0}; + + void push_indent() + { + indent_ += " "; + } + + void pop_indent() + { + indent_.pop_back(); + indent_.pop_back(); + } + + template + void emit(std::string f, Args &&... args) + { + kernel_src_code_ += + indent_ + fmt::format(f, std::forward(args)...) + "\n"; + } + + void generate_header() + { + emit("#version 430 core"); + emit("#extension GL_ARB_compute_shader: enable"); + emit(""); + emit("layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;"); + emit(""); + emit("layout(std430, binding = 0) buffer data"); + emit("{{"); + emit(" float tmp7;"); + emit("}};"); + emit(""); + } + + void generate_bottom() + { + emit("void main()"); + emit("{{"); + emit(" {}();", glsl_kernel_name_); + emit("}}"); + } + + void visit(Block *stmt) override + { + if (!is_top_level_) push_indent(); + for (auto &s : stmt->statements) { + //TI_INFO("visiting sub stmt {}", typeid(*s).name()); + s->accept(this); + } + if (!is_top_level_) pop_indent(); + } + + void visit(UnaryOpStmt *stmt) override { + if (stmt->op_type != UnaryOpType::cast) { + emit("const {} {} = {}({});", opengl_data_type_name(stmt->element_type()), + stmt->raw_name(), metal_unary_op_type_symbol(stmt->op_type), + stmt->operand->raw_name()); + } else { + // cast + if (stmt->cast_by_value) { + emit("const {} {} = {}({});", + opengl_data_type_name(stmt->element_type()), stmt->raw_name(), + opengl_data_type_name(stmt->cast_type), stmt->operand->raw_name()); + } else { + TI_NOT_IMPLEMENTED; + } + } + } + + void visit(BinaryOpStmt *bin) override { + const auto dt_name = opengl_data_type_name(bin->element_type()); + const auto lhs_name = bin->lhs->raw_name(); + const auto rhs_name = bin->rhs->raw_name(); + const auto bin_name = bin->raw_name(); + if (bin->op_type == BinaryOpType::floordiv) { + if (is_integral(bin->element_type())) { + emit("const {} {} = int(floor({} / {}));", dt_name, bin_name, lhs_name, + rhs_name); + } else { + emit("const {} {} = floor({} / {});", dt_name, bin_name, lhs_name, + rhs_name); + } + return; + } + const auto binop = binary_op_type_symbol(bin->op_type); + if (is_opengl_binary_op_infix(bin->op_type)) { + emit("const {} {} = ({} {} {});", dt_name, bin_name, lhs_name, binop, + rhs_name); + } else { + // This is a function call + emit("const {} {} = {}({}, {});", dt_name, bin_name, binop, lhs_name, + rhs_name); + } + } + + void visit(TernaryOpStmt *tri) override { + TI_ASSERT(tri->op_type == TernaryOpType::select); + emit("const {} {} = ({}) ? ({}) : ({});", + opengl_data_type_name(tri->element_type()), tri->raw_name(), + tri->op1->raw_name(), tri->op2->raw_name(), tri->op3->raw_name()); + } + + void visit(GlobalStoreStmt *stmt) override + { + TI_ASSERT(stmt->width() == 1); + emit("{} = {};", stmt->ptr->raw_name(), stmt->data->raw_name()); + } + + void visit(GlobalLoadStmt *stmt) override + { + TI_ASSERT(stmt->width() == 1); + emit("{} {} = {};", metal_data_type_name(stmt->element_type()), + stmt->raw_name(), stmt->ptr->raw_name()); + } + + void visit(LocalLoadStmt *stmt) override + { + bool linear_index = true; + for (int i = 0; i < (int)stmt->ptr.size(); i++) { + if (stmt->ptr[i].offset != i) { + linear_index = false; + } + } + if (stmt->same_source() && linear_index && + stmt->width() == stmt->ptr[0].var->width()) { + auto ptr = stmt->ptr[0].var; + emit("const {} {}({});", opengl_data_type_name(stmt->element_type()), + stmt->raw_name(), ptr->raw_name()); + } else { + TI_NOT_IMPLEMENTED; + } + } + + void visit(LocalStoreStmt *stmt) override + { + emit("{} = {};", stmt->ptr->raw_name(), stmt->data->raw_name()); + } + + void visit(AllocaStmt *alloca) override + { + emit("{} {}(0);", + opengl_data_type_name(alloca->element_type()), + alloca->raw_name()); + } + + void visit(ConstStmt *const_stmt) override + { + TI_ASSERT(const_stmt->width() == 1); + emit("const {} {} = {};", opengl_data_type_name(const_stmt->element_type()), + const_stmt->raw_name(), const_stmt->val[0].stringify()); + } + + std::string make_kernel_name() + { + return fmt::format("{}{}", glsl_kernel_prefix_, glsl_kernel_count_++); + } + + void emit_glsl_kernel_func_sig(const std::string &kernel_name) + { + emit("void {}()", kernel_name); + } + + void generate_serial_kernel(OffloadedStmt *stmt) { + TI_ASSERT(stmt->task_type == OffloadedStmt::TaskType::serial); + const std::string glsl_kernel_name = make_kernel_name(); + this->glsl_kernel_name_ = glsl_kernel_name; + emit_glsl_kernel_func_sig(glsl_kernel_name); + emit("{{ // serial"); + stmt->body->accept(this); + emit("}}\n"); + } + + + void visit(OffloadedStmt *stmt) override { + TI_ASSERT(is_top_level_); + is_top_level_ = false; + using Type = OffloadedStmt::TaskType; + if (stmt->task_type == Type::serial) { + generate_serial_kernel(stmt); + /*} else if (stmt->task_type == Type::range_for) { + generate_range_for_kernel(stmt);*/ + } else { + // struct_for is automatically lowered to ranged_for for dense snodes + // (#378). So we only need to support serial and range_for tasks. + TI_ERROR("Unsupported offload type={} on OpenGL arch", stmt->task_name()); + } + is_top_level_ = true; + } + + +public: + const std::string &kernel_source_code() const + { + return kernel_src_code_; + } + + void run(const SNode &root) + { + generate_header(); + irpass::print(kernel->ir); + kernel->ir->accept(this); + generate_bottom(); + } +}; + +} // namespace + +void OpenglCodeGen::lower() +{ + auto ir = kernel_->ir; + const bool print_ir = prog_->config.print_ir; + if (print_ir) { + TI_TRACE("Initial IR:"); + irpass::print(ir); + } + + if (kernel_->grad) { + irpass::reverse_segments(ir); + irpass::re_id(ir); + if (print_ir) { + TI_TRACE("Segment reversed (for autodiff):"); + irpass::print(ir); + } + } + + irpass::lower(ir); + irpass::re_id(ir); + if (print_ir) { + TI_TRACE("Lowered:"); + irpass::print(ir); + } + + irpass::typecheck(ir); + irpass::re_id(ir); + if (print_ir) { + TI_TRACE("Typechecked:"); + irpass::print(ir); + } + + irpass::demote_dense_struct_fors(ir); + irpass::typecheck(ir); + if (print_ir) { + TI_TRACE("Dense Struct-for demoted:"); + irpass::print(ir); + } + + irpass::constant_fold(ir); + if (prog_->config.simplify_before_lower_access) { + irpass::simplify(ir); + irpass::re_id(ir); + if (print_ir) { + TI_TRACE("Simplified I:"); + irpass::print(ir); + } + } + + if (kernel_->grad) { + irpass::demote_atomics(ir); + irpass::full_simplify(ir); + irpass::typecheck(ir); + if (print_ir) { + TI_TRACE("Before make_adjoint:"); + irpass::print(ir); + } + irpass::make_adjoint(ir); + if (print_ir) { + TI_TRACE("After make_adjoint:"); + irpass::print(ir); + } + irpass::typecheck(ir); + } + + irpass::lower_access(ir, prog_->config.use_llvm); + irpass::re_id(ir); + if (print_ir) { + TI_TRACE("Access Lowered:"); + irpass::print(ir); + } + + irpass::die(ir); + irpass::re_id(ir); + if (print_ir) { + TI_TRACE("DIEd:"); + irpass::print(ir); + } + + irpass::flag_access(ir); + irpass::re_id(ir); + if (print_ir) { + TI_TRACE("Access Flagged:"); + irpass::print(ir); + } + + irpass::constant_fold(ir); + if (print_ir) { + TI_TRACE("Constant folded:"); + irpass::re_id(ir); + irpass::print(ir); + } + + global_tmps_buffer_size_ = + std::max(irpass::offload(ir).total_size, (size_t)(1)); + if (print_ir) { + TI_TRACE("Offloaded:"); + irpass::re_id(ir); + irpass::print(ir); + } + + irpass::full_simplify(ir); + if (print_ir) { + TI_TRACE("Simplified II:"); + irpass::re_id(ir); + irpass::print(ir); + } + + irpass::demote_atomics(ir); + if (print_ir) { + TI_TRACE("Atomics demoted:"); + irpass::re_id(ir); + irpass::print(ir); + } +} + +FunctionType OpenglCodeGen::gen(SNode &root) +{ + KernelGen codegen(kernel_, kernel_name_); + codegen.run(*prog_->snode_root); + const std::string kernel_source_code = codegen.kernel_source_code(); + TI_INFO("\n{}", kernel_source_code); + return [kernel_source_code](Context &ctx) { + launch_glsl_kernel(kernel_source_code); + }; +} + +FunctionType OpenglCodeGen::compile(Program &program, Kernel &kernel) +{ + TI_WARN("OpenGL backend currently WIP, MAY NOT WORK"); + this->prog_ = &program; + this->kernel_ = &kernel; + + lower(); + return gen(*program.snode_root); +} + +} // namespace opengl +TLANG_NAMESPACE_END diff --git a/taichi/backends/codegen_opengl.h b/taichi/backends/codegen_opengl.h new file mode 100644 index 00000000000000..e45281dbec6cc6 --- /dev/null +++ b/taichi/backends/codegen_opengl.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +#include "base.h" +#include "kernel.h" + +#define OPENGL_NAME_SPACE_BEGIN namespace opengl { +#define OPENGL_NAME_SPACE_END } + +TLANG_NAMESPACE_BEGIN +namespace opengl { + +class OpenglCodeGen { + public: + OpenglCodeGen(const std::string &kernel_name) + : kernel_name_(kernel_name) + {} + + FunctionType compile(Program &program, Kernel &kernel); + + private: + void lower(); + FunctionType gen(SNode &root); + + const std::string kernel_name_; + + Program *prog_; + Kernel *kernel_; + size_t global_tmps_buffer_size_{0}; +}; + +} // namespace opengl +TLANG_NAMESPACE_END diff --git a/taichi/platform/opengl/opengl_api.cpp b/taichi/platform/opengl/opengl_api.cpp new file mode 100644 index 00000000000000..41caf498851122 --- /dev/null +++ b/taichi/platform/opengl/opengl_api.cpp @@ -0,0 +1,217 @@ +#define USE_GLEW +#ifdef USE_GLEW +#define GLEW_STATIC +#include +#else +//#include +#endif +#include +#include "opengl_api.h" + +TLANG_NAMESPACE_BEGIN +namespace opengl { + +void glapi_set_uniform(GLuint loc, float value) +{ + glUniform1f(loc, value); +} + +struct GLShader +{ + GLuint id_; + + GLShader(GLuint type = GL_COMPUTE_SHADER) + { + id_ = glCreateShader(type); + } + + GLShader(std::string source, GLuint type = GL_COMPUTE_SHADER) + : GLShader(type) + { + this->compile(source); + } + + ~GLShader() + { + glDeleteShader(id_); + } + + GLShader &compile(const std::string &source) + { + // https://stackoverflow.com/questions/28527956/get-011-error-syntax-error-unexpected-end-when-trying-to-compile-shader + GLchar *source_cstr = new GLchar[source.size() + 1]; + std::strcpy(source_cstr, source.c_str()); + glShaderSource(id_, 1, &source_cstr, NULL); + TI_INFO("[glsl] compiling shader:\n{}", source); + glCompileShader(id_); + GLint status = GL_TRUE; + glGetShaderiv(id_, GL_COMPILE_STATUS, &status); + if (status != GL_TRUE) { + GLsizei logLength; + glGetShaderiv(id_, GL_INFO_LOG_LENGTH, &logLength); + GLchar *log = new GLchar[logLength]; + glGetShaderInfoLog(id_, logLength, &logLength, log); + TI_ERROR("[glsl] shader compile error:\n{}", log); + } + return *this; + } +}; + +struct GLProgram +{ + GLuint id_; + + GLProgram() + { + id_ = glCreateProgram(); + } + + GLProgram(GLShader &shader) + : GLProgram() + { + this->attach(shader); + } + + ~GLProgram() + { + glDeleteProgram(id_); + } + + GLProgram &attach(GLShader &shader) + { + glAttachShader(id_, shader.id_); + return *this; + } + + GLProgram &link() + { + TI_INFO("[glsl] linking program"); + glLinkProgram(id_); + GLint status = GL_TRUE; + glGetProgramiv(id_, GL_LINK_STATUS, &status); + if (status != GL_TRUE) { + GLsizei logLength; + glGetProgramiv(id_, GL_INFO_LOG_LENGTH, &logLength); + GLchar *log = new GLchar[logLength]; + glGetProgramInfoLog(id_, logLength, &logLength, log); + TI_ERROR("[glsl] program link error:\n{}", log); + } + return *this; + } + + GLProgram &use() + { + glUseProgram(id_); + return *this; + } + + template + void set_uniform(std::string name, T value) + { + GLuint loc = glGetUniformLocation(id_, name.c_str()); + glapi_set_uniform(loc, value); + } +}; + +// https://blog.csdn.net/ylbs110/article/details/52074826 +// https://www.khronos.org/opengl/wiki/Shader_Storage_Buffer_Object +struct GLSSBO +{ + GLuint id_; + + GLSSBO() + { + glGenBuffers(1, &id_); + } + + ~GLSSBO() + { + glDeleteBuffers(1, &id_); + } + + GLSSBO &bind() + { + glBindBuffer(GL_SHADER_STORAGE_BUFFER, id_); + return *this; + } + + GLSSBO &bind_data(void *data, size_t size, GLuint usage = GL_STATIC_READ) + { + this->bind(); + glBufferData(GL_SHADER_STORAGE_BUFFER, size, data, usage); + return *this; + } + + GLSSBO &bind_base(size_t index) + { + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, index, id_); + return *this; + } + + void *map(size_t offset, size_t length, GLbitfield access) + { + return glMapBufferRange(GL_SHADER_STORAGE_BUFFER, offset, length, access); + } +}; + +void initialize_opengl() +{ + TI_INFO("[glsl] initializing GLFW (with hint GL 4.3)"); + glfwInit(); + glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4); + glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3); + GLFWwindow *window = glfwCreateWindow(200, 200, "GLFW Window", nullptr, nullptr); + glfwMakeContextCurrent(window); +#ifdef USE_GLEW + TI_INFO("[glsl] initializing GLEW"); + GLint status = glewInit(); + if (status != GLEW_OK) { + TI_ERROR("[glsl] cannot initialize GLEW: {}", glewGetErrorString(status)); + } +#endif + const char *gl_version = (const char *)glGetString(GL_VERSION); + if (!gl_version) { + TI_WARN("[glsl] cannot get GL_VERSION"); + } else { + TI_INFO("[glsl] GL_VERSION: {}", gl_version); + } +} + +void launch_glsl_kernel(std::string source) +{ + static bool gl_inited = false; + if (!gl_inited) { + initialize_opengl(); + gl_inited = true; + } + GLShader shader(source); + GLProgram program(shader); + program.link(); + program.use(); + + struct data { + float number; + } data_i; + data_i.number = 233.3; + GLSSBO ssbo; + ssbo.bind_data(&data_i, sizeof(data_i), GL_DYNAMIC_READ); + ssbo.bind_base(0); + + TI_INFO("[glsl] dispatching compute..."); + // https://www.khronos.org/opengl/wiki/Compute_Shader + // https://community.arm.com/developer/tools-software/graphics/b/blog/posts/get-started-with-compute-shaders + // https://www.khronos.org/assets/uploads/developers/library/2014-siggraph-bof/KITE-BOF_Aug14.pdf + glDispatchCompute(1, 1, 1); + TI_INFO("[glsl] waiting memory barrier..."); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + struct data *data_r = (struct data *)ssbo.map(0, sizeof(data_i), GL_MAP_READ_BIT); + TI_INFO("[glsl] data_r.number = {}", data_r->number); +} + +bool is_opengl_api_available() +{ + return true; +} + +} +TLANG_NAMESPACE_END diff --git a/taichi/platform/opengl/opengl_api.h b/taichi/platform/opengl/opengl_api.h new file mode 100644 index 00000000000000..4387236c739461 --- /dev/null +++ b/taichi/platform/opengl/opengl_api.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +#include + +TLANG_NAMESPACE_BEGIN + +namespace opengl { + +bool is_opengl_api_available(); +void launch_glsl_kernel(std::string source); + +} // namespace opengl + +TLANG_NAMESPACE_END diff --git a/taichi/profiler.cpp b/taichi/profiler.cpp index a69c974db34db7..8fb33d6e526df5 100644 --- a/taichi/profiler.cpp +++ b/taichi/profiler.cpp @@ -149,7 +149,7 @@ class CUDAProfiler : public ProfilerBase { } // namespace std::unique_ptr make_profiler(Arch arch) { - if (arch == Arch::x86_64 || arch == Arch::arm || arch == Arch::metal) { + if (arch == Arch::x86_64 || arch == Arch::arm || arch == Arch::metal || arch == Arch::opengl) { return std::make_unique(arch); } else if (arch == Arch::cuda) { return std::make_unique(); diff --git a/taichi/program.cpp b/taichi/program.cpp index 10a1f4661c6600..9f61a487a6129c 100644 --- a/taichi/program.cpp +++ b/taichi/program.cpp @@ -7,6 +7,7 @@ #include "backends/codegen_cuda.h" #include "backends/codegen_metal.h" +#include "backends/codegen_opengl.h" #include "backends/codegen_x86.h" #include "backends/struct.h" #include "backends/struct_metal.h" @@ -86,6 +87,9 @@ FunctionType Program::compile(Kernel &kernel) { } else if (kernel.arch == Arch::metal) { metal::MetalCodeGen codegen(kernel.name, &metal_struct_compiled_.value()); ret = codegen.compile(*this, kernel, metal_runtime_.get()); + } else if (kernel.arch == Arch::opengl) { + opengl::OpenglCodeGen codegen(kernel.name); + ret = codegen.compile(*this, kernel); } else { TI_NOT_IMPLEMENTED; } diff --git a/taichi/python/export_misc.cpp b/taichi/python/export_misc.cpp index eb2a55e8518c0e..983fc55d55d59b 100644 --- a/taichi/python/export_misc.cpp +++ b/taichi/python/export_misc.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #if defined(TI_WITH_CUDA) #include #endif @@ -164,6 +165,7 @@ void export_misc(py::module &m) { }); m.def("with_cuda", with_cuda); m.def("with_metal", taichi::Tlang::metal::is_metal_api_available); + m.def("with_opengl", taichi::Tlang::opengl::is_opengl_api_available); } TI_NAMESPACE_END