diff --git a/CMakeLists.txt b/CMakeLists.txt index f42705ae7fda5..fd381b9a12e41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,7 @@ endif() tvm_option(USE_CUDA "Build with CUDA" OFF) tvm_option(USE_OPENCL "Build with OpenCL" OFF) +tvm_option(USE_OPENGL "Build with OpenGL" OFF) tvm_option(USE_METAL "Build with Metal" OFF) tvm_option(USE_RPC "Build with RPC" ON) tvm_option(USE_GRAPH_RUNTIME "Build with tiny graph runtime" ON) @@ -61,8 +62,8 @@ if(MSVC) else(MSVC) include(CheckCXXCompilerFlag) check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11) - set(CMAKE_C_FLAGS "-O3 -Wall -std=c++11 -fPIC") - set(CMAKE_CXX_FLAGS ${CMAKE_C_FLAGS}) + set(CMAKE_C_FLAGS "-O3 -Wall -fPIC") + set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -std=c++11") endif(MSVC) # add source group @@ -87,6 +88,7 @@ file(GLOB RUNTIME_SRCS src/runtime/*.cc) file(GLOB COMPILER_LLVM_SRCS src/codegen/llvm/*.cc) file(GLOB RUNTIME_CUDA_SRCS src/runtime/cuda/*.cc) file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc) +file(GLOB RUNTIME_OPENGL_SRCS src/runtime/opengl/*.cc) file(GLOB RUNTIME_METAL_SRCS src/runtime/metal/*.mm) file(GLOB RUNTIME_RPC_SRCS src/runtime/rpc/*.cc) file(GLOB RUNTIME_GRAPH_SRCS src/runtime/graph/*.cc) @@ -135,6 +137,18 @@ else(USE_OPENCL) add_definitions(-DTVM_OPENCL_RUNTIME=0) endif(USE_OPENCL) +if(USE_OPENGL) + find_package(OpenGL QUIET REQUIRED) + find_package(glfw3 QUIET REQUIRED) + message(STATUS "Build with OpenGL support") + include_directories(${OPENGL_INCLUDE_DIRS}) + list(APPEND TVM_RUNTIME_LINKER_LIBS ${OpenGL_LIBRARIES} glfw) + list(APPEND RUNTIME_SRCS ${RUNTIME_OPENGL_SRCS}) + add_definitions(-DTVM_OPENGL_RUNTIME=1) +else(USE_OPENGL) + add_definitions(-DTVM_OPENGL_RUNTIME=0) +endif(USE_OPENGL) + if(USE_METAL) find_package(OpenCL QUIET REQUIRED) message(STATUS "Build with Metal support") diff --git a/Jenkinsfile b/Jenkinsfile index 793bf11f0d4a3..4fc2285f507c9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -88,6 +88,7 @@ stage('Build') { echo USE_CUDNN=1 >> config.mk echo USE_CUDA=1 >> config.mk echo USE_OPENCL=1 >> config.mk + echo USE_OPENGL=1 >> config.mk echo LLVM_CONFIG=llvm-config-4.0 >> config.mk echo USE_RPC=1 >> config.mk echo USE_GRAPH_RUNTIME=1 >> config.mk @@ -120,6 +121,7 @@ stage('Build') { echo USE_CUDA=0 >> config.mk echo USE_OPENCL=0 >> config.mk echo USE_RPC=0 >> config.mk + echo USE_OPENGL=1 >> config.mk echo LLVM_CONFIG=llvm-config-4.0 >> config.mk """ make('cpu', '-j2') diff --git a/Makefile b/Makefile index fef8b2a080514..453415de66346 100644 --- a/Makefile +++ b/Makefile @@ -32,8 +32,8 @@ OBJCFLAGS = -fno-objc-arc EMCC_FLAGS= -std=c++11 -DDMLC_LOG_STACK_TRACE=0\ -Oz -s RESERVED_FUNCTION_POINTERS=2 -s MAIN_MODULE=1 -s NO_EXIT_RUNTIME=1\ -s EXTRA_EXPORTED_RUNTIME_METHODS="['cwrap','getValue','setValue','addFunction']"\ + -s USE_GLFW=3 -s USE_WEBGL2=1 -lglfw\ $(INCLUDE_FLAGS) - # llvm configuration ifdef LLVM_CONFIG LLVM_VERSION=$(shell $(LLVM_CONFIG) --version| cut -b 1,3) @@ -54,6 +54,7 @@ METAL_SRC = $(wildcard src/runtime/metal/*.mm) CUDA_SRC = $(wildcard src/runtime/cuda/*.cc) ROCM_SRC = $(wildcard src/runtime/rocm/*.cc) OPENCL_SRC = $(wildcard src/runtime/opencl/*.cc) +OPENGL_SRC = $(wildcard src/runtime/opengl/*.cc) RPC_SRC = $(wildcard src/runtime/rpc/*.cc) GRAPH_SRC = $(wildcard src/runtime/graph/*.cc) RUNTIME_SRC = $(wildcard src/runtime/*.cc) @@ -65,6 +66,7 @@ METAL_OBJ = $(patsubst src/%.mm, build/%.o, $(METAL_SRC)) CUDA_OBJ = $(patsubst src/%.cc, build/%.o, $(CUDA_SRC)) ROCM_OBJ = $(patsubst src/%.cc, build/%.o, $(ROCM_SRC)) OPENCL_OBJ = $(patsubst src/%.cc, build/%.o, $(OPENCL_SRC)) +OPENGL_OBJ = $(patsubst src/%.cc, build/%.o, $(OPENGL_SRC)) RPC_OBJ = $(patsubst src/%.cc, build/%.o, $(RPC_SRC)) GRAPH_OBJ = $(patsubst src/%.cc, build/%.o, $(GRAPH_SRC)) CC_OBJ = $(patsubst src/%.cc, build/%.o, $(CC_SRC)) $(LLVM_OBJ) @@ -119,6 +121,19 @@ else CFLAGS += -DTVM_OPENCL_RUNTIME=0 endif +ifeq ($(USE_OPENGL), 1) + CFLAGS += -DTVM_OPENGL_RUNTIME=1 + EMCC_FLAGS += -DTVM_OPENGL_RUNTIME=1 + ifeq ($(UNAME_S), Darwin) + FRAMEWORKS += -framework OpenGL + else + LDFLAGS += -lGL -lglfw + endif + RUNTIME_DEP += $(OPENGL_OBJ) +else + CFLAGS += -DTVM_OPENGL_RUNTIME=0 +endif + ifeq ($(USE_METAL), 1) CFLAGS += -DTVM_METAL_RUNTIME=1 LDFLAGS += -lobjc diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index 91175f671a567..6d680330a659f 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -55,9 +55,11 @@ typedef int64_t tvm_index_t; /*! \brief Extension device types in TVM */ typedef enum { + kOpenGL = 11, + // Extension DRAM type, used for quickly test extension device // The device api can differ depending on the xpu driver registered. - kExtDev = 12 + kExtDev = 12, // AddExtraTVMType which is not in DLPack here } TVMDeviceExtType; diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index 318a5363726f9..9ba08fb868255 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -55,11 +55,16 @@ class DeviceAPI { /*! * \brief Allocate a data space on device. * \param ctx The device context to perform operation. - * \param size The size of the memory + * \param nbytes The number of bytes in memory. * \param alignment The alignment of the memory. - * \return The allocated device pointer + * \param type_hint The type of elements. Only needed by certain backends such + * as OpenGL, as nbytes & alignment are sufficient for most backends. + * \return The allocated device pointer. */ - virtual void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) = 0; + virtual void* AllocDataSpace(TVMContext ctx, + size_t nbytes, + size_t alignment, + TVMType type_hint) = 0; /*! * \brief Free a data space on device. * \param ctx The device context to perform operation. diff --git a/include/tvm/schedule.h b/include/tvm/schedule.h index 3efc31774d400..2f94aedccf3da 100644 --- a/include/tvm/schedule.h +++ b/include/tvm/schedule.h @@ -213,6 +213,11 @@ class Stage : public NodeRef { * \return reference to self. */ Stage& double_buffer(); // NOLINT(*) + /*! + * \brief Schedule for OpenGL fragment shader. + * \return reference to self. + */ + Stage& opengl(); // NOLINT(*) /*! * \brief whether the stage has been scheduled. * \return whether the stage has been scheduled. diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py index e23eed7168dce..fe66271dce5e8 100644 --- a/python/tvm/__init__.py +++ b/python/tvm/__init__.py @@ -17,7 +17,7 @@ from . import target from . import ndarray as nd -from .ndarray import context, cpu, gpu, opencl, cl, metal, mtl, vpi, rocm, ext_dev +from .ndarray import context, cpu, gpu, opencl, cl, metal, mtl, vpi, rocm, opengl, ext_dev from ._ffi.runtime_ctypes import TypeCode from ._ffi.function import Function diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index cfadd18188f51..e2641fbc7e469 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -97,6 +97,7 @@ class TVMContext(ctypes.Structure): 8 : 'metal', 9 : 'vpi', 10: 'rocm', + 11: 'opengl', 12: 'ext_dev', } STR2MASK = { @@ -111,6 +112,7 @@ class TVMContext(ctypes.Structure): 'metal': 8, 'vpi': 9, 'rocm': 10, + 'opengl': 11, 'ext_dev': 12, } def __init__(self, device_type, device_id): diff --git a/python/tvm/contrib/rpc.py b/python/tvm/contrib/rpc.py index 3d14f1eb2b6ea..3448c4c554d10 100644 --- a/python/tvm/contrib/rpc.py +++ b/python/tvm/contrib/rpc.py @@ -285,6 +285,10 @@ def metal(self, dev_id=0): """Construct remote Metal device.""" return self.context(8, dev_id) + def opengl(self, dev_id=0): + """Construct remote OpenGL device.""" + return self.context(11, dev_id) + def ext_dev(self, dev_id=0): """Construct remote extension device.""" return self.context(12, dev_id) diff --git a/python/tvm/ndarray.py b/python/tvm/ndarray.py index 1556c4912a352..0521a69c5f80e 100644 --- a/python/tvm/ndarray.py +++ b/python/tvm/ndarray.py @@ -120,6 +120,21 @@ def vpi(dev_id=0): """ return TVMContext(9, dev_id) +def opengl(dev_id=0): + """Construct a OpenGL device + + Parameters + ---------- + dev_id : int, optional + The integer device id + + Returns + ------- + ctx : TVMContext + The created context + """ + return TVMContext(11, dev_id) + def ext_dev(dev_id=0): """Construct a extension device diff --git a/python/tvm/schedule.py b/python/tvm/schedule.py index 6abe4aae2f6fc..0fc6692d950ed 100644 --- a/python/tvm/schedule.py +++ b/python/tvm/schedule.py @@ -611,4 +611,11 @@ def double_buffer(self): """ _api_internal._StageDoubleBuffer(self) + def opengl(self): + """The special OpenGL schedule + + Maps each output element to a pixel. + """ + _api_internal._StageOpenGL(self) + _init_api("tvm.schedule") diff --git a/python/tvm/target.py b/python/tvm/target.py index 4584c1aa4d239..94cd5457e44f7 100644 --- a/python/tvm/target.py +++ b/python/tvm/target.py @@ -67,7 +67,7 @@ class Target(object): Parameters ---------- - target_name : {"llvm", "cuda", "opencl", "metal", "rocm", "stackvm", "ext_dev"} + target_name : {"llvm", "cuda", "opencl", "metal", "rocm", "stackvm", "opengl", "ext_dev"} The major target name. options : list of str, optional @@ -119,6 +119,8 @@ def __init__(self, elif target_name in ("metal",): self.keys += ("gpu",) self.max_num_threads = 256 + elif target_name in ("opengl",): + self.keys += ("opengl",) elif target_name in ("stackvm", "ext_dev"): # Do not now class for stacvm or ext_dev pass diff --git a/src/api/api_lang.cc b/src/api/api_lang.cc index 94075b6ec0596..37a21cedf3dbb 100644 --- a/src/api/api_lang.cc +++ b/src/api/api_lang.cc @@ -399,6 +399,11 @@ TVM_REGISTER_API("_StageDoubleBuffer") args[0].operator Stage().double_buffer(); }); +TVM_REGISTER_API("_StageOpenGL") + .set_body([](TVMArgs args, TVMRetValue *ret) { + args[0].operator Stage().opengl(); + }); + TVM_REGISTER_API("_ScheduleNormalize") .set_body([](TVMArgs args, TVMRetValue* ret) { *ret = args[0].operator Schedule() diff --git a/src/codegen/build_opengl.cc b/src/codegen/build_opengl.cc new file mode 100644 index 0000000000000..5e13676e81117 --- /dev/null +++ b/src/codegen/build_opengl.cc @@ -0,0 +1,35 @@ +/*! + * Copyright (c) 2017 by Contributors + * Build opengl modules from source. + * \file build_opengl.cc + */ +#include +#include "./codegen_opengl.h" +#include "./build_common.h" + +namespace tvm { +namespace codegen { + +runtime::Module BuildOpenGL(Array funcs) { + bool output_ssa = false; + CodeGenOpenGL cg; + cg.Init(output_ssa); + for (LoweredFunc f : funcs) { + cg.AddFunction(f); + } + auto shaders = cg.Finish(); +#if TVM_OPENGL_RUNTIME + return OpenGLModuleCreate(shaders, "gl", ExtractFuncInfo(funcs)); +#else + LOG(WARNING) << "OpenGL runtime not enabled, return a source module..."; + auto data = ToJSON(shaders); + return DeviceSourceModuleCreate(data, "gl", ExtractFuncInfo(funcs), "opengl"); +#endif // TVM_OPENGL_RUNTIME +} + +TVM_REGISTER_API("codegen.build_opengl") +.set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = BuildOpenGL(args[0]); +}); +} // namespace codegen +} // namespace tvm diff --git a/src/codegen/codegen_c.h b/src/codegen/codegen_c.h index 895e94b8198e5..ce882eda4a18e 100644 --- a/src/codegen/codegen_c.h +++ b/src/codegen/codegen_c.h @@ -150,7 +150,7 @@ class CodeGenC : std::string GetStructRef( Type t, const Expr& buffer, const Expr& index, int kind); // print reference to a buffer as type t in index. - std::string GetBufferRef( + virtual std::string GetBufferRef( Type t, const Variable* buffer, Expr index); /*! * \brief If buffer is allocated as type t. diff --git a/src/codegen/codegen_opengl.cc b/src/codegen/codegen_opengl.cc new file mode 100644 index 0000000000000..e645e7f6c7015 --- /dev/null +++ b/src/codegen/codegen_opengl.cc @@ -0,0 +1,264 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file codegen_opengl.cc + * + * We are targeting OpenGL 3.3. The reason of not targeting a recent version + * of OpenGL is to have better compatibility of WebGL 2. + */ +#include +#include +#include +#include +#include "./codegen_opengl.h" +#include "../runtime/thread_storage_scope.h" + +namespace tvm { +namespace codegen { + +CodeGenOpenGL::CodeGenOpenGL() + : output_(nullptr), output_iter_var_(nullptr) {} + +void CodeGenOpenGL::InitFuncState(LoweredFunc f) { + CodeGenC::InitFuncState(f); + output_ = nullptr; + inputs_.clear(); + output_iter_var_ = nullptr; + thread_extent_var_ = ""; +} + +void CodeGenOpenGL::AddFunction(LoweredFunc f) { + // clear previous generated state. + this->InitFuncState(f); + + this->decl_stream << "#version 300 es\n"; + this->decl_stream << "precision highp float;\n"; + + // skip the first underscore, so SSA variable starts from _1 + GetUniqueName("_"); + // add to alloc buffer type. + for (const auto& kv : f->handle_data_type) { + RegisterHandleType(kv.first.get(), kv.second.type()); + } + + // Allocate argument names. Store in `var_idmap_`. + for (auto arg : f->args) { + auto arg_name = GetUniqueName(arg.get()->name_hint); + var_idmap_[arg.get()] = arg_name; + } + + thread_extent_var_ = GetUniqueName("thread_extent"); + this->decl_stream << "uniform int " << thread_extent_var_ << ";\n"; + + this->stream << "void main() {\n"; + + int func_scope = this->BeginScope(); + this->PrintStmt(f->body); + this->EndScope(func_scope); + + this->PrintIndent(); + this->stream << "}\n\n"; + + // Declare arguments. + for (auto arg : f->args) { + if (this->inputs_.find(arg.get()) != this->inputs_.cend()) { + // Declare input texture. + // Format: + // - Float: "uniform sampler2D {name};" + // - Int: "uniform isampler2D {name};" + // - UInt: "uniform usampler2D {name};" + + auto arg_name = GetVarID(arg.get()); + + auto type_it = this->handle_data_type_.find(arg.get()); + CHECK(type_it != this->handle_data_type_.cend()) << "Cannot find type."; + auto type = Type2TVMType(type_it->second); + CHECK_EQ(type.lanes, 1) << "Vector type not supported."; + + switch (type.code) { + case kDLInt: + this->decl_stream << "uniform isampler2D " << arg_name << ";\n"; + break; + case kDLUInt: + this->decl_stream << "uniform usampler2D " << arg_name << ";\n"; + break; + case kDLFloat: + this->decl_stream << "uniform sampler2D " << arg_name << ";\n"; + break; + default: + LOG(FATAL) << "Unsupported type code."; + } + + } else if (this->output_ == arg.get()) { + // Declare output texture. + // Format: "out {type} {name};" + + auto arg_name = GetVarID(arg.get()); + + auto type_it = this->handle_data_type_.find(arg.get()); + CHECK(type_it != this->handle_data_type_.cend()) << "Cannot find type."; + auto type = type_it->second; + + this->decl_stream << "out "; + PrintType(type, this->decl_stream); + this->decl_stream << " " << arg_name << ";\n"; + + } else { + // Declare uniform value. + // Format: "uniform {type} {name};" + + auto arg_name = GetVarID(arg.get()); + auto type = arg.get()->type; + + this->decl_stream << "uniform "; + PrintType(type, this->decl_stream); + this->decl_stream << " " << arg_name << ";\n"; + } + } + + std::vector arg_names; + std::vector arg_kinds; + for (auto arg : f->args) { + std::string name = GetVarID(arg.get()); + + runtime::OpenGLArgKind kind; + if (inputs_.find(arg.get()) != inputs_.cend()) { + kind = runtime::OpenGLArgKind::kInputTexture; + } else if (output_ == arg.get()) { + kind = runtime::OpenGLArgKind::kOutputTexture; + } else { + kind = runtime::OpenGLArgKind::kUniform; + } + + arg_names.push_back(name); + arg_kinds.push_back(kind); + } + + shaders_[f->name] = runtime::OpenGLShader( + this->decl_stream.str() + this->stream.str(), + std::move(arg_names), std::move(arg_kinds), + this->thread_extent_var_); +} + +std::unordered_map CodeGenOpenGL::Finish() { + return shaders_; +} + +void CodeGenOpenGL::BindThreadIndex(const IterVar& iv) { + CHECK_EQ(iv->thread_tag, "threadIdx.x") << "Must be threadIdx.x"; + CHECK(var_idmap_.find(iv->var.get()) == var_idmap_.end()) + << "Only support one thread iter var"; + CHECK(output_iter_var_ == nullptr) << "Only support one thread iter var"; + + var_idmap_[iv->var.get()] = iv->thread_tag; + output_iter_var_ = iv->var.get(); + + // Declare threadIdx local variable. + this->PrintIndent(); + this->stream << "ivec2 threadIdx = ivec2(gl_FragCoord.xy);\n"; + + // Return directly if threadIdx.x >= thread_extent. + this->PrintIndent(); + this->stream << "if (threadIdx.x >= " << thread_extent_var_ << ") {\n"; + this->PrintIndent(); + this->stream << " return;\n"; + this->PrintIndent(); + this->stream << "}\n"; +} + +// GLSL texture store is special. We can only store to one output texture, and +// we must store to the index that matches the current "thread index". +void CodeGenOpenGL::VisitStmt_(const Store* op) { + auto t = op->value.type(); + auto buffer = op->buffer_var.get(); + auto index = op->index; + + if (t.lanes() == 1) { + // Store to a scalar. + CHECK(inputs_.find(buffer) == inputs_.cend()) + << "Texture has been read from before. Must not store to it."; + if (output_ == nullptr) { + output_ = buffer; // Record that this texture is the output. + } else { + CHECK(output_ == buffer) << "GLSL can only write to 1 texture."; + } + + this->PrintIndent(); + this->stream << GetBufferRef(t, buffer, index) << " = " + << PrintExpr(op->value) << ";\n"; + + } else { + // Store to a vector. + LOG(FATAL) << "Vectorized store not implemented."; + } +} + +// texelFetch(tex, ivec2(idx, 0), 0).r +std::string CodeGenOpenGL::TexelFetch(const Variable* buffer, Expr index) { + std::ostringstream os; + os << "texelFetch(" << GetVarID(buffer) << ", ivec2("; + PrintExpr(index, os); + os << ", 0), 0).r"; + return os.str(); +} + +// Print a reference expression to a buffer. +// Format: texelFetch(buffer, index, 0).r +std::string CodeGenOpenGL::GetBufferRef( + Type t, const Variable* buffer, Expr index) { + CHECK_EQ(t.lanes(), 1) << "Vector type not supported."; + CHECK(HandleTypeMatch(buffer, t)) << "Type mismatch not supported."; + + if (buffer == this->output_) { + // This is the output texture. + CHECK_EQ(index.get(), output_iter_var_) + << "GLSL must access corresponding elem of output texture."; + return GetVarID(buffer); + } else { + // This is an input texture. + this->inputs_.insert(buffer); + return TexelFetch(buffer, index); + } +} + +void CodeGenOpenGL::PrintType(Type t, std::ostream& os) { + switch (t.code()) { + case halideir_type_int: + CHECK_EQ(t.bits(), 32) << "Only support 32-bit int."; + os << "int"; + break; + case halideir_type_uint: + CHECK_EQ(t.bits(), 32) << "Only support 32-bit uint."; + os << "uint"; + break; + case halideir_type_float: + CHECK_EQ(t.bits(), 32) << "Only support 32-bit float."; + os << "float"; + break; + default: + LOG(FATAL) << "Unsupported type code."; + } +} + +// Codegen for immediate values + +void CodeGenOpenGL::VisitExpr_(const IntImm* op, std::ostream& os) { + CHECK_EQ(op->type, Int(32)) << "GLSL 3.0 only supports 32-bit ints."; + CodeGenC::VisitExpr_(op, os); +} + +void CodeGenOpenGL::VisitExpr_(const UIntImm* op, std::ostream& os) { + CHECK_EQ(op->type, UInt(32)) << "GLSL 3.0 only supports 32-bit uints."; + CodeGenC::VisitExpr_(op, os); +} + +void CodeGenOpenGL::VisitExpr_(const FloatImm* op, std::ostream& os) { + CHECK_EQ(op->type, Float(32)) << "GLSL 3.0 only supports 32-bit floats."; + CodeGenC::VisitExpr_(op, os); +} + +void CodeGenOpenGL::VisitExpr_(const StringImm*, std::ostream& os) { + LOG(FATAL) << "GLSL 3.0 doesn't support strings."; +} + +} // namespace codegen +} // namespace tvm diff --git a/src/codegen/codegen_opengl.h b/src/codegen/codegen_opengl.h new file mode 100644 index 0000000000000..6ff1f7e9ac953 --- /dev/null +++ b/src/codegen/codegen_opengl.h @@ -0,0 +1,48 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file codegen_opengl.h + * \brief Generate OpenGL device code. + */ +#ifndef TVM_CODEGEN_CODEGEN_OPENGL_H_ +#define TVM_CODEGEN_CODEGEN_OPENGL_H_ + +#include +#include +#include +#include "./codegen_c.h" +#include "../runtime/opengl/opengl_module.h" + +namespace tvm { +namespace codegen { + +class CodeGenOpenGL final : public CodeGenC { + public: + CodeGenOpenGL(); + void AddFunction(LoweredFunc f); + std::unordered_map Finish(); + + void InitFuncState(LoweredFunc f) final; + void BindThreadIndex(const IterVar& iv) final; + void VisitStmt_(const Store* op) final; + std::string TexelFetch(const Variable* buffer, Expr index); + std::string GetBufferRef(Type t, const Variable* buffer, Expr index) final; + void PrintType(Type t, std::ostream& os) final; // NOLINT(*) + + // Codegen for immediate values + void VisitExpr_(const IntImm* op, std::ostream& os) final; // NOLINT(*) + void VisitExpr_(const UIntImm* op, std::ostream& os) final; // NOLINT(*) + void VisitExpr_(const FloatImm* op, std::ostream& os) final; // NOLINT(*) + void VisitExpr_(const StringImm* op, std::ostream& os) final; // NOLINT(*) + + private: + const Variable* output_{nullptr}; + std::unordered_set inputs_; + const Variable* output_iter_var_{nullptr}; + std::unordered_map shaders_; + std::string thread_extent_var_; +}; + +} // namespace codegen +} // namespace tvm + +#endif // TVM_CODEGEN_CODEGEN_OPENGL_H_ diff --git a/src/codegen/verilog/vpi_device_api.cc b/src/codegen/verilog/vpi_device_api.cc index 4e0e73eb427b0..8efd65785547b 100644 --- a/src/codegen/verilog/vpi_device_api.cc +++ b/src/codegen/verilog/vpi_device_api.cc @@ -49,7 +49,10 @@ class VPIDeviceAPI final : public runtime::DeviceAPI { *rv = 1; } } - void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final { + void* AllocDataSpace(TVMContext ctx, + size_t size, + size_t alignment, + TVMType type_hint) final { // always align to 32 bytes at least. CHECK_LE(alignment, runtime::kAllocAlignment); alignment = runtime::kAllocAlignment; diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc index dd8f80bcd72f0..0d0e36f239f28 100644 --- a/src/runtime/c_runtime_api.cc +++ b/src/runtime/c_runtime_api.cc @@ -31,6 +31,7 @@ inline std::string DeviceName(int type) { case kDLMetal: return "metal"; case kDLVPI: return "vpi"; case kDLROCM: return "rocm"; + case kOpenGL: return "opengl"; case kExtDev: return "ext_dev"; default: LOG(FATAL) << "unknown type =" << type; return "Unknown"; } @@ -95,7 +96,8 @@ DeviceAPI* DeviceAPI::Get(TVMContext ctx, bool allow_missing) { } void* DeviceAPI::AllocWorkspace(TVMContext ctx, size_t size) { - return AllocDataSpace(ctx, size, kTempAllocaAlignment); + TVMType type_hint{kDLUInt, 8, 1}; + return AllocDataSpace(ctx, size, kTempAllocaAlignment, type_hint); } void DeviceAPI::FreeWorkspace(TVMContext ctx, void* ptr) { @@ -365,7 +367,7 @@ int TVMArrayAlloc(const tvm_index_t* shape, size_t size = GetDataSize(arr); size_t alignment = GetDataAlignment(arr); arr->data = DeviceAPIManager::Get(arr->ctx)->AllocDataSpace( - arr->ctx, size, alignment); + arr->ctx, size, alignment, arr->dtype); *out = arr; API_END_HANDLE_ERROR(TVMArrayFree_(arr)); } diff --git a/src/runtime/cpu_device_api.cc b/src/runtime/cpu_device_api.cc index 1b2009e98e7fe..30c3bb7d52df1 100644 --- a/src/runtime/cpu_device_api.cc +++ b/src/runtime/cpu_device_api.cc @@ -20,13 +20,16 @@ class CPUDeviceAPI final : public DeviceAPI { *rv = 1; } } - void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final { + void* AllocDataSpace(TVMContext ctx, + size_t nbytes, + size_t alignment, + TVMType type_hint) final { void* ptr; #if _MSC_VER - ptr = _aligned_malloc(size, alignment); + ptr = _aligned_malloc(nbytes, alignment); if (ptr == nullptr) throw std::bad_alloc(); #else - int ret = posix_memalign(&ptr, alignment, size); + int ret = posix_memalign(&ptr, alignment, nbytes); if (ret != 0) throw std::bad_alloc(); #endif return ptr; diff --git a/src/runtime/cuda/cuda_device_api.cc b/src/runtime/cuda/cuda_device_api.cc index fd2c54ffd58d8..69b485a423c0a 100644 --- a/src/runtime/cuda/cuda_device_api.cc +++ b/src/runtime/cuda/cuda_device_api.cc @@ -54,12 +54,15 @@ class CUDADeviceAPI final : public DeviceAPI { } *rv = value; } - void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final { + void* AllocDataSpace(TVMContext ctx, + size_t nbytes, + size_t alignment, + TVMType type_hint) final { CUDA_CALL(cudaSetDevice(ctx.device_id)); CHECK_EQ(256 % alignment, 0U) << "CUDA space is aligned at 256 bytes"; void *ret; - CUDA_CALL(cudaMalloc(&ret, size)); + CUDA_CALL(cudaMalloc(&ret, nbytes)); return ret; } diff --git a/src/runtime/metal/metal_common.h b/src/runtime/metal/metal_common.h index d7980e64c9a9d..7c2975fe7ccc0 100644 --- a/src/runtime/metal/metal_common.h +++ b/src/runtime/metal/metal_common.h @@ -63,7 +63,10 @@ class MetalWorkspace final : public DeviceAPI { // override device API void SetDevice(TVMContext ctx) final; void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final; - void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final; + void* AllocDataSpace(TVMContext ctx, + size_t nbytes, + size_t alignment, + TVMType type_hint) final; void FreeDataSpace(TVMContext ctx, void* ptr) final; void CopyDataFromTo(const void* from, size_t from_size, diff --git a/src/runtime/metal/metal_device_api.mm b/src/runtime/metal/metal_device_api.mm index f66d5b51e64a8..82c52a23e0365 100644 --- a/src/runtime/metal/metal_device_api.mm +++ b/src/runtime/metal/metal_device_api.mm @@ -123,12 +123,12 @@ int GetWarpSize(id dev) { } void* MetalWorkspace::AllocDataSpace( - TVMContext ctx, size_t size, size_t alignment) { + TVMContext ctx, size_t nbytes, size_t alignment, TVMType type_hint) { this->Init(); id dev = GetDevice(ctx); // allocate buffer in GPU only mode. id buf = [ - dev newBufferWithLength:size + dev newBufferWithLength:nbytes options:MTLResourceStorageModePrivate]; CHECK(buf != nil); return (__bridge void*)([buf retain]); diff --git a/src/runtime/module.cc b/src/runtime/module.cc index 43ad6e5234940..3b95137f4fa46 100644 --- a/src/runtime/module.cc +++ b/src/runtime/module.cc @@ -115,6 +115,8 @@ bool RuntimeEnabled(const std::string& target) { f_name = "device_api.gpu"; } else if (target == "cl" || target == "opencl") { f_name = "device_api.opencl"; + } else if (target == "gl" || target == "opengl") { + f_name = "device_api.opengl"; } else if (target == "mtl" || target == "metal") { f_name = "device_api.metal"; } else if (target == "stackvm") { diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h index e990aeba6a3e3..29e205ced4d7b 100644 --- a/src/runtime/opencl/opencl_common.h +++ b/src/runtime/opencl/opencl_common.h @@ -142,7 +142,10 @@ class OpenCLWorkspace final : public DeviceAPI { // override device API void SetDevice(TVMContext ctx) final; void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final; - void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final; + void* AllocDataSpace(TVMContext ctx, + size_t size, + size_t alignment, + TVMType type_hint) final; void FreeDataSpace(TVMContext ctx, void* ptr) final; void CopyDataFromTo(const void* from, size_t from_offset, diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc index 23c897e048256..7518e72f9d9b6 100644 --- a/src/runtime/opencl/opencl_device_api.cc +++ b/src/runtime/opencl/opencl_device_api.cc @@ -51,7 +51,7 @@ void OpenCLWorkspace::GetAttr( } void* OpenCLWorkspace::AllocDataSpace( - TVMContext ctx, size_t size, size_t alignment) { + TVMContext ctx, size_t size, size_t alignment, TVMType type_hint) { this->Init(); CHECK(context != nullptr) << "No OpenCL device"; cl_int err_code; diff --git a/src/runtime/opengl/opengl_common.h b/src/runtime/opengl/opengl_common.h new file mode 100644 index 0000000000000..80b1d9f95c8e0 --- /dev/null +++ b/src/runtime/opengl/opengl_common.h @@ -0,0 +1,495 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file opengl_common.h + * \brief OpenGL common header + */ +#ifndef TVM_RUNTIME_OPENGL_OPENGL_COMMON_H_ +#define TVM_RUNTIME_OPENGL_OPENGL_COMMON_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace tvm { +namespace runtime { +namespace gl { + +// This file contains the following classes. +class GLFunctionPointers; +class OpenGLWorkspace; +class Texture; +class Program; + +inline GLFWglproc GetProcAddress(const char* procname) { + GLFWglproc proc = glfwGetProcAddress(procname); + CHECK(proc != nullptr) << "Cannot get function \"" << procname << "\""; + return proc; +} + +#define SetGLFunctionPointer(NAME) \ + NAME(decltype(NAME)(GetProcAddress("gl" #NAME))) + +/*! + * \brief The function pointers of all OpenGL APIs that are used. + * Must be constructed after creating an OpenGL context. + */ +class GLFunctionPointers { + public: + GLFunctionPointers() + : SetGLFunctionPointer(ActiveTexture), + SetGLFunctionPointer(AttachShader), + SetGLFunctionPointer(BindBuffer), + SetGLFunctionPointer(BindFramebuffer), + SetGLFunctionPointer(BindTexture), + SetGLFunctionPointer(BindVertexArray), + SetGLFunctionPointer(BufferData), + SetGLFunctionPointer(CheckFramebufferStatus), + SetGLFunctionPointer(Clear), + SetGLFunctionPointer(CompileShader), + SetGLFunctionPointer(CreateProgram), + SetGLFunctionPointer(CreateShader), + SetGLFunctionPointer(DeleteFramebuffers), + SetGLFunctionPointer(DeleteProgram), + SetGLFunctionPointer(DeleteShader), + SetGLFunctionPointer(DeleteTextures), + SetGLFunctionPointer(DetachShader), + SetGLFunctionPointer(DrawArrays), + SetGLFunctionPointer(DrawBuffers), + SetGLFunctionPointer(EnableVertexAttribArray), + SetGLFunctionPointer(Finish), + SetGLFunctionPointer(FramebufferTexture2D), + SetGLFunctionPointer(GenBuffers), + SetGLFunctionPointer(GenFramebuffers), + SetGLFunctionPointer(GenTextures), + SetGLFunctionPointer(GenVertexArrays), + SetGLFunctionPointer(GetAttribLocation), + SetGLFunctionPointer(GetError), + SetGLFunctionPointer(GetIntegerv), + SetGLFunctionPointer(GetProgramInfoLog), + SetGLFunctionPointer(GetProgramiv), + SetGLFunctionPointer(GetShaderInfoLog), + SetGLFunctionPointer(GetShaderiv), + SetGLFunctionPointer(GetString), + SetGLFunctionPointer(GetUniformLocation), + SetGLFunctionPointer(LinkProgram), + SetGLFunctionPointer(ReadPixels), + SetGLFunctionPointer(ShaderSource), + SetGLFunctionPointer(TexImage2D), + SetGLFunctionPointer(TexParameteri), + SetGLFunctionPointer(TexSubImage2D), + SetGLFunctionPointer(Uniform1f), + SetGLFunctionPointer(Uniform1i), + SetGLFunctionPointer(UseProgram), + SetGLFunctionPointer(VertexAttribPointer), + SetGLFunctionPointer(Viewport) {} + + void (*ActiveTexture)(GLenum texture); + void (*AttachShader)(GLuint program, GLuint shader); + void (*BindBuffer)(GLenum target, GLuint buffer); + void (*BindFramebuffer)(GLenum target, GLuint framebuffer); + void (*BindTexture)(GLenum target, GLuint texture); + void (*BindVertexArray)(GLuint array); + void (*BufferData)(GLenum target, GLsizeiptr size, const GLvoid* data, + GLenum usage); + GLenum (*CheckFramebufferStatus)(GLenum target); + void (*Clear)(GLbitfield mask); + void (*CompileShader)(GLuint shader); + GLuint (*CreateProgram)(); + GLuint (*CreateShader)(GLenum shader_type); + void (*DeleteFramebuffers)(GLsizei n, const GLuint* framebuffers); + void (*DeleteProgram)(GLuint program); + void (*DeleteShader)(GLuint shader); + void (*DeleteTextures)(GLsizei n, const GLuint* textures); + void (*DetachShader)(GLuint program, GLuint shader); + void (*DrawArrays)(GLenum mode, GLint first, GLsizei count); + void (*DrawBuffers)(GLsizei n, const GLenum* bufs); + void (*EnableVertexAttribArray)(GLuint index); + void (*Finish)(); + void (*FramebufferTexture2D)(GLenum target, GLenum attachment, + GLenum textarget, GLuint texture, GLint level); + void (*GenBuffers)(GLsizei n, GLuint* buffers); + void (*GenFramebuffers)(GLsizei n, GLuint* ids); + void (*GenTextures)(GLsizei n, GLuint* textures); + void (*GenVertexArrays)(GLsizei n, GLuint* arrays); + GLint (*GetAttribLocation)(GLuint program, const GLchar* name); + GLenum (*GetError)(); + void (*GetIntegerv)(GLenum pname, GLint* data); + void (*GetProgramInfoLog)(GLuint program, GLsizei maxLength, GLsizei* length, + GLchar* info_log); + void (*GetProgramiv)(GLuint program, GLenum pname, GLint* params); + void (*GetShaderInfoLog)(GLuint shader, GLsizei max_length, GLsizei* length, + GLchar* info_log); + void (*GetShaderiv)(GLuint shader, GLenum pname, GLint* params); + const GLubyte *(*GetString)(GLenum name); + GLint (*GetUniformLocation)(GLuint program, const GLchar* name); + void (*LinkProgram)(GLuint program); + void (*ReadPixels)(GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, GLvoid* data); + void (*ShaderSource)(GLuint shader, GLsizei count, const GLchar** string, + const GLint* length); + void (*TexImage2D)(GLenum target, GLint level, GLint internal_format, + GLsizei width, GLsizei height, GLint border, GLenum format, + GLenum type, const GLvoid* data); + void (*TexParameteri)(GLenum target, GLenum pname, GLint param); + void (*TexSubImage2D)(GLenum target, GLint level, GLint xoffset, + GLint yoffset, GLsizei width, GLsizei height, + GLenum format, GLenum type, const GLvoid* data); + void (*Uniform1f)(GLint location, GLfloat v0); + void (*Uniform1i)(GLint location, GLint v0); + void (*UseProgram)(GLuint program); + void (*VertexAttribPointer)(GLuint index, GLint size, GLenum type, + GLboolean normalized, GLsizei stride, + const GLvoid* pointer); + void (*Viewport)(GLint x, GLint y, GLsizei width, GLsizei height); +}; + +/*! + * \brief Process global OpenGL workspace. + */ +class OpenGLWorkspace final : public DeviceAPI { + public: + ~OpenGLWorkspace() final; + + // override device API + void SetDevice(TVMContext ctx) final; + void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final; + void* AllocDataSpace(TVMContext ctx, + size_t nbytes, + size_t alignment, + TVMType type_hint) final; + void FreeDataSpace(TVMContext ctx, void* ptr) final; + void CopyDataFromTo(const void* from, + size_t from_offset, + void* to, + size_t to_offset, + size_t size, + TVMContext ctx_from, + TVMContext ctx_to, + TVMStreamHandle stream) final; + void StreamSync(TVMContext ctx, TVMStreamHandle stream) final; + void* AllocWorkspace(TVMContext ctx, size_t size) final; + void FreeWorkspace(TVMContext ctx, void* data) final; + + /*! + * \brief Get the global OpenGL workspace. + * \return The global OpenGL workspace. + */ + static const std::shared_ptr& Global(); + + /*! + * \brief Create an OpenGL program that uses the given fragment shader. + * \param fragment_shader The fragment shader **source**. + * \return The OpenGL program. + */ + Program CreateProgram(const char* fragment_shader_src); + + /*! + * \brief Create an OpenGL texture that stores an array. + * \param type Element type. + * \param nbytes Number of bytes in the array. + * \return The OpenGL texture. + */ + Texture CreateTexture(TVMType type, size_t nbytes); + + /*! + * \brief Upload user data into a sub-region of an OpenGL texture. + * \param texture The texture to be written to. + * \param begin The index of the first element to be written to. + * \param nelems The number of elements to be written to. + * \param data The user data. + */ + void PutTextureData(Texture* texture, + GLint begin, + GLsizei nelems, + const GLvoid* data); + /*! + * \brief Download a sub-region of an OpenGL texture. + * \param texture The texture to download from. + * \param begin The index of first element to download from. + * \param nelems The number of elements to download from. + * \param data The user buffer. + */ + void GetTextureData(const Texture* texture, + GLint begin, + GLsizei nelems, + GLvoid* data); + + /*! + * \brief Set currently used OpenGL program. + */ + void SetCurrentProgram(const Program& program); + + /*! + * \brief Set uniform values for an OpenGL program. + * Must call SetCurrentProgram before calling this. + * \param program The OpenGL program. + * \param name The uniform argument name. + * \param type The type of the uniform. + * \param value The value to pass in. + */ + void SetUniform(const Program& program, + const std::string& name, + TVMType type, + void* value); + + /*! + * \brief Set input texture for an OpenGL program. + * Must call SetCurrentProgram before calling this. + * \param program The OpenGL program. + * \param name The texture uniform argument name. + * \param unit The texture unit to use. Each input texture must occupy a + * different unit. + * \param texture The OpenGL texture to pass in. + */ + void SetInputTexture(const Program& program, + const std::string& name, + GLuint unit, + Texture* texture); + + /*! + * \brief Render to a texture. + * \param output The output texture. + */ + void Render(Texture* output); + + private: + friend class Texture; + friend class Program; + + // Global singleton. Hide constructor. + OpenGLWorkspace(); + + GLFWwindow* window_; + std::unique_ptr gl; + GLuint vertex_shader_; + static const int kWindowWidth = 640; + static const int kWindowHeight = 480; + struct Vertex { + float x, y; + }; + static constexpr size_t kNumVertices = 6; + static const Vertex vertices[kNumVertices]; + static const char* vertex_shader_text_; + + /*! + * \brief Bind a texture to a "texture unit". + * After calling this function, the "texture unit" becomes "active", and the + * texture is bound to GL_TEXTURE_2D in that "texture unit". + * \param unit The texture unit to activate. + * \param texture The texture to bind. + */ + void BindTextureUnit(GLuint unit, GLuint texture); + + /*! + * \brief Callback in Texture's destructor. + */ + void OnDeleteTexture(GLuint texture); + + /*! + * \brief Callback in Program's destructor. + */ + void OnDeleteProgram(GLuint program); + + /*! + * \brief Check if there is any outstanding OpenGL error. If there is, crash. + */ + void CheckOpenGLError(); + + /*! + * \brief Get the maximum number of texture units. + */ + GLuint NumTextureUnits(); + + /*! + * \brief Create and compile a shader from a source string. + * \param shader_kind The kind of shader. + * Could be GL_VERTEX_SHADER or GL_FRAGMENT_SHADER. + * \param shader_src The source string of the shader. + * \return The compiled shader ID. + */ + GLuint CreateShader(GLenum shader_kind, const char* shader_src); + + /*! + * \brief Create an OpenGL program that uses the given fragment shader. + * \param fragment_shader The **compiled** fragment shader. + * \return The OpenGL program. + */ + Program CreateProgram(GLuint fragment_shader); +}; + +/*! + * \brief An OpenGL program, composed of a vertex shader and a fragment shader. + * In TVM, every program has the same vertex shader. + * So a program just corresponds to a fragment shader. + * A program can only be created by the workspace. + * This class is just a wrapper over an OpenGL program ID. + */ +class Program { + public: + // Move constructor. + Program(Program&& other) noexcept + : workspace_(other.workspace_), program_(other.program_) { + other.program_ = kInvalidProgram; + } + + // Move assignment. + Program& operator=(Program&& other) noexcept { + workspace_ = other.workspace_; + program_ = other.program_; + other.program_ = kInvalidProgram; + return *this; + } + + // Disallow copy. + Program(const Program& other) = delete; + Program& operator=(const Program& other) = delete; + + // Destructor. + ~Program() { + if (program_ != kInvalidProgram) { + workspace_->OnDeleteProgram(program_); + program_ = kInvalidProgram; + } + } + + private: + friend class OpenGLWorkspace; + + // Only OpenGLWorkspace can create a Program. + // We enforce this to make sure OpenGL is initialized. + explicit Program(OpenGLWorkspace* workspace, GLuint program) + : workspace_(workspace), program_(program) {} + + // The internal OpenGL program ID. + GLuint program() const { return program_; } + + static constexpr GLuint kInvalidProgram = static_cast(-1); + + OpenGLWorkspace* workspace_; + GLuint program_; +}; + +/*! + * \brief The storage format of a texture. + * The members match the API of glTexImage2D. + */ +struct TextureFormat { + TextureFormat(GLint internal_format, GLenum format, GLenum type) + : internal_format(internal_format), format(format), type(type) {} + + GLsizei elemsz() const { + switch (type) { + case GL_BYTE: case GL_UNSIGNED_BYTE: + return 1; + case GL_SHORT: case GL_UNSIGNED_SHORT: + return 2; + case GL_INT: case GL_UNSIGNED_INT: + return 4; + case GL_FLOAT: + return 4; + default: + LOG(FATAL) << "Unsupported type"; + return -1; + } + } + + bool operator==(const TextureFormat& other) const { + return std::make_tuple(internal_format, format, type) == + std::make_tuple(other.internal_format, other.format, other.type); + } + + GLint internal_format; // OpenGL says this is GLint, not GLenum. + GLenum format; + GLenum type; +}; + +/*! + * \brief An OpenGL texture represents a chunk of GPU memory. + * This is the way we represent tensors. + * We always use 2D textures. + */ +class Texture { + public: + // Move constructor. + Texture(Texture&& other) noexcept + : workspace_(other.workspace_), texture_(other.texture_), + format_(other.format_), width_(other.width_), height_(other.height_) { + other.texture_ = kInvalidTexture; + } + + // Move assignment. + Texture& operator=(Texture&& other) noexcept { + workspace_ = other.workspace_; + texture_ = other.texture_; + format_ = other.format_; + width_ = other.width_; + height_ = other.height_; + other.texture_ = kInvalidTexture; + return *this; + } + + // Disallow copy. + Texture(const Texture& other) = delete; + Texture& operator=(const Texture& other) = delete; + + // Destructor. + ~Texture() { + if (texture_ != kInvalidTexture) { + workspace_->OnDeleteTexture(texture_); + texture_ = kInvalidTexture; + } + } + + /*! + * \brief The width of the texture in number of pixels. + */ + GLsizei width() const { return width_; } + + /*! + * \brief The height of the texture in number of pixels. + */ + GLsizei height() const { return height_; } + + /*! + * \brief The number of bytes of each element in the array. + */ + GLsizei elemsz() const { return format_.elemsz(); } + + private: + friend class OpenGLWorkspace; + + // Only OpenGLWorkspace can create a Texture. + // We enforce this to make sure OpenGL is initialized. + // Always only use the first dimension of a 2D texture. + // The reason is that texelFetch only supports 2D textures. + explicit Texture(OpenGLWorkspace* workspace, GLuint texture, + TextureFormat format, + GLsizei width, GLsizei height) + : workspace_(workspace), texture_(texture), format_(format), + width_(width), height_(height) {} + + // The internal texture ID. + GLuint texture() const { return texture_; } + + static constexpr GLuint kInvalidTexture = static_cast(-1); + + OpenGLWorkspace* workspace_; + GLuint texture_; + TextureFormat format_; + GLsizei width_; + GLsizei height_; +}; + +} // namespace gl +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_OPENGL_OPENGL_COMMON_H_ diff --git a/src/runtime/opengl/opengl_device_api.cc b/src/runtime/opengl/opengl_device_api.cc new file mode 100644 index 0000000000000..798003af902f6 --- /dev/null +++ b/src/runtime/opengl/opengl_device_api.cc @@ -0,0 +1,556 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file opengl_device_api.cc + */ +#include "./opengl_common.h" + +#if TVM_OPENGL_RUNTIME + +#include +#include + +namespace tvm { +namespace runtime { +namespace gl { + +/*! + * \brief Turn OpenGL error enum to string. + */ +static const char* GLGetErrorString(GLenum error) { + switch (error) { + case GL_NO_ERROR: + return "GL_NO_ERROR"; + case GL_INVALID_ENUM: + return "GL_INVALID_ENUM"; + case GL_INVALID_VALUE: + return "GL_INVALID_VALUE"; + case GL_INVALID_OPERATION: + return "GL_INVALID_OPERATION"; + case GL_STACK_OVERFLOW: + return "GL_STACK_OVERFLOW"; + case GL_STACK_UNDERFLOW: + return "GL_STACK_UNDERFLOW"; + case GL_OUT_OF_MEMORY: + return "GL_OUT_OF_MEMORY"; + default: + return "Unknown OpenGL error code"; + } +} + +/*! + * \brief Get the latest error. + */ +void OpenGLWorkspace::CheckOpenGLError() { + GLenum err = gl->GetError(); + CHECK_EQ(err, GL_NO_ERROR) << "OpenGL error, code=" << err << ": " + << gl::GLGetErrorString(err); +} + +/*! + * \brief Protected OpenGL call. + * \param func Expression to call. + */ +#define OPENGL_CALL(func) \ + { \ + (func); \ + CheckOpenGLError(); \ + } + +/*! + * \brief The error handling callback passed to GLFW. + */ +void GlfwErrorCallback(int err, const char* str) { + LOG(FATAL) << "Error: [" << err << "] " << str; +} + +const std::shared_ptr& OpenGLWorkspace::Global() { + static std::shared_ptr inst(new OpenGLWorkspace); + return inst; +} + +void OpenGLWorkspace::SetDevice(TVMContext ctx) { + CHECK_EQ(ctx.device_type, static_cast(kOpenGL)) + << "Device type must be OpenGL."; + CHECK_EQ(ctx.device_id, 0) << "Only support 1 OpenGL \"device\"."; +} + +void OpenGLWorkspace::GetAttr( + TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) { + switch (kind) { + case kExist: { + *rv = static_cast(ctx.device_id == 0); + break; + } + case kMaxThreadsPerBlock: { + GLint max_texture_size; + OPENGL_CALL(gl->GetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size)); + break; + } + case kWarpSize: { + *rv = 1; + break; + } + case kComputeVersion: { + break; + } + } +} + +void* OpenGLWorkspace::AllocDataSpace( + TVMContext ctx, size_t nbytes, size_t alignment, TVMType type_hint) { + return reinterpret_cast(new Texture(CreateTexture(type_hint, nbytes))); +} + +void OpenGLWorkspace::FreeDataSpace(TVMContext ctx, void* ptr) { + delete reinterpret_cast(ptr); +} + +void OpenGLWorkspace::CopyDataFromTo(const void* from, + size_t from_offset, + void* to, + size_t to_offset, + size_t size, + TVMContext ctx_from, + TVMContext ctx_to, + TVMStreamHandle stream) { + CHECK(stream == nullptr); + + // TODO(zhixunt): This is a nasty hack to avoid comparison between + // incompatible enums. We should add kOpenGL to dlpack. + constexpr int gl_devtype = kOpenGL; + std::tuple type_from_to(ctx_from.device_type, ctx_to.device_type); + + if (type_from_to == std::make_tuple(gl_devtype, gl_devtype)) { + auto from_texture = static_cast(from); + auto to_texture = static_cast(to); + auto temp_buffer = std::unique_ptr(new char[size]); + CHECK(from_texture->format_ == to_texture->format_); + auto elemsz = from_texture->elemsz(); + auto from_begin = static_cast(from_offset / elemsz); + auto to_begin = static_cast(to_offset / elemsz); + auto nelems = static_cast(size / elemsz); + GetTextureData(from_texture, from_begin, nelems, temp_buffer.get()); + PutTextureData(to_texture, to_begin, nelems, temp_buffer.get()); + + } else if (type_from_to == std::make_tuple(gl_devtype, kDLCPU)) { + auto texture = static_cast(from); + void *data = static_cast(to) + to_offset; + auto elemsz = texture->elemsz(); + auto begin = static_cast(from_offset / elemsz); + auto nelems = static_cast(size / elemsz); + GetTextureData(texture, begin, nelems, data); + + } else if (type_from_to == std::make_tuple(kDLCPU, gl_devtype)) { + auto texture = reinterpret_cast(to); + const void* data = static_cast(from) + from_offset; + auto elemsz = texture->elemsz(); + auto begin = static_cast(to_offset / elemsz); + auto nelems = static_cast(size / elemsz); + PutTextureData(texture, begin, nelems, data); + + } else { + LOG(FATAL) << "Expect copy from/to OpenGL or between OpenGL"; + } +} + +void OpenGLWorkspace::StreamSync(TVMContext ctx, TVMStreamHandle stream) {} + +void* OpenGLWorkspace::AllocWorkspace(TVMContext ctx, size_t size) { + LOG(FATAL) << "Cannot allocate OpenGL workspace."; + return nullptr; +} + +void OpenGLWorkspace::FreeWorkspace(TVMContext ctx, void* data) { + LOG(FATAL) << "Cannot free OpenGL workspace."; +} + +OpenGLWorkspace::OpenGLWorkspace() { + // Set an error handler. + // This can be called before glfwInit(). + glfwSetErrorCallback(&GlfwErrorCallback); + + // Initialize GLFW. + if (glfwInit() != GL_TRUE) { + LOG(FATAL) << "glfwInit() failed!"; + } + + // Create a window. + glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); + glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3); + glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); + glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); + glfwWindowHint(GLFW_VISIBLE, GL_FALSE); + window_ = glfwCreateWindow(kWindowWidth, kWindowHeight, "", nullptr, nullptr); + if (window_ == nullptr) { + LOG(FATAL) << "glfwCreateWindow() failed!"; + } + + // Before using any OpenGL API, we must specify a context. + glfwMakeContextCurrent(window_); + + // Load all OpenGL API function pointers. + gl = std::unique_ptr(new GLFunctionPointers); + + CheckOpenGLError(); + + // We always render the same vertices and triangles. + GLuint vertex_buffer; + OPENGL_CALL(gl->GenBuffers(1, &vertex_buffer)); + OPENGL_CALL(gl->BindBuffer(GL_ARRAY_BUFFER, vertex_buffer)); + OPENGL_CALL(gl->BufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, + GL_STATIC_DRAW)); + + GLuint vertex_array; + OPENGL_CALL(gl->GenVertexArrays(1, &vertex_array)); + OPENGL_CALL(gl->BindVertexArray(vertex_array)); + OPENGL_CALL(gl->BindBuffer(GL_ARRAY_BUFFER, vertex_buffer)); + + // We always use the same vertex shader. + vertex_shader_ = CreateShader(GL_VERTEX_SHADER, vertex_shader_text_); + + LOG(INFO) << "OpenGL initialized, version = " << gl->GetString(GL_VERSION); +} + +OpenGLWorkspace::~OpenGLWorkspace() { + // Paired with glfwCreateWindow(). + glfwDestroyWindow(window_); + + // Paired with glfwInit(). + glfwTerminate(); +} + +void OpenGLWorkspace::BindTextureUnit(GLuint unit, GLuint texture) { + OPENGL_CALL(gl->ActiveTexture(GL_TEXTURE0 + unit)); + OPENGL_CALL(gl->BindTexture(GL_TEXTURE_2D, texture)); +} + +void OpenGLWorkspace::OnDeleteTexture(GLuint texture) { + OPENGL_CALL(gl->DeleteTextures(1, &texture)); +} + +void OpenGLWorkspace::OnDeleteProgram(GLuint program) { + OPENGL_CALL(gl->DeleteProgram(program)); +} + +GLuint OpenGLWorkspace::NumTextureUnits() { + GLint num_units; + OPENGL_CALL(gl->GetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &num_units)); + return static_cast(num_units); +} + +const OpenGLWorkspace::Vertex OpenGLWorkspace::vertices[OpenGLWorkspace::kNumVertices] = { + {-1.f, -1.f}, + {1.0f, -1.f}, + {1.0f, 1.0f}, + {-1.f, -1.f}, + {-1.f, 1.0f}, + {1.0f, 1.0f}, +}; + +// Don't need to change this. +// The vertex shader only needs to take in the triangle points. +// No need for point transformations. +const char* OpenGLWorkspace::vertex_shader_text_ = "#version 300 es\n" + "in vec2 point; // input to vertex shader\n" + "void main() {\n" + " gl_Position = vec4(point, 0.0, 1.0);\n" + "}\n"; + +Program OpenGLWorkspace::CreateProgram( + const char* fragment_shader_src) { + // Create and compile the shaders. + GLuint fragment_shader = CreateShader(GL_FRAGMENT_SHADER, + fragment_shader_src); + + // Link the shaders and create the program. + Program program = CreateProgram(fragment_shader); + + OPENGL_CALL(gl->DeleteShader(fragment_shader)); + + return program; +} + +GLuint OpenGLWorkspace::CreateShader(GLenum shader_kind, + const char* shader_src) { + // Create the shader. + GLuint shader = gl->CreateShader(shader_kind); + gl->ShaderSource(shader, 1, &shader_src, nullptr); + gl->CompileShader(shader); + + // Check compile errors. + GLint err; + gl->GetShaderiv(shader, GL_COMPILE_STATUS, &err); + + GLint info_log_len; + gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &info_log_len); + + if (err != GL_TRUE) { + std::unique_ptr err_msg(new char[info_log_len + 1]); + gl->GetShaderInfoLog(shader, info_log_len, nullptr, err_msg.get()); + LOG(FATAL) << err_msg.get(); + assert(false); + } + + CheckOpenGLError(); + + return shader; +} + +static TextureFormat GetTextureFormat(TVMType type) { + CHECK_EQ(type.lanes, 1) << "Not supporting multi-lane types."; + + switch (type.code) { + case kDLInt: { + switch (type.bits) { + case 8: + return {GL_R8I, GL_RED_INTEGER, GL_BYTE}; + case 16: + return {GL_R16I, GL_RED_INTEGER, GL_SHORT}; + case 32: + return {GL_R32I, GL_RED_INTEGER, GL_INT}; + default: + LOG(FATAL) << "Unsupported type bits " << type.bits; + } + } + case kDLUInt: { + switch (type.bits) { + case 8: + return {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}; + case 16: + return {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}; + case 32: + return {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}; + default: + LOG(FATAL) << "Unsupported type bits " << type.bits; + } + } + case kDLFloat: { + switch (type.bits) { + case 32: + return {GL_R32F, GL_RED, GL_FLOAT}; + default: + LOG(FATAL) << "Unsupported type bits " << type.bits; + } + } + default: + LOG(FATAL) << "Unsupported type code" << type.code; + } + assert(false); +} + +Texture OpenGLWorkspace::CreateTexture(TVMType type, size_t nbytes) { + // Create a texture. + GLuint texture; + OPENGL_CALL(gl->GenTextures(1, &texture)); + + BindTextureUnit(NumTextureUnits() - 1, texture); + + // Use glTexImage2D with nullptr data to specify GPU data storage. + auto texture_format = GetTextureFormat(type); + auto width = static_cast(nbytes / (type.bits / 8)); + auto height = GLsizei(1); + OPENGL_CALL(gl->TexImage2D(GL_TEXTURE_2D, /*level=*/0, + texture_format.internal_format, + width, height, /*border=*/0, + texture_format.format, texture_format.type, + /*data=*/nullptr)); + + OPENGL_CALL( + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)); + OPENGL_CALL( + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)); + OPENGL_CALL( + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST)); + OPENGL_CALL( + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST)); + + return Texture(this, texture, texture_format, width, height); +} + +Program OpenGLWorkspace::CreateProgram(GLuint fragment_shader) { + // Create the program and link the shaders. + GLuint program = gl->CreateProgram(); + gl->AttachShader(program, vertex_shader_); + gl->AttachShader(program, fragment_shader); + gl->LinkProgram(program); + + // Check link errors. + GLint err; + gl->GetProgramiv(program, GL_LINK_STATUS, &err); + + GLint info_log_len; + gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &info_log_len); + + if (err != GL_TRUE) { + std::unique_ptr err_msg(new char[info_log_len + 1]); + gl->GetProgramInfoLog(program, info_log_len, nullptr, err_msg.get()); + LOG(FATAL) << err_msg.get(); + assert(false); + } + + CheckOpenGLError(); + + OPENGL_CALL(gl->DetachShader(program, vertex_shader_)); + OPENGL_CALL(gl->DetachShader(program, fragment_shader)); + + auto point_attrib = GLuint(gl->GetAttribLocation(program, "point")); + OPENGL_CALL(gl->EnableVertexAttribArray(point_attrib)); + + OPENGL_CALL(gl->VertexAttribPointer(point_attrib, 2, GL_FLOAT, GL_FALSE, + sizeof(Vertex), nullptr)); + + return Program(this, program); +} + +void OpenGLWorkspace::PutTextureData(Texture *texture, + GLint begin, + GLsizei nelems, + const GLvoid* data) { + // Bind to temporary unit. + BindTextureUnit(NumTextureUnits() - 1, texture->texture()); + + // Similar to cudaMemcpy. + OPENGL_CALL(gl->TexSubImage2D(GL_TEXTURE_2D, /*level=*/0, + /*xoffset=*/begin, /*yoffset=*/0, + /*width=*/nelems, /*height=*/1, + texture->format_.format, texture->format_.type, + data)); +} + +void OpenGLWorkspace::GetTextureData(const Texture *texture, + GLint begin, + GLsizei nelems, + GLvoid* data) { + BindTextureUnit(NumTextureUnits() - 1, texture->texture()); + + // Create frame buffer. + GLuint frame_buffer; + OPENGL_CALL(gl->GenFramebuffers(1, &frame_buffer)); + OPENGL_CALL(gl->BindFramebuffer(GL_FRAMEBUFFER, frame_buffer)); + + // Bind texture to framebuffer's attachment 0. + OPENGL_CALL(gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, texture->texture(), 0)); + + // Always check that our framebuffer is okay. + if (gl->CheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG(FATAL) << "Framebuffer not complete."; + } + +#ifdef __EMSCRIPTEN__ + // WebGL2's glReadPixels API doesn't allow GL_RED user buffer format. + // Instead, We must use GL_RGBA. This means the data we retrieve has useless + // GBA channels. Here we are applying a dirty hack. + // TODO(zhixunt): We really want to utilize all RGBA channels in textures. + // + // WebGL2's glReadPixels API also doesn't allow GL_RED_INTEGER or + // GL_RGB_INTEGER user buffer format, which means we cannot retrieve integer + // texture data? (need to confirm) + + CHECK_EQ(texture->format_.internal_format, GL_R32F) + << "Retrieving integer texture not supported yet."; + auto elemsz = texture->format_.elemsz(); + auto nchannels = 4; + auto padded_data_size = nchannels * nelems * elemsz; + auto padded_data = std::unique_ptr(new char[padded_data_size]); + OPENGL_CALL(gl->ReadPixels(/*x=*/begin, /*y=*/0, /*width=*/nelems, + /*height=*/1, GL_RGBA, GL_FLOAT, + padded_data.get())); + for (GLsizei i = 0; i != nelems; ++i) { + auto dst = reinterpret_cast(data) + i * elemsz; + auto src = padded_data.get() + nchannels * i * elemsz; + std::memcpy(dst, src, elemsz); + } +#else + OPENGL_CALL(gl->ReadPixels(/*x=*/begin, /*y=*/0, /*width=*/nelems, + /*height=*/1, texture->format_.format, + texture->format_.type, data)); +#endif + + OPENGL_CALL(gl->DeleteFramebuffers(1, &frame_buffer)); +} + +void OpenGLWorkspace::SetCurrentProgram(const Program& program) { + OPENGL_CALL(gl->UseProgram(program.program())); +} + +void OpenGLWorkspace::SetUniform(const Program& program, + const std::string& name, + TVMType type, + void* value) { + GLint location = gl->GetUniformLocation(program.program(), name.c_str()); + switch (type.code) { + case kDLInt: { + CHECK_EQ(type.bits, 32) << "Only support 32-bit int for uniform."; + GLint uniform_value = *reinterpret_cast(value); + OPENGL_CALL(gl->Uniform1i(location, uniform_value)); + break; + } + case kDLUInt: { + LOG(FATAL) << "Strangely, emcc WebGL does not support glUniform1ui."; + break; + } + case kDLFloat: { + CHECK_EQ(type.bits, 32) << "Only support 32-bit float for uniform."; + GLfloat uniform_value = *reinterpret_cast(value); + OPENGL_CALL(gl->Uniform1f(location, uniform_value)); + break; + } + default: { + LOG(FATAL) << "Unsupported type code for uniform."; + break; + } + } +} + +void OpenGLWorkspace::SetInputTexture(const Program& program, + const std::string& name, + GLuint unit, + Texture* texture) { + // We always use the last texture unit as temporary. + // Therefore, we can have "NumTextureUnits() - 1" input textures. + CHECK_LT(unit, NumTextureUnits() - 1) << "Too many textures."; + + BindTextureUnit(unit, texture->texture()); + GLint location = gl->GetUniformLocation(program.program_, name.c_str()); + OPENGL_CALL(gl->Uniform1i(location, unit)); +} + +void OpenGLWorkspace::Render(Texture* output) { + // Create frame buffer. + GLuint frame_buffer; + OPENGL_CALL(gl->GenFramebuffers(1, &frame_buffer)); + OPENGL_CALL(gl->BindFramebuffer(GL_FRAMEBUFFER, frame_buffer)); + + // Set "renderedTexture" as our colour attachement 0. + OPENGL_CALL(gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, output->texture(), 0)); + + // Specify that we will render to color attachment 0. + GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0}; + OPENGL_CALL(gl->DrawBuffers(1, DrawBuffers)); + + // Always check that our framebuffer is okay. + if (gl->CheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG(FATAL) << "Framebuffer not complete."; + } + + // Perform rendering. + OPENGL_CALL(gl->Viewport(0, 0, output->width(), output->height())); + OPENGL_CALL(gl->Clear(GL_COLOR_BUFFER_BIT)); + OPENGL_CALL(gl->DrawArrays(GL_TRIANGLES, 0, 6)); + + OPENGL_CALL(gl->DeleteFramebuffers(1, &frame_buffer)); +} + +TVM_REGISTER_GLOBAL("device_api.opengl") +.set_body([](TVMArgs args, TVMRetValue* rv) { + DeviceAPI* ptr = OpenGLWorkspace::Global().get(); + *rv = static_cast(ptr); +}); + +} // namespace gl +} // namespace runtime +} // namespace tvm + +#endif // TVM_OPENGL_RUNTIME diff --git a/src/runtime/opengl/opengl_module.cc b/src/runtime/opengl/opengl_module.cc new file mode 100644 index 0000000000000..6793bd6d29b66 --- /dev/null +++ b/src/runtime/opengl/opengl_module.cc @@ -0,0 +1,284 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file opengl_module.cc + */ +#include + +#include "./opengl_common.h" +#include "./opengl_module.h" + +#if TVM_OPENGL_RUNTIME + +#include +#include "../pack_args.h" +#include "../thread_storage_scope.h" +#include "../file_util.h" + +namespace tvm { +namespace runtime { + +class OpenGLModuleNode final : public ModuleNode { + public: + OpenGLModuleNode(std::unordered_map shaders, + std::string fmt, + std::unordered_map fmap); + + ~OpenGLModuleNode() override = default; + + const char* type_key() const final { return "opengl"; } + + PackedFunc GetFunction(const std::string& name, + const std::shared_ptr& sptr_to_self) final; + + std::string GetSource(const std::string& format) final; + + void SaveToFile(const std::string& file_name, + const std::string& format) final; + + void SaveToBinary(dmlc::Stream* stream) final; + + const gl::Program& GetProgram(const std::string& func_name) const; + + const OpenGLShader& GetShader(const std::string& func_name) const; + + const FunctionInfo& GetFunctionInfo(const std::string& func_name) const; + + gl::OpenGLWorkspace& workspace() const { return *workspace_; } + + private: + std::shared_ptr workspace_; + std::unordered_map shaders_; + std::string fmt_; + std::unordered_map fmap_; + std::unordered_map programs_; + + DISALLOW_COPY_AND_ASSIGN(OpenGLModuleNode); +}; + +class OpenGLWrappedFunc { + public: + OpenGLWrappedFunc(OpenGLModuleNode* m, + std::shared_ptr sptr, + std::string func_name, + std::vector arg_size, + const std::vector& thread_axis_tags); + + void operator()(TVMArgs args, TVMRetValue* rv, void** void_args) const; + + private: + // The module + OpenGLModuleNode* m_; + // resource handle + std::shared_ptr sptr_; + // The name of the function. + std::string func_name_; + // convert code for void argument + std::vector arg_size_; + // thread axis config + ThreadAxisConfig thread_axis_cfg_; +}; + +OpenGLModuleNode::OpenGLModuleNode( + std::unordered_map shaders, + std::string fmt, + std::unordered_map fmap) + : workspace_(gl::OpenGLWorkspace::Global()), shaders_(std::move(shaders)), + fmt_(std::move(fmt)), fmap_(std::move(fmap)), programs_() { + CHECK_EQ(fmt_, "gl") << "Unknown OpenGL format " << fmt_; + for (auto &pair : shaders_) { + auto &func_name = pair.first; + auto &shader = pair.second; + programs_.emplace(func_name, + workspace_->CreateProgram(shader.source.c_str())); + } +} + +PackedFunc OpenGLModuleNode::GetFunction( + const std::string& name, + const std::shared_ptr& sptr_to_self) { + CHECK_EQ(sptr_to_self.get(), this); + CHECK_NE(name, symbol::tvm_module_main) << "Device function do not have main"; + + auto func_info_it = fmap_.find(name); + if (func_info_it == fmap_.end()) { return PackedFunc(); } + auto &func_info = func_info_it->second; + + std::vector arg_size(func_info.arg_types.size()); + for (size_t i = 0; i < func_info.arg_types.size(); ++i) { + TVMType t = func_info.arg_types[i]; + CHECK_EQ(t.lanes, 1U); + uint32_t bits = t.bits; + CHECK_EQ(bits % 8, 0U); + arg_size[i] = bits / 8; + } + + // Initialize the wrapped func. + OpenGLWrappedFunc f(this, sptr_to_self, name, arg_size, + func_info.thread_axis_tags); + return PackFuncVoidAddr(f, func_info.arg_types); +} + +std::string OpenGLModuleNode::GetSource(const std::string& format) { + if (format != fmt_ && fmt_ != "gl") { return ""; } + + std::ostringstream os; + for (auto &pair : shaders_) { + auto &name = pair.first; + auto &shader = pair.second; + os << "[" << name << "]" << "\n"; + os << shader.source <<"\n"; + } + return os.str(); +} + +void OpenGLModuleNode::SaveToFile(const std::string& file_name, + const std::string& format) { + std::string fmt = GetFileFormat(file_name, format); + CHECK_EQ(fmt, fmt_) << "Can only save to format=" << fmt_; + std::string meta_file = GetMetaFilePath(file_name); + SaveMetaDataToFile(meta_file, fmap_); + SaveBinaryToFile(file_name, ToJSON(shaders_)); +} + +void OpenGLModuleNode::SaveToBinary(dmlc::Stream* stream) { + stream->Write(fmt_); + stream->Write(fmap_); + stream->Write(ToJSON(shaders_)); +} + +const gl::Program& OpenGLModuleNode::GetProgram( + const std::string& func_name) const { + auto it = programs_.find(func_name); + if (it == programs_.end()) { + LOG(FATAL) << "Cannot find program"; + } + return it->second; +} + +const OpenGLShader& OpenGLModuleNode::GetShader( + const std::string& func_name) const { + auto it = shaders_.find(func_name); + if (it == shaders_.end()) { + LOG(FATAL) << "Cannot find shader"; + } + return it->second; +} + +const FunctionInfo& OpenGLModuleNode::GetFunctionInfo( + const std::string& func_name) const { + auto it = fmap_.find(func_name); + if (it == fmap_.end()) { + LOG(FATAL) << "Cannot find shader"; + } + return it->second; +} + +OpenGLWrappedFunc::OpenGLWrappedFunc( + OpenGLModuleNode* m, + std::shared_ptr sptr, + std::string func_name, + std::vector arg_size, + const std::vector& thread_axis_tags) + : m_(m), sptr_(std::move(sptr)), func_name_(std::move(func_name)), + arg_size_(std::move(arg_size)) { + thread_axis_cfg_.Init(arg_size_.size(), thread_axis_tags); +} + +void OpenGLWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, + void** void_args) const { + auto &shader = m_->GetShader(func_name_); + auto &program = m_->GetProgram(func_name_); + auto &func_info = m_->GetFunctionInfo(func_name_); + size_t nargs = shader.arg_kinds.size(); + + // Must call this function before setting uniforms & input textures. + m_->workspace().SetCurrentProgram(program); + + // Set all arguments. + GLuint texture_unit = 0; + gl::Texture* output = nullptr; + for (size_t i = 0; i != nargs; ++i) { + auto &name = shader.arg_names.at(i); + auto kind = shader.arg_kinds.at(i); + auto type = func_info.arg_types.at(i); + switch (kind) { + case OpenGLArgKind::kUniform: { + m_->workspace().SetUniform(program, name, type, void_args[i]); + break; + } + case OpenGLArgKind::kInputTexture: { + CHECK_EQ(type.code, kHandle) << "Type is not handle?"; + auto texture = *static_cast(void_args[i]); + m_->workspace().SetInputTexture(program, name, texture_unit, texture); + ++texture_unit; + break; + } + case OpenGLArgKind::kOutputTexture: { + CHECK_EQ(type.code, kHandle) << "Type is not handle?"; + CHECK(output == nullptr) << "Can only have one output texture."; + output = *static_cast(void_args[i]); + break; + } + } + } + + // Set "thread_extent" uniform. + ThreadWorkLoad wl = thread_axis_cfg_.Extract(args); + std::unique_ptr thread_extent(new GLint(wl.block_dim(0))); + m_->workspace().SetUniform(program, shader.thread_extent_var, + TVMType{kDLInt, 32, 1}, + static_cast(thread_extent.get())); + + m_->workspace().Render(output); +} + +Module OpenGLModuleCreate(std::unordered_map shaders, + std::string fmt, + std::unordered_map fmap) { + auto n = std::make_shared(std::move(shaders), + std::move(fmt), + std::move(fmap)); + return Module(n); +} + +Module OpenGLModuleLoadFile(const std::string& file_name, + const std::string& format) { + std::string data; + std::unordered_map fmap; + std::string fmt = GetFileFormat(file_name, format); + std::string meta_file = GetMetaFilePath(file_name); + LoadBinaryFromFile(file_name, &data); + LoadMetaDataFromFile(meta_file, &fmap); + return OpenGLModuleCreate(FromJSON(data), fmt, fmap); +} + +Module OpenGLModuleLoadBinary(void* strm) { + auto stream = static_cast(strm); + std::string data; + std::unordered_map fmap; + std::string fmt; + stream->Read(&fmt); + stream->Read(&fmap); + stream->Read(&data); + return OpenGLModuleCreate(FromJSON(data), fmt, fmap); +} + +TVM_REGISTER_GLOBAL("module.loadfile_gl") + .set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = OpenGLModuleLoadFile(args[0], args[1]); + }); + +TVM_REGISTER_GLOBAL("module.loadfile_glbin") + .set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = OpenGLModuleLoadFile(args[0], args[1]); + }); + +TVM_REGISTER_GLOBAL("module.loadbinary_opengl") + .set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = OpenGLModuleLoadBinary(args[0]); + }); + +} // namespace runtime +} // namespace tvm + +#endif // TVM_OPENGL_RUNTIME diff --git a/src/runtime/opengl/opengl_module.h b/src/runtime/opengl/opengl_module.h new file mode 100644 index 0000000000000..1913878c3d680 --- /dev/null +++ b/src/runtime/opengl/opengl_module.h @@ -0,0 +1,148 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file opengl_module.h + * \brief Execution handling of OpenGL kernels + */ +#ifndef TVM_RUNTIME_OPENGL_OPENGL_MODULE_H_ +#define TVM_RUNTIME_OPENGL_OPENGL_MODULE_H_ + +#include +#include +#include +#include +#include +#include +#include "../meta_data.h" + +namespace tvm { +namespace runtime { + +/*! + * \brief Determines how we supply arguments. + */ +enum class OpenGLArgKind { + kInputTexture = 0, // Bind to "gsampler2D" in GLSL. + kOutputTexture = 1, // Bind to "out" in GLSL. + kUniform = 2, // Bind to "uniform" in GLSL. +}; + +std::string OpenGLArgKind2String(OpenGLArgKind kind); +OpenGLArgKind String2OpenGLArgKind(const std::string& str); + +/*! + * \brief The output of OpenGL codegen. + * Contains necessary information to build a fragment shader and bind arguments. + */ +struct OpenGLShader { + OpenGLShader() = default; + OpenGLShader(std::string source, + std::vector arg_names, + std::vector arg_kinds, + std::string thread_extent_var) + : source(std::move(source)), arg_names(std::move(arg_names)), + arg_kinds(std::move(arg_kinds)), + thread_extent_var(std::move(thread_extent_var)) { + CHECK_EQ(this->arg_names.size(), this->arg_kinds.size()) << "Invalid input"; + } + + std::string source; + std::vector arg_names; // Matches FunctionInfo. + std::vector arg_kinds; // Matches FunctionInfo. + std::string thread_extent_var; // Stores the output length. + + void Save(dmlc::JSONWriter* writer) const; + void Load(dmlc::JSONReader* reader); +}; + +std::string ToJSON(const std::unordered_map& shaders); +std::unordered_map FromJSON(const std::string& str); + +/*! + * \brief Create an OpenGL module from data. + * + * \param data The module data. + * \param fmt The format of the data, + * \param fmap The map function information map of each function. + */ +Module OpenGLModuleCreate(std::unordered_map shaders, + std::string fmt, + std::unordered_map fmap); + +inline std::string OpenGLArgKind2String(OpenGLArgKind kind) { + switch (kind) { + case OpenGLArgKind::kOutputTexture: + return "output_texture"; + case OpenGLArgKind::kInputTexture: + return "input_texture"; + case OpenGLArgKind::kUniform: + return "uniform"; + } + assert(false); +} + +inline OpenGLArgKind String2OpenGLArgKind(const std::string& str) { + if (str == "output_texture") { + return OpenGLArgKind::kOutputTexture; + } else if (str == "input_texture") { + return OpenGLArgKind::kInputTexture; + } else if (str == "uniform") { + return OpenGLArgKind::kUniform; + } else { + LOG(FATAL) << "Invalid OpenGL arg kind."; + assert(false); + } +} + +inline void OpenGLShader::Save(dmlc::JSONWriter* writer) const { + std::vector arg_kind_strs; + for (auto kind : arg_kinds) { + arg_kind_strs.push_back(OpenGLArgKind2String(kind)); + } + + writer->BeginObject(); + writer->WriteObjectKeyValue("arg_names", arg_names); + writer->WriteObjectKeyValue("arg_kinds", arg_kind_strs); + writer->WriteObjectKeyValue("source", source); + writer->WriteObjectKeyValue("thread_extent_var", thread_extent_var); + writer->EndObject(); +} + +inline void OpenGLShader::Load(dmlc::JSONReader* reader) { + std::vector arg_kind_strs; + dmlc::JSONObjectReadHelper helper; + helper.DeclareField("arg_names", &arg_names); + helper.DeclareField("arg_kinds", &arg_kind_strs); + helper.DeclareField("source", &source); + helper.DeclareField("thread_extent_var", &thread_extent_var); + helper.ReadAllFields(reader); + + arg_kinds.clear(); + for (auto& str : arg_kind_strs) { + arg_kinds.push_back(String2OpenGLArgKind(str)); + } +} + +inline std::string ToJSON( + const std::unordered_map& shaders) { + std::ostringstream os; + dmlc::JSONWriter writer(&os); + writer.BeginObject(); + writer.WriteObjectKeyValue("shaders", shaders); + writer.EndObject(); + return os.str(); +} + +inline std::unordered_map FromJSON( + const std::string& str) { + std::unordered_map shaders; + std::istringstream is(str); + dmlc::JSONReader reader(&is); + dmlc::JSONObjectReadHelper helper; + helper.DeclareField("shaders", &shaders); + helper.ReadAllFields(&reader); + return shaders; +} + +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_OPENGL_OPENGL_MODULE_H_ diff --git a/src/runtime/rocm/rocm_device_api.cc b/src/runtime/rocm/rocm_device_api.cc index 1e6154163b35b..443d76b76eb66 100644 --- a/src/runtime/rocm/rocm_device_api.cc +++ b/src/runtime/rocm/rocm_device_api.cc @@ -52,12 +52,15 @@ class ROCMDeviceAPI final : public DeviceAPI { } *rv = value; } - void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final { + void* AllocDataSpace(TVMContext ctx, + size_t nbytes, + size_t alignment, + TVMType type_hint) final { ROCM_CALL(hipSetDevice(ctx.device_id)); CHECK_EQ(256 % alignment, 0U) << "ROCM space is aligned at 256 bytes"; void *ret; - ROCM_CALL(hipMalloc(&ret, size)); + ROCM_CALL(hipMalloc(&ret, nbytes)); return ret; } diff --git a/src/runtime/rpc/rpc_device_api.cc b/src/runtime/rpc/rpc_device_api.cc index 7674fa3e23346..bd222b5c272ea 100644 --- a/src/runtime/rpc/rpc_device_api.cc +++ b/src/runtime/rpc/rpc_device_api.cc @@ -20,10 +20,13 @@ class RPCDeviceAPI final : public DeviceAPI { *rv = GetSess(ctx)->CallRemote( RPCCode::kDevGetAttr, ctx, static_cast(kind)); } - void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final { + void* AllocDataSpace(TVMContext ctx, + size_t nbytes, + size_t alignment, + TVMType type_hint) final { auto sess = GetSess(ctx); void *data = sess->CallRemote( - RPCCode::kDevAllocData, ctx, size, alignment); + RPCCode::kDevAllocData, ctx, nbytes, alignment, type_hint); RemoteSpace* space = new RemoteSpace(); space->data = data; space->sess = std::move(sess); diff --git a/src/runtime/rpc/rpc_session.cc b/src/runtime/rpc/rpc_session.cc index 0fa021918ed2c..3bff73afe9bd9 100644 --- a/src/runtime/rpc/rpc_session.cc +++ b/src/runtime/rpc/rpc_session.cc @@ -887,9 +887,11 @@ void RPCDevGetAttr(TVMArgs args, TVMRetValue *rv) { void RPCDevAllocData(TVMArgs args, TVMRetValue *rv) { TVMContext ctx = args[0]; - uint64_t size = args[1]; + uint64_t nbytes = args[1]; uint64_t alignment = args[2]; - void* data = DeviceAPI::Get(ctx)->AllocDataSpace(ctx, size, alignment); + TVMType type_hint = args[3]; + void* data = DeviceAPI::Get(ctx)->AllocDataSpace( + ctx, nbytes, alignment, type_hint); *rv = data; } diff --git a/src/runtime/workspace_pool.cc b/src/runtime/workspace_pool.cc index 494927979a0fa..24035faedaa71 100644 --- a/src/runtime/workspace_pool.cc +++ b/src/runtime/workspace_pool.cc @@ -23,28 +23,29 @@ class WorkspacePool::Pool { allocated_.push_back(e); } // allocate from pool - void* Alloc(TVMContext ctx, DeviceAPI* device, size_t size) { + void* Alloc(TVMContext ctx, DeviceAPI* device, size_t nbytes) { // Allocate align to page. - size = (size + (kWorkspacePageSize - 1)) / kWorkspacePageSize * kWorkspacePageSize; - if (size == 0) size = kWorkspacePageSize; + nbytes = (nbytes + (kWorkspacePageSize - 1)) / kWorkspacePageSize * kWorkspacePageSize; + if (nbytes == 0) nbytes = kWorkspacePageSize; Entry e; + TVMType type = {.code = kDLUInt, .bits = 8, .lanes = 1}; if (free_list_.size() == 2) { e = free_list_.back(); free_list_.pop_back(); - if (e.size < size) { + if (e.size < nbytes) { // resize the page device->FreeDataSpace(ctx, e.data); - e.data = device->AllocDataSpace(ctx, size, kTempAllocaAlignment); - e.size = size; + e.data = device->AllocDataSpace(ctx, nbytes, kTempAllocaAlignment, type); + e.size = nbytes; } } else if (free_list_.size() == 1) { - e.data = device->AllocDataSpace(ctx, size, kTempAllocaAlignment); - e.size = size; + e.data = device->AllocDataSpace(ctx, nbytes, kTempAllocaAlignment, type); + e.size = nbytes; } else { - if (free_list_.back().size >= size) { + if (free_list_.back().size >= nbytes) { // find smallest fit auto it = free_list_.end() - 2; - for (; it->size >= size; --it) {} + for (; it->size >= nbytes; --it) {} e = *(it + 1); free_list_.erase(it + 1); } else { @@ -52,8 +53,8 @@ class WorkspacePool::Pool { e = free_list_.back(); free_list_.pop_back(); device->FreeDataSpace(ctx, e.data); - e.data = device->AllocDataSpace(ctx, size, kTempAllocaAlignment); - e.size = size; + e.data = device->AllocDataSpace(ctx, nbytes, kTempAllocaAlignment, type); + e.size = nbytes; } } allocated_.push_back(e); diff --git a/src/schedule/schedule_lang.cc b/src/schedule/schedule_lang.cc index f8fcb8b0c7441..59bc3f242b033 100644 --- a/src/schedule/schedule_lang.cc +++ b/src/schedule/schedule_lang.cc @@ -397,6 +397,45 @@ Stage& Stage::double_buffer() { return *this; } +Stage& Stage::opengl() { + CHECK(!is_scheduled()) << "Must be a fresh schedule"; + StageNode *self = operator->(); + + auto all_iter_vars = self->all_iter_vars; // curr version of all_iter_vars + CHECK(!all_iter_vars.empty()) << "At least one iter var"; + + // Fuse all data parallel dimensions to 1. + IterVar fused = all_iter_vars[0]; + for (size_t i = 1; i != all_iter_vars.size(); ++i) { + auto iter_var = all_iter_vars[i]; + switch (iter_var->iter_type) { + case IterVarType::kDataPar: { + fuse(fused, all_iter_vars[i], &fused); + break; + } + case IterVarType::kThreadIndex: { + LOG(ERROR) << "A fresh schedule shouldn't have thread index iter var"; + break; + } + case IterVarType::kCommReduce: + case IterVarType::kOrdered: + case IterVarType::kOpaque: { + break; + } + default: { + LOG(ERROR) << "Invalid iter var type " + << IterVarType2String(iter_var->iter_type); + break; + } + } + } + + // Bind the only dimension to threadIdx.x. + bind(fused, thread_axis(Range(nullptr), "threadIdx.x")); + + return *this; +} + Stage CopyStage(const Stage& s) { std::shared_ptr n = std::make_shared(*s.operator->()); diff --git a/tests/ci_build/Dockerfile.cpu b/tests/ci_build/Dockerfile.cpu index 2c7510365f9dd..57fb7e04552bf 100644 --- a/tests/ci_build/Dockerfile.cpu +++ b/tests/ci_build/Dockerfile.cpu @@ -20,3 +20,6 @@ RUN bash /install/ubuntu_install_java.sh COPY install/ubuntu_install_llvm.sh /install/ubuntu_install_llvm.sh RUN bash /install/ubuntu_install_llvm.sh + +COPY install/ubuntu_install_opengl.sh /install/ubuntu_install_opengl.sh +RUN bash /install/ubuntu_install_opengl.sh diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index 9dff84e84635a..b71b4cb118ec0 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -37,6 +37,9 @@ RUN bash /install/ubuntu_install_nodejs.sh COPY install/ubuntu_install_rocm.sh /install/ubuntu_install_rocm.sh RUN bash /install/ubuntu_install_rocm.sh +COPY install/ubuntu_install_opengl.sh /install/ubuntu_install_opengl.sh +RUN bash /install/ubuntu_install_opengl.sh + # Enable doxygen for c++ doc build RUN apt-get install -y doxygen graphviz diff --git a/tests/ci_build/install/ubuntu_install_opengl.sh b/tests/ci_build/install/ubuntu_install_opengl.sh new file mode 100644 index 0000000000000..f8be6e3515814 --- /dev/null +++ b/tests/ci_build/install/ubuntu_install_opengl.sh @@ -0,0 +1,4 @@ +apt-get update --fix-missing + +apt-get install -y --no-install-recommends --force-yes \ + libgl1-mesa-dev libglfw3-dev \ No newline at end of file diff --git a/tests/python/unittest/test_runtime_ndarray.py b/tests/python/unittest/test_runtime_ndarray.py index 698f877d25047..5edf43337de5e 100644 --- a/tests/python/unittest/test_runtime_ndarray.py +++ b/tests/python/unittest/test_runtime_ndarray.py @@ -7,7 +7,8 @@ def enabled_ctx_list(): ('cl', tvm.opencl(0)), ('metal', tvm.metal(0)), ('rocm', tvm.rocm(0)), - ('vpi', tvm.vpi(0))] + ('vpi', tvm.vpi(0)), + ('opengl', tvm.opengl(0))] for k, v in ctx_list: assert tvm.context(k, 0) == v ctx_list = [x[1] for x in ctx_list if x[1].exist] @@ -19,7 +20,8 @@ def enabled_ctx_list(): def test_nd_create(): for ctx in ENABLED_CTX_LIST: - for dtype in ["float32", "int8", "uint16"]: + for dtype in ["uint8", "int8", "uint16", "int16", "uint32", "int32", + "float32"]: x = np.random.randint(0, 10, size=(3, 4)) x = np.array(x, dtype=dtype) y = tvm.nd.array(x, ctx=ctx) diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh index 757f2429ad323..70c2919f6fd01 100755 --- a/tests/scripts/task_python_integration.sh +++ b/tests/scripts/task_python_integration.sh @@ -17,3 +17,5 @@ TVM_FFI=cython python -m nose -v tests/python/integration || exit -1 TVM_FFI=ctypes python3 -m nose -v tests/python/integration || exit -1 TVM_FFI=cython python -m nose -v tests/python/contrib || exit -1 TVM_FFI=ctypes python3 -m nose -v tests/python/contrib || exit -1 +TVM_FFI=cython python -m nose -v tests/webgl || exit -1 +TVM_FFI=ctypes python3 -m nose -v tests/webgl || exit -1 diff --git a/tests/webgl/README.md b/tests/webgl/README.md new file mode 100644 index 0000000000000..c9f2ae3d22728 --- /dev/null +++ b/tests/webgl/README.md @@ -0,0 +1,7 @@ +## Test cases for the WebGL backend + +Any test case with name `test_local_...` tests the C++ OpenGL backend on the +local OS, which can be executed automatically. + +Any test case with name `test_remote_...` tests the WebGL backend within the +browser, which must be run manually. See instruction within the test. diff --git a/tests/webgl/test_local_gemm.py b/tests/webgl/test_local_gemm.py new file mode 100644 index 0000000000000..18d2d1d8bf34e --- /dev/null +++ b/tests/webgl/test_local_gemm.py @@ -0,0 +1,41 @@ +import tvm +import numpy as np + +def test_local_gemm(): + if not tvm.module.enabled("opengl"): + return + if not tvm.module.enabled("llvm"): + return + + nn = 2 + n = tvm.var('n') + n = tvm.convert(nn) + m = n + l = n + A = tvm.placeholder((n, l), name='A', dtype='int32') + B = tvm.placeholder((m, l), name='B', dtype='int32') + k = tvm.reduce_axis((0, l), name='k') + C = tvm.compute((n, m), lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k), + name='CC') + + s = tvm.create_schedule(C.op) + s[C].opengl() + print(tvm.lower(s, [A, B, C], simple_mode=True)) + + f = tvm.build(s, [A, B, C], "opengl", name="gemm") + print("------opengl code------") + print(f.imported_modules[0].get_source(fmt="gl")) + + ctx = tvm.opengl() + n, m, l = nn, nn, nn + a_np = np.random.uniform(low=0, high=10, size=(n, l)).astype(A.dtype) + b_np = np.random.uniform(low=0, high=10, size=(m, l)).astype(B.dtype) + a = tvm.nd.array(a_np, ctx) + b = tvm.nd.array(b_np, ctx) + c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx) + f(a, b, c) + + np.testing.assert_allclose(c.asnumpy(), np.dot(a_np, b_np.T)) + +if __name__ == "__main__": + test_local_gemm() diff --git a/tests/webgl/test_local_save_load.py b/tests/webgl/test_local_save_load.py new file mode 100644 index 0000000000000..9dca0d3acfcad --- /dev/null +++ b/tests/webgl/test_local_save_load.py @@ -0,0 +1,35 @@ +import numpy as np +import tvm +from tvm.contrib import rpc, util, emscripten + +def test_local_save_load(): + if not tvm.module.enabled("opengl"): + return + if not tvm.module.enabled("llvm"): + return + + n = tvm.var("n") + A = tvm.placeholder((n,), name='A', dtype='int32') + B = tvm.placeholder((n,), name='B', dtype='int32') + C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") + s = tvm.create_schedule(C.op) + s[C].opengl() + + f = tvm.build(s, [A, B, C], "opengl", target_host="llvm", name="myadd") + + ctx = tvm.opengl(0) + n = 10 + a = tvm.nd.array(np.random.uniform(high=10, size=(n)).astype(A.dtype), ctx) + b = tvm.nd.array(np.random.uniform(high=10, size=(n)).astype(B.dtype), ctx) + c = tvm.nd.array(np.zeros((n), dtype=C.dtype), ctx) + f(a, b, c) + + temp = util.tempdir() + path_so = temp.relpath("myadd.so") + f.export_library(path_so) + f1 = tvm.module.load(path_so) + f1(a, b, c) + np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy()) + +if __name__ == "__main__": + test_local_save_load() diff --git a/tests/webgl/test_remote_save_load.py b/tests/webgl/test_remote_save_load.py new file mode 100644 index 0000000000000..507616ee9f2e6 --- /dev/null +++ b/tests/webgl/test_remote_save_load.py @@ -0,0 +1,78 @@ +""" +The following instruction is based on web/README.md. + +Setup an RPC server: +$ python -m tvm.exec.rpc_proxy --example-rpc=1 + +Go to http://localhost:9190 in browser. + +Click "Connect To Proxy". + +Run this test script: +$ python tests/webgl/test_remote_save_load.py +""" + +import numpy as np +import tvm +from tvm.contrib import rpc, util, emscripten + +proxy_host = "localhost" +proxy_port = 9090 + +def try_remote_save_load(): + if not tvm.module.enabled("rpc"): + return + if not tvm.module.enabled("opengl"): + return + if not tvm.module.enabled("llvm"): + return + + # Build the module. + n = tvm.var("n") + A = tvm.placeholder((n,), name='A') + B = tvm.placeholder((n,), name='B') + C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") + s = tvm.create_schedule(C.op) + s[C].opengl() + target_host = "llvm -target=asmjs-unknown-emscripten -system-lib" + f = tvm.build(s, [A, B, C], "opengl", target_host=target_host, name="myadd") + + remote = rpc.connect(proxy_host, proxy_port, key="js") + + temp = util.tempdir() + ctx = remote.opengl(0) + path_obj = temp.relpath("myadd.bc") + path_dso = temp.relpath("myadd.js") + path_gl = temp.relpath("myadd.gl") + path_json = temp.relpath("myadd.tvm_meta.json") + + f.save(path_obj) + emscripten.create_js(path_dso, path_obj, side_module=True) + f.imported_modules[0].save(path_gl) + + remote.upload(path_dso, "myadd.dso") + remote.upload(path_gl) + remote.upload(path_json) + + remote.download("myadd.dso") + remote.download("myadd.gl") + remote.download("myadd.tvm_meta.json") + + print('Loading myadd.dso') + fhost = remote.load_module("myadd.dso") + + print('Loading myadd.gl') + fdev = remote.load_module("myadd.gl") + + print('import_module') + fhost.import_module(fdev) + + print('running...') + a = tvm.nd.array(np.random.uniform(size=16).astype(A.dtype), ctx) + b = tvm.nd.array(np.zeros(16, dtype=A.dtype), ctx) + c = tvm.nd.array(np.zeros(16, dtype=C.dtype), ctx) + fhost(a, b, c) + np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy()) + +if __name__ == "__main__": + try_remote_save_load() diff --git a/web/example_rpc.html b/web/example_rpc.html index bcccbef7358f7..b23ecda8e0179 100644 --- a/web/example_rpc.html +++ b/web/example_rpc.html @@ -36,5 +36,9 @@

Options

+ + diff --git a/web/tvm_runtime.js b/web/tvm_runtime.js index f69b1d57a11d2..c23c1f9da7964 100644 --- a/web/tvm_runtime.js +++ b/web/tvm_runtime.js @@ -686,7 +686,8 @@ var tvm_runtime = tvm_runtime || {}; 2 : "gpu", 4 : "opencl", 8 : "metal", - 9 : "vpi" + 9 : "vpi", + 11 : "opengl", }; var CTX_STR2MASK = { "cpu": 1, @@ -695,7 +696,8 @@ var tvm_runtime = tvm_runtime || {}; "cl": 4, "opencl": 4, "metal": 8, - "vpi": 9 + "vpi": 9, + "opengl": 11, }; TVMContext.prototype = { toString : function() { diff --git a/web/web_runtime.cc b/web/web_runtime.cc index 56538733025b2..b8db8cce70062 100644 --- a/web/web_runtime.cc +++ b/web/web_runtime.cc @@ -18,6 +18,8 @@ #include "../src/runtime/rpc/rpc_event_impl.cc" #include "../src/runtime/rpc/rpc_server_env.cc" #include "../src/runtime/graph/graph_runtime.cc" +#include "../src/runtime/opengl/opengl_device_api.cc" +#include "../src/runtime/opengl/opengl_module.cc" namespace tvm { namespace contrib {