[opengl] Split gles as a taichi backend

taichi-dev · Dec 5, 2022 · 0a93b3d · 0a93b3d
1 parent 8635695
commit 0a93b3d
Show file tree

Hide file tree

Showing 37 changed files with 145 additions and 93 deletions.
diff --git a/docs/lang/articles/deployment/ndarray_android.md b/docs/lang/articles/deployment/ndarray_android.md
@@ -66,7 +66,7 @@ The following Python script defines a Taichi AOT module for generating and savin
 ```python
 import taichi as ti
 
-ti.init(arch=ti.opengl, use_gles=True, allow_nv_shader_extension=False)
+ti.init(arch=ti.gles, allow_nv_shader_extension=False)
 
 # Define constants for computation
 G = 1
@@ -126,7 +126,7 @@ aot()
 
 **In line 3, you initialize Taichi:**
 
-1. Set `use_gles` to `True` to generate GLES compute shaders for Android.
+1. Set `arch=ti.gles` to generate GLES compute shaders for Android.
 2. Set `allow_nv_shader_extension` to `False` to prevent the generated GLES compute shaders from using Nvidia GL extensions on Android.
 
 > This setting is because Android supports GLES APIs but GLES does not support `NV_SHADER_EXTENSION`.

diff --git a/python/taichi/lang/impl.py b/python/taichi/lang/impl.py
@@ -682,7 +682,8 @@ def create_field_member(dtype, name, needs_grad, needs_dual):
             # adjoint checkbit
             x_grad_checkbit = Expr(get_runtime().prog.make_id_expr(""))
             dtype = u8
-            if prog.config().arch in (_ti_core.opengl, _ti_core.vulkan):
+            if prog.config().arch in (_ti_core.opengl, _ti_core.vulkan,
+                                      _ti_core.gles):
                 dtype = i32
             x_grad_checkbit.ptr = _ti_core.expr_field(x_grad_checkbit.ptr,
                                                       cook_dtype(dtype))

diff --git a/python/taichi/lang/misc.py b/python/taichi/lang/misc.py
@@ -129,6 +129,11 @@
 """
 # ----------------------
 
+gles = _ti_core.gles
+"""The OpenGL ES backend. OpenGL ES 3.1 required.
+"""
+# ----------------------
+
 # Skip annotating this one because it is barely maintained.
 cc = _ti_core.cc
 
@@ -154,9 +159,9 @@
 """
 # ----------------------
 
-gpu = [cuda, metal, vulkan, opengl, dx11, dx12]
+gpu = [cuda, metal, vulkan, opengl, dx11, dx12, gles]
 """A list of GPU backends supported on the current system.
-Currently contains 'cuda', 'metal', 'opengl', 'vulkan', 'dx11', 'dx12'.
+Currently contains 'cuda', 'metal', 'opengl', 'gles', 'vulkan', 'dx11', 'dx12'.
 
 When this is used, Taichi automatically picks the matching GPU backend. If no
 GPU is detected, Taichi falls back to the CPU backend.
@@ -454,7 +459,7 @@ def init(arch=None,
     if env_arch is not None:
         _logging.info(f'Following TI_ARCH setting up for arch={env_arch}')
         arch = _ti_core.arch_from_name(env_arch)
-    cfg.arch = adaptive_arch_select(arch, enable_fallback, cfg.use_gles)
+    cfg.arch = adaptive_arch_select(arch, enable_fallback)
     if cfg.arch == cc:
         _ti_core.set_tmp_dir(locale_encode(prepare_sandbox()))
     print(f'[Taichi] Starting on arch={_ti_core.arch_name(cfg.arch)}')
@@ -717,14 +722,11 @@ def mesh_patch_idx():
     )
 
 
-def is_arch_supported(arch, use_gles=False):
+def is_arch_supported(arch):
     """Checks whether an arch is supported on the machine.
 
     Args:
         arch (taichi_python.Arch): Specified arch.
-        use_gles (bool): If True, check is GLES is available otherwise
-          check if GLSL is available. Only effective when `arch` is `ti.opengl`.
-          Default is `False`.
 
     Returns:
         bool: Whether `arch` is supported on the machine.
@@ -733,7 +735,8 @@ def is_arch_supported(arch, use_gles=False):
     arch_table = {
         cuda: _ti_core.with_cuda,
         metal: _ti_core.with_metal,
-        opengl: functools.partial(_ti_core.with_opengl, use_gles),
+        opengl: functools.partial(_ti_core.with_opengl, False),
+        gles: functools.partial(_ti_core.with_opengl, True),
         cc: _ti_core.with_cc,
         vulkan: _ti_core.with_vulkan,
         dx11: _ti_core.with_dx11,
@@ -753,13 +756,13 @@ def is_arch_supported(arch, use_gles=False):
         return False
 
 
-def adaptive_arch_select(arch, enable_fallback, use_gles):
+def adaptive_arch_select(arch, enable_fallback):
     if arch is None:
         return cpu
     if not isinstance(arch, (list, tuple)):
         arch = [arch]
     for a in arch:
-        if is_arch_supported(a, use_gles):
+        if is_arch_supported(a):
             return a
     if not enable_fallback:
         raise RuntimeError(f'Arch={arch} is not supported')
@@ -778,7 +781,7 @@ def get_compute_stream_device_time_elapsed_us() -> float:
 __all__ = [
     'i', 'ij', 'ijk', 'ijkl', 'ijl', 'ik', 'ikl', 'il', 'j', 'jk', 'jkl', 'jl',
     'k', 'kl', 'l', 'x86_64', 'x64', 'dx11', 'dx12', 'wasm', 'arm64', 'cc',
-    'cpu', 'cuda', 'gpu', 'metal', 'opengl', 'vulkan', 'extension',
+    'cpu', 'cuda', 'gles', 'gpu', 'metal', 'opengl', 'vulkan', 'extension',
     'loop_config', 'global_thread_idx', 'assume_in_range', 'block_local',
     'cache_read_only', 'init', 'mesh_local', 'no_activate', 'reset',
     'mesh_patch_idx', 'get_compute_stream_device_time_elapsed_us'

diff --git a/taichi/analysis/offline_cache_util.cpp b/taichi/analysis/offline_cache_util.cpp
@@ -52,9 +52,8 @@ static std::vector<std::uint8_t> get_offline_cache_key_of_compile_config(
   if (config->arch == Arch::cc) {
     serializer(config->cc_compile_cmd);
     serializer(config->cc_link_cmd);
-  } else if (config->arch == Arch::opengl) {
+  } else if (config->arch == Arch::opengl || config->arch == Arch::gles) {
     serializer(config->allow_nv_shader_extension);
-    serializer(config->use_gles);
   }
   serializer(config->make_mesh_block_local);
   serializer(config->optimize_mesh_reordered_mapping);

diff --git a/taichi/aot/module_loader.cpp b/taichi/aot/module_loader.cpp
@@ -37,6 +37,10 @@ std::unique_ptr<Module> Module::load(Arch arch, std::any mod_params) {
   } else if (arch == Arch::opengl) {
 #ifdef TI_WITH_OPENGL
     return gfx::make_aot_module(mod_params, arch);
+#endif
+  } else if (arch == Arch::gles) {
+#ifdef TI_WITH_OPENGL
+    return gfx::make_aot_module(mod_params, arch);
 #endif
   } else if (arch == Arch::dx11) {
 #ifdef TI_WITH_DX11

diff --git a/taichi/inc/archs.inc.h b/taichi/inc/archs.inc.h
@@ -16,3 +16,4 @@ PER_ARCH(dx12)    // Microsoft DirectX 12, WIP
 PER_ARCH(opencl)  // OpenCL, N/A
 PER_ARCH(amdgpu)  // AMD GPU, N/A
 PER_ARCH(vulkan)  // Vulkan
+PER_ARCH(gles)    // OpenGL ES
diff --git a/taichi/program/compile_config.h b/taichi/program/compile_config.h
@@ -78,7 +78,6 @@ struct CompileConfig {
 
   // Opengl backend options:
   bool allow_nv_shader_extension{true};
-  bool use_gles{false};
 
   bool quant_opt_store_fusion{true};
   bool quant_opt_atomic_demotion{true};

diff --git a/taichi/program/extension.cpp b/taichi/program/extension.cpp
@@ -23,6 +23,7 @@ bool is_extension_supported(Arch arch, Extension ext) {
       {Arch::metal,
        {Extension::adstack, Extension::assertion, Extension::sparse}},
       {Arch::opengl, {Extension::extfunc}},
+      {Arch::gles, {}},
       {Arch::cc, {Extension::data64, Extension::extfunc, Extension::adstack}},
   };
   // if (with_opengl_extension_data64())

diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp
@@ -123,7 +123,14 @@ Program::Program(Arch desired_arch) : snode_rw_accessors_bank_(this) {
 #endif
   } else if (config.arch == Arch::opengl) {
 #ifdef TI_WITH_OPENGL
-    TI_ASSERT(opengl::initialize_opengl(config.use_gles));
+    TI_ASSERT(opengl::initialize_opengl(/*use_gles=*/false));
+    program_impl_ = std::make_unique<OpenglProgramImpl>(config);
+#else
+    TI_ERROR("This taichi is not compiled with OpenGL");
+#endif
+  } else if (config.arch == Arch::gles) {
+#ifdef TI_WITH_OPENGL
+    TI_ASSERT(opengl::initialize_opengl(/*use_gles=*/true));
     program_impl_ = std::make_unique<OpenglProgramImpl>(config);
 #else
     TI_ERROR("This taichi is not compiled with OpenGL");
@@ -332,6 +339,8 @@ void Program::visualize_layout(const std::string &fn) {
 Arch Program::get_accessor_arch() {
   if (this_thread_config().arch == Arch::opengl) {
     return Arch::opengl;
+  } else if (this_thread_config().arch == Arch::gles) {
+    return Arch::gles;
   } else if (this_thread_config().arch == Arch::vulkan) {
     return Arch::vulkan;
   } else if (this_thread_config().arch == Arch::cuda) {
@@ -417,6 +426,8 @@ void Program::finalize() {
   finalized_ = true;
   num_instances_ -= 1;
   program_impl_->dump_cache_data_to_disk();
+  configs.clear();
+  configs[main_thread_id_] = default_compile_config;
   TI_TRACE("Program ({}) finalized_.", fmt::ptr(this));
 }
 
@@ -550,6 +561,7 @@ std::unique_ptr<AotModuleBuilder> Program::make_aot_module_builder(
       this_thread_config().arch == Arch::metal ||
       this_thread_config().arch == Arch::vulkan ||
       this_thread_config().arch == Arch::opengl ||
+      this_thread_config().arch == Arch::gles ||
       this_thread_config().arch == Arch::dx12) {
     return program_impl_->make_aot_module_builder(cfg);
   }

diff --git a/taichi/program/program.h b/taichi/program/program.h
@@ -138,6 +138,10 @@ class TI_DLL_EXPORT Program {
     return configs[thread_id];
   }
 
+  const CompileConfig &config() {
+    return configs[main_thread_id_];
+  }
+
   struct KernelProfilerQueryResult {
     int counter{0};
     double min{0.0};

diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp
@@ -210,7 +210,6 @@ void export_lang(py::module &m) {
                      &CompileConfig::quant_opt_atomic_demotion)
       .def_readwrite("allow_nv_shader_extension",
                      &CompileConfig::allow_nv_shader_extension)
-      .def_readwrite("use_gles", &CompileConfig::use_gles)
       .def_readwrite("make_mesh_block_local",
                      &CompileConfig::make_mesh_block_local)
       .def_readwrite("mesh_localize_to_end_mapping",
@@ -327,8 +326,7 @@ void export_lang(py::module &m) {
 
   py::class_<Program>(m, "Program")
       .def(py::init<>())
-      .def("config", &Program::this_thread_config,
-           py::return_value_policy::reference)
+      .def("config", &Program::config)
       .def("sync_kernel_profiler",
            [](Program *program) { program->profiler->sync(); })
       .def("update_kernel_profiler",

diff --git a/taichi/rhi/opengl/opengl_api.cpp b/taichi/rhi/opengl/opengl_api.cpp
@@ -28,13 +28,26 @@ static void glfw_error_callback(int code, const char *description) {
   TI_WARN("GLFW Error {}: {}", code, description);
 }
 
-bool initialize_opengl(bool use_gles, bool error_tolerance) {
-  static std::optional<bool> supported;  // std::nullopt
+bool initialize_opengl(bool use_gles, bool error_tolerance, bool reset) {
+  // TODO: properly split opengl & gles initialization in a followup PR.
+  // TODO: properly split initialization and reset in a followup PR.
+  static std::optional<bool> gles_supported;
+  static std::optional<bool> gl_supported;
+
+  if (reset) {
+    glfwTerminate();
+    gles_supported = std::nullopt;
+    gl_supported = std::nullopt;
+    kUseGles = false;
+    return false;
+  }
 
   TI_TRACE("initialize_opengl({}, {}) called", use_gles, error_tolerance);
+  // int idx = static_cast<int>(use_gles);
+  std::optional<bool> *supported = use_gles ? &gles_supported : &gl_supported;
 
-  if (supported.has_value()) {  // this function has been called before
-    if (supported.value()) {    // detected to be true in last call
+  if (supported->has_value()) {  // this function has been called before
+    if (supported->value()) {    // detected to be true in last call
       return true;
     } else {
       if (!error_tolerance)  // not called from with_opengl
@@ -160,7 +173,7 @@ bool initialize_opengl(bool use_gles, bool error_tolerance) {
   if (!opengl_version) {
     if (error_tolerance) {
       TI_WARN("Can not create OpenGL context");
-      supported = std::make_optional<bool>(false);
+      supported[(int)use_gles] = std::make_optional<bool>(false);
       return false;
     }
     TI_ERROR("Can not create OpenGL context");
@@ -179,7 +192,7 @@ bool initialize_opengl(bool use_gles, bool error_tolerance) {
   if (!use_gles && !opengl_extension_GL_ARB_compute_shader) {
     if (error_tolerance) {
       TI_INFO("Your OpenGL does not support GL_ARB_compute_shader extension");
-      supported = std::make_optional<bool>(false);
+      gl_supported = std::make_optional<bool>(false);
       return false;
     }
     TI_ERROR("Your OpenGL does not support GL_ARB_compute_shader extension");
@@ -192,7 +205,7 @@ bool initialize_opengl(bool use_gles, bool error_tolerance) {
   check_opengl_error("glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE)");
   TI_TRACE("GL_MAX_COMPUTE_WORK_GROUP_SIZE: {}", opengl_max_grid_dim);
 
-  supported = std::make_optional<bool>(true);
+  *supported = std::make_optional<bool>(true);
   kUseGles = use_gles;
   return true;
 }

diff --git a/taichi/rhi/opengl/opengl_api.h b/taichi/rhi/opengl/opengl_api.h
@@ -6,7 +6,9 @@
 namespace taichi::lang {
 namespace opengl {
 
-bool initialize_opengl(bool use_gles = false, bool error_tolerance = false);
+bool initialize_opengl(bool use_gles = false,
+                       bool error_tolerance = false,
+                       bool reset = false);
 bool is_opengl_api_available(bool use_gles = false);
 bool is_gles();
 

diff --git a/taichi/runtime/program_impls/opengl/opengl_program.cpp b/taichi/runtime/program_impls/opengl/opengl_program.cpp
@@ -91,7 +91,16 @@ void OpenglProgramImpl::dump_cache_data_to_disk() {
                            config->offline_cache_cleaning_factor);
   mgr->dump_with_merging();
 }
+void OpenglProgramImpl::finalize() {
+  runtime_.reset();
+  device_.reset();
+  opengl::initialize_opengl(/*use_gles=*/false, /*error_tolerance=*/false,
+                            /*reset=*/true);
+}
 
+OpenglProgramImpl::~OpenglProgramImpl() {
+  finalize();
+}
 const std::unique_ptr<gfx::CacheManager>
     &OpenglProgramImpl::get_cache_manager() {
   if (!cache_manager_) {

diff --git a/taichi/runtime/program_impls/opengl/opengl_program.h b/taichi/runtime/program_impls/opengl/opengl_program.h
@@ -11,7 +11,7 @@ class OpenglProgramImpl : public ProgramImpl {
  public:
   explicit OpenglProgramImpl(CompileConfig &config);
   FunctionType compile(Kernel *kernel, OffloadedStmt *offloaded) override;
-
+  ~OpenglProgramImpl() override;
   std::size_t get_snode_num_dynamically_allocated(
       SNode *snode,
       uint64 *result_buffer) override {
@@ -29,7 +29,7 @@ class OpenglProgramImpl : public ProgramImpl {
   void synchronize() override {
     runtime_->synchronize();
   }
-
+  void finalize() override;
   StreamSemaphore flush() override {
     return runtime_->flush();
   }

diff --git a/taichi/transforms/offload.cpp b/taichi/transforms/offload.cpp
@@ -112,7 +112,8 @@ class Offloader {
           offloaded->const_end = true;
           offloaded->end_value = val->val.val_int32();
         } else {
-          if ((arch == Arch::opengl || arch == Arch::vulkan) &&
+          if ((arch == Arch::opengl || arch == Arch::gles ||
+               arch == Arch::vulkan) &&
               demotable_axis_load(s->end)) {
             // TODO: We need to update codegen for each backend gradually so
             // let's limit it to opengl backend for now.
@@ -385,7 +386,8 @@ class IdentifyValuesUsedInOtherOffloads : public BasicStmtVisitor {
     if (top_level_ptr->is<GlobalPtrStmt>() || stmt->is<ExternalPtrStmt>() ||
         (stmt->is<ArgLoadStmt>() && stmt->as<ArgLoadStmt>()->is_ptr))
       return;
-    if ((config_.arch == Arch::opengl || config_.arch == Arch::vulkan) &&
+    if ((config_.arch == Arch::opengl || config_.arch == Arch::gles ||
+         config_.arch == Arch::vulkan) &&
         demotable_axis_load(stmt))
       return;
     // Not yet allocated

diff --git a/taichi/util/offline_cache.cpp b/taichi/util/offline_cache.cpp
@@ -22,7 +22,8 @@ std::string get_cache_path_by_arch(const std::string &base_path, Arch arch) {
   std::string subdir;
   if (arch_uses_llvm(arch)) {
     subdir = "llvm";
-  } else if (arch == Arch::vulkan || arch == Arch::opengl) {
+  } else if (arch == Arch::vulkan || arch == Arch::opengl ||
+             arch == Arch::gles) {
     subdir = "gfx";
   } else if (arch == Arch::metal) {
     subdir = "metal";

diff --git a/tests/python/test_ad_grad_check.py b/tests/python/test_ad_grad_check.py
@@ -6,7 +6,7 @@
 
 
 @test_utils.test(default_fp=ti.f64,
-                 exclude=[ti.cc, ti.vulkan, ti.opengl, ti.metal])
+                 exclude=[ti.cc, ti.vulkan, ti.opengl, ti.gles, ti.metal])
 def test_general():
     x1 = ti.field(dtype=float, shape=(2, 2), needs_grad=True)
     y1 = ti.field(dtype=float, shape=(), needs_grad=True)
@@ -71,6 +71,6 @@ def func():
     lambda x: ti.atan2(x, 0.4), lambda x: 0.4**x, lambda x: x**0.4
 ])
 @test_utils.test(default_fp=ti.f64,
-                 exclude=[ti.cc, ti.vulkan, ti.opengl, ti.metal])
+                 exclude=[ti.cc, ti.vulkan, ti.opengl, ti.gles, ti.metal])
 def test_basics(tifunc):
     grad_test(tifunc)
diff --git a/tests/python/test_aot.py b/tests/python/test_aot.py
@@ -138,10 +138,8 @@ def test_non_dense_snode():
         m.add_field('y', y)
 
 
-@pytest.mark.parametrize('use_gles', [True, False])
-@test_utils.test(arch=[ti.opengl, ti.vulkan])
-def test_mpm88_aot(use_gles):
-    ti.init(ti.lang.impl.current_cfg().arch, use_gles=use_gles)
+@test_utils.test(arch=[ti.opengl, ti.vulkan, ti.gles])
+def test_mpm88_aot():
     n_particles = 8192
     n_grid = 128
     dx = 1 / n_grid