Merge branch 'enable_dynamic_struct_for' of github.com:Galeselee/taic…

…hi into enable_dynamic_struct_for
taichi-dev · Feb 2, 2023 · 42f146a · 42f146a
2 parents 8d7edf7 + bd8b192
commit 42f146a
Show file tree

Hide file tree

Showing 37 changed files with 131 additions and 238 deletions.
diff --git a/.github/workflows/scripts/ci_common/python.py b/.github/workflows/scripts/ci_common/python.py
@@ -69,10 +69,15 @@ def setup_python(version: Optional[str] = None) -> Tuple[Command, Command]:
     env = prefix / 'envs' / version
     if windows:
         exe = env / 'python.exe'
-        path_prepend('PATH', env, env / 'Scripts', prefix / 'Library' / 'bin')
+        paths = [
+            env, env / 'Library' / 'mingw-w64' / 'bin',
+            env / 'Library' / 'usr' / 'bin', env / 'Library' / 'bin',
+            env / 'Scripts', env / 'bin', prefix / 'condabin'
+        ]
+        path_prepend('PATH', *paths)
     else:
         exe = env / 'bin' / 'python'
-        path_prepend('PATH', env / 'bin')
+        path_prepend('PATH', env / 'bin', prefix / 'condabin')
 
     if not exe.exists():
         conda.create('-y', '-n', version, f'python={version}')

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -14,12 +14,12 @@ repos:
         additional_dependencies: [toml]
 
   - repo: https://github.com/PyCQA/isort
-    rev: 5.11.4
+    rev: 5.12.0
     hooks:
       - id: isort
 
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v15.0.6
+    rev: v15.0.7
     hooks:
       - id: clang-format
         exclude: .*\.json$

diff --git a/c_api/src/taichi_llvm_impl.cpp b/c_api/src/taichi_llvm_impl.cpp
@@ -55,8 +55,7 @@ taichi::lang::Device &LlvmRuntime::get() {
 TiMemory LlvmRuntime::allocate_memory(
     const taichi::lang::Device::AllocParams &params) {
   const taichi::lang::CompileConfig &config = executor_->get_config();
-  taichi::lang::TaichiLLVMContext *tlctx =
-      executor_->get_llvm_context(config.arch);
+  taichi::lang::TaichiLLVMContext *tlctx = executor_->get_llvm_context();
   taichi::lang::LLVMRuntime *llvm_runtime = executor_->get_llvm_runtime();
   taichi::lang::LlvmDevice *llvm_device = executor_->llvm_device();
 

diff --git a/...ng/articles/deployment/ndarray_android.md → docs/lang/articles/deployment/tutorial.md b/...ng/articles/deployment/ndarray_android.md → docs/lang/articles/deployment/tutorial.md
diff --git a/docs/lang/articles/get-started/hello_world.md b/docs/lang/articles/get-started/hello_world.md
@@ -9,7 +9,7 @@ Taichi is a domain-specific language embedded in Python and designed specificall
 
 When writing compute-intensive tasks in Python, you can take advantage of Taichi's high performance computation by following a few extra rules. Generally, Taichi provides two decorators `@ti.func` and `@ti.kernel`, which instruct Taichi to take over the tasks. Its just-in-time (JIT) compiler compiles the decorated functions to machine code, and all subsequent calls to these functions are executed on multi-core CPUs or GPUs. In a typical compute-intensive scenario, such as a numerical simulation, Taichi can accelerate performance by 50x~100x compared with native Python code.
 
-Taichi also has a built-in ahead-of-time (AOT) system for exporting your code into binary/shader files, which can then be called in C/C++ and run without the Python environment. See [Tutorial: Run Taichi programs in C++ application](../deployment/ndarray_android.md) for more information.
+Taichi also has a built-in ahead-of-time (AOT) system for exporting your code into binary/shader files, which can then be called in C/C++ and run without the Python environment. See [Tutorial: Run Taichi programs in C++ application](../deployment/tutorial.md) for more information.
 
 ## Prerequisites
 

diff --git a/docs/lang/articles/reference/language_reference.md b/docs/lang/articles/reference/language_reference.md
@@ -73,7 +73,7 @@ the remaining expressions will be evaluated to Taichi values at runtime.
 A Taichi value has a Taichi type, which is one of the following:
 - A primitive type, as described in [Type system](../type_system/type.md)
 - A compound type, as described in [Type system](../type_system/type.md)
-- An ndarray type, as introduced in [Tutorial: Run Taichi programs in C++ application](../deployment/ndarray_android.md)
+- An ndarray type, as introduced in [Tutorial: Run Taichi programs in C++ application](../deployment/tutorial.md)
 - A sparse matrix builder type, as introduced in [Sparse
 Matrix](../math/sparse_matrix.md)
 

diff --git a/python/taichi/__init__.py b/python/taichi/__init__.py
@@ -2,6 +2,7 @@
 
 from taichi._funcs import *
 from taichi._lib import core as _ti_core
+from taichi._lib.utils import warn_restricted_version
 from taichi._logging import *
 from taichi._snode import *
 from taichi.lang import *  # pylint: disable=W0622 # TODO(archibate): It's `taichi.lang.core` overriding `taichi.core`
@@ -86,3 +87,6 @@ def __getattr__(attr):
 
 del sys
 del _ti_core
+
+warn_restricted_version()
+del warn_restricted_version
diff --git a/python/taichi/_lib/utils.py b/python/taichi/_lib/utils.py
@@ -2,6 +2,7 @@
 import platform
 import re
 import sys
+import warnings
 
 from colorama import Fore, Style
 
@@ -202,7 +203,9 @@ def try_get_loaded_libc_version():
 
 def try_get_pip_version():
     try:
-        import pip  # pylint: disable=import-outside-toplevel
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            import pip  # pylint: disable=import-outside-toplevel
         return tuple([int(v) for v in pip.__version__.split('.')])
     except ImportError:
         return None
@@ -244,6 +247,3 @@ def warn_restricted_version():
                 )
         except Exception:
             pass
-
-
-warn_restricted_version()
diff --git a/taichi/codegen/amdgpu/codegen_amdgpu.cpp b/taichi/codegen/amdgpu/codegen_amdgpu.cpp
@@ -396,15 +396,15 @@ class TaskCodeGenAMDGPU : public TaskCodeGenLLVM {
     }
   }
 
-private:
+ private:
   std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() override {
     auto thread_idx =
-          builder->CreateIntrinsic(Intrinsic::amdgcn_workitem_id_x, {}, {});
-    auto workgroup_dim_ = call(
-            "__ockl_get_local_size",
-            llvm::ConstantInt::get(llvm::Type::getInt32Ty(*llvm_context), 0));
-    auto block_dim = builder->CreateTrunc(workgroup_dim_,
-                                      llvm::Type::getInt32Ty(*llvm_context));
+        builder->CreateIntrinsic(Intrinsic::amdgcn_workitem_id_x, {}, {});
+    auto workgroup_dim_ =
+        call("__ockl_get_local_size",
+             llvm::ConstantInt::get(llvm::Type::getInt32Ty(*llvm_context), 0));
+    auto block_dim = builder->CreateTrunc(
+        workgroup_dim_, llvm::Type::getInt32Ty(*llvm_context));
     return std::make_tuple(thread_idx, block_dim);
   }
 };

diff --git a/taichi/codegen/cpu/codegen_cpu.cpp b/taichi/codegen/cpu/codegen_cpu.cpp
@@ -210,7 +210,7 @@ class TaskCodeGenCPU : public TaskCodeGenLLVM {
     }
   }
 
-private:
+ private:
   std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() override {
     auto thread_idx = tlctx->get_constant(0);
     auto block_dim = tlctx->get_constant(1);

diff --git a/taichi/codegen/cuda/codegen_cuda.cpp b/taichi/codegen/cuda/codegen_cuda.cpp
@@ -585,12 +585,12 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
     }
   }
 
-private:
+ private:
   std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() override {
     auto thread_idx =
-          builder->CreateIntrinsic(Intrinsic::nvvm_read_ptx_sreg_tid_x, {}, {});
-    auto block_dim = builder->CreateIntrinsic(Intrinsic::nvvm_read_ptx_sreg_ntid_x,
-                                        {}, {});
+        builder->CreateIntrinsic(Intrinsic::nvvm_read_ptx_sreg_tid_x, {}, {});
+    auto block_dim =
+        builder->CreateIntrinsic(Intrinsic::nvvm_read_ptx_sreg_ntid_x, {}, {});
     return std::make_tuple(thread_idx, block_dim);
   }
 };

diff --git a/taichi/codegen/dx12/codegen_dx12.cpp b/taichi/codegen/dx12/codegen_dx12.cpp
@@ -192,7 +192,7 @@ class TaskCodeGenLLVMDX12 : public TaskCodeGenLLVM {
     }
   }
 
-private:
+ private:
   std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() override {
     auto thread_idx = tlctx->get_constant(0);
     auto block_dim = tlctx->get_constant(1);

diff --git a/taichi/codegen/llvm/codegen_llvm.cpp b/taichi/codegen/llvm/codegen_llvm.cpp
@@ -2125,7 +2125,7 @@ void TaskCodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt) {
 
     auto [thread_idx, block_dim] = this->get_spmd_info();
     builder->CreateStore(builder->CreateAdd(thread_idx, lower_bound),
-                          loop_index);
+                         loop_index);
 
     auto loop_test_bb = BasicBlock::Create(*llvm_context, "loop_test", func);
     auto loop_body_bb = BasicBlock::Create(*llvm_context, "loop_body", func);

diff --git a/taichi/codegen/llvm/codegen_llvm.h b/taichi/codegen/llvm/codegen_llvm.h
@@ -412,7 +412,6 @@ class TaskCodeGenLLVM : public IRVisitor, public LLVMModuleBuilder {
                      std::vector<llvm::Value *> &current_index);
 
   virtual std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() = 0;
-
 };
 
 }  // namespace taichi::lang

diff --git a/taichi/codegen/wasm/codegen_wasm.cpp b/taichi/codegen/wasm/codegen_wasm.cpp
@@ -235,7 +235,7 @@ class TaskCodeGenWASM : public TaskCodeGenLLVM {
     return res;
   }
 
-private:
+ private:
   std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() override {
     TI_NOT_IMPLEMENTED;
   }

diff --git a/taichi/rhi/vulkan/vulkan_device.cpp b/taichi/rhi/vulkan/vulkan_device.cpp
@@ -1624,15 +1624,18 @@ VulkanDevice::~VulkanDevice() {
   // be properly deallocated before VulkanDevice destruction. This isn't
   // the most proper fix but is less intrusive compared to other
   // approaches.
+  vkDeviceWaitIdle(device_);
+
   allocations_.clear();
   image_allocations_.clear();
 
-  vkDeviceWaitIdle(device_);
-
-  desc_pool_ = nullptr;
+  compute_streams_.reset();
+  graphics_streams_.reset();
 
   framebuffer_pools_.clear();
   renderpass_pools_.clear();
+  desc_set_layouts_.clear();
+  desc_pool_ = nullptr;
 
   vmaDestroyAllocator(allocator_);
   vmaDestroyAllocator(allocator_export_);
@@ -2809,12 +2812,6 @@ VulkanSurface::~VulkanSurface() {
     }
     swapchain_images_.clear();
   }
-  if (depth_buffer_ != kDeviceNullAllocation) {
-    device_->dealloc_memory(depth_buffer_);
-  }
-  if (screenshot_buffer_ != kDeviceNullAllocation) {
-    device_->dealloc_memory(screenshot_buffer_);
-  }
 }
 
 void VulkanSurface::resize(uint32_t width, uint32_t height) {
@@ -2880,11 +2877,11 @@ DeviceAllocation VulkanSurface::get_depth_data(DeviceAllocation &depth_alloc) {
   auto [w, h] = get_size();
   size_t size_bytes = size_t(w * h) * sizeof(float);
 
-  if (depth_buffer_ == kDeviceNullAllocation) {
+  if (!depth_buffer_) {
     Device::AllocParams params{size_bytes, /*host_wrtie*/ false,
                                /*host_read*/ true, /*export_sharing*/ false,
                                AllocUsage::Uniform};
-    depth_buffer_ = device_->allocate_memory(params);
+    depth_buffer_ = device_->allocate_memory_unique(params);
   }
 
   BufferImageCopyParams copy_params;
@@ -2895,13 +2892,13 @@ DeviceAllocation VulkanSurface::get_depth_data(DeviceAllocation &depth_alloc) {
   assert(res == RhiResult::success && "Failed to allocate command list");
   cmd_list->image_transition(depth_alloc, ImageLayout::depth_attachment,
                              ImageLayout::transfer_src);
-  cmd_list->image_to_buffer(depth_buffer_.get_ptr(), depth_alloc,
+  cmd_list->image_to_buffer(depth_buffer_->get_ptr(), depth_alloc,
                             ImageLayout::transfer_src, copy_params);
   cmd_list->image_transition(depth_alloc, ImageLayout::transfer_src,
                              ImageLayout::depth_attachment);
   stream->submit_synced(cmd_list.get());
 
-  return depth_buffer_;
+  return *depth_buffer_;
 }
 
 DeviceAllocation VulkanSurface::get_image_data() {
@@ -2923,11 +2920,11 @@ DeviceAllocation VulkanSurface::get_image_data() {
   }
   */
 
-  if (screenshot_buffer_ == kDeviceNullAllocation) {
+  if (!screenshot_buffer_) {
     Device::AllocParams params{size_bytes, /*host_wrtie*/ false,
                                /*host_read*/ true, /*export_sharing*/ false,
                                AllocUsage::Uniform};
-    screenshot_buffer_ = device_->allocate_memory(params);
+    screenshot_buffer_ = device_->allocate_memory_unique(params);
   }
 
   /*
@@ -2952,7 +2949,7 @@ DeviceAllocation VulkanSurface::get_image_data() {
   cmd_list->image_transition(img_alloc, ImageLayout::present_src,
                              ImageLayout::transfer_src);
   // TODO: directly map the image to cpu memory
-  cmd_list->image_to_buffer(screenshot_buffer_.get_ptr(), img_alloc,
+  cmd_list->image_to_buffer(screenshot_buffer_->get_ptr(), img_alloc,
                             ImageLayout::transfer_src, copy_params);
   cmd_list->image_transition(img_alloc, ImageLayout::transfer_src,
                              ImageLayout::present_src);
@@ -2964,7 +2961,7 @@ DeviceAllocation VulkanSurface::get_image_data() {
   */
   stream->submit_synced(cmd_list.get());
 
-  return screenshot_buffer_;
+  return *screenshot_buffer_;
 }
 
 VulkanStream::VulkanStream(VulkanDevice &device,

diff --git a/taichi/rhi/vulkan/vulkan_device.h b/taichi/rhi/vulkan/vulkan_device.h
@@ -537,8 +537,8 @@ class VulkanSurface : public Surface {
   std::vector<DeviceAllocation> swapchain_images_;
 
   // DeviceAllocation screenshot_image_{kDeviceNullAllocation};
-  DeviceAllocation depth_buffer_{kDeviceNullAllocation};
-  DeviceAllocation screenshot_buffer_{kDeviceNullAllocation};
+  DeviceAllocationUnique depth_buffer_{nullptr};
+  DeviceAllocationUnique screenshot_buffer_{nullptr};
 };
 
 struct DescPool {

diff --git a/taichi/runtime/cpu/aot_module_loader_impl.cpp b/taichi/runtime/cpu/aot_module_loader_impl.cpp
@@ -20,7 +20,7 @@ class AotModuleImpl : public LlvmAotModule {
       LlvmOfflineCache::KernelCacheData &&loaded) override {
     Arch arch = executor_->get_config().arch;
     TI_ASSERT(arch == Arch::x64 || arch == Arch::arm64);
-    auto *tlctx = executor_->get_llvm_context(arch);
+    auto *tlctx = executor_->get_llvm_context();
 
     CPUModuleToFunctionConverter converter{tlctx, executor_};
     return converter.convert(name, loaded.args,

diff --git a/taichi/runtime/cuda/aot_module_loader_impl.cpp b/taichi/runtime/cuda/aot_module_loader_impl.cpp
@@ -20,7 +20,7 @@ class AotModuleImpl : public LlvmAotModule {
       LlvmOfflineCache::KernelCacheData &&loaded) override {
     Arch arch = executor_->get_config().arch;
     TI_ASSERT(arch == Arch::cuda);
-    auto *tlctx = executor_->get_llvm_context(arch);
+    auto *tlctx = executor_->get_llvm_context();
 
     CUDAModuleToFunctionConverter converter{tlctx, executor_};
     return converter.convert(name, loaded.args,

diff --git a/taichi/runtime/llvm/llvm_aot_module_loader.cpp b/taichi/runtime/llvm/llvm_aot_module_loader.cpp
@@ -6,7 +6,7 @@ namespace taichi::lang {
 LlvmOfflineCache::KernelCacheData LlvmAotModule::load_kernel_from_cache(
     const std::string &name) {
   TI_ASSERT(cache_reader_ != nullptr);
-  auto *tlctx = executor_->get_llvm_context(executor_->get_config().arch);
+  auto *tlctx = executor_->get_llvm_context();
   LlvmOfflineCache::KernelCacheData loaded;
   auto ok = cache_reader_->get_kernel_cache(loaded, name,
                                             *tlctx->get_this_thread_context());

diff --git a/taichi/runtime/llvm/llvm_context.cpp b/taichi/runtime/llvm/llvm_context.cpp
@@ -94,6 +94,12 @@ TaichiLLVMContext::TaichiLLVMContext(const CompileConfig &config, Arch arch)
     llvm::InitializeNativeTargetAsmParser();
 #endif
   } else if (arch == Arch::dx12) {
+    // FIXME: Must initialize these before initializing Arch::dx12
+    // because it uses the jit of CPU right now.
+    llvm::InitializeNativeTarget();
+    llvm::InitializeNativeTargetAsmPrinter();
+    llvm::InitializeNativeTargetAsmParser();
+    // The dx target is used elsewhere, so we need to initialize it too.
 #if defined(TI_WITH_DX12)
     LLVMInitializeDirectXTarget();
     LLVMInitializeDirectXTargetMC();
-Original file line number
+Diff line change
@@ Expand Up @@
                          std::vector<llvm::Value *> &current_index);
       virtual std::tuple<llvm::Value *, llvm::Value *> get_spmd_info() = 0;
     };
     }  // namespace taichi::lang
@@ Expand Down @@