[gfx] Update Device API: Splitting ResourceBinder into seperate Shade…

…rResourceSet & RasterResources (taichi-dev#6954) Issue: taichi-dev#6832 ### Brief Summary ResourceBinder is not split into two structures, one controls binding of shared-accessible resources, the other is dedicated to control binding of rasterizer states (vertex buffers, etc.) This makes the mapping onto Vulkan DescriptorSets easier, and cleans up the implementation of resource binding all over the place. In addition, these binding states are now no longer attached to the program and can be thus pre-filled / pre-allocated to achieve lower overhead. Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
feisuzhu · Dec 30, 2022 · 4223bf3 · 4223bf3
1 parent 26b81e7
commit 4223bf3
Show file tree

Hide file tree

Showing 31 changed files with 1,014 additions and 840 deletions.
diff --git a/.github/workflows/scripts/aot-demo.sh b/.github/workflows/scripts/aot-demo.sh
@@ -4,7 +4,7 @@ set -ex
 export TI_SKIP_VERSION_CHECK=ON
 export TI_CI=1
 
-export TAICHI_AOT_DEMO_URL=https://github.com/taichi-dev/taichi-aot-demo
+export TAICHI_AOT_DEMO_URL=https://github.com/bobcao3/taichi-aot-demo
 export TAICHI_AOT_DEMO_BRANCH=master
 
 export TAICHI_UNITY2_URL=https://github.com/taichi-dev/taichi-unity2

diff --git a/cpp_examples/rhi_examples/sample_2_triangle.cpp b/cpp_examples/rhi_examples/sample_2_triangle.cpp
@@ -73,6 +73,12 @@ class SampleApp : public App {
       device->unmap(*vertex_buffer);
     }
 
+    // Define the raster state
+    {
+      raster_resources = device->create_raster_resources_unique();
+      raster_resources->vertex_buffer(vertex_buffer->get_ptr(0), 0);
+    }
+
     TI_INFO("App Init Done");
   }
 
@@ -94,10 +100,7 @@ class SampleApp : public App {
 
     // Bind our triangle pipeline
     cmdlist->bind_pipeline(pipeline.get());
-    // Get the binder and bind our vertex buffer
-    auto resource_binder = pipeline->resource_binder();
-    resource_binder->vertex_buffer(vertex_buffer->get_ptr(0), 0);
-    cmdlist->bind_resources(resource_binder);
+    cmdlist->bind_raster_resources(raster_resources.get());
     // Render the triangle
     cmdlist->draw(3, 0);
     // End rendering
@@ -110,9 +113,10 @@ class SampleApp : public App {
   }
 
  public:
-  std::unique_ptr<Pipeline> pipeline;
+  std::unique_ptr<Pipeline> pipeline{nullptr};
+  std::unique_ptr<RasterResources> raster_resources{nullptr};
 
-  std::unique_ptr<DeviceAllocationGuard> vertex_buffer;
+  std::unique_ptr<DeviceAllocationGuard> vertex_buffer{nullptr};
 };
 
 int main() {

diff --git a/taichi/codegen/spirv/spirv_codegen.cpp b/taichi/codegen/spirv/spirv_codegen.cpp
@@ -91,7 +91,7 @@ class TaskCodegen : public IRVisitor {
 
   void fill_snode_to_root() {
     for (int root = 0; root < compiled_structs_.size(); ++root) {
-      for (auto [node_id, node] : compiled_structs_[root].snode_descriptors) {
+      for (auto &[node_id, node] : compiled_structs_[root].snode_descriptors) {
         snode_to_root_[node_id] = root;
       }
     }
@@ -108,9 +108,6 @@ class TaskCodegen : public IRVisitor {
     kernel_function_ = ir_->new_function();  // void main();
     ir_->debug_name(spv::OpName, kernel_function_, "main");
 
-    compile_args_struct();
-    compile_ret_struct();
-
     if (task_ir_->task_type == OffloadedTaskType::serial) {
       generate_serial_kernel(task_ir_);
     } else if (task_ir_->task_type == OffloadedTaskType::range_for) {
@@ -1749,22 +1746,21 @@ class TaskCodegen : public IRVisitor {
     std::vector<spirv::Value> buffers;
     if (caps_->get(DeviceCapability::spirv_version) > 0x10300) {
       buffers = shared_array_binds_;
-      std::unordered_set<BufferInfo, BufferInfoHasher> unique_bufs;
       // One buffer can be bound to different bind points but has to be unique
       // in OpEntryPoint interface declarations.
       // From Spec: before SPIR-V version 1.4, duplication of these interface id
       // is tolerated. Starting with version 1.4, an interface id must not
       // appear more than once.
+      std::unordered_set<spirv::Value, spirv::ValueHasher> entry_point_values;
       for (const auto &bb : task_attribs_.buffer_binds) {
-        if (unique_bufs.count(bb.buffer) == 0) {
-          for (auto &it : buffer_value_map_) {
-            if (it.first.first == bb.buffer) {
-              buffers.push_back(it.second);
-            }
+        for (auto &it : buffer_value_map_) {
+          if (it.first.first == bb.buffer) {
+            entry_point_values.insert(it.second);
           }
-          unique_bufs.insert(bb.buffer);
         }
       }
+      buffers.insert(buffers.end(), entry_point_values.begin(),
+                     entry_point_values.end());
     }
     ir_->commit_kernel_function(kernel_function_, "main", buffers,
                                 group_size);  // kernel entry
@@ -2248,12 +2244,16 @@ class TaskCodegen : public IRVisitor {
     }
 
     if (buffer.type == BufferType::Args) {
+      compile_args_struct();
+
       buffer_binding_map_[key] = 0;
       buffer_value_map_[key] = args_buffer_value_;
       return args_buffer_value_;
     }
 
     if (buffer.type == BufferType::Rets) {
+      compile_ret_struct();
+
       buffer_binding_map_[key] = 1;
       buffer_value_map_[key] = ret_buffer_value_;
       return ret_buffer_value_;
@@ -2537,7 +2537,7 @@ void KernelCodegen::run(TaichiKernelAttributes &kernel_attribs,
 
     size_t last_size;
     bool success = true;
-    do {
+    {
       last_size = optimized_spv.size();
       bool result = false;
       TI_ERROR_IF(
@@ -2546,9 +2546,8 @@ void KernelCodegen::run(TaichiKernelAttributes &kernel_attribs,
           "SPIRV optimization failed");
       if (result) {
         success = false;
-        break;
       }
-    } while (last_size != optimized_spv.size());
+    }
 
     TI_TRACE("SPIRV-Tools-opt: binary size, before={}, after={}",
              task_res.spirv_code.size(), optimized_spv.size());

diff --git a/taichi/codegen/spirv/spirv_ir_builder.cpp b/taichi/codegen/spirv/spirv_ir_builder.cpp
@@ -835,11 +835,7 @@ Value IRBuilder::fetch_texel(Value texture_var,
   // OpImageFetch requires operand with OpImageType
   // We have to extract the underlying OpImage from OpSampledImage here
   SType image_type = get_underlying_image_type(f32_type(), args.size());
-  Value image_val = new_value(image_type, ValueKind::kNormal);
-
-  ib_.begin(spv::OpImage)
-      .add_seq(image_type, image_val, sampled_image)
-      .commit(&function_);
+  Value image_val = make_value(spv::OpImage, image_type, sampled_image);
 
   Value uv;
   if (args.size() == 1) {

diff --git a/taichi/codegen/spirv/spirv_ir_builder.h b/taichi/codegen/spirv/spirv_ir_builder.h
@@ -86,6 +86,16 @@ struct Value {
   SType stype;
   // Additional flags about the value
   ValueKind flag{ValueKind::kNormal};
+
+  bool operator==(const Value &rhs) const {
+    return id == rhs.id;
+  }
+};
+
+struct ValueHasher {
+  size_t operator()(const spirv::Value &v) const {
+    return std::hash<uint32_t>()(v.id);
+  }
 };
 
 // Represent the SPIRV Label

diff --git a/taichi/rhi/cpu/cpu_device.h b/taichi/rhi/cpu/cpu_device.h
@@ -11,33 +11,10 @@
 namespace taichi::lang {
 namespace cpu {
 
-class CpuResourceBinder : public ResourceBinder {
- public:
-  ~CpuResourceBinder() override {
-  }
-
-  void rw_buffer(uint32_t set,
-                 uint32_t binding,
-                 DevicePtr ptr,
-                 size_t size) override{TI_NOT_IMPLEMENTED};
-  void rw_buffer(uint32_t set,
-                 uint32_t binding,
-                 DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED};
-
-  void buffer(uint32_t set,
-              uint32_t binding,
-              DevicePtr ptr,
-              size_t size) override{TI_NOT_IMPLEMENTED};
-  void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override{
-      TI_NOT_IMPLEMENTED};
-};
-
 class CpuPipeline : public Pipeline {
  public:
   ~CpuPipeline() override {
   }
-
-  ResourceBinder *resource_binder() override{TI_NOT_IMPLEMENTED};
 };
 
 class CpuCommandList : public CommandList {
@@ -46,7 +23,11 @@ class CpuCommandList : public CommandList {
   }
 
   void bind_pipeline(Pipeline *p) override{TI_NOT_IMPLEMENTED};
-  void bind_resources(ResourceBinder *binder) override{TI_NOT_IMPLEMENTED};
+  RhiResult bind_shader_resources(ShaderResourceSet *res,
+                                  int set_index = 0) override{
+      TI_NOT_IMPLEMENTED};
+  RhiResult bind_raster_resources(RasterResources *res) override{
+      TI_NOT_IMPLEMENTED};
   void buffer_barrier(DevicePtr ptr, size_t size) override{TI_NOT_IMPLEMENTED};
   void buffer_barrier(DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED};
   void memory_barrier() override{TI_NOT_IMPLEMENTED};
@@ -91,6 +72,8 @@ class CpuDevice : public LlvmDevice {
       const LlvmRuntimeAllocParams &params) override;
   void dealloc_memory(DeviceAllocation handle) override;
 
+  ShaderResourceSet *create_resource_set() override{TI_NOT_IMPLEMENTED};
+
   std::unique_ptr<Pipeline> create_pipeline(
       const PipelineSourceDesc &src,
       std::string name = "Pipeline") override{TI_NOT_IMPLEMENTED};

diff --git a/taichi/rhi/cuda/cuda_device.h b/taichi/rhi/cuda/cuda_device.h
@@ -11,33 +11,10 @@
 namespace taichi::lang {
 namespace cuda {
 
-class CudaResourceBinder : public ResourceBinder {
- public:
-  ~CudaResourceBinder() override {
-  }
-
-  void rw_buffer(uint32_t set,
-                 uint32_t binding,
-                 DevicePtr ptr,
-                 size_t size) override{TI_NOT_IMPLEMENTED};
-  void rw_buffer(uint32_t set,
-                 uint32_t binding,
-                 DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED};
-
-  void buffer(uint32_t set,
-              uint32_t binding,
-              DevicePtr ptr,
-              size_t size) override{TI_NOT_IMPLEMENTED};
-  void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override{
-      TI_NOT_IMPLEMENTED};
-};
-
 class CudaPipeline : public Pipeline {
  public:
   ~CudaPipeline() override {
   }
-
-  ResourceBinder *resource_binder() override{TI_NOT_IMPLEMENTED};
 };
 
 class CudaCommandList : public CommandList {
@@ -46,7 +23,10 @@ class CudaCommandList : public CommandList {
   }
 
   void bind_pipeline(Pipeline *p) override{TI_NOT_IMPLEMENTED};
-  void bind_resources(ResourceBinder *binder) override{TI_NOT_IMPLEMENTED};
+  RhiResult bind_shader_resources(ShaderResourceSet *res,
+                                  int set_index = 0) final{TI_NOT_IMPLEMENTED};
+  RhiResult bind_raster_resources(RasterResources *res) final{
+      TI_NOT_IMPLEMENTED};
   void buffer_barrier(DevicePtr ptr, size_t size) override{TI_NOT_IMPLEMENTED};
   void buffer_barrier(DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED};
   void memory_barrier() override{TI_NOT_IMPLEMENTED};
@@ -104,6 +84,8 @@ class CudaDevice : public LlvmDevice {
       const LlvmRuntimeAllocParams &params) override;
   void dealloc_memory(DeviceAllocation handle) override;
 
+  ShaderResourceSet *create_resource_set() final{TI_NOT_IMPLEMENTED};
+
   std::unique_ptr<Pipeline> create_pipeline(
       const PipelineSourceDesc &src,
       std::string name = "Pipeline") override{TI_NOT_IMPLEMENTED};